def fetch_all(cls, filter_expr=None, filter_value=None, sort=None): # TODO: Make this use less resources. Read options online and then use profiler to test each one. assert (filter_expr is None)==(filter_value is None) import settings, helpers def get_query(): query = cls.all() if filter_expr is not None: assert filter_value is not None query = query.filter(filter_expr, filter_value) if sort is not None: query.order(sort) return query items = get_query().fetch(EXPECTED_UPPER_BOUND) if len(items) >= EXPECTED_UPPER_BOUND: if settings.DEBUG: assert False, "Upper bound is apparently not big enough." else: helpers.log("ERROR: Upper bound is apparently not big enough.") items = list( get_query() ) if cls.default_sort_key_fn is not None: items.sort(key=cls.default_sort_key_fn) return tuple(items)
def set_up_environment (self, settings, ontodir): """ Sets up the XDG_*_HOME variables and make sure the directories exist Settings should be a dict mapping schema names to dicts that hold the settings that should be changed in those schemas. The contents dicts should map key->value, where key is a key name and value is a suitable GLib.Variant instance. """ helpers.log ("[Conf] Setting test environment...") for var, directory in TEST_ENV_DIRS.iteritems (): helpers.log ("export %s=%s" %(var, directory)) self.__recreate_directory (directory) os.environ [var] = directory for directory in EXTRA_DIRS: self.__recreate_directory (directory) if ontodir: helpers.log ("export %s=%s" % ("TRACKER_DB_ONTOLOGIES_DIR", ontodir)) os.environ ["TRACKER_DB_ONTOLOGIES_DIR"] = ontodir for var, value in TEST_ENV_VARS.iteritems (): helpers.log ("export %s=%s" %(var, value)) os.environ [var] = value # Previous loop should have set DCONF_PROFILE to the test location if settings is not None: self._apply_settings(settings) helpers.log ("[Conf] environment ready")
def extract_sold_by(product_page_lxml): """Extracts who the product is sold by given a product page html in unicode""" sold_by = u'Not Sold by Amazon' try: results = product_page_lxml.cssselect('div.buying') for search_result in results: search_result = search_result.text_content() if 'Fulfilled by Amazon' in search_result: sold_by = u'Fulfilled by Amazon' break elif 'Ships from and sold by Amazon.com' in search_result: sold_by = u'Sold by Amazon' break elif 'Ships from and sold by Amazon Digital Services' in \ search_result: sold_by = u'Sold by Amazon' break else: continue except: helpers.log('Sold by error on page') sold_by = 'Error' return sold_by
def send_statistics(): if GlobalVars.metasmoke_down: log('warning', "Metasmoke is down, not sending statistics") return GlobalVars.posts_scan_stats_lock.acquire() if GlobalVars.post_scan_time != 0: posts_per_second = GlobalVars.num_posts_scanned / GlobalVars.post_scan_time payload = {'key': GlobalVars.metasmoke_key, 'statistic': {'posts_scanned': GlobalVars.num_posts_scanned, 'api_quota': GlobalVars.apiquota, 'post_scan_rate': posts_per_second}} else: payload = {'key': GlobalVars.metasmoke_key, 'statistic': {'posts_scanned': GlobalVars.num_posts_scanned, 'api_quota': GlobalVars.apiquota}} GlobalVars.post_scan_time = 0 GlobalVars.num_posts_scanned = 0 GlobalVars.posts_scan_stats_lock.release() headers = {'Content-type': 'application/json'} if GlobalVars.metasmoke_host is not None: log('info', 'Sent statistics to metasmoke: ', payload['statistic']) Metasmoke.post("/statistics.json", data=json.dumps(payload), headers=headers)
def _check_batch(saved): if time.time() < DeletionWatcher.next_request_time: time.sleep(DeletionWatcher.next_request_time - time.time()) for site, posts in saved.items(): ids = ";".join(post_id for post_id in posts if not DeletionWatcher._ignore((post_id, site))) uri = "https://api.stackexchange.com/2.2/posts/{}".format(ids) params = { 'site': site, 'key': 'IAkbitmze4B8KpacUfLqkw((' } res = requests.get(uri, params=params) json = res.json() if "items" not in json: log('warning', 'DeletionWatcher API request received no items in response (code {})'.format(res.status_code)) log('warning', res.text) return if 'backoff' in json: DeletionWatcher.next_request_time = time.time() + json['backoff'] for post in json['items']: if time.time() - post["creation_date"] < 7200: yield to_protocol_relative(post["link"]).replace("/q/", "/questions/")
def getRandomIdeas(questionObj, ideas, size=5): numIdeas = len(ideas) if ideas else 0 if numIdeas >= size: return random.sample(ideas, size) else: helpers.log("WARNING: Cannot return {0} random ideas since only {1} ideas available".format(size, numIdeas)) return []
def getPerson(question=None, nickname=None): person = None # check if person id stored in session # if so use to retrieve logged in user session = gaesessions.get_current_session() person_id = session.pop("new_person_id") if session.has_key("new_person_id") else None if person_id: person = Person.get_by_id(person_id) # check if person id stored in session corresponds to inputs if not person: person = None helpers.log("WARNING: Person not found by id {0}".format(person_id)) elif question and question != person.question: person = None elif nickname and nickname != person.nickname: person = None if not person: user = users.get_current_user() if question: if question.nicknameAuthentication: # if no nickname provided, check session if not nickname: questionSessionValues = session.get(question.code) nickname = questionSessionValues["nickname"] if questionSessionValues else None if nickname: person = Person.all().filter("question =", question).filter("nickname =", nickname).get() elif user is not None: person = Person.all().filter("question =", question).filter("user ="******"question =", None).filter("user =", user).get() return person
def _send_update(from_person, to_person, *updates): from model import Student, Teacher from client_id_utils import timestamp_for_client_id # from settings import CHANNEL_LIMIT_PER_STUDENT from helpers import log assert isinstance(to_person, (Student,Teacher)), repr(to_person) import datetime timestamp = datetime.datetime.now() updates_list = list(updates) updates_list[0]['timestamp'] = timestamp.strftime('%B %d, %Y %H:%M:%S') updates_json = json.dumps(updates_list) # Sort and dedupe client_ids by timestamp, descending client_ids = set(to_person.client_ids) key_fn = lambda client_id:timestamp_for_client_id(client_id) client_ids = sorted(client_ids, key=key_fn, reverse=True) # if CHANNEL_LIMIT_PER_STUDENT is not None and len(client_ids) > CHANNEL_LIMIT_PER_STUDENT and isinstance(to_person, Student): # client_ids = client_ids[:CHANNEL_LIMIT_PER_STUDENT] # log( "=> WARNING: Found %d client IDs for %r but only using %d"%(len(to_person.client_ids), to_person, CHANNEL_LIMIT_PER_STUDENT) ) if len(client_ids)==0: log("=> MESSAGE NOT SENT. No current client IDs for {0}.".format(to_person)) for client_id in client_ids: log( "client ID : %r : sent %s"%(client_id," + ".join(u["type"] for u in updates)) ) channel.send_message(client_id, updates_json)
def main(): while True: data = read(API_URL) result = parse(data) log(repr(result)) save_to_db(result) sleep(TIMEOUT)
def send_stats_on_post(title, link, reasons, body, username, user_link, why, owner_rep, post_score, up_vote_count, down_vote_count): if GlobalVars.metasmoke_host is None: log('info', "Metasmoke location not defined, not reporting") return metasmoke_key = GlobalVars.metasmoke_key try: if len(why) > 1024: why = why[:512] + '...' + why[-512:] post = {'title': title, 'link': link, 'reasons': reasons, 'body': body, 'username': username, 'user_link': user_link, 'why': why, 'user_reputation': owner_rep, 'score': post_score, 'upvote_count': up_vote_count, 'downvote_count': down_vote_count} # Remove None values (if they somehow manage to get through) post = dict((k, v) for k, v in post.items() if v) payload = {'post': post, 'key': metasmoke_key} headers = {'Content-type': 'application/json'} requests.post(GlobalVars.metasmoke_host + "/posts.json", data=json.dumps(payload), headers=headers) except Exception as e: log('error', e)
def _openFile(self): fileName = QtGui.QFileDialog.getOpenFileName(self, "OpenImage", "src", "Bitmaps (*.bmp)") if fileName == "": h.warn("No file selected.") return h.log("Loaded!") #self.pixmapItem = QtGui.QPixmap(fileName) #item = QtGui.QGraphicsPixmapItem(self.pixmapItem) #self.scene.addItem(item) item = QtGui.QStandardItem(os.path.basename(str(fileName))) item.imageFileName = str(fileName) orgImg = Image.open(str(fileName), mode='r').convert() item.image = MImage.pil_to_array(orgImg.convert('L')) item.pixmap = QtGui.QPixmap(fileName) #item.setCheckable(True) self.model.appendRow(item) self.ui.imageListView.setModel(self.model) self._checkButtons()
def reload(): commit = git_commit_info() censored_committer_names = GlobalVars.censored_committer_names if md5(commit['author'][0].encode('utf-8')).hexdigest() in censored_committer_names: commit['author'] = censored_committer_names[md5(commit['author'][0].encode('utf-8')).hexdigest()] GlobalVars.commit = commit GlobalVars.commit_with_author = "`{}` ({}: {})".format( commit['id'], commit['author'][0] if type(commit['author']) in {list, tuple} else commit['author'], commit['message']) GlobalVars.on_master = "HEAD detached" not in git_status() GlobalVars.s = "[ {} ] SmokeDetector started at [rev {}]({}/commit/{}) (running on {}, Python {})".format( GlobalVars.chatmessage_prefix, GlobalVars.commit_with_author, GlobalVars.bot_repository, GlobalVars.commit['id'], GlobalVars.location, platform.python_version()) GlobalVars.s_reverted = \ "[ {} ] SmokeDetector started in [reverted mode](" \ "https://charcoal-se.org/smokey/SmokeDetector-Statuses#reverted-mode) " \ "at [rev {}]({}/commit/{}) (running on {})".format( GlobalVars.chatmessage_prefix, GlobalVars.commit_with_author, GlobalVars.bot_repository, GlobalVars.commit['id'], GlobalVars.location) GlobalVars.s_norestart = "[ {} ] Blacklists reloaded at [rev {}]({}/commit/{}) (running on {})".format( GlobalVars.chatmessage_prefix, GlobalVars.commit_with_author, GlobalVars.bot_repository, GlobalVars.commit['id'], GlobalVars.location) GlobalVars.s_norestart2 = "[ {} ] FindSpam module reloaded at [rev {}]({}/commit/{}) (running on {})".format( GlobalVars.chatmessage_prefix, GlobalVars.commit_with_author, GlobalVars.bot_repository, GlobalVars.commit['id'], GlobalVars.location) GlobalVars.standby_message = \ "[ {} ] SmokeDetector started in [standby mode](" \ "https://charcoal-se.org/smokey/SmokeDetector-Statuses#standby-mode) " \ "at [rev {}]({}/commit/{}) (running on {})".format( GlobalVars.chatmessage_prefix, GlobalVars.commit_with_author, GlobalVars.bot_repository, GlobalVars.commit['id'], GlobalVars.location) log('debug', "GlobalVars loaded")
def send(self, dstip, data): debug2('UDP: sending to %r port %d\n' % dstip) try: self.sock.sendto(data, dstip) except socket.error, e: log('UDP send to %r port %d: %s\n' % (dstip[0], dstip[1], e)) return
def set_up_environment(self, gsettings, ontodir): """ Sets up the XDG_*_HOME variables and make sure the directories exist gsettings is a list of triplets (schema, key, value) that will be set/unset in gsetting """ assert not gsettings or type(gsettings) is list helpers.log("[Conf] Setting test environment...") for var, directory in TEST_ENV_DIRS.iteritems(): helpers.log("export %s=%s" % (var, directory)) self.__recreate_directory(directory) os.environ[var] = directory for directory in EXTRA_DIRS: self.__recreate_directory(directory) if ontodir: helpers.log("export %s=%s" % ("TRACKER_DB_ONTOLOGIES_DIR", ontodir)) os.environ["TRACKER_DB_ONTOLOGIES_DIR"] = ontodir for var, value in TEST_ENV_VARS.iteritems(): helpers.log("export %s=%s" % (var, value)) os.environ[var] = value # Previous loop should have set DCONF_PROFILE to the test location if gsettings: self.dconf = DConfClient() self.dconf.reset() for (schema, key, value) in gsettings: self.dconf.write(schema, key, value) helpers.log("[Conf] environment ready")
def begin_crawl(): # explode out all of our category `start_urls` into subcategories with open(settings.start_file, "r") as f: for line in f: line = line.strip() if not line or line.startswith("#"): continue # skip blank and commented out lines page, html = make_request(line) count = 0 # look for subcategory links on this page subcategories = page.findAll("div", "bxc-grid__image") # downward arrow graphics subcategories.extend(page.findAll("li", "sub-categories__list__item")) # carousel hover menu sidebar = page.find("div", "browseBox") if sidebar: subcategories.extend(sidebar.findAll("li")) # left sidebar for subcategory in subcategories: link = subcategory.find("a") if not link: continue link = link["href"] count += 1 enqueue_url(link) log("Found {} subcategories on {}".format(count, line))
def reload(): GlobalVars.commit = commit = git_commit_info() GlobalVars.commit_with_author = "`{}` ({}: {})".format( commit.id, commit.author, commit.message) GlobalVars.on_branch = git_ref() GlobalVars.s = "[ {} ] SmokeDetector started at [rev {}]({}/commit/{}) (running on {}, Python {})".format( GlobalVars.chatmessage_prefix, GlobalVars.commit_with_author, GlobalVars.bot_repository, GlobalVars.commit.id, GlobalVars.location, platform.python_version()) GlobalVars.s_reverted = \ "[ {} ] SmokeDetector started in [reverted mode](" \ "https://charcoal-se.org/smokey/SmokeDetector-Statuses#reverted-mode) " \ "at [rev {}]({}/commit/{}) (running on {})".format( GlobalVars.chatmessage_prefix, GlobalVars.commit_with_author, GlobalVars.bot_repository, GlobalVars.commit.id, GlobalVars.location) GlobalVars.s_norestart = "[ {} ] Blacklists reloaded at [rev {}]({}/commit/{}) (running on {})".format( GlobalVars.chatmessage_prefix, GlobalVars.commit_with_author, GlobalVars.bot_repository, GlobalVars.commit.id, GlobalVars.location) GlobalVars.s_norestart2 = "[ {} ] FindSpam module reloaded at [rev {}]({}/commit/{}) (running on {})".format( GlobalVars.chatmessage_prefix, GlobalVars.commit_with_author, GlobalVars.bot_repository, GlobalVars.commit.id, GlobalVars.location) GlobalVars.standby_message = \ "[ {} ] SmokeDetector started in [standby mode](" \ "https://charcoal-se.org/smokey/SmokeDetector-Statuses#standby-mode) " \ "at [rev {}]({}/commit/{}) (running on {})".format( GlobalVars.chatmessage_prefix, GlobalVars.commit_with_author, GlobalVars.bot_repository, GlobalVars.commit.id, GlobalVars.location) log('debug', "GlobalVars loaded")
def handle_slice_sampler_exception(exception, starting_point, proposal_measure, opt_compwise=False): ''' Handles slice sampler exceptions. If the slice sampler shrank to zero the slice sampler will be restarted a few times. If this fails or if the exception was another this method will raise the given exception. Args: exception: the exception that occured starting_point: the starting point that was used proposal_measure: the used proposal measure opt_compwise: how to set the compwise option Returns: the output of the slice sampelr Raises: Exception: the first argument ''' if exception.message == "Slice sampler shrank to zero!": log("Slice sampler shrank to zero! Action: trying to restart " + str(NUMBER_OF_RESTARTS) + " times with same starting point") restarts_left = NUMBER_OF_RESTARTS while restarts_left > 0: try: return slice_sample(starting_point, proposal_measure, compwise=opt_compwise) except Exception as e: log("Restart failed. " + str(restarts_left) + " restarts left. Exception was: " + e.message) restarts_left = restarts_left - 1 # if we leave the while loop we will raise the exception we got raise exception
def __init__(self, progress, total, message, newline=True): if 'linux' in sys.platform: message = "{message:<20}".format(message=message) elif 'darwin' in sys.platform: message = """ \033[38;5;204m{message:<20}\033[0m """.format(message=message) import time time.sleep(0.01) progress += 1 percentage = (progress * 10 / total) # (divided by 10) percentage_left = 10 - percentage bar = '[' bar += percentage * log('*', colour="white", ret=True) bar += percentage_left * log('*', colour="black", ret=True) bar += ']' bar += ' {}'.format(progress) string = '' if progress != total: if progress == 1 and newline == True: string = '\n\r {message} {bar}'.format(message=message, bar=bar) else: string = '\r {message} {bar}'.format(message=message, bar=bar) else: string = '\r {message} {bar}'.format(message=message, bar=bar) sys.stdout.write(string) sys.stdout.flush()
def sample_hyperparameters_gp(mcmc_iters, noiseless, input_points, func_values, cov_func, noise, amp2, ls): ''' Samples hyper parameters for Gaussian processes. Args: mcmc_iters: the number of hyper-parameter samples required noiseless: the modeled function is noiseless input_points: all the points that have been evaluated so far func_values: the corresponding observed function values cov_func: the covariance function the Gaussian process uses noise: a starting value for the noise amp2: a starting value for the amplitude ls: an array of starting values for the length scales (size has to be the dimension of the input points) Returns: a list of hyper-parameter tuples the tuples are of the form (mean, noise, amplitude, [length-scales]) ''' mean = np.mean(func_values) hyper_samples = [] # sample hyper parameters for i in xrange(0, mcmc_iters): if noiseless: noise = 1e-3 [mean, amp2] = _sample_mean_amp_noise(input_points, func_values, cov_func, np.array([mean, amp2]), ls) else: [mean, amp2, noise] = _sample_mean_amp_noise(input_points, func_values, cov_func, np.array([mean, amp2, noise]), ls) ls = _sample_ls(input_points, func_values, cov_func, ls, mean, amp2, noise) #This is the order as expected #log("mean: " + str(mean) + ", noise: " + str(noise) + " amp: " + str(amp2) + ", ls: " + str(ls)) hyper_samples.append((mean, noise, amp2, ls)) samples = [] for i in xrange(0, mcmc_iters - 1, mcmc_iters / 10): samples.append(hyper_samples[i]) log("mean: " + str(hyper_samples[i][0]) + ", noise: " + str(hyper_samples[i][1]) + " amp: " + str(hyper_samples[i][2]) + ", ls: " + str(hyper_samples[i][3])) return samples
def determine_if_autoflagged(post_url): """ Given the URL for a post, determine whether or not it has been autoflagged. """ payload = { 'key': GlobalVars.metasmoke_key, 'filter': 'GKNJKLILHNFMJLFKINGJJHJOLGFHJF', # id and autoflagged 'urls': post_url } try: response = Metasmoke.get("/api/v2.0/posts/urls", params=payload).json() except Exception as e: log('error', e) return False, [] if len(response["items"]) > 0 and response["items"][0]["autoflagged"]: # get flagger names id = str(response["items"][0]["id"]) payload = {'key': GlobalVars.metasmoke_key} flags = Metasmoke.get("/api/v2.0/posts/" + id + "/flags", params=payload).json() if len(flags["items"]) > 0: return True, [user["username"] for user in flags["items"][0]["autoflagged"]["users"]] return False, []
def subscribe(self, post_url, callback=None, pickle=True, timeout=None): post_id, post_site, post_type = fetch_post_id_and_site_from_url(post_url) if post_site not in GlobalVars.site_id_dict: log("warning", "unknown site {} when subscribing to {}".format(post_site, post_url)) return if post_type == "answer": question_id = datahandling.get_post_site_id_link((post_id, post_site, post_type)) if question_id is None: return else: question_id = post_id site_id = GlobalVars.site_id_dict[post_site] action = "{}-question-{}".format(site_id, question_id) max_time = (time.time() + timeout) if timeout else None if action not in self.posts: self.posts[action] = (post_id, post_site, post_type, post_url, [(callback, max_time)] if callback else []) try: self.socket.send(action) except websocket.WebSocketException: log('error', 'DeletionWatcher failed on sending {}'.format(action)) elif callback: _, _, _, _, callbacks = self.posts[action] callbacks.append((callback, max_time)) else: return if pickle: Tasks.do(self._save)
def determine_if_autoflagged(post_url): """ Given the URL for a post, determine whether or not it has been autoflagged. """ payload = { 'key': GlobalVars.metasmoke_key, 'filter': 'GFGJGHFMHGOLMMJMJJJGHIGOMKFKKILF', # id and autoflagged 'urls': post_url } try: response = Metasmoke.get("/api/v2.0/posts/urls", params=payload).json() except Exception as e: log('error', e) return False, [] # The first report of a URL is the only one that will be autoflagged. MS responses to the # /posts/urls endpoint have the oldest report last. first_report_index = len(response["items"]) - 1 if first_report_index > -1 and response["items"][first_report_index]["autoflagged"]: # get flagger names id = str(response["items"][first_report_index]["id"]) payload = {'key': GlobalVars.metasmoke_key} flags = Metasmoke.get("/api/v2.0/posts/" + id + "/flags", params=payload).json() if len(flags["items"]) > 0: return True, [user["username"] for user in flags["items"][0]["autoflagged"]["users"]] return False, []
def setup_websocket(attempt, max_attempts): try: ws = websocket.create_connection("wss://qa.sockets.stackexchange.com/") ws.send("155-questions-active") return ws except websocket.WebSocketException: log('warning', 'WS failed to create websocket connection. Attempt {} of {}.'.format(attempt, max_attempts)) return None
def log_exception(exctype, value, tb): now = datetime.utcnow() tr = '\n'.join((traceback.format_tb(tb))) exception_only = ''.join(traceback.format_exception_only(exctype, value)).strip() logged_msg = "{exception}\n{now} UTC\n{row}\n\n".format(exception=exception_only, now=now, row=tr) log('error', logged_msg) with open("errorLogs.txt", "a") as f: f.write(logged_msg)
def reset (self): profile = os.environ ["DCONF_PROFILE"] assert profile == "trackertest" # XDG_CONFIG_HOME is useless dconf_db = os.path.join (os.environ ["HOME"], ".config", "dconf", profile) if os.path.exists (dconf_db): log ("[Conf] Removing dconf-profile: " + dconf_db) os.remove (dconf_db)
def tracker_store_restart_with_new_ontologies (self, ontodir): self.store.stop () if ontodir: helpers.log ("[Conf] Setting %s - %s" % ("TRACKER_DB_ONTOLOGIES_DIR", ontodir)) os.environ ["TRACKER_DB_ONTOLOGIES_DIR"] = ontodir try: self.store.start () except dbus.DBusException, e: raise UnableToBootException ("Unable to boot the store \n(" + str(e) + ")")
def send_message(self, text, length_check=True): if "no-chat" in sys.argv: log('info', "Blocked message to {0} due to no-chat setting: {1}".format(self.name, text)) return if "charcoal-hq-only" not in sys.argv or int(self.id) == 11540: return rooms.Room.send_message(self, text, length_check) else: log('info', "Blocked message to {0} due to charcoal-hq-only setting: {1}".format(self.name, text))
def callback(self): log('--no callback defined-- %r\n' % self) (r, w, x) = select.select(self.socks, [], [], 0) for s in r: v = s.recv(4096) if not v: log('--closed-- %r\n' % self) self.socks = [] self.ok = False
def check_if_spam_json(json_data): try: post = Post(json_data=json_data) except PostParseError as err: log('error', 'Parse error {0} when parsing json_data {1!r}'.format( err, json_data)) return False, '', '' is_spam, reason, why = check_if_spam(post) return is_spam, reason, why
def read(url): try: response = urlopen(url) content = response.read() return json.loads(content.decode('utf8')) except URLError as e: log("Could not read data!", str(e)) return {}
def __init__(self, name, t): if name not in ('master', 'slave'): helpers.log("HA controller must either be 'master' or 'slave'") self.t = t self._name = name self.rest = HaBsnRestClient(name, t)
def make_api_call_for_site(self, site): if site not in self.queue: return self.queue_modify_lock.acquire() new_posts = self.queue.pop(site) store_bodyfetcher_queue() self.queue_modify_lock.release() new_post_ids = [int(k) for k in new_posts.keys()] if GlobalVars.flovis is not None: for post_id in new_post_ids: GlobalVars.flovis.stage('bodyfetcher/api_request', site, post_id, { 'site': site, 'posts': list(new_posts.keys()) }) self.queue_timing_modify_lock.acquire() post_add_times = [v for k, v in new_posts.items()] pop_time = datetime.utcnow() for add_time in post_add_times: try: seconds_in_queue = (pop_time - add_time).total_seconds() if site in self.queue_timings: self.queue_timings[site].append(seconds_in_queue) else: self.queue_timings[site] = [seconds_in_queue] except KeyError: # XXX: Any other possible exception? continue # Skip to next item if we've got invalid data or missing values. store_queue_timings() self.queue_timing_modify_lock.release() self.max_ids_modify_lock.acquire() if site in self.previous_max_ids and max( new_post_ids) > self.previous_max_ids[site]: previous_max_id = self.previous_max_ids[site] intermediate_posts = range(previous_max_id + 1, max(new_post_ids)) # We don't want to go over the 100-post API cutoff, so take the last # (100-len(new_post_ids)) from intermediate_posts intermediate_posts = intermediate_posts[(100 - len(new_post_ids)):] # new_post_ids could contain edited posts, so merge it back in combined = chain(intermediate_posts, new_post_ids) # Could be duplicates, so uniquify posts = list(set(combined)) else: posts = new_post_ids try: if max(new_post_ids) > self.previous_max_ids[site]: self.previous_max_ids[site] = max(new_post_ids) store_bodyfetcher_max_ids() except KeyError: self.previous_max_ids[site] = max(new_post_ids) store_bodyfetcher_max_ids() self.max_ids_modify_lock.release() log('debug', "New IDs / Hybrid Intermediate IDs for {}:".format(site)) if len(new_post_ids) > 30: log( 'debug', "{} +{} more".format( sorted(new_post_ids)[:30], len(new_post_ids) - 30)) else: log('debug', sorted(new_post_ids)) if len(new_post_ids) == len(posts): log('debug', "[ *Identical* ]") elif len(posts) > 30: log('debug', "{} +{} more".format(sorted(posts)[:30], len(posts) - 30)) else: log('debug', sorted(posts)) question_modifier = "" pagesize_modifier = {} if site == "stackoverflow.com": # Not all SO questions are shown in the realtime feed. We now # fetch all recently modified SO questions to work around that. if self.last_activity_date != 0: pagesize = "50" else: pagesize = "25" pagesize_modifier = { 'pagesize': pagesize, 'min': str(self.last_activity_date) } else: question_modifier = "/{0}".format(";".join( [str(post) for post in posts])) url = "https://api.stackexchange.com/2.2/questions{}".format( question_modifier) params = { 'filter': '!*xq08dCDNr)PlxxXfaN8ntivx(BPlY_8XASyXLX-J7F-)VK*Q3KTJVkvp*', 'key': 'IAkbitmze4B8KpacUfLqkw((', 'site': site } params.update(pagesize_modifier) # wait to make sure API has/updates post data time.sleep(3) GlobalVars.api_request_lock.acquire() # Respect backoff, if we were given one if GlobalVars.api_backoff_time > time.time(): time.sleep(GlobalVars.api_backoff_time - time.time() + 2) try: time_request_made = datetime.now().strftime('%H:%M:%S') response = requests.get(url, params=params, timeout=20).json() except (requests.exceptions.Timeout, requests.ConnectionError, Exception): # Any failure in the request being made (timeout or otherwise) should be added back to # the queue. self.queue_modify_lock.acquire() if site in self.queue: self.queue[site].update(new_posts) else: self.queue[site] = new_posts self.queue_modify_lock.release() GlobalVars.api_request_lock.release() return self.api_data_lock.acquire() add_or_update_api_data(site) self.api_data_lock.release() message_hq = "" if "quota_remaining" in response: if response[ "quota_remaining"] - GlobalVars.apiquota >= 5000 and GlobalVars.apiquota >= 0: tell_rooms_with( "debug", "API quota rolled over with {0} requests remaining. " "Current quota: {1}.".format(GlobalVars.apiquota, response["quota_remaining"])) sorted_calls_per_site = sorted( GlobalVars.api_calls_per_site.items(), key=itemgetter(1), reverse=True) api_quota_used_per_site = "" for site_name, quota_used in sorted_calls_per_site: sanatized_site_name = site_name.replace( '.com', '').replace('.stackexchange', '') api_quota_used_per_site += sanatized_site_name + ": {0}\n".format( str(quota_used)) api_quota_used_per_site = api_quota_used_per_site.strip() tell_rooms_with("debug", api_quota_used_per_site) clear_api_data() if response["quota_remaining"] == 0: tell_rooms_with( "debug", "API reports no quota left! May be a glitch.") tell_rooms_with("debug", str(response)) # No code format for now? if GlobalVars.apiquota == -1: tell_rooms_with( "debug", "Restart: API quota is {quota}.".format( quota=response["quota_remaining"])) GlobalVars.apiquota = response["quota_remaining"] else: message_hq = "The quota_remaining property was not in the API response." if "error_message" in response: message_hq += " Error: {} at {} UTC.".format( response["error_message"], time_request_made) if "error_id" in response and response["error_id"] == 502: if GlobalVars.api_backoff_time < time.time( ) + 12: # Add a backoff of 10 + 2 seconds as a default GlobalVars.api_backoff_time = time.time() + 12 message_hq += " Backing off on requests for the next 12 seconds." message_hq += " Previous URL: `{}`".format(url) if "backoff" in response: if GlobalVars.api_backoff_time < time.time() + response["backoff"]: GlobalVars.api_backoff_time = time.time() + response["backoff"] GlobalVars.api_request_lock.release() if len(message_hq) > 0 and "site is required" not in message_hq: tell_rooms_with("debug", message_hq.strip()) if "items" not in response: return if site == "stackoverflow.com": items = response["items"] if len(items) > 0 and "last_activity_date" in items[0]: self.last_activity_date = items[0]["last_activity_date"] num_scanned = 0 start_time = time.time() for post in response["items"]: pnb = copy.deepcopy(post) if 'body' in pnb: pnb['body'] = 'Present, but truncated' if 'answers' in pnb: del pnb['answers'] if "title" not in post or "body" not in post: if GlobalVars.flovis is not None and 'question_id' in post: GlobalVars.flovis.stage( 'bodyfetcher/api_response/no_content', site, post['question_id'], pnb) continue post['site'] = site try: post['edited'] = (post['creation_date'] != post['last_edit_date']) except KeyError: post[ 'edited'] = False # last_edit_date not present = not edited try: post_ = Post(api_response=post) except PostParseError as err: log('error', 'Error {0} when parsing post: {1!r}'.format(err, post_)) if GlobalVars.flovis is not None and 'question_id' in post: GlobalVars.flovis.stage('bodyfetcher/api_response/error', site, post['question_id'], pnb) continue num_scanned += 1 is_spam, reason, why = check_if_spam(post_) if is_spam: try: if GlobalVars.flovis is not None and 'question_id' in post: GlobalVars.flovis.stage( 'bodyfetcher/api_response/spam', site, post['question_id'], { 'post': pnb, 'check_if_spam': [is_spam, reason, why] }) handle_spam(post=post_, reasons=reason, why=why) except Exception as e: log('error', "Exception in handle_spam:", e) elif GlobalVars.flovis is not None and 'question_id' in post: GlobalVars.flovis.stage( 'bodyfetcher/api_response/not_spam', site, post['question_id'], { 'post': pnb, 'check_if_spam': [is_spam, reason, why] }) try: if "answers" not in post: pass else: for answer in post["answers"]: anb = copy.deepcopy(answer) if 'body' in anb: anb['body'] = 'Present, but truncated' num_scanned += 1 answer["IsAnswer"] = True # Necesssary for Post object answer[ "title"] = "" # Necessary for proper Post object creation answer[ "site"] = site # Necessary for proper Post object creation try: answer['edited'] = (answer['creation_date'] != answer['last_edit_date']) except KeyError: answer[ 'edited'] = False # last_edit_date not present = not edited answer_ = Post(api_response=answer, parent=post_) is_spam, reason, why = check_if_spam(answer_) if is_spam: try: if GlobalVars.flovis is not None and 'answer_id' in answer: GlobalVars.flovis.stage( 'bodyfetcher/api_response/spam', site, answer['answer_id'], { 'post': anb, 'check_if_spam': [is_spam, reason, why] }) handle_spam(answer_, reasons=reason, why=why) except Exception as e: log('error', "Exception in handle_spam:", e) elif GlobalVars.flovis is not None and 'answer_id' in answer: GlobalVars.flovis.stage( 'bodyfetcher/api_response/not_spam', site, answer['answer_id'], { 'post': anb, 'check_if_spam': [is_spam, reason, why] }) except Exception as e: log('error', "Exception handling answers:", e) end_time = time.time() GlobalVars.posts_scan_stats_lock.acquire() GlobalVars.num_posts_scanned += num_scanned GlobalVars.post_scan_time += end_time - start_time GlobalVars.posts_scan_stats_lock.release() return