def test_get_by_id(praw, ids): list = a.get_by_ids(praw, ids) if list: list = [l for l in list] if len(list): print "Passed" return True print "Failed" return False
def get_posts(self, lim): # first ask for posts try: posts = self.sub.mod.log(action="removelink", limit=lim) posts = [posts.next().target_fullname for i in range(lim)] return Actions.get_by_ids(self.praw, posts) except Exception, e: logging.error(str(e)) if __debug__: logging.exception(e)
def get_historial_posts(self, goto): """Scans the sub with more intensive detection of previously found reddit posts Allows for mass processing of past posts """ last_id = None last_seen = datetime.datetime.now() posts = [] if self.policy.Use_Reddit_Analytics_For_Historical_Scan: while last_seen > goto: raise NotImplementedError if last_id: self.RA_params["after"] = last_id try: data = requests.get("http://api.redditanalytics.com/getPosts", params=self.RA_params, headers=self.RA_headers) json_data = json.loads(data.content) ids = [post["name"] for post in json_data["data"]] with DataBase.DataBaseWrapper(self.file) as db: exists = db.reddit_exists(ids) if exists is not None: ids = [ids[i] for i in range(len(ids)) if not exists[i]] else: return None if not len(ids): continue temp_posts = Actions.get_by_ids(self.praw, ids) if temp_posts is not None: posts.extend(temp_posts) else: return None last_id = json_data["metadata"]["oldest_id"] last_seen = datetime.datetime.fromtimestamp(json_data["metadata"]["oldest_date"]) except ValueError, e: if str(e).startswith("No JSON object"): logging.error("Reddit-Analytics is down, retrying historical scan after pause...") else: logging.error(str(e)) if __debug__: logging.exception(e) #temporary fix to avoid endless waiting while RA is down return [] except Exception, e: logging.error(str(e)) if __debug__: logging.exception(e) #temporary fix to avoid endless waiting while RA is down return []
def main(): cred = CRImport("credentials.cred") db = sqlite3.connect('database.db', detect_types=sqlite3.PARSE_DECLTYPES|sqlite3.PARSE_COLNAMES) cursor = db.cursor() post_list = [post[0] for post in cursor.execute('select short_url from reddit_record where submitter is null').fetchall()] praw = utilitymethods.create_multiprocess_praw(cred) reddit = utilitymethods.get_subreddit(cred, praw, 'listentothis') mods = [mod.name for mod in Actions.get_mods(praw, reddit)] stride = 100 total_len = len(post_list) count = 0 while len(post_list): num_loaded = min(stride, len(post_list)) reddit_posts = Actions.get_by_ids(praw, post_list[:num_loaded]) update_list = [] print "{} / {}".format(count, total_len) count += stride for i, post in enumerate(reddit_posts): #check submitter = cursor.execute('select submitter from reddit_record where short_url = ?', (post_list[i],)).fetchone()[0] if submitter is not None: continue assert(post_list[i] == post.name) success = False while not success: try: success = True if Actions.is_deleted(post): #check comments found = False for comment in post.comments: if comment.distinguished == 'moderator': if re.search(r'^(?:\*\*)?/u/', comment.body): search = re.search(r'^(?:\*\*)?/u/([\w\d_\-\*]+)[,\s]', comment.body) if search: found = True success = True update_list.append((search.group(1), post_list[i])) break elif re.search(r'^All apologies /u/([\w\d_\-\*]+)[,\s]', comment.body): search = re.search(r'^All apologies /u/([\w\d_\-\*]+)[,\s]', comment.body) if search: found = True success = True update_list.append((search.group(1), post_list[i])) break elif re.search(r'/u/([\w\d\*-_]+), your submission', comment.body): search = re.search(r'/u/([\w\d\*-_]+), your submission', comment.body) if search: found = True success = True update_list.append((search.group(1), post_list[i])) break elif re.search(r'^Hey /u/([\w\d_\-\*]+)[,\s]', comment.body): search = re.search(r'^Hey /u/([\w\d_\-\*]+)[,\s]', comment.body) if search: found = True success = True update_list.append((search.group(1), post_list[i])) break elif re.search(r'/u/([\w\d_\-\*]+)[,\s]', comment.body): search = re.search(r'/u/([\w\d_\-\*]+)[,\s]', comment.body) if search and 'evilnight' not in search.group(1): print comment.body print search.group(1) if not found: success = True update_list.append((None, post_list[i])) else: success = True update_list.append((post.author.name, post_list[i])) if update_list[-1][0] is not None and update_list[-1][0].endswith(','): print update_list[-1] except Exception, e: success = False time.sleep(1) assert (not any(val[0].endswith(',') for val in update_list if val[0] is not None)) post_list = post_list[num_loaded:] cursor.executemany('update reddit_record set submitter = ? where short_url = ?', update_list) db.commit()
def scan(self): """ Scans the previously collected reddit posts for deleted posts """ #scan old messages, see if deleted with DataBase.DataBaseWrapper(self.database_file, False) as db: now = datetime.datetime.now() global_strike_date = now - self.policy.Strike_Counter_Global_Strike_History history_date = now - self.policy.Strike_Counter_Scan_History entries = db.get_reddit(date_added=history_date, processed=0, return_dateadded=True) if entries is None: logging.warning(u"No reddit entries found in database...") return new_strike_channels = [] #loop over entries stride = 100 while len(entries): num_loaded = min(stride, len(entries)) (ids, channels, domains, add_dates) = zip(*entries[:num_loaded]) ids = list(ids) #see if we have submitters have_submitters = db.have_submitter(ids) #any new detected usernames go here new_submitters_list = [] channels = list(channels) domains = list(domains) loaded = Actions.get_by_ids(self.praw, ids) if not loaded: logging.info(u"Historical posts not loaded...") return #make sure posts retrieved posts = [post for post in loaded] if not posts: logging.info(u"Bad post retrieve") return #make sure channels exist add_channels = [] exists = db.channel_exists([(channel, domains[i]) for i, channel in enumerate(channels)]) for i, e in enumerate(exists): if not e: #pull up the url add_channels.append((channels[i], domains[i])) #resolve all the added ids if add_channels: if not db.add_channels(add_channels): logging.info(u"Error adding channels to channel_record, skipping processing of posts") continue #if there was an error adding the channels, don't mark as processed #check for deleted / exceptions increment_posts = {} processed_posts = [] excepted_posts = [] for i, post in enumerate(posts): if Actions.is_deleted(post): if not have_submitters[i]: val = self.check_for_submitter(post) if val is not None: new_submitters_list.append((val, ids[i])) if not self.check_exception(post): #self.policy.info(u"Deleted post found {}".format(post.name), u"channel = {}, domain = {}".format(channels[i], domains[i])) if add_dates[i] > global_strike_date or self.recount_strikes: if not (channels[i], domains[i]) in increment_posts: increment_posts[(channels[i], domains[i])] = 1 else: increment_posts[(channels[i], domains[i])] += 1 if not (channels[i], domains[i]) in new_strike_channels: new_strike_channels.append((channels[i], domains[i])) else: excepted_posts.append(post.name) processed_posts.append(post.name) if len(increment_posts): #add strikes db.add_strike([(increment_posts[key],) + key for key in increment_posts]) if __debug__: logging.info(u"Strike Counter found {} new deleted posts...".format(len(increment_posts))) if len(increment_posts) or len(excepted_posts): #remove from consideration (so we don't count them over and over) db.set_processed(processed_posts) db.set_exception(excepted_posts) #update submitters if len(new_submitters_list): db.update_submitter(new_submitters_list) #forget old entries entries = entries[num_loaded:] #check for rule breaking channels channels = db.get_channels(strike_count=self.policy.Strike_Count_Max, blacklist=Blacklist.BlacklistEnums.NotFound) if channels and len(channels): if __debug__: logging.info(u"{} new channels added to the blacklist".format(len(channels))) db.set_blacklist(channels, Blacklist.BlacklistEnums.Blacklisted, self.owner.credentials['USERNAME'], u"Global strike count exceeded") #check for user strike counts user_strikes = db.max_processed_from_user(not_found_value=Blacklist.BlacklistEnums.NotFound, strike_limit=self.policy.User_Strike_Count_Max) if len(user_strikes): reason_list = [u"User strike count exceeded by {} ({} strikes counted) for channel {} on domain {}" \ u"".format(user[1], user[0], user[2], user[3]) for user in user_strikes] new_blacklist = [(user[2], user[3]) for user in user_strikes] db.set_blacklist(new_blacklist, Blacklist.BlacklistEnums.Blacklisted, self.owner.credentials['USERNAME'], reason_list) #update global strike counts #find posts older than scan period marked as processed old_strikes = db.processed_older_than(global_strike_date, old_flag=0) if old_strikes is not None and len(old_strikes): decrement_count = {} for pair in old_strikes: if not pair in decrement_count: decrement_count[pair] = 0 decrement_count[pair] += 1 #and remove them from the count db.subtract_strikes_and_mark([(decrement_count[pair],) + pair for pair in decrement_count], global_strike_date) #remove older than scan period db.remove_reddit_older_than(history_date) #turn off recount if true if self.recount_strikes: self.recount_strikes = False logging.info(u'Strike recount completed successfully') if __debug__: logging.info(u"Strike count completed successfully at {}".format(datetime.datetime.now()))