def main(): global r logging.config.fileConfig(path_to_cfg) re.set_fallback_notification(re.FALLBACK_EXCEPTION) # which queues to check and the function to call queue_funcs = {'report': 'get_reports', 'spam': 'get_mod_queue', 'submission': 'get_new', 'comment': 'get_comments'} while True: try: r = praw.Reddit(user_agent=cfg_file.get('reddit', 'user_agent')) logging.info('Logging in as {0}' .format(cfg_file.get('reddit', 'username'))) r.login(cfg_file.get('reddit', 'username'), cfg_file.get('reddit', 'password')) sr_dict, cond_dict = initialize(queue_funcs.keys()) break except Exception as e: logging.error('ERROR: {0}'.format(e)) run_counter = 0 while True: run_counter += 1 try: # only check reports every 10 runs # sleep afterwards in case ^C is needed if run_counter % 10 == 0: check_queues(queue_funcs, sr_dict, cond_dict) Condition.clear_standard_cache() if process_messages(): sr_dict, cond_dict = initialize(queue_funcs.keys(), reload_mod_subs=False) logging.info('Sleeping ({0})'.format(datetime.now())) sleep(5) run_counter = 0 else: check_queues({q: queue_funcs[q] for q in queue_funcs if q != 'report'}, sr_dict, cond_dict) if process_messages(): sr_dict, cond_dict = initialize(queue_funcs.keys(), reload_mod_subs=False) except (praw.errors.ModeratorRequired, praw.errors.ModeratorOrScopeRequired, HTTPError) as e: if not isinstance(e, HTTPError) or e.response.status_code == 403: logging.info('Re-initializing due to {0}'.format(e)) sr_dict, cond_dict = initialize(queue_funcs.keys()) except KeyboardInterrupt: raise except Exception as e: logging.error('ERROR: {0}'.format(e)) session.rollback()
Requirements: - `treelib`: `pip install treelib` """ # Copyright (C) 2016 by # Rion Brattig Correia <*****@*****.**> # Ian B. Wood <*****@*****.**> # All rights reserved. # MIT license. from treelib import Tree try: import re2 as re except ImportError: import re else: re.set_fallback_notification(re.FALLBACK_WARNING) from nltk.tokenize import PunktSentenceTokenizer, TweetTokenizer, sent_tokenize from nltk.stem import PorterStemmer, WordNetLemmatizer __author__ = """\n""".join( ['Rion Brattig Correia <*****@*****.**>', 'Ian B. Wood <*****@*****.**>']) __all__ = ['TermDictionaryParser'] # class Match(object): """ """ def __init__(self, id=None, tokens=tuple, si=None, wi=None, ti=None): self.id = id
import eventlet urllib2 = eventlet.import_patched('urllib2') eventlet.monkey_patch() unionpool = eventlet.GreenPool(10) """ try: import re2 as re except ImportError: import re print "Using standard library regexes" else: print "Using Google RE2 regexes (fallback to standard library)" re.set_fallback_notification(re.FALLBACK_WARNING) import sys, os, shutil, types, time, urllib2 from Cheetah.Template import Template from glob import glob from model import * from sqlobject.dberrors import DuplicateEntryError from BeautifulSoup import BeautifulSoup defaultencoding = 'iso-8859-1' #defaultencoding = 'utf-8' prefix = "http://m.assetbar.com/achewood/" # added the explicit match for question marks to keep the urlopen() in process_monthly() from 404ing # maybe the fine men and women at 'asset bar' changed their url scheme?
import it. """ from timeit import Timer import simplejson import re2 import re try: import regex except ImportError: regex = None import os import gzip re2.set_fallback_notification(re2.FALLBACK_EXCEPTION) os.chdir(os.path.dirname(__file__) or '.') tests = {} setup_code = """\ import re2 import re from __main__ import tests, current_re test = tests[%r] """ current_re = [None]
def main(): global r logging.config.fileConfig(path_to_cfg) re.set_fallback_notification(re.FALLBACK_EXCEPTION) # which queues to check and the function to call queue_funcs = {'report': 'get_reports', 'spam': 'get_mod_queue', 'submission': 'get_new', 'comment': 'get_comments'} while True: try: r = praw.Reddit(user_agent=cfg_file.get('reddit', 'user_agent')) logging.info('Logging in as {0}' .format(cfg_file.get('reddit', 'username'))) r.login(cfg_file.get('reddit', 'username'), cfg_file.get('reddit', 'password')) sr_dict = get_enabled_subreddits() Condition.update_standards() cond_dict = load_all_conditions(sr_dict, queue_funcs.keys()) break except Exception as e: logging.error('ERROR: {0}'.format(e)) reports_mins = int(cfg_file.get('reddit', 'reports_check_period_mins')) reports_check_period = timedelta(minutes=reports_mins) last_reports_check = time() while True: try: sr_dict = get_enabled_subreddits(reload_mod_subs=False) # if the standard conditions have changed, reinit all conditions if Condition.update_standards(): logging.info('Updating standard conditions from database') cond_dict = load_all_conditions(sr_dict, queue_funcs.keys()) # check reports if past checking period if elapsed_since(last_reports_check) > reports_check_period: last_reports_check = time() check_queues({'report': queue_funcs['report']}, sr_dict, cond_dict) check_queues({q: queue_funcs[q] for q in queue_funcs if q != 'report'}, sr_dict, cond_dict) updated_srs = process_messages() if updated_srs: if any(sr not in sr_dict for sr in updated_srs): sr_dict = get_enabled_subreddits(reload_mod_subs=True) else: sr_dict = get_enabled_subreddits(reload_mod_subs=False) for sr in updated_srs: update_conditions_for_sr(cond_dict, queue_funcs.keys(), sr_dict[sr]) except (praw.errors.ModeratorRequired, praw.errors.ModeratorOrScopeRequired, HTTPError) as e: if not isinstance(e, HTTPError) or e.response.status_code == 403: logging.info('Re-initializing due to {0}'.format(e)) sr_dict = get_enabled_subreddits() except KeyboardInterrupt: raise except Exception as e: logging.error('ERROR: {0}'.format(e)) session.rollback()
def main(): global r logging.config.fileConfig(path_to_cfg) re.set_fallback_notification(re.FALLBACK_EXCEPTION) # which queues to check and the function to call queue_funcs = { 'report': 'get_reports', 'spam': 'get_mod_queue', 'submission': 'get_new', 'comment': 'get_comments' } while True: try: r = praw.Reddit(user_agent=cfg_file.get('reddit', 'user_agent')) logging.info('Logging in as {0}'.format( cfg_file.get('reddit', 'username'))) r.login(cfg_file.get('reddit', 'username'), cfg_file.get('reddit', 'password')) sr_dict = get_enabled_subreddits() Condition.update_standards() cond_dict = load_all_conditions(sr_dict, queue_funcs.keys()) break except Exception as e: logging.error('ERROR: {0}'.format(e)) reports_mins = int(cfg_file.get('reddit', 'reports_check_period_mins')) reports_check_period = timedelta(minutes=reports_mins) last_reports_check = time() while True: try: sr_dict = get_enabled_subreddits(reload_mod_subs=False) # if the standard conditions have changed, reinit all conditions if Condition.update_standards(): logging.info('Updating standard conditions from database') cond_dict = load_all_conditions(sr_dict, queue_funcs.keys()) # check reports if past checking period if elapsed_since(last_reports_check) > reports_check_period: last_reports_check = time() check_queues({'report': queue_funcs['report']}, sr_dict, cond_dict) check_queues( {q: queue_funcs[q] for q in queue_funcs if q != 'report'}, sr_dict, cond_dict) updated_srs = process_messages() if updated_srs: if any(sr not in sr_dict for sr in updated_srs): sr_dict = get_enabled_subreddits(reload_mod_subs=True) else: sr_dict = get_enabled_subreddits(reload_mod_subs=False) for sr in updated_srs: update_conditions_for_sr(cond_dict, queue_funcs.keys(), sr_dict[sr]) except (praw.errors.ModeratorRequired, praw.errors.ModeratorOrScopeRequired, HTTPError) as e: if not isinstance(e, HTTPError) or e.response.status_code == 403: logging.info('Re-initializing due to {0}'.format(e)) sr_dict = get_enabled_subreddits() except KeyboardInterrupt: raise except Exception as e: logging.error('ERROR: {0}'.format(e)) session.rollback()