def login(): """ Handles project account authentication """ if g.project is not None: return redirect(url_for('home', project_name=g.project['project_name'])) form = LoginForm(request.form) if form.validate_on_submit(): # On submit, grab name & password project_name = form.project_name.data password = form.password.data # Try login db = DB() resp = db.auth(project_name, password) if resp['status']: session['project_id'] = resp['project_id'] project_detail = db.get_project_detail(session['project_id']) project_name = project_detail['project_name'] return redirect(url_for('home', project_name=project_name)) else: flash(resp['message']) return render_template('login.html', form=form)
def create(): """ Page to create a new project account """ form = CreateForm(request.form) if form.validate_on_submit(): # On submit, grab form information project_name = form.project_name.data email = form.email.data password = form.password.data hashed_password = generate_password_hash(password) description = form.description.data # Create the account db = DB() resp = db.create(project_name, password, hashed_password, description=description, email=email) if resp['status']: flash('Project successfully created!') return redirect( url_for('admin_home', admin_id=g.admin['project_id'])) else: flash(resp['message']) return render_template('create.html', form=form)
def admin_login(): """ Login for an admin account """ if g.admin is not None: return redirect(url_for('admin_home', admin_id=g.admin['project_id'])) form = LoginForm(request.form) if form.validate_on_submit(): # On submit, grab name & password project_name = form.project_name.data password = form.password.data # Try login db = DB() resp = db.auth(project_name, password) if resp['status'] and resp['admin']: session['admin_project_id'] = resp['project_id'] admin_detail = db.get_project_detail(session['admin_project_id']) admin_id = admin_detail['project_id'] return redirect(url_for('admin_home', admin_id=admin_id)) elif not resp['admin']: flash('Invalid admin account!') else: flash(resp['message']) return render_template('admin_login.html', form=form)
def run_search(self): """ One-time Graph API search for historical collections """ self.running = True while self.running: # First, check to see if the thread has been set to shut down. If so, break thread_status = self.e.isSet() if thread_status: self.c.log('Collection thread set to shut down. Shutting down.', thread=self.thread) self.running = False break # Loop thru each term and queries the API for id in self.id_list: since = self.params['since'] until = self.params['until'] # Check to see if there's a last value less than until. If so, replace b/c got cut off # mid collection last time last = self.params['last'] if last: if until is None or last < until: until = last # Loop thru until the paging has finished paging_url = 'none' while paging_url is not None: try: if paging_url is not 'none': resp = requests.get(paging_url) resp = json.loads(resp.content) else: resp = self.fb.get_object_feed(id, since=since, until=until) # If there is not data and no more pages, the historical search has completed, so shut down if 'paging' not in resp.keys() or not resp['data']: self.c.log('Historical query collection completed for term: %s.' % str(id), thread=self.thread) paging_url = None pass # If there's data, process it elif resp['data']: self.on_data(resp['data']) # Set the paging url for the next loop thru paging_url = resp['paging']['next'] # On error, logs and records in DB except FacebookError as e: self.c.log('Query for term ID %s failed.' % str(id), thread=self.thread, level='error') now = time.strftime('%Y-%m-%d %H:%M:%S') self.project_db.update({'_id': ObjectId(self.collector_id)}, {'$push': {'error_codes': {'code': e[0]['code'], 'message': e[0]['message'], 'date': now}}}) self.c.log('Historical query collection completed for all terms. Shutting down.', thread=self.thread) db = DB() db.set_collector_status(self.c.project_id, self.collector_id, collector_status=0) self.running = False
def decorated_function(*args, **kwargs): g.admin = None if 'admin_project_id' in session: db = DB() resp = db.get_project_detail(session['admin_project_id']) if resp['status']: g.admin = resp return f(*args, **kwargs)
def _aload_project(project_name): """ Utility method to load an admin project detail if an admin is viewing their control page """ db = DB() resp = db.stack_config.find_one({'project_name': project_name}) g.project = db.get_project_detail(str(resp['_id'])) session['project_id'] = str(resp['_id'])
def setUp(self): db = DB(Config.DATABASE_URI) # db.create_tables() self.db = db self.ticket_json = '''{ "message": "test_message", "subject": "test_subject", "email": "*****@*****.**" }''' self.comment_json = '''{
def __init__(self, db_dir, n_splits=None, initialize_db=False, **kwargs): """ Divides all samples in k groups of samples. If n_splits is None, it will generate Total Number of Ratings folds (Leave-One-Out) Style """ self.db_dir = db_dir self.db = DB(db_dir=self.db_dir) self.total_ratings = len(self.db.ratings) if n_splits is None: self._n_splits = len(self.db.ratings) else: self._n_splits = n_splits self._index = 0 self._jump_size = math.ceil(len(self.db.ratings) / self._n_splits) self._initialize_db = initialize_db
def admin_home(admin_id): """ Homepage for an admin account """ project_list = [] db = DB() resp = db.get_project_list() if resp['status']: for project in resp['project_list']: if 'admin' in list(project.keys()) and not project['admin']: project_list.append(project) return render_template('admin_home.html', admin_detail=g.admin, project_list=project_list)
def __init__(self, db_dir, recommender_class, top_n=None, **kwargs): """ Evaluates Prediction performance """ self.initial_time = datetime.utcnow() n_splits = kwargs.get('n_splits') if 'init_recommender_params' in kwargs: self._init_recommender_params = kwargs['init_recommender_params'] else: self._init_recommender_params = {} self._k_fold_generator = KFoldGenerator( db_dir, n_splits=n_splits, initialize_db=False) self.db_dir = db_dir self.db = DB(db_dir=self.db_dir) self._recommender_class = recommender_class self.top_n = top_n
def setup(): """ Called on a new install to setup an admin account """ form = SetupForm(request.form) if form.validate_on_submit(): # On submit, grab form information project_name = form.project_name.data password = form.password.data hashed_password = generate_password_hash(password) # Create the account db = DB() resp = db.create(project_name, password, hashed_password, admin=True) if resp['status']: flash('Project successfully created!') return redirect(url_for('index')) else: flash(resp['message']) return render_template('setup.html', form=form)
def collector_control(collector_id): """ POST control route for collector forms """ collector_form = ProcessControlForm(request.form) task = None # On form submit controls the processor if request.method == 'POST' and collector_form.validate(): command = request.form['control'].lower() task_args = { 'process': 'collect', 'project': g.project, 'collector_id': collector_id } db = DB() collector = db.get_collector_detail(g.project['project_id'], collector_id) network = collector['collector']['network'] if command == 'start': task = start_daemon.apply_async(kwargs=task_args, queue='stack-start') elif command == 'stop': task = stop_daemon.apply_async(kwargs=task_args, queue='stack-stop') elif command == 'restart': task = restart_daemon.apply_async(kwargs=task_args, queue='stack-start') return redirect( url_for('collector', project_name=g.project['project_name'], network=network, collector_id=collector_id, task_id=task.task_id))
def collector(project_name, network, collector_id, task_id=None): """ Loads the detail / control page for a collector """ # Redirects an admin back to the homepage b/c nothing is loaded into the session yet if g.project is None: flash( 'Please navigate to the New Collector page from your homepage panel.' ) return redirect(url_for('index')) form = ProcessControlForm(request.form) # Loads collector info for the page db = DB() resp = db.get_collector_detail(g.project['project_id'], collector_id) collector = resp['collector'] # Loads active status resp = db.check_process_status(g.project['project_id'], 'collect', collector_id=collector_id) active_status = resp['message'] # If a start/stop/restart is in progress, display the status task_status = None if task_id: resp = celery.AsyncResult(task_id) if resp.state == 'PENDING': task_status = 'Collector start/shutdown still in progress...' else: task_status = 'Collector start/shutdown completed.' return render_template('collector.html', collector=collector, active_status=active_status, form=form, task_status=task_status)
def insert_work(): title = raw_input('Work item title: ') description = raw_input('Work item description: ') image_name = raw_input('Main image name: ') main_link = raw_input('Main link URL: ') priority = raw_input('Priority: ') tags = [] flag = 1 while flag: tag_name = raw_input('Input tag name: ') tag_url = raw_input('Input tag link URL: ') tags.append({'tag_name': tag_name, 'tag_url': tag_url}) yn = raw_input('Add more tags? [y/n]: ') if yn == 'n': flag = 0 doc = { 'priority': priority, 'title': title, 'description': description, 'image_name': 'http://www.ceskavich.com/static/img/' + image_name, 'main_link': main_link, 'tags': tags } db = DB('main', 'work') resp = db.insert(doc) if resp['status']: print '\n' print 'SUCCESS!' else: print '\n' print 'Oops. Something went wrong. Please try again.'
def index(): """ Loads the STACK homepage w/ list of project accounts """ start_workers() db = DB() resp = db.get_project_list() project_list = None admins = None if resp and resp['project_list']: project_list = resp['project_list'] admins = [ project for project in project_list if 'admin' in list(project.keys()) and project['admin'] == 1 ] # Renders index of at least one admin account exists, if not calls the new install setup if admins: return render_template('index.html', project_list=project_list) else: return redirect(url_for('setup'))
def setup(self): self.dir_name = os.path.join(BASE_DIR, 'data', 'predictors') self.db = DB(db_dir=self.dir_name) self.predictor = CollaborativePredictor(self.db, neighbourhood_size=100)
import simplejson from email.utils import parsedate_tz from collections import defaultdict import sys import traceback import string import config from pymongo import errors as PymongoErrors from . import module_dir from app.models import DB PLATFORM_CONFIG_FILE = module_dir + '/platform.ini' BATCH_INSERT_SIZE = 1000 db = DB() db_central = DB(local=False) # function goes out and gets a list of raw tweet data files def get_processed_tweet_file_queue(Config, insertdir): insert_queue_path = insertdir + '/' if not os.path.exists(insert_queue_path): os.makedirs(insert_queue_path) tweetsOutFile = Config.get('files', 'tweets_file', 0) file_extension = os.path.splitext(tweetsOutFile)[1] tweetFileNamePattern = tweetsOutFile.replace(file_extension, '_processed' + file_extension) tweetFileNamePattern = insert_queue_path + '*' + tweetFileNamePattern
def setup(self): """ Test computation of matrix density """ fname = os.path.join(BASE_DIR, 'data', 'simple') self.db = DB(db_dir=fname)
def compute(): """ Computes different statistics relevant for Task 1 """ db = DB() print('\nGeneral Statistics:') t = PrettyTable([ 'Total Users', 'Total Movies', 'Total Tags', 'Matrix Density', ]) t.align['Total Users'] = 'l' t.align['Total Movies'] = 'l' t.align['Matrix Density'] = 'l' t.padding_width = 1 t.add_row([ len(db.users.keys()), len(db.movies.keys()), len(db.tags), '{0:.3g}'.format(db.density) ]) print(t) print('\nStatistics by User and Ratings:') t = PrettyTable([ 'Max Number of Ratings for User', 'Min Number of Ratings for User', 'Median Ratings for User', 'Mean Ratings for User', 'Standard Deviation Ratings for User' ]) t.add_row([ db.stats['users']['max'], db.stats['users']['min'], db.stats['users']['median'], '{0:.3g}'.format(db.stats['users']['mean']), '{0:.3g}'.format(db.stats['users']['sd']) ]) print(t) plot_ratings_distribution(db) plot_user_ratings_distribution(db) print('\nStatistics by Movie and Ratings:') t = PrettyTable([ 'Max Number of Ratings for Movie', 'Min Number of Ratings for Movie', 'Median Ratings for Movie', 'Mean Ratings for Movie', 'Standard Deviation Ratings for Movie' ]) t.add_row([ db.stats['movies']['max'], db.stats['movies']['min'], db.stats['movies']['median'], '{0:.3g}'.format(db.stats['movies']['mean']), '{0:.3g}'.format(db.stats['movies']['sd']) ]) print(t) print('\nTotal number of ratings for each of the 5 ratings') t = PrettyTable(sorted(db.stats['ratings'].keys())) row = [db.stats['ratings'][k] for k in sorted(db.stats['ratings'].keys())] t.add_row(row) print(t) print('\nStatistics by Tags and User') t = PrettyTable([ 'Max Number of Tags by User', 'Min Number of Tags by User', 'Median Number of Tags by User', 'Mean Number of Tags by User', 'Standard Deviation Tags by User' ]) t.add_row([ db.stats['tags']['by_user']['max'], db.stats['tags']['by_user']['min'], db.stats['tags']['by_user']['median'], '{0:.3g}'.format(db.stats['tags']['by_user']['mean']), '{0:.3g}'.format(db.stats['tags']['by_user']['sd']) ]) print(t) print('\nStatistics by Tags and Movie') t = PrettyTable([ 'Max Number of Tags by Movie', 'Min Number of Tags by Movie', 'Median Number of Tags by Movie', 'Mean Number of Tags by Movie', 'Standard Deviation Tags by Movie' ]) t.add_row([ db.stats['tags']['by_movie']['max'], db.stats['tags']['by_movie']['min'], db.stats['tags']['by_movie']['median'], '{0:.3g}'.format(db.stats['tags']['by_movie']['mean']), '{0:.3g}'.format(db.stats['tags']['by_movie']['sd']) ]) print(t)
def __init__(self, project_id, collector_id, process_name): self.project_id = project_id self.collector_id = collector_id self.process_name = process_name self.collecting_data = False # Sets up connection w/ project config DB & loads in collector info self.db = DB() project = self.db.get_project_detail(self.project_id) if project['status']: self.project_name = project['project_name'] configdb = project['project_config_db'] project_db = self.db.connection[configdb] self.project_db = project_db.config resp = self.db.get_collector_detail(self.project_id, self.collector_id) if resp['status']: collector_info = resp['collector'] # Load in collector info self.collector_name = collector_info['collector_name'] self.network = collector_info['network'] self.api = collector_info['api'] self.collection_type = collector_info['collection_type'] self.params = collector_info['params'] self.terms_list = collector_info['terms_list'] self.languages = collector_info['languages'] self.locations = collector_info['location'] self.auth = collector_info['api_auth'] # TODO - file format to Mongo # TODO - less then hour = warning self.file_format = '%Y%m%d-%H' # If this is a streaming collector if self.collection_type == 'realtime': self.project_db.update({'_id': ObjectId(self.collector_id)}, {'$set': { 'stream_limits': [] }}) # Sets up logdir and logging logdir = app.config[ 'LOGDIR'] + '/' + self.project_name + '-' + self.project_id + '/logs' if not os.path.exists(logdir): os.makedirs(logdir) # Sets logger w/ name collector_name and level INFO self.logger = logging.getLogger(self.collector_name) self.logger.setLevel(logging.INFO) # Sets up logging file handler logfile = logdir + '/%s.log' % self.process_name # TODO - logging params # TODO - port logging rotation params to Mongo for user control later / these default values good handler = logging.handlers.TimedRotatingFileHandler(logfile, when='D', backupCount=30) handler.setLevel(logging.INFO) # Formats format = '%(asctime)s %(name)-12s %(levelname)-8s %(message)s' dateformat = '%m-%d %H:%M' formatter = logging.Formatter(format, dateformat) handler.setFormatter(formatter) # Adds handler to logger to finish self.logger.addHandler(handler) self.log('STACK collector %s initiated.' % self.collector_name) # Sets up rawdir self.rawdir = app.config[ 'DATADIR'] + '/' + self.project_name + '-' + self.project_id + '/' + self.network + '/raw' if not os.path.exists(self.rawdir): os.makedirs(self.rawdir) self.log( 'All raw files and directories set. Now starting collector...')
def __init__(self, process, cmdline=False, home_dir='.', umask=0o22, verbose=1, **kwargs): self.db = DB() self.process = process self.cmdline = cmdline self.usage_message = 'controller collect|process|insert start|stop|restart project_id collector_id' self.home_dir = home_dir self.umask = umask self.verbose = verbose if self.cmdline is False: # Grab information from Flask user object self.project = kwargs['project'] self.project_id = self.project['project_id'] self.project_name = self.project['project_name'] else: # Command is coming from the command line, look up info self.project_id = kwargs['project_id'] resp = self.db.get_project_detail(self.project_id) if resp['status']: self.project_name = resp['project_name'] else: print('Project w/ ID %s not found!' % self.project_id) print('') print('USAGE: python %s %s' % (sys.argv[0], self.usage_message)) sys.exit(1) # Project account DB connection project_info = self.db.get_project_detail(self.project_id) configdb = project_info['project_config_db'] project_config_db = self.db.connection[configdb] self.projectdb = project_config_db.config # Loads info for process based on type: collector, processor, inserter if self.process in ['process', 'insert']: # Only module type needed for processor / inserter self.module = kwargs['network'] self.collector_id = None # Set name for worker based on gathered info self.process_name = self.project_name + '-' + self.process + '-' + self.module + '-' + self.project_id elif process == 'collect': # For collectors, also grabs: collector_id, api, collector_name self.collector_id = kwargs['collector_id'] resp = self.db.get_collector_detail(self.project_id, self.collector_id) if resp['status']: collector = resp['collector'] self.module = collector['network'] self.api = collector['api'] self.collector_name = collector['collector_name'] else: print('Collector (ID: %s) not found!' % self.collector_id) print('') print('USAGE: python %s %s' % (sys.argv[0], self.usage_message)) sys.exit(1) # Set name for worker based on gathered info self.process_name = self.project_name + '-' + self.collector_name + '-' + self.process + '-' + self.module + \ '-' + self.collector_id # Sets out directories self.piddir = app.config['LOGDIR'] + '/' + self.project_name + '-' + self.project_id + '/pid' self.logdir = app.config['LOGDIR'] + '/' + self.project_name + '-' + self.project_id + '/logs' self.stddir = app.config['LOGDIR'] + '/' + self.project_name + '-' + self.project_id + '/std' # Sets data dirs # TODO - deprecate w/ Facebook self.rawdir = app.config[ 'DATADIR'] + '/' + self.project_name + '-' + self.project_id + '/' + self.module + '/raw' self.archdir = app.config['DATADIR'] + '/' + self.project_name + '-' + self.project_id + '/' + self.module + \ '/archive' self.insertdir = app.config['DATADIR'] + '/' + self.project_name + '-' + self.project_id + '/' + self.module + \ '/insert_queue' # self.rawdir = "Users/harshita/Downloads/20200608-09-ClimateChange1-5ec8d268f57962092d668731\-5ec8d3cef57962093753ca79-tweets_out.json" # self.archdir = app.config['DATADIR'] + '/' + self.project_name + '-' + self.project_id + '/' + self.module + \ # '/archive' # self.insertdir = app.config['DATADIR'] + '/' + self.project_name + '-' + self.project_id + '/' + self.module + \ # '/insert_queue' # Creates dirs if they don't already exist if not os.path.exists(self.piddir): os.makedirs(self.piddir) if not os.path.exists(self.stddir): os.makedirs(self.stddir) # These directories only need be created for Twitter # TODO - deprecate w/ Facebook if self.module == 'twitter': if not os.path.exists(self.logdir): os.makedirs(self.logdir) if not os.path.exists(self.rawdir): os.makedirs(self.rawdir) if not os.path.exists(self.archdir): os.makedirs(self.archdir) if not os.path.exists(self.insertdir): os.makedirs(self.insertdir) # Sets outfiles self.pidfile = self.piddir + '/%s.pid' % self.process_name self.stdout = self.stddir + '/%s-stdout.txt' % self.process_name self.stderr = self.stddir + '/%s-stderr.txt' % self.process_name self.stdin = self.stddir + '/%s-stdin.txt' % self.process_name # Creates the std files for the daemon if not os.path.isfile(self.stdout): create_file = open(self.stdout, 'w') create_file.close() if not os.path.isfile(self.stdin): create_file = open(self.stdin, 'w') create_file.close() if not os.path.isfile(self.stderr): create_file = open(self.stderr, 'w') create_file.close()
def update_collector(collector_id): """ Used to update a collector details form the front-end TODO - Terms & start / end dates """ db = DB() resp = db.get_collector_detail(g.project['project_id'], collector_id) collector = resp['collector'] # First, populate the main form w/ info form_params = {'collector_name': collector['collector_name']} if collector['network'] == 'twitter': form_params['api'] = collector['api'] form_params['consumer_key'] = collector['api_auth']['consumer_key'] form_params['consumer_secret'] = collector['api_auth'][ 'consumer_secret'] form_params['access_token'] = collector['api_auth']['access_token'] form_params['access_token_secret'] = collector['api_auth'][ 'access_token_secret'] if collector['languages']: languages = '\r\n'.join(collector['languages']) form_params['languages'] = languages if collector['location']: loc_string = '' r = 1 c = 1 for loc in collector['location']: if c == 1 and r > 1: loc_string = loc_string + '\r\n' + loc + ',' else: if c != 4: loc_string = loc_string + loc + ',' else: loc_string = loc_string + loc if c == 4: c = 1 r += 1 else: c += 1 form_params['locations'] = loc_string elif collector['network'] == 'facebook': form_params['collection_type'] = collector['collection_type'] form_params['client_id'] = collector['api_auth']['client_id'] form_params['client_secret'] = collector['api_auth']['client_secret'] # TODO - start & end dates form = UpdateCollectorForm(**form_params) # Next, create a form for each term -- if no terms, one form is needed & it's empty terms = collector['terms_list'] terms_forms = [] if terms: for term in terms: # Load form & set defaults form_params = { 'term': term['term'], 'collect': int(term['collect']), } tform = UpdateCollectorTermsForm(prefx=term['term'], **form_params) terms_forms.append(tform) # Finally, update on submission if request.method == 'POST': # Loads in the data from the form & sets initial param dict form_data = request.get_json() params = {} if form_data['collector_name'] != collector['collector_name']: params['collector_name'] = form_data['collector_name'] # Twitter data if collector['network'] == 'twitter': if form_data['api'] != collector['api']: params['api'] = form_data['api'] # If one auth param is updated, assume all are if form_data['consumer_key'] != collector['api_auth'][ 'consumer_key']: params['api_auth'] = { 'consumer_key': form_data['consumer_key'], 'consumer_secret': form_data['consumer_secret'], 'access_token': form_data['access_token'], 'access_token_secret': form_data['access_token_secret'] } languages = form_data['languages'] if not languages or languages == '': languages = None else: languages = languages.split('\r\n') if languages != collector['languages']: params['languages'] = languages locations = form_data['locations'] if not locations or languages == '': locations = None else: locations = locations.replace('\r\n', ',').split(',') if len(locations) % 4 is not 0: flash( 'Location coordinates should be entered in pairs of 4. Please try again' ) return redirect( url_for('update_collector', collector_id=collector_id)) if locations != collector['location']: params['location'] = locations # Facebook Data elif collector['network'] == 'facebook': if form_data['collection_type'] != collector['collection_type']: params['collection_type'] = form.collection_type.data if form_data['client_id'] != collector['api_auth']['client_id']: params['api_auth'] = { 'client_id': form_data['client_id'], 'client_secret': form_data['client_secret'] } # TODO - start and end dates # Final terms dict params['terms_list'] = [] # Term type value if collector['network'] == 'twitter': if collector['api'] == 'follow': term_type = 'handle' else: term_type = 'term' else: term_type = 'page' # New terms (if any) terms = form_data['new_terms'] if terms and terms != '': terms = terms.split('\r\n') for t in terms: params['terms_list'].append({ 'term': t, 'collect': 1, 'type': term_type, 'id': None }) # Updated terms (if any) current_terms = form_data['terms'] while current_terms: params['terms_list'].append({ 'term': current_terms.pop(0), 'collect': int(current_terms.pop(0)), 'type': term_type, 'id': None }) # Now, try updating resp = db.update_collector_detail(g.project['project_id'], collector_id, **params) if resp['status']: flash('Collector updated successfully!') return redirect( url_for('collector', project_name=g.project['project_name'], network=collector['network'], collector_id=collector_id)) else: flash(resp['message']) return redirect( url_for('update_collector', collector_id=collector_id)) return render_template('update_collector.html', collector=collector, form=form, terms_forms=terms_forms)
def network_home(project_name, network, task_id=None): """ Renders a project account's homepage """ # Loads project details if an admin if g.admin is not None: _aload_project(project_name) # Grabs collectors for the given network if not g.project['collectors']: collectors = None else: collectors = [ c for c in g.project['collectors'] if c['network'] == network ] for collector in collectors: collector['num_terms'] = 0 if collector['terms_list'] is not None: collector['num_terms'] = len(collector['terms_list']) g.project['num_collectors'] = len(collectors) processor_form = ProcessControlForm(request.form) inserter_form = ProcessControlForm(request.form) # Loads processor active status db = DB() resp = db.check_process_status(g.project['project_id'], 'process', module=network) processor_active_status = resp['message'] # Loads inserter active status resp = db.check_process_status(g.project['project_id'], 'insert', module=network) inserter_active_status = resp['message'] # Loads count of tweets in the storage DB count = db.get_storage_counts(g.project['project_id'], network) # If a start/stop/restart is in progress, display the status task_status = None if task_id: resp = celery.AsyncResult(task_id) if resp.state == 'PENDING': processor_task_status = 'Processor/Inserter start/shutdown still in progress...' else: processor_task_status = 'Processor/Inserter start/shutdown completed.' return render_template('network_home.html', network=network, collectors=collectors, project_detail=g.project, processor_active_status=processor_active_status, inserter_active_status=inserter_active_status, task_status=task_status, count=count, processor_form=processor_form, inserter_form=inserter_form)
def __init__(self, project_id, process_name, network): self.project_id = project_id self.process_name = process_name self.network = network # Sets up connection w/ project config DB & loads in collector info self.db = DB() project = self.db.get_project_detail(self.project_id) self.project_name = project['project_name'] # Grabs connection to project config DB configdb = project['project_config_db'] project_db = self.db.connection[configdb] self.project_db = project_db.config # Grabs connection to insertion DB # NOTE - on init, need to connect to appropriate network collection db_name = self.project_name + '_' + self.project_id self.insert_db = self.db.connection[db_name] # Sets up logdir and logging logdir = app.config[ 'LOGDIR'] + '/' + self.project_name + '-' + self.project_id + '/logs' if not os.path.exists(logdir): os.makedirs(logdir) # Sets logger w/ name collector_name and level INFO self.logger = logging.getLogger('Inserter') self.logger.setLevel(logging.INFO) # Sets up logging file handler logfile = logdir + '/%s.log' % self.process_name # TODO - port logging rotation params to Mongo for user control later / these default values good handler = logging.handlers.TimedRotatingFileHandler(logfile, when='D', backupCount=30) handler.setLevel(logging.INFO) # Formats format = '%(asctime)s %(name)-12s %(levelname)-8s %(message)s' dateformat = '%m-%d %H:%M' formatter = logging.Formatter(format, dateformat) handler.setFormatter(formatter) # Adds handler to logger to finish self.logger.addHandler(handler) self.log('STACK inserter for project %s initiated.' % self.project_name) # Sets up data directory self.datadir = app.config[ 'DATADIR'] + '/' + self.project_name + '-' + self.project_id # Establish connections to data directories self.raw = self.datadir + '/' + self.network + '/raw' self.archive = self.datadir + '/' + self.network + '/archive' self.queue = self.datadir + '/' + self.network + '/queue' self.error = self.datadir + '/' + self.network + '/error' if not os.path.exists(self.raw): os.makedirs(self.raw) if not os.path.exists(self.archive): os.makedirs(self.archive) if not os.path.exists(self.queue): os.makedirs(self.queue) if not os.path.exists(self.error): os.makedirs(self.error) self.log('STACK processor setup completed. Now starting...')
def new_collector(): """ Route for a project account to create a new STACK collector """ # Redirects an admin back to the homepage b/c nothing is loaded into the session yet if g.project is None: flash( 'Please navigate to the New Collector page from your homepage panel.' ) return redirect(url_for('index')) form = NewCollectorForm(request.form) # On submit, get info which varies by network if request.method == 'POST' and form.validate(): collector_name = form.collector_name.data network = form.network.data api = None languages = None locations = None start_date = None end_date = None # TODO - historical for Twitter as well collection_type = 'realtime' if network == 'twitter': api = form.api.data oauth_dict = { 'consumer_key': form.consumer_key.data, 'consumer_secret': form.consumer_secret.data, 'access_token': form.access_token.data, 'access_token_secret': form.access_token_secret.data } # Optional form values are assigned 'None' if not filled out languages = form.languages.data if not languages or languages == '': languages = None else: languages = languages.split('\r\n') locations = form.locations.data if not locations or languages == '': locations = None else: locations = locations.replace('\r\n', ',').split(',') if len(locations) % 4 is not 0: flash( 'Location coordinates should be entered in pairs of 4. Please try again' ) return redirect(url_for('new_collector')) terms = form.twitter_terms.data if not terms or terms == '': terms = None else: terms = terms.split('\r\n') elif network == 'facebook': collection_type = form.collection_type.data oauth_dict = { 'client_id': form.client_id.data, 'client_secret': form.client_secret.data } # Optional start & end date params start_date = form.start_date.data if not start_date or start_date == '': start_date = None else: start_date = str(start_date) end_date = form.end_date.data if not end_date or end_date == '': end_date = None else: end_date = str(end_date) terms = form.facebook_terms.data terms = terms.split('\r\n') # Create collector w/ form data db = DB() resp = db.set_collector_detail(g.project['project_id'], collector_name, network, collection_type, oauth_dict, terms, api=api, languages=languages, location=locations, start_date=start_date, end_date=end_date) # If successful, redirect to collector page if resp['status']: project_config_db = db.connection[g.project['project_config_db']] coll = project_config_db.config try: collector = coll.find_one({'collector_name': collector_name}) collector_id = str(collector['_id']) network = str(collector['network']) return redirect( url_for('collector', project_name=g.project['project_name'], network=network, collector_id=collector_id)) except: flash( 'Collector created, but cannot redirect to collector page!' ) return redirect( url_for('home', project_name=g.project['project_name'])) else: flash(resp['message']) return render_template('new_collector.html', form=form)
def setup(self): self.db_dir = os.path.join(BASE_DIR, 'data', 'predictors') self.db = DB(db_dir=self.db_dir)
def before_request(): g.db = DB(app.config['DATABASE_URI'])
import ConfigParser import datetime import logging import logging.config import time from time import sleep import traceback from bson.objectid import ObjectId # Config file includes paths, parameters, and oauth information for this module # Complete the directions in "example_platform.ini" for configuration before proceeding PLATFORM_CONFIG_FILE = module_dir + '/platform.ini' # Connect to Mongo db = DB() # Program thread e = threading.Event() class fileOutListener(StreamListener): """ This listener handles tweets as they come in by converting them to JSON and sending them to a file. Each line in the file is a tweet. """ def __init__(self, tweetsOutFilePath, tweetsOutFileDateFrmt, tweetsOutFile, logger, collection_type, project_id, collector_id): self.logger = logger self.logger.info( 'COLLECTION LISTENER: Initializing Stream Listener...') self.buffer = ''
def setup(self): db_dir = os.path.join(BASE_DIR, 'data', 'recommenders') self.db = DB(db_dir=db_dir)