Ejemplo n.º 1
0
def login():
    """
    Handles project account authentication
    """
    if g.project is not None:
        return redirect(url_for('home',
                                project_name=g.project['project_name']))

    form = LoginForm(request.form)
    if form.validate_on_submit():
        # On submit, grab name & password
        project_name = form.project_name.data
        password = form.password.data

        # Try login
        db = DB()
        resp = db.auth(project_name, password)
        if resp['status']:
            session['project_id'] = resp['project_id']

            project_detail = db.get_project_detail(session['project_id'])
            project_name = project_detail['project_name']

            return redirect(url_for('home', project_name=project_name))
        else:
            flash(resp['message'])
    return render_template('login.html', form=form)
Ejemplo n.º 2
0
def create():
    """
    Page to create a new project account
    """
    form = CreateForm(request.form)
    if form.validate_on_submit():
        # On submit, grab form information
        project_name = form.project_name.data
        email = form.email.data
        password = form.password.data
        hashed_password = generate_password_hash(password)
        description = form.description.data

        # Create the account
        db = DB()
        resp = db.create(project_name,
                         password,
                         hashed_password,
                         description=description,
                         email=email)
        if resp['status']:
            flash('Project successfully created!')
            return redirect(
                url_for('admin_home', admin_id=g.admin['project_id']))
        else:
            flash(resp['message'])

    return render_template('create.html', form=form)
Ejemplo n.º 3
0
def admin_login():
    """
    Login for an admin account
    """
    if g.admin is not None:
        return redirect(url_for('admin_home', admin_id=g.admin['project_id']))

    form = LoginForm(request.form)
    if form.validate_on_submit():
        # On submit, grab name & password
        project_name = form.project_name.data
        password = form.password.data

        # Try login
        db = DB()
        resp = db.auth(project_name, password)
        if resp['status'] and resp['admin']:
            session['admin_project_id'] = resp['project_id']

            admin_detail = db.get_project_detail(session['admin_project_id'])
            admin_id = admin_detail['project_id']

            return redirect(url_for('admin_home', admin_id=admin_id))
        elif not resp['admin']:
            flash('Invalid admin account!')
        else:
            flash(resp['message'])
    return render_template('admin_login.html', form=form)
Ejemplo n.º 4
0
    def run_search(self):
        """
        One-time Graph API search for historical collections
        """
        self.running = True
        while self.running:

            # First, check to see if the thread has been set to shut down. If so, break
            thread_status = self.e.isSet()
            if thread_status:
                self.c.log('Collection thread set to shut down. Shutting down.', thread=self.thread)
                self.running = False
                break

            # Loop thru each term and queries the API
            for id in self.id_list:
                since = self.params['since']
                until = self.params['until']

                # Check to see if there's a last value less than until. If so, replace b/c got cut off
                # mid collection last time
                last = self.params['last']
                if last:
                    if until is None or last < until:
                        until = last

                # Loop thru until the paging has finished
                paging_url = 'none'
                while paging_url is not None:
                    try:
                        if paging_url is not 'none':
                            resp = requests.get(paging_url)
                            resp = json.loads(resp.content)
                        else:
                            resp = self.fb.get_object_feed(id, since=since, until=until)

                        # If there is not data and no more pages, the historical search has completed, so shut down
                        if 'paging' not in resp.keys() or not resp['data']:
                            self.c.log('Historical query collection completed for term: %s.' % str(id), thread=self.thread)
                            paging_url = None
                            pass
                        # If there's data, process it
                        elif resp['data']:
                            self.on_data(resp['data'])
                            # Set the paging url for the next loop thru
                            paging_url = resp['paging']['next']

                    # On error, logs and records in DB
                    except FacebookError as e:
                        self.c.log('Query for term ID %s failed.' % str(id), thread=self.thread, level='error')

                        now = time.strftime('%Y-%m-%d %H:%M:%S')
                        self.project_db.update({'_id': ObjectId(self.collector_id)},
                            {'$push': {'error_codes': {'code': e[0]['code'], 'message': e[0]['message'], 'date': now}}})

            self.c.log('Historical query collection completed for all terms. Shutting down.', thread=self.thread)
            db = DB()
            db.set_collector_status(self.c.project_id, self.collector_id, collector_status=0)
            self.running = False
Ejemplo n.º 5
0
 def decorated_function(*args, **kwargs):
     g.admin = None
     if 'admin_project_id' in session:
         db = DB()
         resp = db.get_project_detail(session['admin_project_id'])
         if resp['status']:
             g.admin = resp
     return f(*args, **kwargs)
Ejemplo n.º 6
0
def _aload_project(project_name):
    """
    Utility method to load an admin project detail if an admin is viewing their control page
    """
    db = DB()
    resp = db.stack_config.find_one({'project_name': project_name})
    g.project = db.get_project_detail(str(resp['_id']))

    session['project_id'] = str(resp['_id'])
Ejemplo n.º 7
0
 def setUp(self):
     db = DB(Config.DATABASE_URI)
     # db.create_tables()
     self.db = db
     self.ticket_json = '''{
         "message": "test_message",
         "subject": "test_subject",
         "email": "*****@*****.**"
     }'''
     self.comment_json = '''{
Ejemplo n.º 8
0
    def __init__(self, db_dir, n_splits=None, initialize_db=False, **kwargs):
        """
        Divides all samples in k groups of samples.
        If n_splits is None, it will generate
        Total Number of Ratings folds (Leave-One-Out) Style
        """
        self.db_dir = db_dir
        self.db = DB(db_dir=self.db_dir)
        self.total_ratings = len(self.db.ratings)
        if n_splits is None:
            self._n_splits = len(self.db.ratings)
        else:
            self._n_splits = n_splits

        self._index = 0
        self._jump_size = math.ceil(len(self.db.ratings) / self._n_splits)
        self._initialize_db = initialize_db
Ejemplo n.º 9
0
def admin_home(admin_id):
    """
    Homepage for an admin account
    """
    project_list = []

    db = DB()
    resp = db.get_project_list()

    if resp['status']:
        for project in resp['project_list']:
            if 'admin' in list(project.keys()) and not project['admin']:
                project_list.append(project)

    return render_template('admin_home.html',
                           admin_detail=g.admin,
                           project_list=project_list)
Ejemplo n.º 10
0
    def __init__(self, db_dir, recommender_class, top_n=None, **kwargs):
        """
        Evaluates Prediction performance
        """
        self.initial_time = datetime.utcnow()
        n_splits = kwargs.get('n_splits')

        if 'init_recommender_params' in kwargs:
            self._init_recommender_params = kwargs['init_recommender_params']
        else:
            self._init_recommender_params = {}

        self._k_fold_generator = KFoldGenerator(
            db_dir, n_splits=n_splits, initialize_db=False)
        self.db_dir = db_dir
        self.db = DB(db_dir=self.db_dir)
        self._recommender_class = recommender_class
        self.top_n = top_n
Ejemplo n.º 11
0
def setup():
    """
    Called on a new install to setup an admin account
    """
    form = SetupForm(request.form)
    if form.validate_on_submit():
        # On submit, grab form information
        project_name = form.project_name.data
        password = form.password.data
        hashed_password = generate_password_hash(password)

        # Create the account
        db = DB()
        resp = db.create(project_name, password, hashed_password, admin=True)
        if resp['status']:
            flash('Project successfully created!')
            return redirect(url_for('index'))
        else:
            flash(resp['message'])

    return render_template('setup.html', form=form)
Ejemplo n.º 12
0
def collector_control(collector_id):
    """
    POST control route for collector forms
    """
    collector_form = ProcessControlForm(request.form)
    task = None

    # On form submit controls the processor
    if request.method == 'POST' and collector_form.validate():
        command = request.form['control'].lower()

        task_args = {
            'process': 'collect',
            'project': g.project,
            'collector_id': collector_id
        }

        db = DB()
        collector = db.get_collector_detail(g.project['project_id'],
                                            collector_id)
        network = collector['collector']['network']

        if command == 'start':
            task = start_daemon.apply_async(kwargs=task_args,
                                            queue='stack-start')
        elif command == 'stop':
            task = stop_daemon.apply_async(kwargs=task_args,
                                           queue='stack-stop')
        elif command == 'restart':
            task = restart_daemon.apply_async(kwargs=task_args,
                                              queue='stack-start')

        return redirect(
            url_for('collector',
                    project_name=g.project['project_name'],
                    network=network,
                    collector_id=collector_id,
                    task_id=task.task_id))
Ejemplo n.º 13
0
def collector(project_name, network, collector_id, task_id=None):
    """
    Loads the detail / control page for a collector
    """
    # Redirects an admin back to the homepage b/c nothing is loaded into the session yet
    if g.project is None:
        flash(
            'Please navigate to the New Collector page from your homepage panel.'
        )
        return redirect(url_for('index'))

    form = ProcessControlForm(request.form)

    # Loads collector info for the page
    db = DB()
    resp = db.get_collector_detail(g.project['project_id'], collector_id)
    collector = resp['collector']

    # Loads active status
    resp = db.check_process_status(g.project['project_id'],
                                   'collect',
                                   collector_id=collector_id)
    active_status = resp['message']

    # If a start/stop/restart is in progress, display the status
    task_status = None
    if task_id:
        resp = celery.AsyncResult(task_id)
        if resp.state == 'PENDING':
            task_status = 'Collector start/shutdown still in progress...'
        else:
            task_status = 'Collector start/shutdown completed.'

    return render_template('collector.html',
                           collector=collector,
                           active_status=active_status,
                           form=form,
                           task_status=task_status)
Ejemplo n.º 14
0
def insert_work():
    title = raw_input('Work item title: ')
    description = raw_input('Work item description: ')
    image_name = raw_input('Main image name: ')
    main_link = raw_input('Main link URL: ')
    priority = raw_input('Priority: ')

    tags = []
    flag = 1
    while flag:
        tag_name = raw_input('Input tag name: ')
        tag_url = raw_input('Input tag link URL: ')

        tags.append({'tag_name': tag_name, 'tag_url': tag_url})

        yn = raw_input('Add more tags? [y/n]: ')
        if yn == 'n':
            flag = 0

    doc = {
        'priority': priority,
        'title': title,
        'description': description,
        'image_name': 'http://www.ceskavich.com/static/img/' + image_name,
        'main_link': main_link,
        'tags': tags
    }

    db = DB('main', 'work')

    resp = db.insert(doc)
    if resp['status']:
        print '\n'
        print 'SUCCESS!'
    else:
        print '\n'
        print 'Oops. Something went wrong. Please try again.'
Ejemplo n.º 15
0
def index():
    """
    Loads the STACK homepage w/ list of project accounts
    """
    start_workers()

    db = DB()
    resp = db.get_project_list()

    project_list = None
    admins = None

    if resp and resp['project_list']:
        project_list = resp['project_list']
        admins = [
            project for project in project_list
            if 'admin' in list(project.keys()) and project['admin'] == 1
        ]

    # Renders index of at least one admin account exists, if not calls the new install setup
    if admins:
        return render_template('index.html', project_list=project_list)
    else:
        return redirect(url_for('setup'))
Ejemplo n.º 16
0
 def setup(self):
     self.dir_name = os.path.join(BASE_DIR, 'data', 'predictors')
     self.db = DB(db_dir=self.dir_name)
     self.predictor = CollaborativePredictor(self.db,
                                             neighbourhood_size=100)
Ejemplo n.º 17
0
import simplejson
from email.utils import parsedate_tz
from collections import defaultdict
import sys
import traceback
import string
import config
from pymongo import errors as PymongoErrors

from . import module_dir
from app.models import DB

PLATFORM_CONFIG_FILE = module_dir + '/platform.ini'
BATCH_INSERT_SIZE = 1000

db = DB()
db_central = DB(local=False)


# function goes out and gets a list of raw tweet data files
def get_processed_tweet_file_queue(Config, insertdir):
    insert_queue_path = insertdir + '/'
    if not os.path.exists(insert_queue_path):
        os.makedirs(insert_queue_path)

    tweetsOutFile = Config.get('files', 'tweets_file', 0)
    file_extension = os.path.splitext(tweetsOutFile)[1]
    tweetFileNamePattern = tweetsOutFile.replace(file_extension,
                                                 '_processed' + file_extension)
    tweetFileNamePattern = insert_queue_path + '*' + tweetFileNamePattern
Ejemplo n.º 18
0
 def setup(self):
     """
     Test computation of matrix density
     """
     fname = os.path.join(BASE_DIR, 'data', 'simple')
     self.db = DB(db_dir=fname)
Ejemplo n.º 19
0
def compute():
    """
    Computes different statistics relevant for Task 1
    """
    db = DB()

    print('\nGeneral Statistics:')
    t = PrettyTable([
        'Total Users',
        'Total Movies',
        'Total Tags',
        'Matrix Density',
    ])
    t.align['Total Users'] = 'l'
    t.align['Total Movies'] = 'l'
    t.align['Matrix Density'] = 'l'
    t.padding_width = 1
    t.add_row([
        len(db.users.keys()),
        len(db.movies.keys()),
        len(db.tags), '{0:.3g}'.format(db.density)
    ])

    print(t)

    print('\nStatistics by User and Ratings:')
    t = PrettyTable([
        'Max Number of Ratings for User', 'Min Number of Ratings for User',
        'Median Ratings for User', 'Mean Ratings for User',
        'Standard Deviation Ratings for User'
    ])
    t.add_row([
        db.stats['users']['max'], db.stats['users']['min'],
        db.stats['users']['median'],
        '{0:.3g}'.format(db.stats['users']['mean']),
        '{0:.3g}'.format(db.stats['users']['sd'])
    ])
    print(t)

    plot_ratings_distribution(db)
    plot_user_ratings_distribution(db)

    print('\nStatistics by Movie and Ratings:')

    t = PrettyTable([
        'Max Number of Ratings for Movie', 'Min Number of Ratings for Movie',
        'Median Ratings for Movie', 'Mean Ratings for Movie',
        'Standard Deviation Ratings for Movie'
    ])

    t.add_row([
        db.stats['movies']['max'], db.stats['movies']['min'],
        db.stats['movies']['median'],
        '{0:.3g}'.format(db.stats['movies']['mean']),
        '{0:.3g}'.format(db.stats['movies']['sd'])
    ])

    print(t)

    print('\nTotal number of ratings for each of the 5 ratings')

    t = PrettyTable(sorted(db.stats['ratings'].keys()))
    row = [db.stats['ratings'][k] for k in sorted(db.stats['ratings'].keys())]
    t.add_row(row)
    print(t)

    print('\nStatistics by Tags and User')
    t = PrettyTable([
        'Max Number of Tags by User', 'Min Number of Tags by User',
        'Median Number of Tags by User', 'Mean Number of Tags by User',
        'Standard Deviation Tags by User'
    ])
    t.add_row([
        db.stats['tags']['by_user']['max'], db.stats['tags']['by_user']['min'],
        db.stats['tags']['by_user']['median'],
        '{0:.3g}'.format(db.stats['tags']['by_user']['mean']),
        '{0:.3g}'.format(db.stats['tags']['by_user']['sd'])
    ])
    print(t)

    print('\nStatistics by Tags and Movie')
    t = PrettyTable([
        'Max Number of Tags by Movie', 'Min Number of Tags by Movie',
        'Median Number of Tags by Movie', 'Mean Number of Tags by Movie',
        'Standard Deviation Tags by Movie'
    ])
    t.add_row([
        db.stats['tags']['by_movie']['max'],
        db.stats['tags']['by_movie']['min'],
        db.stats['tags']['by_movie']['median'],
        '{0:.3g}'.format(db.stats['tags']['by_movie']['mean']),
        '{0:.3g}'.format(db.stats['tags']['by_movie']['sd'])
    ])
    print(t)
Ejemplo n.º 20
0
    def __init__(self, project_id, collector_id, process_name):
        self.project_id = project_id
        self.collector_id = collector_id
        self.process_name = process_name
        self.collecting_data = False

        # Sets up connection w/ project config DB & loads in collector info
        self.db = DB()

        project = self.db.get_project_detail(self.project_id)
        if project['status']:
            self.project_name = project['project_name']

            configdb = project['project_config_db']
            project_db = self.db.connection[configdb]
            self.project_db = project_db.config

        resp = self.db.get_collector_detail(self.project_id, self.collector_id)
        if resp['status']:
            collector_info = resp['collector']

            # Load in collector info
            self.collector_name = collector_info['collector_name']
            self.network = collector_info['network']
            self.api = collector_info['api']
            self.collection_type = collector_info['collection_type']
            self.params = collector_info['params']
            self.terms_list = collector_info['terms_list']
            self.languages = collector_info['languages']
            self.locations = collector_info['location']
            self.auth = collector_info['api_auth']
            # TODO - file format to Mongo
            # TODO - less then hour = warning
            self.file_format = '%Y%m%d-%H'

        # If this is a streaming collector
        if self.collection_type == 'realtime':
            self.project_db.update({'_id': ObjectId(self.collector_id)},
                                   {'$set': {
                                       'stream_limits': []
                                   }})

        # Sets up logdir and logging
        logdir = app.config[
            'LOGDIR'] + '/' + self.project_name + '-' + self.project_id + '/logs'
        if not os.path.exists(logdir):
            os.makedirs(logdir)

        # Sets logger w/ name collector_name and level INFO
        self.logger = logging.getLogger(self.collector_name)
        self.logger.setLevel(logging.INFO)

        # Sets up logging file handler
        logfile = logdir + '/%s.log' % self.process_name
        # TODO - logging params
        # TODO - port logging rotation params to Mongo for user control later / these default values good
        handler = logging.handlers.TimedRotatingFileHandler(logfile,
                                                            when='D',
                                                            backupCount=30)
        handler.setLevel(logging.INFO)
        # Formats
        format = '%(asctime)s %(name)-12s %(levelname)-8s %(message)s'
        dateformat = '%m-%d %H:%M'
        formatter = logging.Formatter(format, dateformat)
        handler.setFormatter(formatter)
        # Adds handler to logger to finish
        self.logger.addHandler(handler)

        self.log('STACK collector %s initiated.' % self.collector_name)

        # Sets up rawdir
        self.rawdir = app.config[
            'DATADIR'] + '/' + self.project_name + '-' + self.project_id + '/' + self.network + '/raw'
        if not os.path.exists(self.rawdir):
            os.makedirs(self.rawdir)

        self.log(
            'All raw files and directories set. Now starting collector...')
Ejemplo n.º 21
0
    def __init__(self, process, cmdline=False, home_dir='.', umask=0o22, verbose=1, **kwargs):
        self.db = DB()
        self.process = process
        self.cmdline = cmdline
        self.usage_message = 'controller collect|process|insert start|stop|restart project_id collector_id'

        self.home_dir = home_dir
        self.umask = umask
        self.verbose = verbose

        if self.cmdline is False:
            # Grab information from Flask user object
            self.project = kwargs['project']
            self.project_id = self.project['project_id']
            self.project_name = self.project['project_name']
        else:
            # Command is coming from the command line, look up info
            self.project_id = kwargs['project_id']

            resp = self.db.get_project_detail(self.project_id)
            if resp['status']:
                self.project_name = resp['project_name']
            else:
                print('Project w/ ID %s not found!' % self.project_id)
                print('')
                print('USAGE: python %s %s' % (sys.argv[0], self.usage_message))
                sys.exit(1)

        # Project account DB connection
        project_info = self.db.get_project_detail(self.project_id)
        configdb = project_info['project_config_db']
        project_config_db = self.db.connection[configdb]
        self.projectdb = project_config_db.config

        # Loads info for process based on type: collector, processor, inserter
        if self.process in ['process', 'insert']:
            # Only module type needed for processor / inserter
            self.module = kwargs['network']
            self.collector_id = None
            # Set name for worker based on gathered info
            self.process_name = self.project_name + '-' + self.process + '-' + self.module + '-' + self.project_id
        elif process == 'collect':
            # For collectors, also grabs: collector_id, api, collector_name
            self.collector_id = kwargs['collector_id']

            resp = self.db.get_collector_detail(self.project_id, self.collector_id)
            if resp['status']:
                collector = resp['collector']
                self.module = collector['network']
                self.api = collector['api']
                self.collector_name = collector['collector_name']
            else:
                print('Collector (ID: %s) not found!' % self.collector_id)
                print('')
                print('USAGE: python %s %s' % (sys.argv[0], self.usage_message))
                sys.exit(1)

            # Set name for worker based on gathered info
            self.process_name = self.project_name + '-' + self.collector_name + '-' + self.process + '-' + self.module + \
                                '-' + self.collector_id

        # Sets out directories
        self.piddir = app.config['LOGDIR'] + '/' + self.project_name + '-' + self.project_id + '/pid'
        self.logdir = app.config['LOGDIR'] + '/' + self.project_name + '-' + self.project_id + '/logs'
        self.stddir = app.config['LOGDIR'] + '/' + self.project_name + '-' + self.project_id + '/std'

        # Sets data dirs
        # TODO - deprecate w/ Facebook
        self.rawdir = app.config[
                          'DATADIR'] + '/' + self.project_name + '-' + self.project_id + '/' + self.module + '/raw'
        self.archdir = app.config['DATADIR'] + '/' + self.project_name + '-' + self.project_id + '/' + self.module + \
                       '/archive'
        self.insertdir = app.config['DATADIR'] + '/' + self.project_name + '-' + self.project_id + '/' + self.module + \
                         '/insert_queue'

        # self.rawdir = "Users/harshita/Downloads/20200608-09-ClimateChange1-5ec8d268f57962092d668731\-5ec8d3cef57962093753ca79-tweets_out.json"
        # self.archdir = app.config['DATADIR'] + '/' + self.project_name + '-' + self.project_id + '/' + self.module + \
        #                '/archive'
        # self.insertdir = app.config['DATADIR'] + '/' + self.project_name + '-' + self.project_id + '/' + self.module + \
        #                  '/insert_queue'


        # Creates dirs if they don't already exist
        if not os.path.exists(self.piddir): os.makedirs(self.piddir)
        if not os.path.exists(self.stddir): os.makedirs(self.stddir)

        # These directories only need be created for Twitter
        # TODO - deprecate w/ Facebook
        if self.module == 'twitter':
            if not os.path.exists(self.logdir): os.makedirs(self.logdir)
            if not os.path.exists(self.rawdir): os.makedirs(self.rawdir)
            if not os.path.exists(self.archdir): os.makedirs(self.archdir)
            if not os.path.exists(self.insertdir): os.makedirs(self.insertdir)

        # Sets outfiles
        self.pidfile = self.piddir + '/%s.pid' % self.process_name
        self.stdout = self.stddir + '/%s-stdout.txt' % self.process_name
        self.stderr = self.stddir + '/%s-stderr.txt' % self.process_name
        self.stdin = self.stddir + '/%s-stdin.txt' % self.process_name

        # Creates the std files for the daemon
        if not os.path.isfile(self.stdout):
            create_file = open(self.stdout, 'w')
            create_file.close()
        if not os.path.isfile(self.stdin):
            create_file = open(self.stdin, 'w')
            create_file.close()
        if not os.path.isfile(self.stderr):
            create_file = open(self.stderr, 'w')
            create_file.close()
Ejemplo n.º 22
0
def update_collector(collector_id):
    """
    Used to update a collector details form the front-end

    TODO - Terms & start / end dates
    """
    db = DB()
    resp = db.get_collector_detail(g.project['project_id'], collector_id)
    collector = resp['collector']

    # First, populate the main form w/ info
    form_params = {'collector_name': collector['collector_name']}

    if collector['network'] == 'twitter':
        form_params['api'] = collector['api']
        form_params['consumer_key'] = collector['api_auth']['consumer_key']
        form_params['consumer_secret'] = collector['api_auth'][
            'consumer_secret']
        form_params['access_token'] = collector['api_auth']['access_token']
        form_params['access_token_secret'] = collector['api_auth'][
            'access_token_secret']

        if collector['languages']:
            languages = '\r\n'.join(collector['languages'])
            form_params['languages'] = languages
        if collector['location']:
            loc_string = ''
            r = 1
            c = 1
            for loc in collector['location']:
                if c == 1 and r > 1:
                    loc_string = loc_string + '\r\n' + loc + ','
                else:
                    if c != 4:
                        loc_string = loc_string + loc + ','
                    else:
                        loc_string = loc_string + loc

                if c == 4:
                    c = 1
                    r += 1
                else:
                    c += 1

            form_params['locations'] = loc_string

    elif collector['network'] == 'facebook':
        form_params['collection_type'] = collector['collection_type']
        form_params['client_id'] = collector['api_auth']['client_id']
        form_params['client_secret'] = collector['api_auth']['client_secret']

        # TODO - start & end dates

    form = UpdateCollectorForm(**form_params)

    # Next, create a form for each term -- if no terms, one form is needed & it's empty
    terms = collector['terms_list']
    terms_forms = []
    if terms:
        for term in terms:
            # Load form & set defaults
            form_params = {
                'term': term['term'],
                'collect': int(term['collect']),
            }
            tform = UpdateCollectorTermsForm(prefx=term['term'], **form_params)
            terms_forms.append(tform)

    # Finally, update on submission
    if request.method == 'POST':
        # Loads in the data from the form & sets initial param dict
        form_data = request.get_json()
        params = {}

        if form_data['collector_name'] != collector['collector_name']:
            params['collector_name'] = form_data['collector_name']

        # Twitter data
        if collector['network'] == 'twitter':
            if form_data['api'] != collector['api']:
                params['api'] = form_data['api']
            # If one auth param is updated, assume all are
            if form_data['consumer_key'] != collector['api_auth'][
                    'consumer_key']:
                params['api_auth'] = {
                    'consumer_key': form_data['consumer_key'],
                    'consumer_secret': form_data['consumer_secret'],
                    'access_token': form_data['access_token'],
                    'access_token_secret': form_data['access_token_secret']
                }

            languages = form_data['languages']
            if not languages or languages == '':
                languages = None
            else:
                languages = languages.split('\r\n')

            if languages != collector['languages']:
                params['languages'] = languages

            locations = form_data['locations']
            if not locations or languages == '':
                locations = None
            else:
                locations = locations.replace('\r\n', ',').split(',')
                if len(locations) % 4 is not 0:
                    flash(
                        'Location coordinates should be entered in pairs of 4. Please try again'
                    )
                    return redirect(
                        url_for('update_collector', collector_id=collector_id))

            if locations != collector['location']:
                params['location'] = locations

        # Facebook Data
        elif collector['network'] == 'facebook':
            if form_data['collection_type'] != collector['collection_type']:
                params['collection_type'] = form.collection_type.data
            if form_data['client_id'] != collector['api_auth']['client_id']:
                params['api_auth'] = {
                    'client_id': form_data['client_id'],
                    'client_secret': form_data['client_secret']
                }

            # TODO - start and end dates

        # Final terms dict
        params['terms_list'] = []

        # Term type value
        if collector['network'] == 'twitter':
            if collector['api'] == 'follow':
                term_type = 'handle'
            else:
                term_type = 'term'
        else:
            term_type = 'page'

        # New terms (if any)
        terms = form_data['new_terms']
        if terms and terms != '':
            terms = terms.split('\r\n')
            for t in terms:
                params['terms_list'].append({
                    'term': t,
                    'collect': 1,
                    'type': term_type,
                    'id': None
                })

        # Updated terms (if any)
        current_terms = form_data['terms']
        while current_terms:
            params['terms_list'].append({
                'term': current_terms.pop(0),
                'collect': int(current_terms.pop(0)),
                'type': term_type,
                'id': None
            })

        # Now, try updating
        resp = db.update_collector_detail(g.project['project_id'],
                                          collector_id, **params)
        if resp['status']:
            flash('Collector updated successfully!')
            return redirect(
                url_for('collector',
                        project_name=g.project['project_name'],
                        network=collector['network'],
                        collector_id=collector_id))
        else:
            flash(resp['message'])
            return redirect(
                url_for('update_collector', collector_id=collector_id))

    return render_template('update_collector.html',
                           collector=collector,
                           form=form,
                           terms_forms=terms_forms)
Ejemplo n.º 23
0
def network_home(project_name, network, task_id=None):
    """
    Renders a project account's homepage
    """
    # Loads project details if an admin
    if g.admin is not None:
        _aload_project(project_name)

    # Grabs collectors for the given network
    if not g.project['collectors']:
        collectors = None
    else:
        collectors = [
            c for c in g.project['collectors'] if c['network'] == network
        ]
        for collector in collectors:
            collector['num_terms'] = 0

            if collector['terms_list'] is not None:
                collector['num_terms'] = len(collector['terms_list'])

        g.project['num_collectors'] = len(collectors)

    processor_form = ProcessControlForm(request.form)
    inserter_form = ProcessControlForm(request.form)

    # Loads processor active status
    db = DB()
    resp = db.check_process_status(g.project['project_id'],
                                   'process',
                                   module=network)
    processor_active_status = resp['message']

    # Loads inserter active status
    resp = db.check_process_status(g.project['project_id'],
                                   'insert',
                                   module=network)
    inserter_active_status = resp['message']

    # Loads count of tweets in the storage DB
    count = db.get_storage_counts(g.project['project_id'], network)

    # If a start/stop/restart is in progress, display the status
    task_status = None
    if task_id:
        resp = celery.AsyncResult(task_id)
        if resp.state == 'PENDING':
            processor_task_status = 'Processor/Inserter start/shutdown still in progress...'
        else:
            processor_task_status = 'Processor/Inserter start/shutdown completed.'

    return render_template('network_home.html',
                           network=network,
                           collectors=collectors,
                           project_detail=g.project,
                           processor_active_status=processor_active_status,
                           inserter_active_status=inserter_active_status,
                           task_status=task_status,
                           count=count,
                           processor_form=processor_form,
                           inserter_form=inserter_form)
Ejemplo n.º 24
0
    def __init__(self, project_id, process_name, network):
        self.project_id = project_id
        self.process_name = process_name
        self.network = network

        # Sets up connection w/ project config DB & loads in collector info
        self.db = DB()

        project = self.db.get_project_detail(self.project_id)
        self.project_name = project['project_name']

        # Grabs connection to project config DB
        configdb = project['project_config_db']
        project_db = self.db.connection[configdb]
        self.project_db = project_db.config

        # Grabs connection to insertion DB
        # NOTE - on init, need to connect to appropriate network collection
        db_name = self.project_name + '_' + self.project_id
        self.insert_db = self.db.connection[db_name]

        # Sets up logdir and logging
        logdir = app.config[
            'LOGDIR'] + '/' + self.project_name + '-' + self.project_id + '/logs'
        if not os.path.exists(logdir):
            os.makedirs(logdir)

        # Sets logger w/ name collector_name and level INFO
        self.logger = logging.getLogger('Inserter')
        self.logger.setLevel(logging.INFO)

        # Sets up logging file handler
        logfile = logdir + '/%s.log' % self.process_name
        # TODO - port logging rotation params to Mongo for user control later / these default values good
        handler = logging.handlers.TimedRotatingFileHandler(logfile,
                                                            when='D',
                                                            backupCount=30)
        handler.setLevel(logging.INFO)
        # Formats
        format = '%(asctime)s %(name)-12s %(levelname)-8s %(message)s'
        dateformat = '%m-%d %H:%M'
        formatter = logging.Formatter(format, dateformat)
        handler.setFormatter(formatter)
        # Adds handler to logger to finish
        self.logger.addHandler(handler)

        self.log('STACK inserter for project %s initiated.' %
                 self.project_name)

        # Sets up data directory
        self.datadir = app.config[
            'DATADIR'] + '/' + self.project_name + '-' + self.project_id

        # Establish connections to data directories
        self.raw = self.datadir + '/' + self.network + '/raw'
        self.archive = self.datadir + '/' + self.network + '/archive'
        self.queue = self.datadir + '/' + self.network + '/queue'
        self.error = self.datadir + '/' + self.network + '/error'

        if not os.path.exists(self.raw):
            os.makedirs(self.raw)
        if not os.path.exists(self.archive):
            os.makedirs(self.archive)
        if not os.path.exists(self.queue):
            os.makedirs(self.queue)
        if not os.path.exists(self.error):
            os.makedirs(self.error)

        self.log('STACK processor setup completed. Now starting...')
Ejemplo n.º 25
0
def new_collector():
    """
    Route for a project account to create a new STACK collector
    """
    # Redirects an admin back to the homepage b/c nothing is loaded into the session yet
    if g.project is None:
        flash(
            'Please navigate to the New Collector page from your homepage panel.'
        )
        return redirect(url_for('index'))

    form = NewCollectorForm(request.form)

    # On submit, get info which varies by network
    if request.method == 'POST' and form.validate():
        collector_name = form.collector_name.data
        network = form.network.data

        api = None
        languages = None
        locations = None
        start_date = None
        end_date = None

        # TODO - historical for Twitter as well
        collection_type = 'realtime'

        if network == 'twitter':
            api = form.api.data
            oauth_dict = {
                'consumer_key': form.consumer_key.data,
                'consumer_secret': form.consumer_secret.data,
                'access_token': form.access_token.data,
                'access_token_secret': form.access_token_secret.data
            }

            # Optional form values are assigned 'None' if not filled out
            languages = form.languages.data
            if not languages or languages == '':
                languages = None
            else:
                languages = languages.split('\r\n')

            locations = form.locations.data
            if not locations or languages == '':
                locations = None
            else:
                locations = locations.replace('\r\n', ',').split(',')
                if len(locations) % 4 is not 0:
                    flash(
                        'Location coordinates should be entered in pairs of 4. Please try again'
                    )
                    return redirect(url_for('new_collector'))

            terms = form.twitter_terms.data
            if not terms or terms == '':
                terms = None
            else:
                terms = terms.split('\r\n')

        elif network == 'facebook':
            collection_type = form.collection_type.data
            oauth_dict = {
                'client_id': form.client_id.data,
                'client_secret': form.client_secret.data
            }

            # Optional start & end date params
            start_date = form.start_date.data
            if not start_date or start_date == '':
                start_date = None
            else:
                start_date = str(start_date)

            end_date = form.end_date.data
            if not end_date or end_date == '':
                end_date = None
            else:
                end_date = str(end_date)

            terms = form.facebook_terms.data
            terms = terms.split('\r\n')

        # Create collector w/ form data
        db = DB()
        resp = db.set_collector_detail(g.project['project_id'],
                                       collector_name,
                                       network,
                                       collection_type,
                                       oauth_dict,
                                       terms,
                                       api=api,
                                       languages=languages,
                                       location=locations,
                                       start_date=start_date,
                                       end_date=end_date)

        # If successful, redirect to collector page
        if resp['status']:
            project_config_db = db.connection[g.project['project_config_db']]
            coll = project_config_db.config
            try:
                collector = coll.find_one({'collector_name': collector_name})
                collector_id = str(collector['_id'])
                network = str(collector['network'])

                return redirect(
                    url_for('collector',
                            project_name=g.project['project_name'],
                            network=network,
                            collector_id=collector_id))
            except:
                flash(
                    'Collector created, but cannot redirect to collector page!'
                )
                return redirect(
                    url_for('home', project_name=g.project['project_name']))
        else:
            flash(resp['message'])

    return render_template('new_collector.html', form=form)
Ejemplo n.º 26
0
 def setup(self):
     self.db_dir = os.path.join(BASE_DIR, 'data', 'predictors')
     self.db = DB(db_dir=self.db_dir)
Ejemplo n.º 27
0
 def before_request():
     g.db = DB(app.config['DATABASE_URI'])
Ejemplo n.º 28
0
import ConfigParser
import datetime
import logging
import logging.config
import time
from time import sleep
import traceback
from bson.objectid import ObjectId

# Config file includes paths, parameters, and oauth information for this module
# Complete the directions in "example_platform.ini" for configuration before proceeding

PLATFORM_CONFIG_FILE = module_dir + '/platform.ini'

# Connect to Mongo
db = DB()

# Program thread
e = threading.Event()


class fileOutListener(StreamListener):
    """ This listener handles tweets as they come in by converting them
    to JSON and sending them to a file. Each line in the file is a tweet.
    """
    def __init__(self, tweetsOutFilePath, tweetsOutFileDateFrmt, tweetsOutFile,
                 logger, collection_type, project_id, collector_id):
        self.logger = logger
        self.logger.info(
            'COLLECTION LISTENER: Initializing Stream Listener...')
        self.buffer = ''
Ejemplo n.º 29
0
 def setup(self):
     db_dir = os.path.join(BASE_DIR, 'data', 'recommenders')
     self.db = DB(db_dir=db_dir)