Example #1
0
    def mount(self):
        if self.mounted:
            return

        query = self._ensure_table_query(self.db_query, self.files_table_name)
        file_index_func = lambda row: rethinkdb.args([
            row[STATUS_JSON_NAME], row[FILE_NAME_JSON_NAME], row[
                FINISHED_DATE_JSON_NAME]
        ])
        file_prefix_index_func = lambda row: rethinkdb.expr(row[
            STATUS_JSON_NAME] == "completed").branch(
                rethinkdb.args([
                    row[FILE_NAME_JSON_NAME].split("/").slice(1, -1), row[
                        FINISHED_DATE_JSON_NAME]
                ]), rethinkdb.error("File is still uploading."))
        query = query.do(lambda result: rethinkdb.expr(result[
            "tables_created"] == 1).branch(
                self._create_index(
                    self.db_query, self.files_table_name, self.file_index,
                    file_index_func).do(lambda _: self._create_index(
                        self.db_query, self.files_table_name, self.
                        file_prefix_index, file_prefix_index_func)), None))

        query = query.do(lambda _: self._ensure_table_query(
            self.db_query, self.chunks_table_name))
        chunk_index_func = lambda row: rethinkdb.args(
            [row[FILE_ID_JSON_NAME], row[NUM_JSON_NAME]])
        query = query.do(lambda result: rethinkdb.expr(result[
            "tables_created"] == 1).branch(
                self._create_index(self.db_query, self.chunks_table_name, self.
                                   chunk_index, chunk_index_func), None))
        query = query.do(lambda _: self._confirm_mount())
        return query
Example #2
0
    def get(self):
        """
        GET /profile

        Logged in User Profile

        :return:
        """
        # TODO: id must be a valid URN
        current_user = self.get_current_user()
        if not current_user:
            self.userError("not authenticated")
            return

        try:
            pQuery = r.table('users')\
                    .get(self.get_current_user()['id']) \
                    .pluck(self.fields['profile']) \
                    .merge(lambda user: {
                    'authority': r.table('authorities').get(user['authority']) \
                                                           .pluck(self.fields_short['authorities']) \
                                                           .default({'id': user['authority']})
                     }) \
                    .merge(lambda user: {
                        'projects': r.table('projects') \
                               .get_all(r.args(user['projects'])) \
                               .distinct() \
                               .pluck(self.fields_short['projects']) \
                               .coerce_to('array')
                    }) \
                    .merge(lambda user: {
                        'slices': r.table('slices') \
                               .get_all(r.args(user['slices'])) \
                               .distinct() \
                               .pluck(self.fields_short['slices']) \
                               .coerce_to('array')
                    })
            if self.isAdmin():
                profile = yield pQuery.merge(lambda user: {
                    'pi_authorities': r.expr(user['pi_authorities']).map(lambda a: {'id':a})
                    }) \
                    .run(self.dbconnection)
            else:
                profile = yield pQuery.merge(lambda user: {
                    'pi_authorities': r.table('authorities').get_all(r.args(user['pi_authorities'])) \
                                                           .distinct() \
                                                           .pluck(self.fields_short['authorities']) \
                                                           .coerce_to('array')
                    }) \
                    .run(self.dbconnection)
        except Exception:
            import traceback
            traceback.print_exc()
            self.userError("not authenticated")
            return
        self.write(json.dumps({"result": profile}, cls=myJSONEncoder))
Example #3
0
    def get_key_frames(self, weighted_nodes, page, include_total_pages=False):
        """
        Holt zu den übergebenen Konzepten die Keyframe-Metadaten aus der Datenbank.
        :param weighted_nodes: übergebene Konzepte mit Gewichtung
        :param page: Seite die abgefragt werden soll
        :return: Keyframe-Metadaten
        """

        total_pages = None

        nodes = weighted_nodes.map(lambda item: item[0])
        weighted_nodes_dict = weighted_nodes.to_dict()

        if include_total_pages:
            total_count = (
                r.table('key_frame_predictions')
                .get_all(r.args(nodes), index='node')
                .group('key_frame_id')
                .ungroup()
                .count()
                .run(r.connect(self.db_host, self.db_port, 'vor'))
            )

            total_pages = math.ceil(float(total_count)/PAGE_LENGTH)

        key_frames = (
            r.table('key_frame_predictions')
            .get_all(r.args(nodes), index='node')  # alle Keyframe-Predictions holen, die einem übergebenen Konzept zugeordnet sind
            .map(lambda row: {
                'key_frame_id': row['key_frame_id'],
                'weighted_score': r.expr(weighted_nodes_dict)[row['node']].mul(row['score'].coerce_to('number'))  # die übergebenen Gewicht und Scores der Keyframes multiplizieren
            })
            .group('key_frame_id').reduce(lambda left, right: {  # nach Keyframe gruppieren
                'weighted_score': left['weighted_score'].add(right['weighted_score'])  # den Score für mehrere Konzepte addieren
            })
            .ungroup()
            .map(lambda row: {
                'key_frame_id': row['group'],
                'weighted_score_sum': row['reduction']['weighted_score']
            })
            .order_by(r.desc('weighted_score_sum'))  # absteigend sortieren
            .slice(*KeyFrameRepository.__pagination(page))  # zur entsprechenden Page skippen
            .eq_join('key_frame_id', r.table('key_frames'))  # die Metadaten aus der Keyframe-Tabelle holen
            .without({'right': 'id'})
            .zip()
            .order_by(r.desc('weighted_score_sum'))  # erneut sortieren, da join die Reihenfolge verändert
            .run(r.connect(self.db_host, self.db_port, 'vor'))
        )

        return key_frames, total_pages
Example #4
0
 def get_fcq_data(self, fcq_ids):
     db = self.application.settings['database_name']
     conn = self.application.settings['conn']
     fcq_data = list(r.db(db).table('Fcq').get_all(r.args(fcq_ids)).run(conn))
     return list(map(lambda fcq:
                 dict(fcq_title=self.fcq_title(fcq), **fcq),
                 fcq_data))
Example #5
0
 def get_fcq_data(self, fcq_ids):
     db = self.application.settings['database_name']
     conn = self.application.settings['conn']
     fcq_data = list(r.db(db).table('Fcq').get_all(r.args(fcq_ids)).run(conn))
     return list(map(lambda fcq:
                 dict(fcq_title=self.fcq_title(fcq), **fcq),
                 fcq_data))
Example #6
0
def create_table(progress, conn, db, table, create_args, sindexes):
    # Make sure that the table is ready if it exists, or create it
    r.branch(
        r.db(db).table_list().contains(table),
        r.db(db).table(table).wait(timeout=30),
        r.db(db).table_create(table, **create_args)).run(conn)

    if progress[0] is None:
        progress[0] = 0

    # Recreate secondary indexes - assume that any indexes that already exist are wrong
    # and create them from scratch
    indexes = r.db(db).table(table).index_list().run(conn)
    created_indexes = list()
    try:
        for sindex in sindexes[progress[0]:]:
            if isinstance(sindex, dict) and all(
                    k in sindex for k in ('index', 'function')):
                if sindex['index'] in indexes:
                    r.db(db).table(table).index_drop(sindex['index']).run(conn)
                r.db(db).table(table).index_create(
                    sindex['index'], sindex['function']).run(conn)
                created_indexes.append(sindex['index'])
            progress[0] += 1
        r.db(db).table(table).index_wait(r.args(created_indexes)).run(conn)
    except RuntimeError:
        raise RuntimeError("Sindex warning")
Example #7
0
def user_feed(user_id):
    # TODO paginate
    key = "user:#{0}".format(user_id)
    feed = redis.zquery(key, 0, 20)
    conn = r.connect(**rethink_conn.conn())
    events = r.table("company_events").get_all(r.args(feed)).run(conn)
    return make_response(json.dumps(events))
Example #8
0
    def find(cls, id, listing=False):
        file_ref = r.table(cls._table).get(id).run(conn)
        if file_ref is not None:
            if file_ref['is_folder'] and listing and file_ref['objects'] is not None:
                file_ref['objects'] = list(r.table(cls._table).get_all(r.args(file_ref['objects'])).run(conn))

        return file_ref
Example #9
0
 def ldap_users_exists(self, commit=False):
     cfg = r.table('config').get(1).run(db.conn)['auth']
     users = list(
         r.table('users').filter({
             'active': True,
             'kind': 'ldap'
         }).pluck('id', 'name', 'accessed').run(db.conn))
     nonvalid = []
     valid = []
     for u in users:
         conn = ldap.initialize(cfg['ldap']['ldap_server'])
         id_conn = conn.search(cfg['ldap']['bind_dn'], ldap.SCOPE_SUBTREE,
                               "uid=%s" % u['id'])
         tmp, info = conn.result(id_conn, 0)
         if len(info):
             valid.append(u)
         else:
             nonvalid.append(u)
     if commit:
         nonvalid_list = [u['id'] for u in nonvalid]
         return r.table('users').get_all(r.args(nonvalid_list)).update({
             'active':
             False
         }).run(db.conn)
     else:
         return {'nonvalid': nonvalid, 'valid': valid}
Example #10
0
    def claim_sites(self, n=1):
        result = (
            self.rr.table('sites').get_all(
                r.args(
                    r.db(self.rr.dbname).table(
                        'sites', read_mode='majority').between(
                            ['ACTIVE', r.minval], ['ACTIVE', r.maxval],
                            index='sites_last_disclaimed').order_by(
                                r.desc('claimed'), 'last_disclaimed').
                    fold({},
                         lambda acc, site: acc.merge(
                             r.branch(
                                 site.has_fields('job_id'),
                                 r.object(
                                     site['job_id'].coerce_to('string'), acc[
                                         site['job_id'].coerce_to('string')].
                                     default(0).add(1)), {})),
                         emit=lambda acc, site, new_acc: r.branch(
                             r.and_(
                                 r.or_(
                                     site['claimed'].not_(), site[
                                         'last_claimed'].lt(r.now().sub(60 * 60
                                                                        ))),
                                 r.or_(
                                     site.has_fields('max_claimed_sites').not_(
                                     ), new_acc[site['job_id'].coerce_to(
                                         'string')].le(site['max_claimed_sites'
                                                            ]))), [site['id']],
                             [])).limit(n))).
            update(
                # try to avoid a race condition resulting in multiple
                # brozzler-workers claiming the same site
                # see https://github.com/rethinkdb/rethinkdb/issues/3235#issuecomment-60283038
                r.branch(
                    r.or_(r.row['claimed'].not_(),
                          r.row['last_claimed'].lt(r.now().sub(60 * 60))), {
                              'claimed': True,
                              'last_claimed': r.now()
                          }, {}),
                return_changes=True)).run()

        self._vet_result(result,
                         replaced=list(range(n + 1)),
                         unchanged=list(range(n + 1)))
        sites = []
        for i in range(result["replaced"]):
            if result["changes"][i]["old_val"]["claimed"]:
                self.logger.warn(
                    "re-claimed site that was still marked 'claimed' "
                    "because it was last claimed a long time ago "
                    "at %s, and presumably some error stopped it from "
                    "being disclaimed",
                    result["changes"][i]["old_val"]["last_claimed"])
            site = brozzler.Site(self.rr, result["changes"][i]["new_val"])
            sites.append(site)
        if sites:
            return sites
        else:
            raise brozzler.NothingToClaim
Example #11
0
 def add_present_command(self, command, presents=[], **kwargs):
     """
     Add present fields check to command
     """
     if len(presents) > 0:
         print("Only downloading documents with fields: " + str(presents))
         command = command.has_fields(r.args(presents))
     return command
Example #12
0
 def add_present_command(self, command, presents=[], **kwargs):
     '''
     Add present fields check to command
     '''
     if len(presents) > 0:
         print("Only downloading documents with fields: " + str(presents))
         command = command.has_fields(r.args(presents))
     return command
Example #13
0
    def claim_sites(self, n=1):
        self.logger.trace('claiming up to %s sites to brozzle', n)
        result = (
            self.rr.table('sites').get_all(r.args(
                r.db(self.rr.dbname).table('sites', read_mode='majority')
                .between(
                    ['ACTIVE', r.minval], ['ACTIVE', r.maxval],
                    index='sites_last_disclaimed')
                .order_by(r.desc('claimed'), 'last_disclaimed')
                .fold(
                    {}, lambda acc, site: acc.merge(
                        r.branch(
                            site.has_fields('job_id'),
                            r.object(
                                site['job_id'].coerce_to('string'),
                                acc[site['job_id'].coerce_to('string')].default(0).add(1)),
                            {})),
                    emit=lambda acc, site, new_acc: r.branch(
                        r.and_(
                            r.or_(
                                site['claimed'].not_(),
                                site['last_claimed'].lt(r.now().sub(60*60))),
                            r.or_(
                                site.has_fields('max_claimed_sites').not_(),
                                new_acc[site['job_id'].coerce_to('string')].le(site['max_claimed_sites']))),
                            [site['id']], []))
                .limit(n)))
            .update(
                # try to avoid a race condition resulting in multiple
                # brozzler-workers claiming the same site
                # see https://github.com/rethinkdb/rethinkdb/issues/3235#issuecomment-60283038
                r.branch(
                    r.or_(
                      r.row['claimed'].not_(),
                      r.row['last_claimed'].lt(r.now().sub(60*60))),
                    {'claimed': True, 'last_claimed': r.now()},
                    {}),
                return_changes=True)).run()

        self._vet_result(
                result, replaced=list(range(n+1)),
                unchanged=list(range(n+1)))
        sites = []
        for i in range(result["replaced"]):
            if result["changes"][i]["old_val"]["claimed"]:
                self.logger.warn(
                        "re-claimed site that was still marked 'claimed' "
                        "because it was last claimed a long time ago "
                        "at %s, and presumably some error stopped it from "
                        "being disclaimed",
                        result["changes"][i]["old_val"]["last_claimed"])
            site = brozzler.Site(self.rr, result["changes"][i]["new_val"])
            sites.append(site)
        self.logger.debug('claimed %s sites', len(sites))
        if sites:
            return sites
        else:
            raise brozzler.NothingToClaim
Example #14
0
def get_domains_with_transitional_status(list_status=TRANSITIONAL_STATUS):
    r_conn = new_rethink_connection()
    rtable = r.table('domains')
    # ~ l = list(rtable.filter(lambda d: r.expr(list_status).
    # ~ contains(d['status'])).pluck('status', 'id', 'hyp_started').
    # ~ run
    l = list(rtable.get_all(r.args(list_status), index='status').pluck('status', 'id', 'hyp_started').run(r_conn))
    close_rethink_connection(r_conn)
    return l
Example #15
0
 def filter(self, ids=None, **kwargs):
     if ids:
         try:
             query = self.query.get_all(r.args(ids)).filter(kwargs)
         except AttributeError:
             # self.query already has a get_all applied
             query = (self.query.filter(lambda doc: r.expr(ids).contains(doc['id']))
                                .filter(kwargs))
     else:
         query = self.query.filter(kwargs)
     return ObjectSet(self, query)
Example #16
0
 def filter(self, ids=None, **kwargs):
     if ids:
         try:
             query = self.query.get_all(r.args(ids)).filter(kwargs)
         except AttributeError:
             # self.query already has a get_all applied
             query = (self.query.filter(lambda doc: r.expr(ids).contains(
                 doc['id'])).filter(kwargs))
     else:
         query = self.query.filter(kwargs)
     return ObjectSet(self, query)
Example #17
0
def fetch_holdings(holding_ids):
    return r.table('holdings')\
        .get_all(r.args(holding_ids), index='id')\
        .filter(lambda holding: (
            fetch_latest_block_num() >= holding['start_block_num'])
                & (fetch_latest_block_num() < holding['end_block_num']))\
        .map(lambda holding: (holding['label'] == "").branch(
            holding.without('label'), holding))\
        .map(lambda holding: (holding['description'] == "").branch(
            holding.without('description'), holding))\
        .without('start_block_num', 'end_block_num', 'delta_id', 'account')\
        .coerce_to('array')
Example #18
0
def get_downloads_in_progress():
    r_conn = new_rethink_connection()
    try:
        d = r.table('media').get_all(r.args(['DownloadStarting', 'Downloading']), index='status'). \
            pluck('id',
                  'path',
                  'isard-web',
                  'status').run(r_conn)
    except r.ReqlNonExistenceError:
        d = []

    close_rethink_connection(r_conn)
    return d
Example #19
0
def get_downloads_in_progress():
    r_conn = new_rethink_connection()
    try:
        d = r.table('media').get_all(r.args(['DownloadStarting', 'Downloading']), index='status'). \
            pluck('id',
                  'path',
                  'isard-web',
                  'status').run(r_conn)
    except r.ReqlNonExistenceError:
        d = []

    close_rethink_connection(r_conn)
    return d
    def handle_entitiesImported(self, work):
        entities = work['data']['entities']
        entityType = work['work']['configType']
        pageCount = self.countsPerType[entityType]['pageCount']

        self.countsPerType[entityType].setdefault('importCount', 0)
        self.countsPerType[entityType]['importCount'] += len(entities)
        self.idsPerType.setdefault(entityType, []).extend([e['id'] for e in entities])
        LOG.info("Imported {currCount}/{totalCount} entities for type '{typ}' on page {page}/{pageCount}".format(
            currCount=self.countsPerType[entityType]['importCount'],
            totalCount=self.countsPerType[entityType]['entityCount'],
            typ=entityType,
            page=work['work']['page'],
            pageCount=pageCount,
        ))

        entityConfig = self.controller.entityConfigManager.getConfigForType(entityType)
        self.controller.post_entities(entityConfig, entities)

        # Store the timestamp for the import
        # We'll use this to discard old EventLogEntities that happened before the import
        # However, eventlogentry's that are created while importing will still be applied
        timestamps = self.importTimestampsPerType.setdefault(entityType, {})
        timestamps.setdefault('startImportTimestamp', work['data']['startImportTimestamp'])

        if not len(self.activeWorkItemsPerType[entityType]):
            LOG.info("Imported all entities for type '{0}'".format(entityType))

            # Get a list of
            cachedEntityIDs = set(rethinkdb
                .table(entityConfig['table'])
                .map(lambda asset: asset['id'])
                .coerce_to('array')
                .run(self.controller.rethink)
            )
            importedEntityIDs = set(self.idsPerType[entityType])
            diffIDs = cachedEntityIDs.difference(importedEntityIDs)

            if len(diffIDs):
                # Delete these extra entities
                # This allows us to update the cache in place without
                # having the drop the table before the import, allowing for
                # a more seamless import / update process
                LOG.info("Deleting extra entities found in cache with IDs: {0}".format(diffIDs))
                rethinkdb.db('shotguncache').table(entityConfig['table']).get_all(rethinkdb.args(diffIDs)).delete().run(self.controller.rethink)

            self.config.history.setdefault('config_hashes', {})[entityType] = entityConfig.hash
            self.config.history.setdefault('cached_entity_types', {})[entityType] = self.importTimestampsPerType[entityType]
            self.config.history.save()

            self.activeWorkItemsPerType.pop(entityType)
Example #21
0
 def ldap_users_exists(self,commit=False):
     cfg=r.table('config').get(1).run(db.conn)['auth']
     users=list(r.table('users').filter({'active':True,'kind':'ldap'}).pluck('id','name','accessed').run(db.conn))
     nonvalid=[]
     valid=[]
     for u in users:
         conn = ldap.initialize(cfg['ldap']['ldap_server'])
         id_conn = conn.search(cfg['ldap']['bind_dn'],ldap.SCOPE_SUBTREE,"uid=%s" % u['id'])
         tmp,info=conn.result(id_conn, 0)
         if len(info):
             valid.append(u)
         else:
             nonvalid.append(u)
     if commit:
         nonvalid_list= [ u['id'] for u in nonvalid ]
         return r.table('users').get_all(r.args(nonvalid_list)).update({'active':False}).run(db.conn)
     else:
         return {'nonvalid':nonvalid,'valid':valid}
Example #22
0
        def remove(self, *objs):
            old_keys = set()
            for obj in objs:
                if not isinstance(obj, model_cls):
                    raise TypeError('%s instance expected, got %r' %
                                    (model_cls.__name__, obj))
                obj_key = getattr(obj.fields, rkey, None)
                if obj_key is not None:
                    old_keys.add(obj_key)

            existing_keys = {doc[rkey]
                            for doc in self.query.run()}
            # Remove inexisting keys from old_keys
            old_keys &= existing_keys

            if old_keys:
                (join_model_cls.table.get_all(r.args(list(old_keys)), index=mrkey)
                                     .delete()
                                     .run())
Example #23
0
        def remove(self, *objs):
            old_keys = set()
            for obj in objs:
                if not isinstance(obj, model_cls):
                    raise TypeError('%s instance expected, got %r' %
                                    (model_cls.__name__, obj))
                obj_key = getattr(obj.fields, rkey, None)
                if obj_key is not None:
                    old_keys.add(obj_key)

            existing_keys = {doc[rkey]
                            for doc in self.query.run()}
            # Remove inexisting keys from old_keys
            old_keys &= existing_keys

            if old_keys:
                (join_model_cls.table.get_all(r.args(list(old_keys)), index=mrkey)
                                     .delete()
                                     .run())
Example #24
0
def create_table(progress, conn, db, table, pkey, sindexes):
    if table not in r.db(db).table_list().run(conn):
        r.db(db).table_create(table, primary_key=pkey).run(conn)

    if progress[0] is None:
        progress[0] = 0

    # Recreate secondary indexes - assume that any indexes that already exist are wrong
    # and create them from scratch
    indexes = r.db(db).table(table).index_list().run(conn)
    created_indexes = list()
    for sindex in sindexes[progress[0]:]:
        if isinstance(sindex, dict) and all(k in sindex for k in ('index', 'function')):
            if sindex['index'] in indexes:
                r.db(db).table(table).index_drop(sindex['index']).run(conn)
            r.db(db).table(table).index_create(sindex['index'], sindex['function']).run(conn)
            created_indexes.append(sindex['index'])
        progress[0] += 1
    r.db(db).table(table).index_wait(r.args(created_indexes)).run(conn)
Example #25
0
def create_table(progress, conn, db, table, pkey, sindexes):
    if table not in r.db(db).table_list().run(conn):
        r.db(db).table_create(table, primary_key=pkey).run(conn)

    if progress[0] is None:
        progress[0] = 0

    # Recreate secondary indexes - assume that any indexes that already exist are wrong
    # and create them from scratch
    indexes = r.db(db).table(table).index_list().run(conn)
    created_indexes = list()
    for sindex in sindexes[progress[0]:]:
        if isinstance(sindex, dict) and all(k in sindex for k in ('index', 'function')):
            if sindex['index'] in indexes:
                r.db(db).table(table).index_drop(sindex['index']).run(conn)
            r.db(db).table(table).index_create(sindex['index'], sindex['function']).run(conn)
            created_indexes.append(sindex['index'])
        progress[0] += 1
    r.db(db).table(table).index_wait(r.args(created_indexes)).run(conn)
Example #26
0
def create_table(progress, conn, db, table, create_args, sindexes):
    # Make sure that the table is ready if it exists, or create it
    r.branch(r.db(db).table_list().contains(table),
        r.db(db).table(table).wait(timeout=30),
        r.db(db).table_create(table, **create_args)).run(conn)

    if progress[0] is None:
        progress[0] = 0

    # Recreate secondary indexes - assume that any indexes that already exist are wrong
    # and create them from scratch
    indexes = r.db(db).table(table).index_list().run(conn)
    created_indexes = list()
    for sindex in sindexes[progress[0]:]:
        if isinstance(sindex, dict) and all(k in sindex for k in ('index', 'function')):
            if sindex['index'] in indexes:
                r.db(db).table(table).index_drop(sindex['index']).run(conn)
            r.db(db).table(table).index_create(sindex['index'], sindex['function']).run(conn)
            created_indexes.append(sindex['index'])
        progress[0] += 1
    r.db(db).table(table).index_wait(r.args(created_indexes)).run(conn)
Example #27
0
    def get(self, id=None, o=None):
        """
            - GET /authorities
                (public) Authorities list

            - GET /authorities/<id>
                (public) Authority with <id>

            - GET /authorities/(users|projects)
                (auth) Users/Projects list of the authority of the
                logged in user

            - GET /authorities/<id>/(users|projects)
                (auth) Users/Projects list of the authority with <id>

            :return:
            """

        response = []
        current_user = self.get_current_user()

        # GET /authorities
        if not id and not o:
            cursor = yield r.table('authorities') \
                            .pluck(self.fields['authorities']) \
                            .run(self.dbconnection)
            while (yield cursor.fetch_next()):
                authority = yield cursor.next()
                if authority['name'] is None:
                    authority['name'] = authority['shortname'].title()
                response.append(authority)

        # GET /authorities/<id>
        elif not o and id and self.isUrn(id):
            if not current_user:
                self.userError('permission denied')
                return

            try:
                # Check if the user has the right to GET an authority, PI of an upper authority
                a = yield r.table('authorities').get(id).run(self.dbconnection)
                if not a:
                    self.userError("this authority %s does not exist" % id)
                    return
                # Check if the user isAdmin
                admin = self.isAdmin()
                if self.current_user['id'] not in a['pi_users'] and not admin:
                    self.userError("your user has no rights on authority: %s" %
                                   id)
                    return
            except Exception:
                import traceback
                traceback.print_exc()
                self.userError("not authenticated")
                return

            cursor = yield r.table('authorities') \
                            .pluck(self.fields['authorities']) \
                            .filter({'id': id}) \
                            .run(self.dbconnection)
            while (yield cursor.fetch_next()):
                authority = yield cursor.next()
                if authority['name'] is None:
                    authority['name'] = authority['shortname'].title()
                response.append(authority)

        # GET /authorities/(users|projects)
        elif not id and o in ['users', 'projects']:
            if not current_user:
                self.userError('permission denied')
                return

            cursor = yield r.table(o) \
                            .pluck(self.fields[o]) \
                            .filter({"authority": current_user['authority']}) \
                            .merge(lambda user: {
                                'authority': r.table('authorities').get(user['authority']) \
                                       .pluck(self.fields_short['authorities']) \
                                       .default({'id': user['authority']})
                            }) \
                            .merge(lambda user: {
                                'pi_authorities': r.table('authorities').get_all(r.args(user['pi_authorities'])) \
                                       .distinct() \
                                       .pluck(self.fields_short['authorities']) \
                                       .coerce_to('array')
                            }) \
                                .merge(lambda user: {
                                'projects': r.table('projects') \
                                       .get_all(r.args(user['projects'])) \
                                       .distinct() \
                                       .pluck(self.fields_short['projects']) \
                                       .coerce_to('array')
                            }) \
                                .merge(lambda user: {
                                'slices': r.table('slices') \
                                       .get_all(r.args(user['slices'])) \
                                       .distinct() \
                                       .pluck(self.fields_short['slices']) \
                                       .coerce_to('array')
                            }) \
                            .run(self.dbconnection)
            while (yield cursor.fetch_next()):
                item = yield cursor.next()
                if 'name' in item and item['name'] is None:
                    item['name'] = item['shortname'].title()
                response.append(item)

        # GET /authorities/<id>/(users|projects)
        elif id and self.isUrn(id) and o in ['users', 'projects']:
            try:
                # Check if the user has the right to GET an authority, PI of an upper authority
                a = yield r.table('authorities').get(id).run(self.dbconnection)
                if not a:
                    self.userError("this authority %s does not exist" % id)
                    return
                if self.current_user['id'] not in a[
                        'pi_users'] and not self.isAdmin():
                    self.userError("your user has no rights on authority: %s" %
                                   id)
                    return
            except Exception:
                import traceback
                traceback.print_exc()
                self.userError("not authenticated")
                return

            if o == 'users':
                cursor = yield r.table(o) \
                            .pluck(self.fields[o]) \
                            .filter({"authority": id}) \
                            .merge(lambda user: {
                                'authority': r.table('authorities').get(user['authority']) \
                                                            .pluck(self.fields_short['authorities']) \
                                                            .default({'id' : user['authority']})
                            }) \
                            .merge(lambda user: {
                            'pi_authorities': r.table('authorities').get_all(r.args(user['pi_authorities'])) \
                                                                   .distinct() \
                                                                   .pluck(self.fields_short['authorities']) \
                                                                   .coerce_to('array')
                             }) \
                            .merge(lambda user: {
                                'projects': r.table('projects') \
                                       .get_all(r.args(user['projects'])) \
                                       .distinct() \
                                       .pluck(self.fields_short['projects']) \
                                       .coerce_to('array')
                            }) \
                            .merge(lambda user: {
                                'slices': r.table('slices') \
                                       .get_all(r.args(user['slices'])) \
                                       .distinct() \
                                       .pluck(self.fields_short['slices']) \
                                       .coerce_to('array')
                            }) \
                            .run(self.dbconnection)
            else:
                cursor = yield r.table(o) \
                            .pluck(self.fields[o]) \
                            .filter({"authority": id}) \
                            .run(self.dbconnection)
            while (yield cursor.fetch_next()):
                item = yield cursor.next()
                if 'name' in item and item['name'] is None:
                    item['name'] = item['shortname'].title()
                response.append(item)

        else:
            self.userError("invalid request {} {}".format(id, o))
            return

        self.finish(json.dumps({"result": response}, cls=myJSONEncoder))
Example #28
0
    def run(self):
        self.tid = get_tid()
        logs.downloads.debug(
            'RUN-DOWNLOAD-THREAD-------------------------------------')
        pool_id = 'default'
        first_loop = True
        if self.stop is False:
            if first_loop is True:
                # if domains or media have status Downloading when engine restart
                # we need to resetdownloading deleting file and
                first_loop = False
                # wait a hyp to downloads
                next_hyp = False
                while next_hyp is False:
                    logs.downloads.info(
                        'waiting an hypervisor online to launch downloading actions'
                    )
                    if pool_id in self.manager.pools.keys():
                        next_hyp = self.manager.pools[pool_id].get_next()
                    sleep(1)

                for hyp_id in get_hypers_in_pool():
                    self.killall_curl(hyp_id)

                domains_status_downloading = get_domains_with_status(
                    'Downloading')
                medias_status_downloading = get_media_with_status(
                    'Downloading')

                for id_domain in domains_status_downloading:
                    create_dict = get_domain(id_domain)['create_dict']
                    dict_changes = {
                        'id': id_domain,
                        'table': 'domains',
                        'create_dict': create_dict
                    }
                    update_domain_status('ResetDownloading', id_domain)
                    self.abort_download(dict_changes,
                                        final_status='DownloadFailed')

                for id_media in medias_status_downloading:
                    dict_media = get_media(id_media)
                    dict_changes = {
                        'id': id_media,
                        'table': 'media',
                        'path': dict_media['path'],
                        'hypervisors_pools': dict_media['hypervisors_pools']
                    }
                    update_status_table('media', 'ResetDownloading', id_media)
                    self.abort_download(dict_changes,
                                        final_status='DownloadFailed')

            self.r_conn = new_rethink_connection()
            update_table_field('hypervisors_pools', pool_id,
                               'download_changes', 'Started')
            for c in r.table('media').get_all(r.args(
                    ['Deleting', 'Deleted', 'Downloaded', 'DownloadFailed', 'DownloadStarting', 'Downloading', 'Download',
                     'DownloadAborting','ResetDownloading']), index='status'). \
                    pluck('id',
                          'path',
                          'url-isard',
                          'url-web',
                          'status'
                          ).merge(
                {'table': 'media'}).changes(include_initial=True).union(
                r.table('domains').get_all(
                    r.args(['Downloaded', 'DownloadFailed','DownloadStarting', 'Downloading', 'DownloadAborting','ResetDownloading']), index='status'). \
                        pluck('id',
                              'create_dict',
                              'url-isard',
                              'url-web',
                              'status').merge(
                    {"table": "domains"}).changes(include_initial=True)).union(
                r.table('engine').pluck('threads', 'status_all_threads').merge({'table': 'engine'}).changes()).run(
                self.r_conn):

                if self.stop:
                    break
                if c.get('new_val', None) is not None:
                    if c['new_val'].get('table', False) == 'engine':
                        if c['new_val']['status_all_threads'] == 'Stopping':
                            break
                        else:
                            continue

                logs.downloads.debug('DOWNLOAD CHANGES DETECTED:')
                logs.downloads.debug(pprint.pformat(c))

                if c.get('old_val', None) is None:
                    if c['new_val']['status'] == 'DownloadStarting':
                        self.start_download(c['new_val'])
                elif c.get('new_val', None) is None:
                    if c['old_val']['status'] in ['DownloadAborting']:
                        self.remove_download_thread(c['old_val'])

                elif 'old_val' in c and 'new_val' in c:
                    if c['old_val']['status'] == 'DownloadFailed' and c[
                            'new_val']['status'] == 'DownloadStarting':
                        self.start_download(c['new_val'])

                    elif c['old_val']['status'] == 'Downloaded' and c[
                            'new_val']['status'] == 'Deleting':
                        if c['new_val']['table'] == 'media':
                            self.delete_media(c['new_val'])

                    elif c['old_val']['status'] == 'Deleting' and c['new_val'][
                            'status'] == 'Deleted':
                        if c['new_val']['table'] == 'media':
                            remove_media(c['new_val']['id'])

                    elif c['old_val']['status'] == 'Downloading' and c[
                            'new_val']['status'] == 'DownloadFailed':
                        pass

                    elif c['old_val']['status'] == 'DownloadStarting' and c[
                            'new_val']['status'] == 'Downloading':
                        pass

                    elif c['old_val']['status'] == 'Downloading' and c[
                            'new_val']['status'] == 'Downloaded':
                        pass

                    elif c['old_val']['status'] == 'Downloading' and c[
                            'new_val']['status'] == 'DownloadAborting':
                        self.abort_download(c['new_val'])

                    elif c['old_val']['status'] == 'Downloading' and c[
                            'new_val']['status'] == 'ResetDownloading':
                        self.abort_download(c['new_val'],
                                            final_status='DownloadFailed')
    slug_map = {}

    for owner_repo, plugins in grouped_plugins.iteritems():

        print '\nPlugin with GitHub path %s occurs %s times' % (
                owner_repo,
                len(plugins))

        canonical = merge_plugins(plugins)

        print "Using %s as canonical" % canonical['slug']

        # db.plugins.insert normalizes the ower/repo to lower case
        db.plugins.insert(canonical, conflict='replace')
        updated += 1

        dupes = [dupe for dupe in plugins if dupe['slug'] != canonical['slug']]
        if dupes:
            dupe_slugs = [dupe['slug'] for dupe in dupes]
            # Store deleted slugs for logging
            slug_map[canonical['slug']] = dupe_slugs
            print 'Deleting duplicates rows: %s' % ', '.join(dupe_slugs)
            r.table('plugins').get_all(r.args(dupe_slugs)).delete().run(r_conn())
            deleted += len(dupes)

    with open(LOG_FILE, 'w') as log:
        print 'Writing deleted slug names to %s' % LOG_FILE
        log.writelines(dupe_log_line(c, d) for c, d in slug_map.iteritems())

    print "Updated %d rows and deleted %d" % (updated, deleted)
Example #30
0
    def get(self, id=None, o=None):
        """
            - GET /slices
                (public) Slices list

            - GET /slices/<urn|hrn>
                (public) Slice with <urn|hrn>

            - GET /slices/<id|hrn>/(users|resources)
                (auth) Users/Resources list of the slice with <id|hrn>

            :return:
            """

        slice = None
        response = []

        if not self.get_current_user():
            self.userError('permission denied user not logged in')
            return
        ##
        # if id (hrn|urn) is set we get the slice with id <urn|hrn>
        #
        if id:
            if self.isUrn(id):
                filter = {'id': id}

            elif self.isHrn(id):
                filter = {'hrn': id}
            else:
                self.userError('id or hrn format error')
                return

            cursor = yield r.table('slices') \
                .filter(filter) \
                .merge(lambda slice: {
                    'authority': r.table('authorities').get(slice['authority']) \
                       .pluck(self.fields_short['authorities']) \
                       .default({'id': slice['authority']})
                }) \
                .merge(lambda slice: {
                    'project': r.table('projects').get(slice['project']) \
                       .pluck(self.fields_short['projects']) \
                       .default({'id': slice['project']})
                }) \
                .merge(lambda slice: {
                    'users': r.table('users').get_all(r.args(slice['users']), index="id") \
                           .distinct() \
                           .pluck(self.fields_short['users']).coerce_to('array')
                }) \
                .merge(lambda slice: {
                    'leases': r.table('leases').filter({'slice_id':slice['id']}).merge(lambda l: {
                        'resources': l['resources'].map(lambda res: r.table('resources').get(res)) \
                           .coerce_to('array')
                        }).coerce_to('array')
                }) \
                .run(self.dbconnection)
            while (yield cursor.fetch_next()):
                slice = yield cursor.next()

        ##
        # GET /slices
        #
        # returns list of slices
        #
        if not id and not o:
            cursor = yield r.table('slices') \
                .pluck(self.fields['slices']) \
                .merge(lambda slice: {
                    'authority': r.table('authorities').get(slice['authority']) \
                           .pluck(self.fields_short['authorities']) \
                           .default({'id': slice['authority']})
                }) \
                .merge(lambda slice: {
                    'project': r.table('projects').get(slice['project']) \
                           .pluck(self.fields_short['projects']) \
                           .default({'id': slice['project']})
                }) \
                .run(self.dbconnection)
            while (yield cursor.fetch_next()):
                slice = yield cursor.next()
                response.append(slice)
        ##
        # GET /slices/<urn|hrn>
        #
        # returns slice with <hrn|urn>
        #
        elif not o and id:
            if not self.get_current_user():
                self.userError('permission denied user not logged in')
                return

            response.append(slice)

        ##
        # GET /slice/<urn|hrn>/users
        #
        # returns a list of users of slice with id urn|hrn
        #
        elif id and slice and o == 'users':

            response = yield r.table('users') \
                .get_all(r.args(slice['users'])['id'], index='id') \
                .distinct() \
                .pluck(self.fields['users']) \
                .merge(lambda user: {
                    'authority': r.table('authorities').get(user['authority']) \
                           .pluck(self.fields_short['authorities']) \
                           .default({'id': user['authority']})
                }) \
                .coerce_to('array').run(self.dbconnection)

        ##
        # GET /slice/<urn|hrn>/resources
        #
        # returns a list of resources in the slice with id urn|hrn
        #
        elif id and slice and o == 'resources':

            response = yield r.table('resources') \
                .get_all(r.args(slice['resources'])['id'], index='id') \
                .distinct() \
                .merge(lambda resource: {
                    'testbed': r.table('testbeds').get(resource['testbed']) \
                           .pluck(self.fields_short['testbeds']) \
                           .default({'id': resource['testbed']})
                }) \
                .coerce_to('array').run(self.dbconnection)

        else:
            self.userError("invalid request")
            return

        self.finish(json.dumps({'result': response}, cls=myJSONEncoder))
Example #31
0
    def handle_entitiesImported(self, work):
        entities = work['data']['entities']
        entityType = work['work']['configType']
        pageCount = self.countsPerType[entityType]['pageCount']

        self.countsPerType[entityType].setdefault('importCount', 0)
        self.countsPerType[entityType]['importCount'] += len(entities)
        self.idsPerType.setdefault(entityType,
                                   []).extend([e['id'] for e in entities])
        LOG.info(
            "Imported {currCount}/{totalCount} entities for type '{typ}' on page {page}/{pageCount}"
            .format(
                currCount=self.countsPerType[entityType]['importCount'],
                totalCount=self.countsPerType[entityType]['entityCount'],
                typ=entityType,
                page=work['work']['page'],
                pageCount=pageCount,
            ))

        entityConfig = self.controller.entityConfigManager.getConfigForType(
            entityType)
        self.controller.post_entities(entityConfig, entities)

        # Store the timestamp for the import
        # We'll use this to discard old EventLogEntities that happened before the import
        # However, eventlogentry's that are created while importing will still be applied
        timestamps = self.importTimestampsPerType.setdefault(entityType, {})
        timestamps.setdefault('startImportTimestamp',
                              work['data']['startImportTimestamp'])

        if not len(self.activeWorkItemsPerType[entityType]):
            LOG.info("Imported all entities for type '{0}'".format(entityType))

            # Get a list of
            cachedEntityIDs = set(
                rethinkdb.table(entityConfig['table']).map(
                    lambda asset: asset['id']).coerce_to('array').run(
                        self.controller.rethink))
            importedEntityIDs = set(self.idsPerType[entityType])
            diffIDs = cachedEntityIDs.difference(importedEntityIDs)

            if len(diffIDs):
                # Delete these extra entities
                # This allows us to update the cache in place without
                # having the drop the table before the import, allowing for
                # a more seamless import / update process
                LOG.info(
                    "Deleting extra entities found in cache with IDs: {0}".
                    format(diffIDs))
                rethinkdb.db('shotguncache').table(
                    entityConfig['table']).get_all(
                        rethinkdb.args(diffIDs)).delete().run(
                            self.controller.rethink)

            self.config.history.setdefault('config_hashes',
                                           {})[entityType] = entityConfig.hash
            self.config.history.setdefault(
                'cached_entity_types',
                {})[entityType] = self.importTimestampsPerType[entityType]
            self.config.history.save()

            self.activeWorkItemsPerType.pop(entityType)
    def get_word_list_syns_and_related(cls,
                                       base_word_list: [],
                                       add_base_word=True):

        # Get Base Word Records
        all_syn_ids = set()
        # {base word : [id, base word]}
        base_word_records_dict: {
            str: [str, str]
        } = cls.keyword.get_all(r.args(base_word_list),
                                index="word").group("word").run(cls.connection)
        # {base word id: {word, syns: { syn_id: weight }}}
        base_word_dict: {str: {str, {str: float}}} = {}

        for key, base_word_record in base_word_records_dict.items():
            base_word_id = base_word_record[0]["id"]
            base_word_dict[base_word_id] = {
                "word": base_word_record[0]["word"],
                "syns": {}
            }
            if add_base_word:
                base_word_dict[base_word_id]["syns"][base_word_id] = 1
                all_syn_ids.add(base_word_id)

        # Get all relevant syns and bases
        _id_args = r.args(list(base_word_dict.keys()))
        # [{"base_id", "syn_id", "weight"}]
        syn_ids = cls.synlinks.get_all(
            _id_args, index="base_id").coerce_to("array").run(cls.connection)
        # [{"base_id", "syn_id", "weight"}]
        base_ids = cls.synlinks.get_all(
            _id_args, index="syn_id").coerce_to("array").run(cls.connection)
        all_syn_ids |= (set([syn_id["syn_id"] for syn_id in syn_ids] +
                            [base_id["base_id"] for base_id in base_ids]))

        for syn_id_record in syn_ids:
            # add to base_word syns_id list
            if syn_id_record["syn_id"] in base_word_dict[
                    syn_id_record["base_id"]]["syns"]:
                # if synlink is already added, take larger weight
                original = base_word_dict[syn_id_record["base_id"]]["syns"][
                    syn_id_record["syn_id"]]
                base_word_dict[syn_id_record["base_id"]]["syns"][
                    syn_id_record["syn_id"]] = max(
                        syn_id_record["weight"] * cls.syn_decay, original)
            else:
                base_word_dict[syn_id_record["base_id"]]["syns"][syn_id_record[
                    "syn_id"]] = syn_id_record["weight"] * cls.syn_decay
        for base_id_record in base_ids:
            # add to base_word syns_id list
            if base_id_record["base_id"] in base_word_dict[
                    base_id_record["syn_id"]]["syns"]:
                # if synlink is already added, take larger weight
                original = base_word_dict[base_id_record["syn_id"]]["syns"][
                    base_id_record["base_id"]]
                base_word_dict[base_id_record["syn_id"]]["syns"][
                    base_id_record["base_id"]] = max(
                        base_id_record["weight"] * cls.syn_decay, original)
            else:
                base_word_dict[base_id_record["syn_id"]]["syns"][
                    base_id_record[
                        "base_id"]] = base_id_record["weight"] * cls.syn_decay

        # {"id": { "id", "word" }}
        syn_records = cls.keyword.get_all(r.args(all_syn_ids)).group("id").run(
            cls.connection)

        # [{"parent_id", "child_id"}]
        related_word_ids = cls.keyword_relations.get_all(
            r.args(all_syn_ids), index="parent_id").pluck(
                "parent_id", "child_id").coerce_to("array").run(cls.connection)

        # key: id of the root synonym
        # value: ids of the related words
        related_words_dict = {}
        for item in related_word_ids:
            if item["parent_id"] not in related_words_dict:
                related_words_dict[item["parent_id"]] = []
            related_words_dict[item["parent_id"]].append(item["child_id"])

        related_word_records = dict(
            cls.keyword.get_all(
                r.args([
                    related_word_id_item["child_id"]
                    for related_word_id_item in related_word_ids
                ])).group("id").run(cls.connection))

        base_word_weight_dict = {}
        for base_word_id, val in base_word_dict.items():
            # for each base word
            base_word_word = val["word"]
            syns_dict = val["syns"]
            weight_dict = {}
            for syn_id, weight in syns_dict.items():
                # for synonyms:
                syn_record = syn_records[syn_id][0]
                weight_dict[syn_record["word"]] = {
                    "id": syn_record["id"],
                    "weight": weight,
                    "parent": None
                }
                if syn_id in related_words_dict:
                    # If there are related words for the synonym
                    for syn_related_word_id in related_words_dict[syn_id]:
                        related_word_record = related_word_records[
                            syn_related_word_id][0]
                        if related_word_record["word"] not in weight_dict:
                            weight_dict[related_word_record["word"]] = {
                                "id": related_word_record["id"],
                                "weight": weight * cls.related_word_decay,
                                "parent": syn_record["word"]
                            }
                base_word_weight_dict[base_word_word] = weight_dict
        return base_word_weight_dict
Example #33
0
    def search_items(self, searchstring, searchfields=['tags'], returnfields=['id']):
        """
        Searches through the Table
        1) search all alternate_titles for full searchstring
        2) if searchstring contains a number > 99, search course_numbers for that number
        3) if searchstring contains a ngram with four or fewer characters, looks through course_subject
        """

        DB = self.DB
        table = self.__class__.__name__
        lowercase_searchstring = searchstring.lower()
        lowercase_searchstring.replace('-', ' ')
        splitwords = lowercase_searchstring.split(' ')
        words = list(filter(lambda word: word not in STOPWORDS, splitwords))

        if not len(words):
            return []

        logging.info(words)

        def sequence_search(searchfield, words):
            return r.expr(words).concat_map(
                lambda word: r.db(DB).table(table).filter(
                    lambda doc: doc[searchfield].map(
                        lambda title: title.do(
                            lambda matcher: matcher.coerce_to('STRING').match('(?i)' + word)
                        )
                    ).reduce(lambda left, right: left | right)
                ).coerce_to('array').map(lambda doc: doc['id'])
            )

        def static_search(searchfield, words):
            return r.expr(words).concat_map(
                lambda word: r.db(DB).table(table).filter(
                    lambda doc: doc[searchfield].coerce_to('STRING').match('(?i)' + word)
                ).coerce_to('array').map(lambda doc: doc['id'])
            )

        def search(searchfield, words):
            if isinstance(self.default()[searchfield], (list, tuple)):
                return sequence_search(searchfield, words)
            return static_search(searchfield, words)

        searches = [search(searchfield, words) for searchfield in searchfields]
        total_results = r.add(r.args(searches)).run(self.conn)

        searchresults = (r.expr(total_results)).group(r.row).count().ungroup().order_by('reduction').run(self.conn)

        if not len(searchresults):
            return []

        best_score = searchresults[-1]['reduction']

        best_ids = r.expr(searchresults).filter({'reduction': best_score}).get_field('group').run(self.conn)

        if 'id' not in returnfields:
            logging.warn("'id' is not in listed returnfields. It's recomended this field is included")
        if not len(returnfields):
            logging.error("returnfields cannot be empty")
            return []
        try:
            return list(r.db(DB).table(table).get_all(r.args(best_ids)).pluck(r.args(returnfields)).run(self.conn))
        except err:
            logging.error(err)
            return []
Example #34
0
def model_overtime(db, conn):
    def _general_overtime(doc, val):
        return {
            'total_fcqs': val['reduction'].count(),
            'total_forms_requested': val['reduction'].sum('forms_requested'),
            'total_forms_returned': val['reduction'].sum('forms_returned'),
            'denver_data_averages': r.branch(((doc.get_field('campus').default(None) == 'DN') & (val['group'] <= 20144)), {
                'r_fairness': val['reduction'].get_field('denver_data').get_field('r_fairness').avg().default(None),
                'r_presentation': val['reduction'].get_field('denver_data').get_field('r_presentation').avg().default(None),
                'r_workload': val['reduction'].get_field('denver_data').get_field('r_workload').avg().default(None),
                'r_diversity': val['reduction'].get_field('denver_data').get_field('r_diversity').avg().default(None),
                'r_accessibility': val['reduction'].get_field('denver_data').get_field('r_accessibility').avg().default(None),
                'r_learning': val['reduction'].get_field('denver_data').get_field('r_learning').avg().default(None),
            }, None)
        }

    def _general_stats(doc):
        return {
            'total_fcqs': doc['fcq_data'].count(),
            'total_forms_requested': doc['fcq_data'].sum('forms_requested'),
            'total_forms_returned': doc['fcq_data'].sum('forms_returned'),
        }

    def _instructor_overtime(doc, val, unchained=False):
        iot = {
            'GR_fcqs': val['reduction'].filter({'level': 'GR'}).count(),
            'UD_fcqs': val['reduction'].filter({'level': 'UD'}).count(),
            'LD_fcqs': val['reduction'].filter({'level': 'LD'}).count(),
            'total_courses': val['reduction'].get_field('course_id').distinct().count(),
            'instructoroverall_average': val['reduction'].get_field('instructoroverall').avg().default(None),
            'instructoroverall_sd_average': val['reduction'].get_field('instructoroverall_sd').avg().default(None),
            'instructor_effectiveness_average': val['reduction'].get_field('instructor_effectiveness').avg().default(None),
            'instructor_availability_average': val['reduction'].get_field('instructor_availability').avg().default(None),
            'instructor_respect_average': val['reduction'].get_field('instructor_respect').avg().default(None)
        }
        chain = {} if unchained else _general_overtime(doc, val)
        iot.update(chain)
        return iot

    def _instructor_stats(doc, unchained=False):
        iot = {
            'GR_fcqs': doc['fcq_data'].filter({'level': 'GR'}).count(),
            'UD_fcqs': doc['fcq_data'].filter({'level': 'UD'}).count(),
            'LD_fcqs': doc['fcq_data'].filter({'level': 'LD'}).count(),
            'total_courses': doc['fcq_data'].get_field('course_id').distinct().count(),
            'instructoroverall_average': doc['fcq_data'].get_field('instructoroverall').avg().default(None),
            'instructoroverall_sd_average': doc['fcq_data'].get_field('instructoroverall_sd').avg().default(None),
            'instructor_effectiveness_average': doc['fcq_data'].get_field('instructor_effectiveness').avg().default(None),
            'instructor_availability_average': doc['fcq_data'].get_field('instructor_availability').avg().default(None),
            'instructor_respect_average': doc['fcq_data'].get_field('instructor_respect').avg().default(None)
        }
        chain = {} if unchained else _general_stats(doc)
        iot.update(chain)
        return iot

    def _course_overtime(doc, val, unchained=False):
        cot = {
            'total_instructors': val['reduction'].get_field('instructor_id').distinct().count(),
            'courseoverall_average': val['reduction'].get_field('courseoverall').avg().default(None),
            'courseoverall_sd_average': val['reduction'].get_field('courseoverall_sd').avg().default(None),
            'course_challenge_average': val['reduction'].get_field('course_challenge').avg().default(None),
            'course_howmuchlearned_average': val['reduction'].get_field('course_howmuchlearned').avg().default(None),
            'course_priorinterest_average': val['reduction'].get_field('course_priorinterest').avg().default(None)
        }
        chain = {} if unchained else _general_overtime(doc, val)
        cot.update(chain)
        return cot

    def _course_stats(doc, unchained=False):
        cot = {
            'total_instructors': doc['fcq_data'].get_field('instructor_id').distinct().count(),
            'courseoverall_average': doc['fcq_data'].get_field('courseoverall').avg().default(None),
            'courseoverall_sd_average': doc['fcq_data'].get_field('courseoverall_sd').avg().default(None),
            'course_challenge_average': doc['fcq_data'].get_field('course_challenge').avg().default(None),
            'course_howmuchlearned_average': doc['fcq_data'].get_field('course_howmuchlearned').avg().default(None),
            'course_priorinterest_average': doc['fcq_data'].get_field('course_priorinterest').avg().default(None)
        }
        chain = {} if unchained else _general_stats(doc)
        cot.update(chain)
        return cot

    def _department_overtime(doc, val):
        iot = _instructor_overtime(doc, val, unchained=True)
        cot = _course_overtime(doc, val, unchained=True)
        got = _general_overtime(doc, val)
        dot = {
            'GR_courses': val['reduction'].filter({'level': 'GR'}).get_field('course_id').distinct().count(),
            'UD_courses': val['reduction'].filter({'level': 'UD'}).get_field('course_id').distinct().count(),
            'LD_courses': val['reduction'].filter({'level': 'LD'}).get_field('course_id').distinct().count(),
            'TA_instructors': val['reduction'].filter({'instructor_group': 'TA'}).get_field('instructor_id').distinct().count(),
            'OTH_instructors': val['reduction'].filter({'instructor_group': 'OTH'}).get_field('instructor_id').distinct().count(),
            'TTT_instructors': val['reduction'].filter({'instructor_group': 'TTT'}).get_field('instructor_id').distinct().count(),
            'TA_instructoroverall_average': val['reduction'].filter({'instructor_group': 'TA'}).get_field('instructoroverall').avg().default(None),
            'OTH_instructoroverall_average': val['reduction'].filter({'instructor_group': 'OTH'}).get_field('instructoroverall').avg().default(None),
            'TTT_instructoroverall_average': val['reduction'].filter({'instructor_group': 'TTT'}).get_field('instructoroverall').avg().default(None),
            'GR_courseoverall_average': val['reduction'].filter({'level': 'GR'}).get_field('courseoverall').avg().default(None),
            'UD_courseoverall_average': val['reduction'].filter({'level': 'UD'}).get_field('courseoverall').avg().default(None),
            'LD_courseoverall_average': val['reduction'].filter({'level': 'LD'}).get_field('courseoverall').avg().default(None),
            'GR_forms_requested': val['reduction'].filter({'level': 'GR'}).sum('forms_requested'),
            'UD_forms_requested': val['reduction'].filter({'level': 'UD'}).sum('forms_requested'),
            'LD_forms_requested': val['reduction'].filter({'level': 'LD'}).sum('forms_requested')
        }
        dot.update(iot)
        dot.update(cot)
        dot.update(got)
        return dot

    def _department_stats(doc):
        iot = _instructor_stats(doc, unchained=True)
        cot = _course_stats(doc, unchained=True)
        got = _general_stats(doc)
        dot = {
            'GR_courses': doc['fcq_data'].filter({'level': 'GR'}).get_field('course_id').distinct().count(),
            'UD_courses': doc['fcq_data'].filter({'level': 'UD'}).get_field('course_id').distinct().count(),
            'LD_courses': doc['fcq_data'].filter({'level': 'LD'}).get_field('course_id').distinct().count(),
            'TA_instructors': doc['fcq_data'].filter({'instructor_group': 'TA'}).get_field('instructor_id').distinct().count(),
            'OTH_instructors': doc['fcq_data'].filter({'instructor_group': 'OTH'}).get_field('instructor_id').distinct().count(),
            'TTT_instructors': doc['fcq_data'].filter({'instructor_group': 'TTT'}).get_field('instructor_id').distinct().count()
        }
        dot.update(iot)
        dot.update(cot)
        dot.update(got)
        return dot

    # model_overtime
    for model in ['Instructor', 'Department', 'Course']:
        _model_overtime = {
            'Instructor': _instructor_overtime,
            'Department': _department_overtime,
            'Course': _course_overtime
        }[model]
        _model_stats = {
            'Instructor': _instructor_stats,
            'Department': _department_stats,
            'Course': _course_stats
        }[model]
        overtime_query = r.db(db).table(model).merge(
            lambda doc: {'fcq_data': r.db(db).table('Fcq').get_all(r.args(doc['fcqs'])).coerce_to('array')}
        ).for_each(
            lambda doc: r.db(db).table(model).get(doc['id']).update({'overtime': doc['fcq_data'].group('yearterm').ungroup().map(
                lambda val: [val['group'].coerce_to('string'), _model_overtime(doc, val)]
            ).coerce_to('object'), 'stats': _model_stats(doc)})
        ).run(conn, array_limit=200000)
        logging.info(overtime_query)
Example #35
0
def args(key):
    '''Create a rethinkdb ARGS object out of a list'''
    if type(key) is str:
        return key
    return r.args(key)
Example #36
0
def model_overtime(db, conn):
    def _grades_overtime(doc, val):
        return {
            "grade_data_averages": r.branch(
                ((doc.get_field("grades").count() > 0) & ((val["group"] % 10) != 4)),
                {
                    "percent_a": val["reduction"].get_field("percent_a").avg().default(None),
                    "percent_b": val["reduction"].get_field("percent_b").avg().default(None),
                    "percent_c": val["reduction"].get_field("percent_c").avg().default(None),
                    "percent_d": val["reduction"].get_field("percent_d").avg().default(None),
                    "percent_f": val["reduction"].get_field("percent_f").avg().default(None),
                    "percent_incomplete": val["reduction"].get_field("percent_incomplete").avg().default(None),
                    "percent_c_minus_or_below": val["reduction"]
                    .get_field("percent_c_minus_or_below")
                    .avg()
                    .default(None),
                    "average_grade": val["reduction"].get_field("average_grade").avg().default(None),
                },
                None,
            )
        }

    def _general_overtime(doc, val):
        return {
            "total_fcqs": val["reduction"].count(),
            "total_forms_requested": val["reduction"].sum("forms_requested"),
            "total_forms_returned": val["reduction"].sum("forms_returned"),
            "denver_data_averages": r.branch(
                ((doc.get_field("campus").default(None) == "DN") & (val["group"] <= 20144)),
                {
                    "r_fairness": val["reduction"].get_field("denver_data").get_field("r_fairness").avg().default(None),
                    "r_presentation": val["reduction"]
                    .get_field("denver_data")
                    .get_field("r_presentation")
                    .avg()
                    .default(None),
                    "r_workload": val["reduction"].get_field("denver_data").get_field("r_workload").avg().default(None),
                    "r_diversity": val["reduction"]
                    .get_field("denver_data")
                    .get_field("r_diversity")
                    .avg()
                    .default(None),
                    "r_accessibility": val["reduction"]
                    .get_field("denver_data")
                    .get_field("r_accessibility")
                    .avg()
                    .default(None),
                    "r_learning": val["reduction"].get_field("denver_data").get_field("r_learning").avg().default(None),
                },
                None,
            ),
        }

    def _general_stats(doc):
        return {
            "total_fcqs": doc["fcq_data"].count(),
            "total_forms_requested": doc["fcq_data"].sum("forms_requested"),
            "total_forms_returned": doc["fcq_data"].sum("forms_returned"),
        }

    def _instructor_overtime(doc, val, unchained=False):
        iot = {
            "GR_fcqs": val["reduction"].filter({"level": "GR"}).count(),
            "UD_fcqs": val["reduction"].filter({"level": "UD"}).count(),
            "LD_fcqs": val["reduction"].filter({"level": "LD"}).count(),
            "total_courses": val["reduction"].get_field("course_id").distinct().count(),
            "instructoroverall_average": val["reduction"].get_field("instructoroverall").avg().default(None),
            "instructoroverall_sd_average": val["reduction"].get_field("instructoroverall_sd").avg().default(None),
            "instructor_effectiveness_average": val["reduction"]
            .get_field("instructor_effectiveness")
            .avg()
            .default(None),
            "instructor_availability_average": val["reduction"]
            .get_field("instructor_availability")
            .avg()
            .default(None),
            "instructor_respect_average": val["reduction"].get_field("instructor_respect").avg().default(None),
        }
        chain = {} if unchained else _general_overtime(doc, val)
        iot.update(chain)
        return iot

    def _instructor_stats(doc, unchained=False):
        iot = {
            "GR_fcqs": doc["fcq_data"].filter({"level": "GR"}).count(),
            "UD_fcqs": doc["fcq_data"].filter({"level": "UD"}).count(),
            "LD_fcqs": doc["fcq_data"].filter({"level": "LD"}).count(),
            "total_courses": doc["fcq_data"].get_field("course_id").distinct().count(),
            "instructoroverall_average": doc["fcq_data"].get_field("instructoroverall").avg().default(None),
            "instructoroverall_sd_average": doc["fcq_data"].get_field("instructoroverall_sd").avg().default(None),
            "instructor_effectiveness_average": doc["fcq_data"]
            .get_field("instructor_effectiveness")
            .avg()
            .default(None),
            "instructor_availability_average": doc["fcq_data"].get_field("instructor_availability").avg().default(None),
            "instructor_respect_average": doc["fcq_data"].get_field("instructor_respect").avg().default(None),
        }
        chain = {} if unchained else _general_stats(doc)
        iot.update(chain)
        return iot

    def _course_overtime(doc, val, unchained=False):
        cot = {
            "total_instructors": val["reduction"].get_field("instructor_id").distinct().count(),
            "courseoverall_average": val["reduction"].get_field("courseoverall").avg().default(None),
            "courseoverall_sd_average": val["reduction"].get_field("courseoverall_sd").avg().default(None),
            "course_challenge_average": val["reduction"].get_field("course_challenge").avg().default(None),
            "course_howmuchlearned_average": val["reduction"].get_field("course_howmuchlearned").avg().default(None),
            "course_priorinterest_average": val["reduction"].get_field("course_priorinterest").avg().default(None),
        }
        chain = {} if unchained else _general_overtime(doc, val)
        cot.update(chain)
        return cot

    def _course_stats(doc, unchained=False):
        cot = {
            "total_instructors": doc["fcq_data"].get_field("instructor_id").distinct().count(),
            "courseoverall_average": doc["fcq_data"].get_field("courseoverall").avg().default(None),
            "courseoverall_sd_average": doc["fcq_data"].get_field("courseoverall_sd").avg().default(None),
            "course_challenge_average": doc["fcq_data"].get_field("course_challenge").avg().default(None),
            "course_howmuchlearned_average": doc["fcq_data"].get_field("course_howmuchlearned").avg().default(None),
            "course_priorinterest_average": doc["fcq_data"].get_field("course_priorinterest").avg().default(None),
        }
        chain = {} if unchained else _general_stats(doc)
        cot.update(chain)
        return cot

    def _department_overtime(doc, val):
        iot = _instructor_overtime(doc, val, unchained=True)
        cot = _course_overtime(doc, val, unchained=True)
        got = _general_overtime(doc, val)
        dot = {
            "GR_courses": val["reduction"].filter({"level": "GR"}).get_field("course_id").distinct().count(),
            "UD_courses": val["reduction"].filter({"level": "UD"}).get_field("course_id").distinct().count(),
            "LD_courses": val["reduction"].filter({"level": "LD"}).get_field("course_id").distinct().count(),
            "TA_instructors": val["reduction"]
            .filter({"instructor_group": "TA"})
            .get_field("instructor_id")
            .distinct()
            .count(),
            "OTH_instructors": val["reduction"]
            .filter({"instructor_group": "OTH"})
            .get_field("instructor_id")
            .distinct()
            .count(),
            "TTT_instructors": val["reduction"]
            .filter({"instructor_group": "TTT"})
            .get_field("instructor_id")
            .distinct()
            .count(),
            "TA_instructoroverall_average": val["reduction"]
            .filter({"instructor_group": "TA"})
            .get_field("instructoroverall")
            .avg()
            .default(None),
            "OTH_instructoroverall_average": val["reduction"]
            .filter({"instructor_group": "OTH"})
            .get_field("instructoroverall")
            .avg()
            .default(None),
            "TTT_instructoroverall_average": val["reduction"]
            .filter({"instructor_group": "TTT"})
            .get_field("instructoroverall")
            .avg()
            .default(None),
            "GR_courseoverall_average": val["reduction"]
            .filter({"level": "GR"})
            .get_field("courseoverall")
            .avg()
            .default(None),
            "UD_courseoverall_average": val["reduction"]
            .filter({"level": "UD"})
            .get_field("courseoverall")
            .avg()
            .default(None),
            "LD_courseoverall_average": val["reduction"]
            .filter({"level": "LD"})
            .get_field("courseoverall")
            .avg()
            .default(None),
            "GR_forms_requested": val["reduction"].filter({"level": "GR"}).sum("forms_requested"),
            "UD_forms_requested": val["reduction"].filter({"level": "UD"}).sum("forms_requested"),
            "LD_forms_requested": val["reduction"].filter({"level": "LD"}).sum("forms_requested"),
        }
        dot.update(iot)
        dot.update(cot)
        dot.update(got)
        return dot

    def _department_stats(doc):
        iot = _instructor_stats(doc, unchained=True)
        cot = _course_stats(doc, unchained=True)
        got = _general_stats(doc)
        dot = {
            "GR_courses": doc["fcq_data"].filter({"level": "GR"}).get_field("course_id").distinct().count(),
            "UD_courses": doc["fcq_data"].filter({"level": "UD"}).get_field("course_id").distinct().count(),
            "LD_courses": doc["fcq_data"].filter({"level": "LD"}).get_field("course_id").distinct().count(),
            "TA_instructors": doc["fcq_data"]
            .filter({"instructor_group": "TA"})
            .get_field("instructor_id")
            .distinct()
            .count(),
            "OTH_instructors": doc["fcq_data"]
            .filter({"instructor_group": "OTH"})
            .get_field("instructor_id")
            .distinct()
            .count(),
            "TTT_instructors": doc["fcq_data"]
            .filter({"instructor_group": "TTT"})
            .get_field("instructor_id")
            .distinct()
            .count(),
        }
        dot.update(iot)
        dot.update(cot)
        dot.update(got)
        return dot

    # model_overtime
    for model in ["Instructor", "Department", "Course"]:
        _model_overtime = {
            "Instructor": _instructor_overtime,
            "Department": _department_overtime,
            "Course": _course_overtime,
        }[model]
        _model_stats = {"Instructor": _instructor_stats, "Department": _department_stats, "Course": _course_stats}[
            model
        ]
        overtime_query = (
            r.db(db)
            .table(model)
            .merge(
                lambda doc: {
                    "fcq_data": r.branch(
                        doc["fcqs"].count() > 0,
                        r.db(db).table("Fcq").get_all(r.args(doc["fcqs"])).coerce_to("array"),
                        [],
                    ),
                    "grade_data": r.branch(
                        doc["grades"].count() > 0,
                        r.db(db).table("Grade").get_all(r.args(doc["grades"])).coerce_to("array"),
                        [],
                    ),
                }
            )
            .for_each(
                lambda doc: r.db(db)
                .table(model)
                .get(doc["id"])
                .update(
                    {
                        "fcq_data": None,
                        "fcqs_overtime": doc["fcq_data"]
                        .group("yearterm")
                        .ungroup()
                        .map(lambda val: [val["group"].coerce_to("string"), _model_overtime(doc, val)])
                        .coerce_to("object"),
                        "fcqs_stats": _model_stats(doc),
                        "grades_overtime": doc["grade_data"]
                        .group("yearterm")
                        .ungroup()
                        .map(lambda val: [val["group"].coerce_to("string"), _grades_overtime(doc, val)])
                        .coerce_to("object"),
                        "grades_stats": None,
                    }
                )
            )
            .run(conn, array_limit=200000)
        )
        logging.info(overtime_query)
Example #37
0
    def run(self):
        self.tid = get_tid()
        logs.downloads.debug(
            'RUN-DOWNLOAD-THREAD-------------------------------------')
        if self.stop is False:
            self.r_conn = new_rethink_connection()
            for c in r.table('media').get_all(r.args(['Deleting','Deleted','Downloaded','DownloadStarting', 'Downloading','Download','DownloadAborting']), index='status').\
                    pluck('id',
                          'path',
                          'url-isard',
                          'url-web',
                          'status'
                          ).merge(
                {'table': 'media'}).changes(include_initial=True).union(
                r.table('domains').get_all(r.args(['Downloaded','DownloadStarting', 'Downloading','DownloadAborting']), index='status').\
                        pluck('id',
                              'create_dict',
                              'url-isard',
                              'url-web',
                              'status').merge(
                    {"table": "domains"}).changes(include_initial=True)).union(
                r.table('engine').pluck('threads', 'status_all_threads').merge({'table': 'engine'}).changes()).run(self.r_conn):

                if self.stop:
                    break
                if c.get('new_val', None) is not None:
                    if c['new_val'].get('table', False) == 'engine':
                        if c['new_val']['status_all_threads'] == 'Stopping':
                            break
                        else:
                            continue

                logs.downloads.debug('DOWNLOAD CHANGES DETECTED:')
                logs.downloads.debug(pprint.pformat(c))

                if c.get('old_val', None) is None:
                    if c['new_val']['status'] == 'DownloadStarting':
                        self.start_download(c['new_val'])
                elif c.get('new_val', None) is None:
                    if c['old_val']['status'] in ['DownloadAborting']:
                        self.remove_download_thread(c['old_val'])

                elif 'old_val' in c and 'new_val' in c:
                    if c['old_val']['status'] == 'FailedDownload' and c[
                            'new_val']['status'] == 'DownloadStarting':
                        self.start_download(c['new_val'])

                    elif c['old_val']['status'] == 'Downloaded' and c[
                            'new_val']['status'] == 'Deleting':
                        if c['new_val']['table'] == 'media':
                            self.delete_media(c['new_val'])

                    elif c['old_val']['status'] == 'Deleting' and c['new_val'][
                            'status'] == 'Deleted':
                        if c['new_val']['table'] == 'media':
                            remove_media(c['new_val']['id'])

                    elif c['old_val']['status'] == 'Downloading' and c[
                            'new_val']['status'] == 'FailedDownload':
                        pass

                    elif c['old_val']['status'] == 'DownloadStarting' and c[
                            'new_val']['status'] == 'Downloading':
                        pass

                    elif c['old_val']['status'] == 'Downloading' and c[
                            'new_val']['status'] == 'Downloaded':
                        pass

                    elif c['old_val']['status'] == 'Downloading' and c[
                            'new_val']['status'] == 'DownloadAborting':
                        self.abort_download(c['new_val'])
        print '\nRepo with GitHub path %s occurs %s times' % (
                owner_repo,
                len(repos))

        # Use the most recently scraped row as the canonical row
        canonical = repos.pop()

        assert canonical
        print 'Using %s as the canoncial row' % canonical['id']

        canonical_owner_repo = (canonical['owner'], canonical['repo_name'])

        if canonical_owner_repo != owner_repo:
            print "Normalizing %s to %s for our canonical row" % (
                    canonical_owner_repo,
                    owner_repo)

            r.table(table).get(canonical['id']).update({
                'owner': canonical['owner'].lower(),
                'repo_name': canonical['repo_name'].lower()}).run(r_conn())
            updated += 1

        if repos:
            dupe_ids = [dupe['id'] for dupe in repos]
            print 'Deleting duplicates rows: %s' % ', '.join(dupe_ids)
            r.table(table).get_all(r.args(dupe_ids)).delete().run(r_conn())
            deleted += len(repos)

    print "Updated %d rows and deleted %d" % (updated, deleted)
Example #39
0
    def get(self, id=None, o=None):
        """
            - GET /projects
                (public) Projects list

            - GET /projects/<id>
                (public) Project with <id>

            - GET /projects/<id>/(users|slices)
                (auth) Users/Slices list of the project with <id>

            :return:
            """

        response = []
        current_user = self.get_current_user()

        # GET /projects
        if not id and not o:
            if not current_user:
                self.userError('permission denied')
                return
            if self.isAdmin():
                f = {}
            else:
                f = lambda project: project["pi_users"].contains(current_user[
                    'id'])
            cursor = yield r.table('projects') \
                .pluck(self.fields['projects']) \
                .filter(f) \
                .merge(lambda project: {
                    'authority': r.table('authorities').get(project['authority']) \
                                                        .pluck(self.fields_short['authorities']) \
                                                        .default({'id': project['authority']})
                }) \
                .merge(lambda project: {
                    'slices': r.table('slices') \
                       .get_all(r.args(project['slices'])) \
                       .distinct() \
                       .pluck(self.fields_short['slices']) \
                       .coerce_to('array')
                }) \
                .merge(lambda project: {
                    'pi_users': r.table('users') \
                           .get_all(r.args(project['pi_users'])) \
                           .distinct() \
                           .pluck(self.fields_short['users']) \
                           .coerce_to('array')
                }) \
                .merge(lambda project: {
                    'users': r.table('users') \
                           .get_all(r.args(project['users'])) \
                           .distinct() \
                           .pluck(self.fields_short['users']) \
                           .coerce_to('array')
                }) \
                .run(self.dbconnection)
            while (yield cursor.fetch_next()):
                project = yield cursor.next()
                response.append(project)

        # GET /projects/<id>
        elif not o and id and self.isUrn(id):
            if not current_user:
                self.userError('permission denied')
                return

            cursor = yield r.table('projects') \
                .pluck(self.fields['projects']) \
                .filter({'id': id}) \
                .filter(lambda project:
                        project["pi_users"].contains(current_user['id']) or
                        project["users"].contains(current_user['id'])) \
                .merge(lambda project: {
                    'authority': r.table('authorities').get(project['authority']) \
                                                        .pluck(self.fields_short['authorities']) \
                                                        .default({'id': project['authority']})
                }) \
                .run(self.dbconnection)
            while (yield cursor.fetch_next()):
                project = yield cursor.next()
                response.append(project)

        # GET /projects/<id>/users
        elif id and self.isUrn(id) and o == 'users':
            cursor = yield r.table(o) \
                .pluck(self.fields[o]) \
                .filter(lambda user: user["projects"].contains(id)) \
                .merge(lambda user: {
                    'authority': r.table('authorities').get(user['authority']) \
                                                        .pluck(self.fields_short['authorities']) \
                                                        .default({'id': user['authority']})
                }) \
                .run(self.dbconnection)
            while (yield cursor.fetch_next()):
                item = yield cursor.next()
                response.append(item)

        # GET /projects/<id>/slices
        elif id and self.isUrn(id) and o == 'slices':
            cursor = yield r.table(o) \
                .pluck(self.fields[o]) \
                .filter({ "project": id }) \
                .merge(lambda slice: {
                    'project': r.table('projects').get(slice['project']) \
                                                .pluck(self.fields_short['projects']) \
                                                .default({'id': slice['project']})
                }) \
                .merge(lambda slice: {
                    'authority': r.table('authorities').get(slice['authority']) \
                           .pluck(self.fields_short['authorities']) \
                           .default({'id': slice['authority']})
                }) \
                .run(self.dbconnection)
            while (yield cursor.fetch_next()):
                item = yield cursor.next()
                response.append(item)

        else:
            self.userError("invalid request")
            return

        self.finish(json.dumps({"result": response}, cls=myJSONEncoder))
Example #40
0
    def get(cls,
            _id=None,
            index=None,
            filter=None,
            contains=None,
            raw=None,
            orderby=None,
            limit=0,
            db={}):
        """Get

		Returns one or more records from the table. Send no ID to fetch all
		records in the table. Set an index to look for something other than the
		primary key

		Args:
			_id (str|str[]): The ID(s) to fetch from the table
			index (str): If set, used as the index to search instead of the
				primary key
			filter (dict): If set, used as an additional filter to the ID or
				index lookup
			raw (bool|list): If set to true, raw dicts will be returned instead
				of Document instances. If set to a list or tuple, only those
				fields listed will be returned
			orderby (str|str[]): The field(s) to order the result by
			limit (uint): The number of records to return
			db (dict): Optional DB info
				'server' for the name of the host info passed to server()
				'postfix' for the postfix added to the DB name

		Returns:
			Table|Table[]|dict|dict[]

		Raises:
			StorageException
		"""

        # Assume multiple records
        bMultiple = True

        # Get the info
        dInfo = cls.info(db)

        # If there is an index passed
        if index:

            # If the index doesn't exist
            if index not in dInfo['conf']['indexes']:
                raise StorageException('no index', index, 'tree')

        # Get a connection to the server
        with connect_with(dInfo['server']) as oCon:

            # Create a cursor for all records
            oCur = r \
             .db(dInfo['db']) \
             .table(dInfo['tree']._name) \

            # If all records must be returned, we don't need to modify the
            #	cursor any further
            if _id == None:
                pass

            # Else, if there's an index
            elif index:

                # If it's a tuple
                if isinstance(_id, tuple):

                    # Check if one of the values is None
                    iNone = -1
                    for i in range(len(_id)):

                        # If a value is None
                        if _id[i] is None:

                            # If we already have an index
                            if iNone != -1:
                                raise StorageException(
                                    'can\'t list more than one None in an index tuple'
                                )

                            # Store the index
                            iNone = i

                    # If we have one
                    if iNone > -1:

                        # Copy the tuples
                        idMax = list(_id)
                        idMin = list(_id)

                        # Change the None accordingly
                        idMax[iNone] = r.maxval
                        idMin[iNone] = r.minval

                        # Call between instead of get_all
                        oCur = oCur.between(idMin, idMax, index=index)

                    # Else we have no Nones, pass it through
                    else:
                        oCur = oCur.get_all(_id, index=index)

                # Else if it's a list
                elif isinstance(_id, list):
                    oCur = oCur.get_all(r.args(_id), index=index)

                # Else just pass it through
                else:
                    oCur = oCur.get_all(_id, index=index)

            # Else, we are dealing with the primary key
            else:

                # If we got multiple IDs
                if isinstance(_id, (tuple, list)):

                    # Continue to filter using get all
                    oCur = oCur.get_all(*_id)

                # Else we want one record
                else:

                    # Turn off the multiple flag
                    bMultiple = False

                    # Filter to a single ID
                    oCur = oCur.get(_id)

            # If an additional filter was passed
            if filter:
                oCur = oCur.filter(filter)

            # If there's a contains
            if contains:

                # If we don't have a list
                if not isinstance(contains[1], (tuple, list)):
                    contains = [contains[0], [contains[1]]]

                # Add the contains filter
                oCur = oCur.filter(
                    lambda obj: obj[contains[0]].contains(*contains[1]))

            # If there's a limit
            if limit > 0:
                oCur = oCur.limit(limit)

            # If a raw request was done with specific fields
            if isinstance(raw, (tuple, list)):
                oCur = oCur.pluck(*raw).default(None)

            # If an order by list was sent
            if isinstance(orderby, (tuple, list)):
                oCur = oCur.order_by(*orderby)
            # Else if an order field was sent
            elif isinstance(orderby, basestring):
                oCur = oCur.order_by(orderby)

            try:
                # Run the request
                itRes = oCur.run(oCon)

            except r.errors.ReqlOpFailedError as e:

                # The index doesn't exist
                if e.args[0][:5] == 'Index':
                    raise StorageException('no index', index, 'table')

                # Else, re-raise
                raise e

            # If we are expecting a single record
            if limit == 1:

                # Try to get one row
                try:
                    dRow = itRes.next()
                except r.net.DefaultCursorEmpty as e:
                    return None

                # If it's raw, don't instantiate it
                return (raw and dRow or cls(dRow, db))

            # If there's no data
            if not itRes:
                if bMultiple:
                    if limit == 1: return None
                    else: return []
                else: return None

            # If multiple records are expected
            if bMultiple:

                # If Raw requested, return as is
                if raw:
                    return [d for d in itRes]

                # Else create instances for each
                else:
                    return [cls(d, db) for d in itRes]

            # Else, one record requested
            else:
                return raw and itRes or cls(itRes, db)
Example #41
0
    def get(self, id=None, o=None):
        """
            - GET /users
                (auth) Users list

            - GET /users/<id>
                (auth) User with <id>

            - GET /users/<email>
                (auth) User with <email>

            - GET /users/(projects|slices|authorities)
                (auth) Projects/Slices/Authorities list of the authenticated user

            - GET /users/<id>/(projects|slices|authorities)
                (auth) Projects/Slices list of the user with <id>

            :return:
            """

        response = []
        current_user = self.get_current_user()
        if not current_user:
            self.userError("not authenticated")
            return

        # GET /users
        if not id and not o:
            cursor = yield r.table('users') \
                .pluck(self.fields['users']) \
                .merge(lambda user: {
                    'authority': r.table('authorities').get(user['authority']) \
                                                       .pluck(self.fields_short['authorities']) \
                                                       .default({'id' : user['authority']})
                }) \
                .merge(lambda user: {
                'pi_authorities': r.table('authorities').get_all(r.args(user['pi_authorities'])) \
                                                       .distinct() \
                                                       .pluck(self.fields_short['authorities']) \
                                                       .coerce_to('array')
                 }) \
                .merge(lambda user: {
                    'projects': r.table('projects') \
                           .get_all(r.args(user['projects'])) \
                           .distinct() \
                           .pluck(self.fields_short['projects']) \
                           .coerce_to('array')
                }) \
                .merge(lambda user: {
                    'slices': r.table('slices') \
                           .get_all(r.args(user['slices'])) \
                           .distinct() \
                           .pluck(self.fields_short['slices']) \
                           .coerce_to('array')
                }) \
                .run(self.dbconnection)
            while (yield cursor.fetch_next()):
                users = yield cursor.next()
                response.append(users)


        # GET /users/<id> or /users/<email>
        elif not o and id:
            if self.isUrn(id):
                f = {'id':id}
            elif self.isEmail(id):
                f = {'email':id}
            if not current_user:
                self.userError('permission denied')
                return

            cursor = yield r.table('users') \
                .pluck(self.fields['users']) \
                .filter(f) \
                .merge(lambda user: {
                    'authority': r.table('authorities').get(user['authority']) \
                                                       .pluck(self.fields_short['authorities']) \
                                                       .default({'id': user['authority']})
                }) \
                .merge(lambda user: {
                    'pi_authorities': r.table('authorities').get_all(r.args(user['pi_authorities'])) \
                                                       .distinct() \
                                                       .pluck(self.fields_short['authorities']) \
                                                       .coerce_to('array')
                 }) \
                .merge(lambda user: {
                    'projects': r.table('projects') \
                           .get_all(r.args(user['projects'])) \
                           .distinct() \
                           .pluck(self.fields_short['projects']) \
                           .coerce_to('array')
                }) \
                .merge(lambda user: {
                    'slices': r.table('slices') \
                           .get_all(r.args(user['slices'])) \
                           .distinct() \
                           .pluck(self.fields_short['slices']) \
                           .coerce_to('array')
                }) \
                .run(self.dbconnection)
            while (yield cursor.fetch_next()):
                user = yield cursor.next()
                response.append(user)

        # GET /users/[<id>/]projects
        elif o == 'projects':
            if not id or not self.isUrn(id):
                try:
                    id = current_user['id']
                except Exception as e:
                    self.serverError(" user is not logged in")
                    return


            cursor = yield r.table(o) \
                .pluck(self.fields[o]) \
                .filter(lambda project: project["pi_users"].contains(id)) \
                .merge(lambda project: {
                    'authority': r.table('authorities').get(project['authority']) \
                           .pluck(self.fields_short['authorities']) \
                           .default({'id': project['authority']})
                }) \
                .merge(lambda project: {
                    'slices': r.table('slices') \
                           .get_all(r.args(project['slices'])) \
                           .distinct() \
                           .pluck(self.fields_short['slices']) \
                           .coerce_to('array')
                }) \
                .run(self.dbconnection)
            while (yield cursor.fetch_next()):
                item = yield cursor.next()
                response.append(item)

        # GET /users/[<id>/]slices
        elif o == 'slices':
            if not id or not self.isUrn(id):
                try:
                    id = current_user['id']
                except Exception as e:
                    self.serverError(" user is not logged in")
                    return


            cursor = yield r.table(o) \
                .pluck(self.fields[o]) \
                .filter(lambda slice: slice["users"].contains(id)) \
                .merge(lambda slice: {
                    'project': r.table('projects').get(slice['project']) \
                           .pluck(self.fields_short['projects']) \
                           .default({'id': slice['project']})
                }) \
                .merge(lambda slice: {
                    'authority': r.table('authorities').get(slice['authority']) \
                           .pluck(self.fields_short['authorities']) \
                           .default({'id': slice['authority']})
                }) \
                .run(self.dbconnection)
            while (yield cursor.fetch_next()):
                item = yield cursor.next()
                response.append(item)

        # GET /users/[<id>/]authorities
        elif o == 'authorities':
            if not id or not self.isUrn(id):
                try:
                    id = current_user['id']
                except Exception as e:
                    self.serverError(" user is not logged in")
                    return


            cursor = yield r.table('authorities') \
                .pluck(self.fields['authorities']) \
                .filter(lambda authority:
                        authority["pi_users"].contains(current_user['id'])
                        or
                        authority["users"].contains(current_user['id'])) \
                .run(self.dbconnection)
            while (yield cursor.fetch_next()):
                authority = yield cursor.next()
                response.append(authority)

        else:
            self.userError("invalid request")
            return

        self.finish(json.dumps({"result": response}, cls=myJSONEncoder))
        print '\nRepo with GitHub path %s occurs %s times' % (owner_repo,
                                                              len(repos))

        # Use the most recently scraped row as the canonical row
        canonical = repos.pop()

        assert canonical
        print 'Using %s as the canoncial row' % canonical['id']

        canonical_owner_repo = (canonical['owner'], canonical['repo_name'])

        if canonical_owner_repo != owner_repo:
            print "Normalizing %s to %s for our canonical row" % (
                canonical_owner_repo, owner_repo)

            r.table(table).get(canonical['id']).update({
                'owner':
                canonical['owner'].lower(),
                'repo_name':
                canonical['repo_name'].lower()
            }).run(r_conn())
            updated += 1

        if repos:
            dupe_ids = [dupe['id'] for dupe in repos]
            print 'Deleting duplicates rows: %s' % ', '.join(dupe_ids)
            r.table(table).get_all(r.args(dupe_ids)).delete().run(r_conn())
            deleted += len(repos)

    print "Updated %d rows and deleted %d" % (updated, deleted)
Example #43
0
def model_overtime(db, conn):

    def _grades_overtime(doc, val):
        return r.branch(((doc.get_field('grades').count() > 0) & ((val['group'] % 10) != 4)), {
                'percent_a': val['reduction'].get_field('percent_a').avg().default(None),
                'percent_b': val['reduction'].get_field('percent_b').avg().default(None),
                'percent_c': val['reduction'].get_field('percent_c').avg().default(None),
                'percent_d': val['reduction'].get_field('percent_d').avg().default(None),
                'percent_f': val['reduction'].get_field('percent_f').avg().default(None),
                'percent_incomplete': val['reduction'].get_field('percent_incomplete').avg().default(None),
                'percent_c_minus_or_below': val['reduction'].get_field('percent_c_minus_or_below').avg().default(None),
                'average_grade': val['reduction'].get_field('average_grade').avg().default(None),
            }, None)

    def _expanded_grades_overtime(doc, val):
        return r.branch(((doc.get_field('grades').count() > 0) & ((val['group'] % 10) != 4)), {
                    'percent_a': val['reduction'].get_field('percent_a').avg().default(None),
                    'percent_b': val['reduction'].get_field('percent_b').avg().default(None),
                    'percent_c': val['reduction'].get_field('percent_c').avg().default(None),
                    'percent_d': val['reduction'].get_field('percent_d').avg().default(None),
                    'percent_f': val['reduction'].get_field('percent_f').avg().default(None),
                    'percent_incomplete': val['reduction'].get_field('percent_incomplete').avg().default(None),
                    'percent_c_minus_or_below': val['reduction'].get_field('percent_c_minus_or_below').avg().default(None),
                    'average_grade': val['reduction'].get_field('average_grade').avg().default(None),
                    'GR_percent_a': val['reduction'].filter({'level': 'GR'}).get_field('percent_a').avg().default(None),
                    'GR_percent_b': val['reduction'].filter({'level': 'GR'}).get_field('percent_b').avg().default(None),
                    'GR_percent_c': val['reduction'].filter({'level': 'GR'}).get_field('percent_c').avg().default(None),
                    'GR_percent_d': val['reduction'].filter({'level': 'GR'}).get_field('percent_d').avg().default(None),
                    'GR_percent_f': val['reduction'].filter({'level': 'GR'}).get_field('percent_f').avg().default(None),
                    'GR_percent_incomplete': val['reduction'].filter({'level': 'GR'}).get_field('percent_incomplete').avg().default(None),
                    'GR_percent_c_minus_or_below': val['reduction'].filter({'level': 'GR'}).get_field('percent_c_minus_or_below').avg().default(None),
                    'GR_average_grade': val['reduction'].filter({'level': 'GR'}).get_field('average_grade').avg().default(None),
                    'UD_percent_a': val['reduction'].filter({'level': 'UD'}).get_field('percent_a').avg().default(None),
                    'UD_percent_b': val['reduction'].filter({'level': 'UD'}).get_field('percent_b').avg().default(None),
                    'UD_percent_c': val['reduction'].filter({'level': 'UD'}).get_field('percent_c').avg().default(None),
                    'UD_percent_d': val['reduction'].filter({'level': 'UD'}).get_field('percent_d').avg().default(None),
                    'UD_percent_f': val['reduction'].filter({'level': 'UD'}).get_field('percent_f').avg().default(None),
                    'UD_percent_incomplete': val['reduction'].filter({'level': 'UD'}).get_field('percent_incomplete').avg().default(None),
                    'UD_percent_c_minus_or_below': val['reduction'].filter({'level': 'UD'}).get_field('percent_c_minus_or_below').avg().default(None),
                    'UD_average_grade': val['reduction'].filter({'level': 'UD'}).get_field('average_grade').avg().default(None),
                    'LD_percent_a': val['reduction'].filter({'level': 'LD'}).get_field('percent_a').avg().default(None),
                    'LD_percent_b': val['reduction'].filter({'level': 'LD'}).get_field('percent_b').avg().default(None),
                    'LD_percent_c': val['reduction'].filter({'level': 'LD'}).get_field('percent_c').avg().default(None),
                    'LD_percent_d': val['reduction'].filter({'level': 'LD'}).get_field('percent_d').avg().default(None),
                    'LD_percent_f': val['reduction'].filter({'level': 'LD'}).get_field('percent_f').avg().default(None),
                    'LD_percent_incomplete': val['reduction'].filter({'level': 'LD'}).get_field('percent_incomplete').avg().default(None),
                    'LD_percent_c_minus_or_below': val['reduction'].filter({'level': 'LD'}).get_field('percent_c_minus_or_below').avg().default(None),
                    'LD_average_grade': val['reduction'].filter({'level': 'LD'}).get_field('average_grade').avg().default(None),
                }, None)

    def _grades_stats(doc):
        return r.branch(((doc.get_field('grades').count() > 0)), {
                'percent_a': doc['grade_data'].get_field('percent_a').avg().default(None),
                'percent_b': doc['grade_data'].get_field('percent_b').avg().default(None),
                'percent_c': doc['grade_data'].get_field('percent_c').avg().default(None),
                'percent_d': doc['grade_data'].get_field('percent_d').avg().default(None),
                'percent_f': doc['grade_data'].get_field('percent_f').avg().default(None),
                'percent_incomplete': doc['grade_data'].get_field('percent_incomplete').avg().default(None),
                'percent_c_minus_or_below': doc['grade_data'].get_field('percent_c_minus_or_below').avg().default(None),
                'average_grade': doc['grade_data'].get_field('average_grade').avg().default(None),
            }, None)

    def _expanded_grades_stats(doc):
        return r.branch(((doc.get_field('grades').count() > 0)), {
                'percent_a': doc['grade_data'].get_field('percent_a').avg().default(None),
                'percent_b': doc['grade_data'].get_field('percent_b').avg().default(None),
                'percent_c': doc['grade_data'].get_field('percent_c').avg().default(None),
                'percent_d': doc['grade_data'].get_field('percent_d').avg().default(None),
                'percent_f': doc['grade_data'].get_field('percent_f').avg().default(None),
                'percent_incomplete': doc['grade_data'].get_field('percent_incomplete').avg().default(None),
                'percent_c_minus_or_below': doc['grade_data'].get_field('percent_c_minus_or_below').avg().default(None),
                'average_grade': doc['grade_data'].get_field('average_grade').avg().default(None),
                'GR_percent_a': doc['grade_data'].filter({'level': 'GR'}).get_field('percent_a').avg().default(None),
                'GR_percent_b': doc['grade_data'].filter({'level': 'GR'}).get_field('percent_b').avg().default(None),
                'GR_percent_c': doc['grade_data'].filter({'level': 'GR'}).get_field('percent_c').avg().default(None),
                'GR_percent_d': doc['grade_data'].filter({'level': 'GR'}).get_field('percent_d').avg().default(None),
                'GR_percent_f': doc['grade_data'].filter({'level': 'GR'}).get_field('percent_f').avg().default(None),
                'GR_percent_incomplete': doc['grade_data'].filter({'level': 'GR'}).get_field('percent_incomplete').avg().default(None),
                'GR_percent_c_minus_or_below': doc['grade_data'].filter({'level': 'GR'}).get_field('percent_c_minus_or_below').avg().default(None),
                'GR_average_grade': doc['grade_data'].filter({'level': 'GR'}).get_field('average_grade').avg().default(None),
                'UD_percent_a': doc['grade_data'].filter({'level': 'UD'}).get_field('percent_a').avg().default(None),
                'UD_percent_b': doc['grade_data'].filter({'level': 'UD'}).get_field('percent_b').avg().default(None),
                'UD_percent_c': doc['grade_data'].filter({'level': 'UD'}).get_field('percent_c').avg().default(None),
                'UD_percent_d': doc['grade_data'].filter({'level': 'UD'}).get_field('percent_d').avg().default(None),
                'UD_percent_f': doc['grade_data'].filter({'level': 'UD'}).get_field('percent_f').avg().default(None),
                'UD_percent_incomplete': doc['grade_data'].filter({'level': 'UD'}).get_field('percent_incomplete').avg().default(None),
                'UD_percent_c_minus_or_below': doc['grade_data'].filter({'level': 'UD'}).get_field('percent_c_minus_or_below').avg().default(None),
                'UD_average_grade': doc['grade_data'].filter({'level': 'UD'}).get_field('average_grade').avg().default(None),
                'LD_percent_a': doc['grade_data'].filter({'level': 'LD'}).get_field('percent_a').avg().default(None),
                'LD_percent_b': doc['grade_data'].filter({'level': 'LD'}).get_field('percent_b').avg().default(None),
                'LD_percent_c': doc['grade_data'].filter({'level': 'LD'}).get_field('percent_c').avg().default(None),
                'LD_percent_d': doc['grade_data'].filter({'level': 'LD'}).get_field('percent_d').avg().default(None),
                'LD_percent_f': doc['grade_data'].filter({'level': 'LD'}).get_field('percent_f').avg().default(None),
                'LD_percent_incomplete': doc['grade_data'].filter({'level': 'LD'}).get_field('percent_incomplete').avg().default(None),
                'LD_percent_c_minus_or_below': doc['grade_data'].filter({'level': 'LD'}).get_field('percent_c_minus_or_below').avg().default(None),
                'LD_average_grade': doc['grade_data'].filter({'level': 'LD'}).get_field('average_grade').avg().default(None),
            }, None)

    def _general_overtime(doc, val):
        return {
            'total_fcqs': val['reduction'].count(),
            'total_forms_requested': val['reduction'].sum('forms_requested'),
            'total_forms_returned': val['reduction'].sum('forms_returned'),
            'denver_data_averages': r.branch(((doc.get_field('campus').default(None) == 'DN') & (val['group'] <= 20144)), {
                'r_fairness': val['reduction'].get_field('denver_data').get_field('r_fairness').avg().default(None),
                'r_presentation': val['reduction'].get_field('denver_data').get_field('r_presentation').avg().default(None),
                'r_workload': val['reduction'].get_field('denver_data').get_field('r_workload').avg().default(None),
                'r_diversity': val['reduction'].get_field('denver_data').get_field('r_diversity').avg().default(None),
                'r_accessibility': val['reduction'].get_field('denver_data').get_field('r_accessibility').avg().default(None),
                'r_learning': val['reduction'].get_field('denver_data').get_field('r_learning').avg().default(None),
            }, None)
        }

    def _general_stats(doc):
        return {
            'total_fcqs': doc['fcq_data'].count(),
            'total_forms_requested': doc['fcq_data'].sum('forms_requested'),
            'total_forms_returned': doc['fcq_data'].sum('forms_returned'),
        }

    def _instructor_overtime(doc, val, unchained=False):
        iot = {
            'GR_fcqs': val['reduction'].filter({'level': 'GR'}).count(),
            'UD_fcqs': val['reduction'].filter({'level': 'UD'}).count(),
            'LD_fcqs': val['reduction'].filter({'level': 'LD'}).count(),
            'total_courses': val['reduction'].get_field('course_id').distinct().count(),
            'instructoroverall_average': val['reduction'].get_field('instructoroverall').avg().default(None),
            'instructoroverall_sd_average': val['reduction'].get_field('instructoroverall_sd').avg().default(None),
            'instructor_effectiveness_average': val['reduction'].get_field('instructor_effectiveness').avg().default(None),
            'instructor_availability_average': val['reduction'].get_field('instructor_availability').avg().default(None),
            'instructor_respect_average': val['reduction'].get_field('instructor_respect').avg().default(None)
        }
        chain = {} if unchained else _general_overtime(doc, val)
        iot.update(chain)
        return iot

    def _instructor_stats(doc, unchained=False):
        iot = {
            'GR_fcqs': doc['fcq_data'].filter({'level': 'GR'}).count(),
            'UD_fcqs': doc['fcq_data'].filter({'level': 'UD'}).count(),
            'LD_fcqs': doc['fcq_data'].filter({'level': 'LD'}).count(),
            'total_courses': doc['fcq_data'].get_field('course_id').distinct().count(),
            'instructoroverall_average': doc['fcq_data'].get_field('instructoroverall').avg().default(None),
            'instructoroverall_sd_average': doc['fcq_data'].get_field('instructoroverall_sd').avg().default(None),
            'instructor_effectiveness_average': doc['fcq_data'].get_field('instructor_effectiveness').avg().default(None),
            'instructor_availability_average': doc['fcq_data'].get_field('instructor_availability').avg().default(None),
            'instructor_respect_average': doc['fcq_data'].get_field('instructor_respect').avg().default(None)
        }
        chain = {} if unchained else _general_stats(doc)
        iot.update(chain)
        return iot

    def _course_overtime(doc, val, unchained=False):
        cot = {
            'total_instructors': val['reduction'].get_field('instructor_id').distinct().count(),
            'courseoverall_average': val['reduction'].get_field('courseoverall').avg().default(None),
            'courseoverall_sd_average': val['reduction'].get_field('courseoverall_sd').avg().default(None),
            'course_challenge_average': val['reduction'].get_field('course_challenge').avg().default(None),
            'course_howmuchlearned_average': val['reduction'].get_field('course_howmuchlearned').avg().default(None),
            'course_priorinterest_average': val['reduction'].get_field('course_priorinterest').avg().default(None)
        }
        chain = {} if unchained else _general_overtime(doc, val)
        cot.update(chain)
        return cot

    def _course_stats(doc, unchained=False):
        cot = {
            'total_instructors': doc['fcq_data'].get_field('instructor_id').distinct().count(),
            'courseoverall_average': doc['fcq_data'].get_field('courseoverall').avg().default(None),
            'courseoverall_sd_average': doc['fcq_data'].get_field('courseoverall_sd').avg().default(None),
            'course_challenge_average': doc['fcq_data'].get_field('course_challenge').avg().default(None),
            'course_howmuchlearned_average': doc['fcq_data'].get_field('course_howmuchlearned').avg().default(None),
            'course_priorinterest_average': doc['fcq_data'].get_field('course_priorinterest').avg().default(None)
        }
        chain = {} if unchained else _general_stats(doc)
        cot.update(chain)
        return cot

    def _department_overtime(doc, val):
        iot = _instructor_overtime(doc, val, unchained=True)
        cot = _course_overtime(doc, val, unchained=True)
        got = _general_overtime(doc, val)
        dot = {
            'GR_courses': val['reduction'].filter({'level': 'GR'}).get_field('course_id').distinct().count(),
            'UD_courses': val['reduction'].filter({'level': 'UD'}).get_field('course_id').distinct().count(),
            'LD_courses': val['reduction'].filter({'level': 'LD'}).get_field('course_id').distinct().count(),
            'TA_instructors': val['reduction'].filter({'instructor_group': 'TA'}).get_field('instructor_id').distinct().count(),
            'OTH_instructors': val['reduction'].filter({'instructor_group': 'OTH'}).get_field('instructor_id').distinct().count(),
            'TTT_instructors': val['reduction'].filter({'instructor_group': 'TTT'}).get_field('instructor_id').distinct().count(),
            'TA_instructoroverall_average': val['reduction'].filter({'instructor_group': 'TA'}).get_field('instructoroverall').avg().default(None),
            'OTH_instructoroverall_average': val['reduction'].filter({'instructor_group': 'OTH'}).get_field('instructoroverall').avg().default(None),
            'TTT_instructoroverall_average': val['reduction'].filter({'instructor_group': 'TTT'}).get_field('instructoroverall').avg().default(None),
            'GR_courseoverall_average': val['reduction'].filter({'level': 'GR'}).get_field('courseoverall').avg().default(None),
            'UD_courseoverall_average': val['reduction'].filter({'level': 'UD'}).get_field('courseoverall').avg().default(None),
            'LD_courseoverall_average': val['reduction'].filter({'level': 'LD'}).get_field('courseoverall').avg().default(None),
            'GR_forms_requested': val['reduction'].filter({'level': 'GR'}).sum('forms_requested'),
            'UD_forms_requested': val['reduction'].filter({'level': 'UD'}).sum('forms_requested'),
            'LD_forms_requested': val['reduction'].filter({'level': 'LD'}).sum('forms_requested')
        }
        dot.update(iot)
        dot.update(cot)
        dot.update(got)
        return dot

    def _department_stats(doc):
        iot = _instructor_stats(doc, unchained=True)
        cot = _course_stats(doc, unchained=True)
        got = _general_stats(doc)
        dot = {
            'GR_courses': doc['fcq_data'].filter({'level': 'GR'}).get_field('course_id').distinct().count(),
            'UD_courses': doc['fcq_data'].filter({'level': 'UD'}).get_field('course_id').distinct().count(),
            'LD_courses': doc['fcq_data'].filter({'level': 'LD'}).get_field('course_id').distinct().count(),
            'TA_instructors': doc['fcq_data'].filter({'instructor_group': 'TA'}).get_field('instructor_id').distinct().count(),
            'OTH_instructors': doc['fcq_data'].filter({'instructor_group': 'OTH'}).get_field('instructor_id').distinct().count(),
            'TTT_instructors': doc['fcq_data'].filter({'instructor_group': 'TTT'}).get_field('instructor_id').distinct().count()
        }
        dot.update(iot)
        dot.update(cot)
        dot.update(got)
        return dot

    # model_overtime
    for model in ['Instructor', 'Department', 'Course', 'College', 'Campus']:
        _model_overtime = {
            'Instructor': _instructor_overtime,
            'Department': _department_overtime,
            'College': _department_overtime,
            'Campus': _department_overtime,
            'Course': _course_overtime
        }[model]
        _model_stats = {
            'Instructor': _instructor_stats,
            'Department': _department_stats,
            'College': _department_stats,
            'Campus': _department_stats,
            'Course': _course_stats
        }[model]
        _model_grades_overtime = {
            'Instructor': _expanded_grades_overtime,
            'Department': _expanded_grades_overtime,
            'College': _expanded_grades_overtime,
            'Campus': _expanded_grades_overtime,
            'Course': _grades_overtime
        }[model]
        _model_grades_stats = {
            'Instructor': _expanded_grades_stats,
            'Department': _expanded_grades_stats,
            'College': _expanded_grades_stats,
            'Campus': _expanded_grades_stats,
            'Course': _grades_stats
        }[model]
        overtime_query = r.db(db).table(model).merge(
            lambda doc: {
                'fcq_data': r.branch(
                    doc['fcqs'].count() > 0,
                    r.db(db).table('Fcq').get_all(r.args(doc['fcqs'])).coerce_to('array'),
                    []
                ),
                'grade_data': r.branch(
                    doc['grades'].count() > 0,
                    r.db(db).table('Grade').get_all(r.args(doc['grades'])).coerce_to('array'),
                    []
                )
            }
        ).for_each(
            lambda doc: r.db(db).table(model).get(doc['id']).update({
                'fcq_data': None,
                'fcqs_overtime': doc['fcq_data'].group('yearterm').ungroup().map(
                    lambda val: [val['group'].coerce_to('string'), _model_overtime(doc, val)]
                ).coerce_to('object'),
                'fcqs_stats': _model_stats(doc),
                'grades_overtime': doc['grade_data'].group('yearterm').ungroup().map(
                    lambda val: [val['group'].coerce_to('string'), _model_grades_overtime(doc, val)]
                ).coerce_to('object'),
                'grades_stats': _model_grades_stats(doc)
            })
        ).run(conn, array_limit=200000)
        logging.info(overtime_query)