def mount(self): if self.mounted: return query = self._ensure_table_query(self.db_query, self.files_table_name) file_index_func = lambda row: rethinkdb.args([ row[STATUS_JSON_NAME], row[FILE_NAME_JSON_NAME], row[ FINISHED_DATE_JSON_NAME] ]) file_prefix_index_func = lambda row: rethinkdb.expr(row[ STATUS_JSON_NAME] == "completed").branch( rethinkdb.args([ row[FILE_NAME_JSON_NAME].split("/").slice(1, -1), row[ FINISHED_DATE_JSON_NAME] ]), rethinkdb.error("File is still uploading.")) query = query.do(lambda result: rethinkdb.expr(result[ "tables_created"] == 1).branch( self._create_index( self.db_query, self.files_table_name, self.file_index, file_index_func).do(lambda _: self._create_index( self.db_query, self.files_table_name, self. file_prefix_index, file_prefix_index_func)), None)) query = query.do(lambda _: self._ensure_table_query( self.db_query, self.chunks_table_name)) chunk_index_func = lambda row: rethinkdb.args( [row[FILE_ID_JSON_NAME], row[NUM_JSON_NAME]]) query = query.do(lambda result: rethinkdb.expr(result[ "tables_created"] == 1).branch( self._create_index(self.db_query, self.chunks_table_name, self. chunk_index, chunk_index_func), None)) query = query.do(lambda _: self._confirm_mount()) return query
def get(self): """ GET /profile Logged in User Profile :return: """ # TODO: id must be a valid URN current_user = self.get_current_user() if not current_user: self.userError("not authenticated") return try: pQuery = r.table('users')\ .get(self.get_current_user()['id']) \ .pluck(self.fields['profile']) \ .merge(lambda user: { 'authority': r.table('authorities').get(user['authority']) \ .pluck(self.fields_short['authorities']) \ .default({'id': user['authority']}) }) \ .merge(lambda user: { 'projects': r.table('projects') \ .get_all(r.args(user['projects'])) \ .distinct() \ .pluck(self.fields_short['projects']) \ .coerce_to('array') }) \ .merge(lambda user: { 'slices': r.table('slices') \ .get_all(r.args(user['slices'])) \ .distinct() \ .pluck(self.fields_short['slices']) \ .coerce_to('array') }) if self.isAdmin(): profile = yield pQuery.merge(lambda user: { 'pi_authorities': r.expr(user['pi_authorities']).map(lambda a: {'id':a}) }) \ .run(self.dbconnection) else: profile = yield pQuery.merge(lambda user: { 'pi_authorities': r.table('authorities').get_all(r.args(user['pi_authorities'])) \ .distinct() \ .pluck(self.fields_short['authorities']) \ .coerce_to('array') }) \ .run(self.dbconnection) except Exception: import traceback traceback.print_exc() self.userError("not authenticated") return self.write(json.dumps({"result": profile}, cls=myJSONEncoder))
def get_key_frames(self, weighted_nodes, page, include_total_pages=False): """ Holt zu den übergebenen Konzepten die Keyframe-Metadaten aus der Datenbank. :param weighted_nodes: übergebene Konzepte mit Gewichtung :param page: Seite die abgefragt werden soll :return: Keyframe-Metadaten """ total_pages = None nodes = weighted_nodes.map(lambda item: item[0]) weighted_nodes_dict = weighted_nodes.to_dict() if include_total_pages: total_count = ( r.table('key_frame_predictions') .get_all(r.args(nodes), index='node') .group('key_frame_id') .ungroup() .count() .run(r.connect(self.db_host, self.db_port, 'vor')) ) total_pages = math.ceil(float(total_count)/PAGE_LENGTH) key_frames = ( r.table('key_frame_predictions') .get_all(r.args(nodes), index='node') # alle Keyframe-Predictions holen, die einem übergebenen Konzept zugeordnet sind .map(lambda row: { 'key_frame_id': row['key_frame_id'], 'weighted_score': r.expr(weighted_nodes_dict)[row['node']].mul(row['score'].coerce_to('number')) # die übergebenen Gewicht und Scores der Keyframes multiplizieren }) .group('key_frame_id').reduce(lambda left, right: { # nach Keyframe gruppieren 'weighted_score': left['weighted_score'].add(right['weighted_score']) # den Score für mehrere Konzepte addieren }) .ungroup() .map(lambda row: { 'key_frame_id': row['group'], 'weighted_score_sum': row['reduction']['weighted_score'] }) .order_by(r.desc('weighted_score_sum')) # absteigend sortieren .slice(*KeyFrameRepository.__pagination(page)) # zur entsprechenden Page skippen .eq_join('key_frame_id', r.table('key_frames')) # die Metadaten aus der Keyframe-Tabelle holen .without({'right': 'id'}) .zip() .order_by(r.desc('weighted_score_sum')) # erneut sortieren, da join die Reihenfolge verändert .run(r.connect(self.db_host, self.db_port, 'vor')) ) return key_frames, total_pages
def get_fcq_data(self, fcq_ids): db = self.application.settings['database_name'] conn = self.application.settings['conn'] fcq_data = list(r.db(db).table('Fcq').get_all(r.args(fcq_ids)).run(conn)) return list(map(lambda fcq: dict(fcq_title=self.fcq_title(fcq), **fcq), fcq_data))
def create_table(progress, conn, db, table, create_args, sindexes): # Make sure that the table is ready if it exists, or create it r.branch( r.db(db).table_list().contains(table), r.db(db).table(table).wait(timeout=30), r.db(db).table_create(table, **create_args)).run(conn) if progress[0] is None: progress[0] = 0 # Recreate secondary indexes - assume that any indexes that already exist are wrong # and create them from scratch indexes = r.db(db).table(table).index_list().run(conn) created_indexes = list() try: for sindex in sindexes[progress[0]:]: if isinstance(sindex, dict) and all( k in sindex for k in ('index', 'function')): if sindex['index'] in indexes: r.db(db).table(table).index_drop(sindex['index']).run(conn) r.db(db).table(table).index_create( sindex['index'], sindex['function']).run(conn) created_indexes.append(sindex['index']) progress[0] += 1 r.db(db).table(table).index_wait(r.args(created_indexes)).run(conn) except RuntimeError: raise RuntimeError("Sindex warning")
def user_feed(user_id): # TODO paginate key = "user:#{0}".format(user_id) feed = redis.zquery(key, 0, 20) conn = r.connect(**rethink_conn.conn()) events = r.table("company_events").get_all(r.args(feed)).run(conn) return make_response(json.dumps(events))
def find(cls, id, listing=False): file_ref = r.table(cls._table).get(id).run(conn) if file_ref is not None: if file_ref['is_folder'] and listing and file_ref['objects'] is not None: file_ref['objects'] = list(r.table(cls._table).get_all(r.args(file_ref['objects'])).run(conn)) return file_ref
def ldap_users_exists(self, commit=False): cfg = r.table('config').get(1).run(db.conn)['auth'] users = list( r.table('users').filter({ 'active': True, 'kind': 'ldap' }).pluck('id', 'name', 'accessed').run(db.conn)) nonvalid = [] valid = [] for u in users: conn = ldap.initialize(cfg['ldap']['ldap_server']) id_conn = conn.search(cfg['ldap']['bind_dn'], ldap.SCOPE_SUBTREE, "uid=%s" % u['id']) tmp, info = conn.result(id_conn, 0) if len(info): valid.append(u) else: nonvalid.append(u) if commit: nonvalid_list = [u['id'] for u in nonvalid] return r.table('users').get_all(r.args(nonvalid_list)).update({ 'active': False }).run(db.conn) else: return {'nonvalid': nonvalid, 'valid': valid}
def claim_sites(self, n=1): result = ( self.rr.table('sites').get_all( r.args( r.db(self.rr.dbname).table( 'sites', read_mode='majority').between( ['ACTIVE', r.minval], ['ACTIVE', r.maxval], index='sites_last_disclaimed').order_by( r.desc('claimed'), 'last_disclaimed'). fold({}, lambda acc, site: acc.merge( r.branch( site.has_fields('job_id'), r.object( site['job_id'].coerce_to('string'), acc[ site['job_id'].coerce_to('string')]. default(0).add(1)), {})), emit=lambda acc, site, new_acc: r.branch( r.and_( r.or_( site['claimed'].not_(), site[ 'last_claimed'].lt(r.now().sub(60 * 60 ))), r.or_( site.has_fields('max_claimed_sites').not_( ), new_acc[site['job_id'].coerce_to( 'string')].le(site['max_claimed_sites' ]))), [site['id']], [])).limit(n))). update( # try to avoid a race condition resulting in multiple # brozzler-workers claiming the same site # see https://github.com/rethinkdb/rethinkdb/issues/3235#issuecomment-60283038 r.branch( r.or_(r.row['claimed'].not_(), r.row['last_claimed'].lt(r.now().sub(60 * 60))), { 'claimed': True, 'last_claimed': r.now() }, {}), return_changes=True)).run() self._vet_result(result, replaced=list(range(n + 1)), unchanged=list(range(n + 1))) sites = [] for i in range(result["replaced"]): if result["changes"][i]["old_val"]["claimed"]: self.logger.warn( "re-claimed site that was still marked 'claimed' " "because it was last claimed a long time ago " "at %s, and presumably some error stopped it from " "being disclaimed", result["changes"][i]["old_val"]["last_claimed"]) site = brozzler.Site(self.rr, result["changes"][i]["new_val"]) sites.append(site) if sites: return sites else: raise brozzler.NothingToClaim
def add_present_command(self, command, presents=[], **kwargs): """ Add present fields check to command """ if len(presents) > 0: print("Only downloading documents with fields: " + str(presents)) command = command.has_fields(r.args(presents)) return command
def add_present_command(self, command, presents=[], **kwargs): ''' Add present fields check to command ''' if len(presents) > 0: print("Only downloading documents with fields: " + str(presents)) command = command.has_fields(r.args(presents)) return command
def claim_sites(self, n=1): self.logger.trace('claiming up to %s sites to brozzle', n) result = ( self.rr.table('sites').get_all(r.args( r.db(self.rr.dbname).table('sites', read_mode='majority') .between( ['ACTIVE', r.minval], ['ACTIVE', r.maxval], index='sites_last_disclaimed') .order_by(r.desc('claimed'), 'last_disclaimed') .fold( {}, lambda acc, site: acc.merge( r.branch( site.has_fields('job_id'), r.object( site['job_id'].coerce_to('string'), acc[site['job_id'].coerce_to('string')].default(0).add(1)), {})), emit=lambda acc, site, new_acc: r.branch( r.and_( r.or_( site['claimed'].not_(), site['last_claimed'].lt(r.now().sub(60*60))), r.or_( site.has_fields('max_claimed_sites').not_(), new_acc[site['job_id'].coerce_to('string')].le(site['max_claimed_sites']))), [site['id']], [])) .limit(n))) .update( # try to avoid a race condition resulting in multiple # brozzler-workers claiming the same site # see https://github.com/rethinkdb/rethinkdb/issues/3235#issuecomment-60283038 r.branch( r.or_( r.row['claimed'].not_(), r.row['last_claimed'].lt(r.now().sub(60*60))), {'claimed': True, 'last_claimed': r.now()}, {}), return_changes=True)).run() self._vet_result( result, replaced=list(range(n+1)), unchanged=list(range(n+1))) sites = [] for i in range(result["replaced"]): if result["changes"][i]["old_val"]["claimed"]: self.logger.warn( "re-claimed site that was still marked 'claimed' " "because it was last claimed a long time ago " "at %s, and presumably some error stopped it from " "being disclaimed", result["changes"][i]["old_val"]["last_claimed"]) site = brozzler.Site(self.rr, result["changes"][i]["new_val"]) sites.append(site) self.logger.debug('claimed %s sites', len(sites)) if sites: return sites else: raise brozzler.NothingToClaim
def get_domains_with_transitional_status(list_status=TRANSITIONAL_STATUS): r_conn = new_rethink_connection() rtable = r.table('domains') # ~ l = list(rtable.filter(lambda d: r.expr(list_status). # ~ contains(d['status'])).pluck('status', 'id', 'hyp_started'). # ~ run l = list(rtable.get_all(r.args(list_status), index='status').pluck('status', 'id', 'hyp_started').run(r_conn)) close_rethink_connection(r_conn) return l
def filter(self, ids=None, **kwargs): if ids: try: query = self.query.get_all(r.args(ids)).filter(kwargs) except AttributeError: # self.query already has a get_all applied query = (self.query.filter(lambda doc: r.expr(ids).contains(doc['id'])) .filter(kwargs)) else: query = self.query.filter(kwargs) return ObjectSet(self, query)
def filter(self, ids=None, **kwargs): if ids: try: query = self.query.get_all(r.args(ids)).filter(kwargs) except AttributeError: # self.query already has a get_all applied query = (self.query.filter(lambda doc: r.expr(ids).contains( doc['id'])).filter(kwargs)) else: query = self.query.filter(kwargs) return ObjectSet(self, query)
def fetch_holdings(holding_ids): return r.table('holdings')\ .get_all(r.args(holding_ids), index='id')\ .filter(lambda holding: ( fetch_latest_block_num() >= holding['start_block_num']) & (fetch_latest_block_num() < holding['end_block_num']))\ .map(lambda holding: (holding['label'] == "").branch( holding.without('label'), holding))\ .map(lambda holding: (holding['description'] == "").branch( holding.without('description'), holding))\ .without('start_block_num', 'end_block_num', 'delta_id', 'account')\ .coerce_to('array')
def get_downloads_in_progress(): r_conn = new_rethink_connection() try: d = r.table('media').get_all(r.args(['DownloadStarting', 'Downloading']), index='status'). \ pluck('id', 'path', 'isard-web', 'status').run(r_conn) except r.ReqlNonExistenceError: d = [] close_rethink_connection(r_conn) return d
def handle_entitiesImported(self, work): entities = work['data']['entities'] entityType = work['work']['configType'] pageCount = self.countsPerType[entityType]['pageCount'] self.countsPerType[entityType].setdefault('importCount', 0) self.countsPerType[entityType]['importCount'] += len(entities) self.idsPerType.setdefault(entityType, []).extend([e['id'] for e in entities]) LOG.info("Imported {currCount}/{totalCount} entities for type '{typ}' on page {page}/{pageCount}".format( currCount=self.countsPerType[entityType]['importCount'], totalCount=self.countsPerType[entityType]['entityCount'], typ=entityType, page=work['work']['page'], pageCount=pageCount, )) entityConfig = self.controller.entityConfigManager.getConfigForType(entityType) self.controller.post_entities(entityConfig, entities) # Store the timestamp for the import # We'll use this to discard old EventLogEntities that happened before the import # However, eventlogentry's that are created while importing will still be applied timestamps = self.importTimestampsPerType.setdefault(entityType, {}) timestamps.setdefault('startImportTimestamp', work['data']['startImportTimestamp']) if not len(self.activeWorkItemsPerType[entityType]): LOG.info("Imported all entities for type '{0}'".format(entityType)) # Get a list of cachedEntityIDs = set(rethinkdb .table(entityConfig['table']) .map(lambda asset: asset['id']) .coerce_to('array') .run(self.controller.rethink) ) importedEntityIDs = set(self.idsPerType[entityType]) diffIDs = cachedEntityIDs.difference(importedEntityIDs) if len(diffIDs): # Delete these extra entities # This allows us to update the cache in place without # having the drop the table before the import, allowing for # a more seamless import / update process LOG.info("Deleting extra entities found in cache with IDs: {0}".format(diffIDs)) rethinkdb.db('shotguncache').table(entityConfig['table']).get_all(rethinkdb.args(diffIDs)).delete().run(self.controller.rethink) self.config.history.setdefault('config_hashes', {})[entityType] = entityConfig.hash self.config.history.setdefault('cached_entity_types', {})[entityType] = self.importTimestampsPerType[entityType] self.config.history.save() self.activeWorkItemsPerType.pop(entityType)
def ldap_users_exists(self,commit=False): cfg=r.table('config').get(1).run(db.conn)['auth'] users=list(r.table('users').filter({'active':True,'kind':'ldap'}).pluck('id','name','accessed').run(db.conn)) nonvalid=[] valid=[] for u in users: conn = ldap.initialize(cfg['ldap']['ldap_server']) id_conn = conn.search(cfg['ldap']['bind_dn'],ldap.SCOPE_SUBTREE,"uid=%s" % u['id']) tmp,info=conn.result(id_conn, 0) if len(info): valid.append(u) else: nonvalid.append(u) if commit: nonvalid_list= [ u['id'] for u in nonvalid ] return r.table('users').get_all(r.args(nonvalid_list)).update({'active':False}).run(db.conn) else: return {'nonvalid':nonvalid,'valid':valid}
def remove(self, *objs): old_keys = set() for obj in objs: if not isinstance(obj, model_cls): raise TypeError('%s instance expected, got %r' % (model_cls.__name__, obj)) obj_key = getattr(obj.fields, rkey, None) if obj_key is not None: old_keys.add(obj_key) existing_keys = {doc[rkey] for doc in self.query.run()} # Remove inexisting keys from old_keys old_keys &= existing_keys if old_keys: (join_model_cls.table.get_all(r.args(list(old_keys)), index=mrkey) .delete() .run())
def create_table(progress, conn, db, table, pkey, sindexes): if table not in r.db(db).table_list().run(conn): r.db(db).table_create(table, primary_key=pkey).run(conn) if progress[0] is None: progress[0] = 0 # Recreate secondary indexes - assume that any indexes that already exist are wrong # and create them from scratch indexes = r.db(db).table(table).index_list().run(conn) created_indexes = list() for sindex in sindexes[progress[0]:]: if isinstance(sindex, dict) and all(k in sindex for k in ('index', 'function')): if sindex['index'] in indexes: r.db(db).table(table).index_drop(sindex['index']).run(conn) r.db(db).table(table).index_create(sindex['index'], sindex['function']).run(conn) created_indexes.append(sindex['index']) progress[0] += 1 r.db(db).table(table).index_wait(r.args(created_indexes)).run(conn)
def create_table(progress, conn, db, table, create_args, sindexes): # Make sure that the table is ready if it exists, or create it r.branch(r.db(db).table_list().contains(table), r.db(db).table(table).wait(timeout=30), r.db(db).table_create(table, **create_args)).run(conn) if progress[0] is None: progress[0] = 0 # Recreate secondary indexes - assume that any indexes that already exist are wrong # and create them from scratch indexes = r.db(db).table(table).index_list().run(conn) created_indexes = list() for sindex in sindexes[progress[0]:]: if isinstance(sindex, dict) and all(k in sindex for k in ('index', 'function')): if sindex['index'] in indexes: r.db(db).table(table).index_drop(sindex['index']).run(conn) r.db(db).table(table).index_create(sindex['index'], sindex['function']).run(conn) created_indexes.append(sindex['index']) progress[0] += 1 r.db(db).table(table).index_wait(r.args(created_indexes)).run(conn)
def get(self, id=None, o=None): """ - GET /authorities (public) Authorities list - GET /authorities/<id> (public) Authority with <id> - GET /authorities/(users|projects) (auth) Users/Projects list of the authority of the logged in user - GET /authorities/<id>/(users|projects) (auth) Users/Projects list of the authority with <id> :return: """ response = [] current_user = self.get_current_user() # GET /authorities if not id and not o: cursor = yield r.table('authorities') \ .pluck(self.fields['authorities']) \ .run(self.dbconnection) while (yield cursor.fetch_next()): authority = yield cursor.next() if authority['name'] is None: authority['name'] = authority['shortname'].title() response.append(authority) # GET /authorities/<id> elif not o and id and self.isUrn(id): if not current_user: self.userError('permission denied') return try: # Check if the user has the right to GET an authority, PI of an upper authority a = yield r.table('authorities').get(id).run(self.dbconnection) if not a: self.userError("this authority %s does not exist" % id) return # Check if the user isAdmin admin = self.isAdmin() if self.current_user['id'] not in a['pi_users'] and not admin: self.userError("your user has no rights on authority: %s" % id) return except Exception: import traceback traceback.print_exc() self.userError("not authenticated") return cursor = yield r.table('authorities') \ .pluck(self.fields['authorities']) \ .filter({'id': id}) \ .run(self.dbconnection) while (yield cursor.fetch_next()): authority = yield cursor.next() if authority['name'] is None: authority['name'] = authority['shortname'].title() response.append(authority) # GET /authorities/(users|projects) elif not id and o in ['users', 'projects']: if not current_user: self.userError('permission denied') return cursor = yield r.table(o) \ .pluck(self.fields[o]) \ .filter({"authority": current_user['authority']}) \ .merge(lambda user: { 'authority': r.table('authorities').get(user['authority']) \ .pluck(self.fields_short['authorities']) \ .default({'id': user['authority']}) }) \ .merge(lambda user: { 'pi_authorities': r.table('authorities').get_all(r.args(user['pi_authorities'])) \ .distinct() \ .pluck(self.fields_short['authorities']) \ .coerce_to('array') }) \ .merge(lambda user: { 'projects': r.table('projects') \ .get_all(r.args(user['projects'])) \ .distinct() \ .pluck(self.fields_short['projects']) \ .coerce_to('array') }) \ .merge(lambda user: { 'slices': r.table('slices') \ .get_all(r.args(user['slices'])) \ .distinct() \ .pluck(self.fields_short['slices']) \ .coerce_to('array') }) \ .run(self.dbconnection) while (yield cursor.fetch_next()): item = yield cursor.next() if 'name' in item and item['name'] is None: item['name'] = item['shortname'].title() response.append(item) # GET /authorities/<id>/(users|projects) elif id and self.isUrn(id) and o in ['users', 'projects']: try: # Check if the user has the right to GET an authority, PI of an upper authority a = yield r.table('authorities').get(id).run(self.dbconnection) if not a: self.userError("this authority %s does not exist" % id) return if self.current_user['id'] not in a[ 'pi_users'] and not self.isAdmin(): self.userError("your user has no rights on authority: %s" % id) return except Exception: import traceback traceback.print_exc() self.userError("not authenticated") return if o == 'users': cursor = yield r.table(o) \ .pluck(self.fields[o]) \ .filter({"authority": id}) \ .merge(lambda user: { 'authority': r.table('authorities').get(user['authority']) \ .pluck(self.fields_short['authorities']) \ .default({'id' : user['authority']}) }) \ .merge(lambda user: { 'pi_authorities': r.table('authorities').get_all(r.args(user['pi_authorities'])) \ .distinct() \ .pluck(self.fields_short['authorities']) \ .coerce_to('array') }) \ .merge(lambda user: { 'projects': r.table('projects') \ .get_all(r.args(user['projects'])) \ .distinct() \ .pluck(self.fields_short['projects']) \ .coerce_to('array') }) \ .merge(lambda user: { 'slices': r.table('slices') \ .get_all(r.args(user['slices'])) \ .distinct() \ .pluck(self.fields_short['slices']) \ .coerce_to('array') }) \ .run(self.dbconnection) else: cursor = yield r.table(o) \ .pluck(self.fields[o]) \ .filter({"authority": id}) \ .run(self.dbconnection) while (yield cursor.fetch_next()): item = yield cursor.next() if 'name' in item and item['name'] is None: item['name'] = item['shortname'].title() response.append(item) else: self.userError("invalid request {} {}".format(id, o)) return self.finish(json.dumps({"result": response}, cls=myJSONEncoder))
def run(self): self.tid = get_tid() logs.downloads.debug( 'RUN-DOWNLOAD-THREAD-------------------------------------') pool_id = 'default' first_loop = True if self.stop is False: if first_loop is True: # if domains or media have status Downloading when engine restart # we need to resetdownloading deleting file and first_loop = False # wait a hyp to downloads next_hyp = False while next_hyp is False: logs.downloads.info( 'waiting an hypervisor online to launch downloading actions' ) if pool_id in self.manager.pools.keys(): next_hyp = self.manager.pools[pool_id].get_next() sleep(1) for hyp_id in get_hypers_in_pool(): self.killall_curl(hyp_id) domains_status_downloading = get_domains_with_status( 'Downloading') medias_status_downloading = get_media_with_status( 'Downloading') for id_domain in domains_status_downloading: create_dict = get_domain(id_domain)['create_dict'] dict_changes = { 'id': id_domain, 'table': 'domains', 'create_dict': create_dict } update_domain_status('ResetDownloading', id_domain) self.abort_download(dict_changes, final_status='DownloadFailed') for id_media in medias_status_downloading: dict_media = get_media(id_media) dict_changes = { 'id': id_media, 'table': 'media', 'path': dict_media['path'], 'hypervisors_pools': dict_media['hypervisors_pools'] } update_status_table('media', 'ResetDownloading', id_media) self.abort_download(dict_changes, final_status='DownloadFailed') self.r_conn = new_rethink_connection() update_table_field('hypervisors_pools', pool_id, 'download_changes', 'Started') for c in r.table('media').get_all(r.args( ['Deleting', 'Deleted', 'Downloaded', 'DownloadFailed', 'DownloadStarting', 'Downloading', 'Download', 'DownloadAborting','ResetDownloading']), index='status'). \ pluck('id', 'path', 'url-isard', 'url-web', 'status' ).merge( {'table': 'media'}).changes(include_initial=True).union( r.table('domains').get_all( r.args(['Downloaded', 'DownloadFailed','DownloadStarting', 'Downloading', 'DownloadAborting','ResetDownloading']), index='status'). \ pluck('id', 'create_dict', 'url-isard', 'url-web', 'status').merge( {"table": "domains"}).changes(include_initial=True)).union( r.table('engine').pluck('threads', 'status_all_threads').merge({'table': 'engine'}).changes()).run( self.r_conn): if self.stop: break if c.get('new_val', None) is not None: if c['new_val'].get('table', False) == 'engine': if c['new_val']['status_all_threads'] == 'Stopping': break else: continue logs.downloads.debug('DOWNLOAD CHANGES DETECTED:') logs.downloads.debug(pprint.pformat(c)) if c.get('old_val', None) is None: if c['new_val']['status'] == 'DownloadStarting': self.start_download(c['new_val']) elif c.get('new_val', None) is None: if c['old_val']['status'] in ['DownloadAborting']: self.remove_download_thread(c['old_val']) elif 'old_val' in c and 'new_val' in c: if c['old_val']['status'] == 'DownloadFailed' and c[ 'new_val']['status'] == 'DownloadStarting': self.start_download(c['new_val']) elif c['old_val']['status'] == 'Downloaded' and c[ 'new_val']['status'] == 'Deleting': if c['new_val']['table'] == 'media': self.delete_media(c['new_val']) elif c['old_val']['status'] == 'Deleting' and c['new_val'][ 'status'] == 'Deleted': if c['new_val']['table'] == 'media': remove_media(c['new_val']['id']) elif c['old_val']['status'] == 'Downloading' and c[ 'new_val']['status'] == 'DownloadFailed': pass elif c['old_val']['status'] == 'DownloadStarting' and c[ 'new_val']['status'] == 'Downloading': pass elif c['old_val']['status'] == 'Downloading' and c[ 'new_val']['status'] == 'Downloaded': pass elif c['old_val']['status'] == 'Downloading' and c[ 'new_val']['status'] == 'DownloadAborting': self.abort_download(c['new_val']) elif c['old_val']['status'] == 'Downloading' and c[ 'new_val']['status'] == 'ResetDownloading': self.abort_download(c['new_val'], final_status='DownloadFailed')
slug_map = {} for owner_repo, plugins in grouped_plugins.iteritems(): print '\nPlugin with GitHub path %s occurs %s times' % ( owner_repo, len(plugins)) canonical = merge_plugins(plugins) print "Using %s as canonical" % canonical['slug'] # db.plugins.insert normalizes the ower/repo to lower case db.plugins.insert(canonical, conflict='replace') updated += 1 dupes = [dupe for dupe in plugins if dupe['slug'] != canonical['slug']] if dupes: dupe_slugs = [dupe['slug'] for dupe in dupes] # Store deleted slugs for logging slug_map[canonical['slug']] = dupe_slugs print 'Deleting duplicates rows: %s' % ', '.join(dupe_slugs) r.table('plugins').get_all(r.args(dupe_slugs)).delete().run(r_conn()) deleted += len(dupes) with open(LOG_FILE, 'w') as log: print 'Writing deleted slug names to %s' % LOG_FILE log.writelines(dupe_log_line(c, d) for c, d in slug_map.iteritems()) print "Updated %d rows and deleted %d" % (updated, deleted)
def get(self, id=None, o=None): """ - GET /slices (public) Slices list - GET /slices/<urn|hrn> (public) Slice with <urn|hrn> - GET /slices/<id|hrn>/(users|resources) (auth) Users/Resources list of the slice with <id|hrn> :return: """ slice = None response = [] if not self.get_current_user(): self.userError('permission denied user not logged in') return ## # if id (hrn|urn) is set we get the slice with id <urn|hrn> # if id: if self.isUrn(id): filter = {'id': id} elif self.isHrn(id): filter = {'hrn': id} else: self.userError('id or hrn format error') return cursor = yield r.table('slices') \ .filter(filter) \ .merge(lambda slice: { 'authority': r.table('authorities').get(slice['authority']) \ .pluck(self.fields_short['authorities']) \ .default({'id': slice['authority']}) }) \ .merge(lambda slice: { 'project': r.table('projects').get(slice['project']) \ .pluck(self.fields_short['projects']) \ .default({'id': slice['project']}) }) \ .merge(lambda slice: { 'users': r.table('users').get_all(r.args(slice['users']), index="id") \ .distinct() \ .pluck(self.fields_short['users']).coerce_to('array') }) \ .merge(lambda slice: { 'leases': r.table('leases').filter({'slice_id':slice['id']}).merge(lambda l: { 'resources': l['resources'].map(lambda res: r.table('resources').get(res)) \ .coerce_to('array') }).coerce_to('array') }) \ .run(self.dbconnection) while (yield cursor.fetch_next()): slice = yield cursor.next() ## # GET /slices # # returns list of slices # if not id and not o: cursor = yield r.table('slices') \ .pluck(self.fields['slices']) \ .merge(lambda slice: { 'authority': r.table('authorities').get(slice['authority']) \ .pluck(self.fields_short['authorities']) \ .default({'id': slice['authority']}) }) \ .merge(lambda slice: { 'project': r.table('projects').get(slice['project']) \ .pluck(self.fields_short['projects']) \ .default({'id': slice['project']}) }) \ .run(self.dbconnection) while (yield cursor.fetch_next()): slice = yield cursor.next() response.append(slice) ## # GET /slices/<urn|hrn> # # returns slice with <hrn|urn> # elif not o and id: if not self.get_current_user(): self.userError('permission denied user not logged in') return response.append(slice) ## # GET /slice/<urn|hrn>/users # # returns a list of users of slice with id urn|hrn # elif id and slice and o == 'users': response = yield r.table('users') \ .get_all(r.args(slice['users'])['id'], index='id') \ .distinct() \ .pluck(self.fields['users']) \ .merge(lambda user: { 'authority': r.table('authorities').get(user['authority']) \ .pluck(self.fields_short['authorities']) \ .default({'id': user['authority']}) }) \ .coerce_to('array').run(self.dbconnection) ## # GET /slice/<urn|hrn>/resources # # returns a list of resources in the slice with id urn|hrn # elif id and slice and o == 'resources': response = yield r.table('resources') \ .get_all(r.args(slice['resources'])['id'], index='id') \ .distinct() \ .merge(lambda resource: { 'testbed': r.table('testbeds').get(resource['testbed']) \ .pluck(self.fields_short['testbeds']) \ .default({'id': resource['testbed']}) }) \ .coerce_to('array').run(self.dbconnection) else: self.userError("invalid request") return self.finish(json.dumps({'result': response}, cls=myJSONEncoder))
def handle_entitiesImported(self, work): entities = work['data']['entities'] entityType = work['work']['configType'] pageCount = self.countsPerType[entityType]['pageCount'] self.countsPerType[entityType].setdefault('importCount', 0) self.countsPerType[entityType]['importCount'] += len(entities) self.idsPerType.setdefault(entityType, []).extend([e['id'] for e in entities]) LOG.info( "Imported {currCount}/{totalCount} entities for type '{typ}' on page {page}/{pageCount}" .format( currCount=self.countsPerType[entityType]['importCount'], totalCount=self.countsPerType[entityType]['entityCount'], typ=entityType, page=work['work']['page'], pageCount=pageCount, )) entityConfig = self.controller.entityConfigManager.getConfigForType( entityType) self.controller.post_entities(entityConfig, entities) # Store the timestamp for the import # We'll use this to discard old EventLogEntities that happened before the import # However, eventlogentry's that are created while importing will still be applied timestamps = self.importTimestampsPerType.setdefault(entityType, {}) timestamps.setdefault('startImportTimestamp', work['data']['startImportTimestamp']) if not len(self.activeWorkItemsPerType[entityType]): LOG.info("Imported all entities for type '{0}'".format(entityType)) # Get a list of cachedEntityIDs = set( rethinkdb.table(entityConfig['table']).map( lambda asset: asset['id']).coerce_to('array').run( self.controller.rethink)) importedEntityIDs = set(self.idsPerType[entityType]) diffIDs = cachedEntityIDs.difference(importedEntityIDs) if len(diffIDs): # Delete these extra entities # This allows us to update the cache in place without # having the drop the table before the import, allowing for # a more seamless import / update process LOG.info( "Deleting extra entities found in cache with IDs: {0}". format(diffIDs)) rethinkdb.db('shotguncache').table( entityConfig['table']).get_all( rethinkdb.args(diffIDs)).delete().run( self.controller.rethink) self.config.history.setdefault('config_hashes', {})[entityType] = entityConfig.hash self.config.history.setdefault( 'cached_entity_types', {})[entityType] = self.importTimestampsPerType[entityType] self.config.history.save() self.activeWorkItemsPerType.pop(entityType)
def get_word_list_syns_and_related(cls, base_word_list: [], add_base_word=True): # Get Base Word Records all_syn_ids = set() # {base word : [id, base word]} base_word_records_dict: { str: [str, str] } = cls.keyword.get_all(r.args(base_word_list), index="word").group("word").run(cls.connection) # {base word id: {word, syns: { syn_id: weight }}} base_word_dict: {str: {str, {str: float}}} = {} for key, base_word_record in base_word_records_dict.items(): base_word_id = base_word_record[0]["id"] base_word_dict[base_word_id] = { "word": base_word_record[0]["word"], "syns": {} } if add_base_word: base_word_dict[base_word_id]["syns"][base_word_id] = 1 all_syn_ids.add(base_word_id) # Get all relevant syns and bases _id_args = r.args(list(base_word_dict.keys())) # [{"base_id", "syn_id", "weight"}] syn_ids = cls.synlinks.get_all( _id_args, index="base_id").coerce_to("array").run(cls.connection) # [{"base_id", "syn_id", "weight"}] base_ids = cls.synlinks.get_all( _id_args, index="syn_id").coerce_to("array").run(cls.connection) all_syn_ids |= (set([syn_id["syn_id"] for syn_id in syn_ids] + [base_id["base_id"] for base_id in base_ids])) for syn_id_record in syn_ids: # add to base_word syns_id list if syn_id_record["syn_id"] in base_word_dict[ syn_id_record["base_id"]]["syns"]: # if synlink is already added, take larger weight original = base_word_dict[syn_id_record["base_id"]]["syns"][ syn_id_record["syn_id"]] base_word_dict[syn_id_record["base_id"]]["syns"][ syn_id_record["syn_id"]] = max( syn_id_record["weight"] * cls.syn_decay, original) else: base_word_dict[syn_id_record["base_id"]]["syns"][syn_id_record[ "syn_id"]] = syn_id_record["weight"] * cls.syn_decay for base_id_record in base_ids: # add to base_word syns_id list if base_id_record["base_id"] in base_word_dict[ base_id_record["syn_id"]]["syns"]: # if synlink is already added, take larger weight original = base_word_dict[base_id_record["syn_id"]]["syns"][ base_id_record["base_id"]] base_word_dict[base_id_record["syn_id"]]["syns"][ base_id_record["base_id"]] = max( base_id_record["weight"] * cls.syn_decay, original) else: base_word_dict[base_id_record["syn_id"]]["syns"][ base_id_record[ "base_id"]] = base_id_record["weight"] * cls.syn_decay # {"id": { "id", "word" }} syn_records = cls.keyword.get_all(r.args(all_syn_ids)).group("id").run( cls.connection) # [{"parent_id", "child_id"}] related_word_ids = cls.keyword_relations.get_all( r.args(all_syn_ids), index="parent_id").pluck( "parent_id", "child_id").coerce_to("array").run(cls.connection) # key: id of the root synonym # value: ids of the related words related_words_dict = {} for item in related_word_ids: if item["parent_id"] not in related_words_dict: related_words_dict[item["parent_id"]] = [] related_words_dict[item["parent_id"]].append(item["child_id"]) related_word_records = dict( cls.keyword.get_all( r.args([ related_word_id_item["child_id"] for related_word_id_item in related_word_ids ])).group("id").run(cls.connection)) base_word_weight_dict = {} for base_word_id, val in base_word_dict.items(): # for each base word base_word_word = val["word"] syns_dict = val["syns"] weight_dict = {} for syn_id, weight in syns_dict.items(): # for synonyms: syn_record = syn_records[syn_id][0] weight_dict[syn_record["word"]] = { "id": syn_record["id"], "weight": weight, "parent": None } if syn_id in related_words_dict: # If there are related words for the synonym for syn_related_word_id in related_words_dict[syn_id]: related_word_record = related_word_records[ syn_related_word_id][0] if related_word_record["word"] not in weight_dict: weight_dict[related_word_record["word"]] = { "id": related_word_record["id"], "weight": weight * cls.related_word_decay, "parent": syn_record["word"] } base_word_weight_dict[base_word_word] = weight_dict return base_word_weight_dict
def search_items(self, searchstring, searchfields=['tags'], returnfields=['id']): """ Searches through the Table 1) search all alternate_titles for full searchstring 2) if searchstring contains a number > 99, search course_numbers for that number 3) if searchstring contains a ngram with four or fewer characters, looks through course_subject """ DB = self.DB table = self.__class__.__name__ lowercase_searchstring = searchstring.lower() lowercase_searchstring.replace('-', ' ') splitwords = lowercase_searchstring.split(' ') words = list(filter(lambda word: word not in STOPWORDS, splitwords)) if not len(words): return [] logging.info(words) def sequence_search(searchfield, words): return r.expr(words).concat_map( lambda word: r.db(DB).table(table).filter( lambda doc: doc[searchfield].map( lambda title: title.do( lambda matcher: matcher.coerce_to('STRING').match('(?i)' + word) ) ).reduce(lambda left, right: left | right) ).coerce_to('array').map(lambda doc: doc['id']) ) def static_search(searchfield, words): return r.expr(words).concat_map( lambda word: r.db(DB).table(table).filter( lambda doc: doc[searchfield].coerce_to('STRING').match('(?i)' + word) ).coerce_to('array').map(lambda doc: doc['id']) ) def search(searchfield, words): if isinstance(self.default()[searchfield], (list, tuple)): return sequence_search(searchfield, words) return static_search(searchfield, words) searches = [search(searchfield, words) for searchfield in searchfields] total_results = r.add(r.args(searches)).run(self.conn) searchresults = (r.expr(total_results)).group(r.row).count().ungroup().order_by('reduction').run(self.conn) if not len(searchresults): return [] best_score = searchresults[-1]['reduction'] best_ids = r.expr(searchresults).filter({'reduction': best_score}).get_field('group').run(self.conn) if 'id' not in returnfields: logging.warn("'id' is not in listed returnfields. It's recomended this field is included") if not len(returnfields): logging.error("returnfields cannot be empty") return [] try: return list(r.db(DB).table(table).get_all(r.args(best_ids)).pluck(r.args(returnfields)).run(self.conn)) except err: logging.error(err) return []
def model_overtime(db, conn): def _general_overtime(doc, val): return { 'total_fcqs': val['reduction'].count(), 'total_forms_requested': val['reduction'].sum('forms_requested'), 'total_forms_returned': val['reduction'].sum('forms_returned'), 'denver_data_averages': r.branch(((doc.get_field('campus').default(None) == 'DN') & (val['group'] <= 20144)), { 'r_fairness': val['reduction'].get_field('denver_data').get_field('r_fairness').avg().default(None), 'r_presentation': val['reduction'].get_field('denver_data').get_field('r_presentation').avg().default(None), 'r_workload': val['reduction'].get_field('denver_data').get_field('r_workload').avg().default(None), 'r_diversity': val['reduction'].get_field('denver_data').get_field('r_diversity').avg().default(None), 'r_accessibility': val['reduction'].get_field('denver_data').get_field('r_accessibility').avg().default(None), 'r_learning': val['reduction'].get_field('denver_data').get_field('r_learning').avg().default(None), }, None) } def _general_stats(doc): return { 'total_fcqs': doc['fcq_data'].count(), 'total_forms_requested': doc['fcq_data'].sum('forms_requested'), 'total_forms_returned': doc['fcq_data'].sum('forms_returned'), } def _instructor_overtime(doc, val, unchained=False): iot = { 'GR_fcqs': val['reduction'].filter({'level': 'GR'}).count(), 'UD_fcqs': val['reduction'].filter({'level': 'UD'}).count(), 'LD_fcqs': val['reduction'].filter({'level': 'LD'}).count(), 'total_courses': val['reduction'].get_field('course_id').distinct().count(), 'instructoroverall_average': val['reduction'].get_field('instructoroverall').avg().default(None), 'instructoroverall_sd_average': val['reduction'].get_field('instructoroverall_sd').avg().default(None), 'instructor_effectiveness_average': val['reduction'].get_field('instructor_effectiveness').avg().default(None), 'instructor_availability_average': val['reduction'].get_field('instructor_availability').avg().default(None), 'instructor_respect_average': val['reduction'].get_field('instructor_respect').avg().default(None) } chain = {} if unchained else _general_overtime(doc, val) iot.update(chain) return iot def _instructor_stats(doc, unchained=False): iot = { 'GR_fcqs': doc['fcq_data'].filter({'level': 'GR'}).count(), 'UD_fcqs': doc['fcq_data'].filter({'level': 'UD'}).count(), 'LD_fcqs': doc['fcq_data'].filter({'level': 'LD'}).count(), 'total_courses': doc['fcq_data'].get_field('course_id').distinct().count(), 'instructoroverall_average': doc['fcq_data'].get_field('instructoroverall').avg().default(None), 'instructoroverall_sd_average': doc['fcq_data'].get_field('instructoroverall_sd').avg().default(None), 'instructor_effectiveness_average': doc['fcq_data'].get_field('instructor_effectiveness').avg().default(None), 'instructor_availability_average': doc['fcq_data'].get_field('instructor_availability').avg().default(None), 'instructor_respect_average': doc['fcq_data'].get_field('instructor_respect').avg().default(None) } chain = {} if unchained else _general_stats(doc) iot.update(chain) return iot def _course_overtime(doc, val, unchained=False): cot = { 'total_instructors': val['reduction'].get_field('instructor_id').distinct().count(), 'courseoverall_average': val['reduction'].get_field('courseoverall').avg().default(None), 'courseoverall_sd_average': val['reduction'].get_field('courseoverall_sd').avg().default(None), 'course_challenge_average': val['reduction'].get_field('course_challenge').avg().default(None), 'course_howmuchlearned_average': val['reduction'].get_field('course_howmuchlearned').avg().default(None), 'course_priorinterest_average': val['reduction'].get_field('course_priorinterest').avg().default(None) } chain = {} if unchained else _general_overtime(doc, val) cot.update(chain) return cot def _course_stats(doc, unchained=False): cot = { 'total_instructors': doc['fcq_data'].get_field('instructor_id').distinct().count(), 'courseoverall_average': doc['fcq_data'].get_field('courseoverall').avg().default(None), 'courseoverall_sd_average': doc['fcq_data'].get_field('courseoverall_sd').avg().default(None), 'course_challenge_average': doc['fcq_data'].get_field('course_challenge').avg().default(None), 'course_howmuchlearned_average': doc['fcq_data'].get_field('course_howmuchlearned').avg().default(None), 'course_priorinterest_average': doc['fcq_data'].get_field('course_priorinterest').avg().default(None) } chain = {} if unchained else _general_stats(doc) cot.update(chain) return cot def _department_overtime(doc, val): iot = _instructor_overtime(doc, val, unchained=True) cot = _course_overtime(doc, val, unchained=True) got = _general_overtime(doc, val) dot = { 'GR_courses': val['reduction'].filter({'level': 'GR'}).get_field('course_id').distinct().count(), 'UD_courses': val['reduction'].filter({'level': 'UD'}).get_field('course_id').distinct().count(), 'LD_courses': val['reduction'].filter({'level': 'LD'}).get_field('course_id').distinct().count(), 'TA_instructors': val['reduction'].filter({'instructor_group': 'TA'}).get_field('instructor_id').distinct().count(), 'OTH_instructors': val['reduction'].filter({'instructor_group': 'OTH'}).get_field('instructor_id').distinct().count(), 'TTT_instructors': val['reduction'].filter({'instructor_group': 'TTT'}).get_field('instructor_id').distinct().count(), 'TA_instructoroverall_average': val['reduction'].filter({'instructor_group': 'TA'}).get_field('instructoroverall').avg().default(None), 'OTH_instructoroverall_average': val['reduction'].filter({'instructor_group': 'OTH'}).get_field('instructoroverall').avg().default(None), 'TTT_instructoroverall_average': val['reduction'].filter({'instructor_group': 'TTT'}).get_field('instructoroverall').avg().default(None), 'GR_courseoverall_average': val['reduction'].filter({'level': 'GR'}).get_field('courseoverall').avg().default(None), 'UD_courseoverall_average': val['reduction'].filter({'level': 'UD'}).get_field('courseoverall').avg().default(None), 'LD_courseoverall_average': val['reduction'].filter({'level': 'LD'}).get_field('courseoverall').avg().default(None), 'GR_forms_requested': val['reduction'].filter({'level': 'GR'}).sum('forms_requested'), 'UD_forms_requested': val['reduction'].filter({'level': 'UD'}).sum('forms_requested'), 'LD_forms_requested': val['reduction'].filter({'level': 'LD'}).sum('forms_requested') } dot.update(iot) dot.update(cot) dot.update(got) return dot def _department_stats(doc): iot = _instructor_stats(doc, unchained=True) cot = _course_stats(doc, unchained=True) got = _general_stats(doc) dot = { 'GR_courses': doc['fcq_data'].filter({'level': 'GR'}).get_field('course_id').distinct().count(), 'UD_courses': doc['fcq_data'].filter({'level': 'UD'}).get_field('course_id').distinct().count(), 'LD_courses': doc['fcq_data'].filter({'level': 'LD'}).get_field('course_id').distinct().count(), 'TA_instructors': doc['fcq_data'].filter({'instructor_group': 'TA'}).get_field('instructor_id').distinct().count(), 'OTH_instructors': doc['fcq_data'].filter({'instructor_group': 'OTH'}).get_field('instructor_id').distinct().count(), 'TTT_instructors': doc['fcq_data'].filter({'instructor_group': 'TTT'}).get_field('instructor_id').distinct().count() } dot.update(iot) dot.update(cot) dot.update(got) return dot # model_overtime for model in ['Instructor', 'Department', 'Course']: _model_overtime = { 'Instructor': _instructor_overtime, 'Department': _department_overtime, 'Course': _course_overtime }[model] _model_stats = { 'Instructor': _instructor_stats, 'Department': _department_stats, 'Course': _course_stats }[model] overtime_query = r.db(db).table(model).merge( lambda doc: {'fcq_data': r.db(db).table('Fcq').get_all(r.args(doc['fcqs'])).coerce_to('array')} ).for_each( lambda doc: r.db(db).table(model).get(doc['id']).update({'overtime': doc['fcq_data'].group('yearterm').ungroup().map( lambda val: [val['group'].coerce_to('string'), _model_overtime(doc, val)] ).coerce_to('object'), 'stats': _model_stats(doc)}) ).run(conn, array_limit=200000) logging.info(overtime_query)
def args(key): '''Create a rethinkdb ARGS object out of a list''' if type(key) is str: return key return r.args(key)
def model_overtime(db, conn): def _grades_overtime(doc, val): return { "grade_data_averages": r.branch( ((doc.get_field("grades").count() > 0) & ((val["group"] % 10) != 4)), { "percent_a": val["reduction"].get_field("percent_a").avg().default(None), "percent_b": val["reduction"].get_field("percent_b").avg().default(None), "percent_c": val["reduction"].get_field("percent_c").avg().default(None), "percent_d": val["reduction"].get_field("percent_d").avg().default(None), "percent_f": val["reduction"].get_field("percent_f").avg().default(None), "percent_incomplete": val["reduction"].get_field("percent_incomplete").avg().default(None), "percent_c_minus_or_below": val["reduction"] .get_field("percent_c_minus_or_below") .avg() .default(None), "average_grade": val["reduction"].get_field("average_grade").avg().default(None), }, None, ) } def _general_overtime(doc, val): return { "total_fcqs": val["reduction"].count(), "total_forms_requested": val["reduction"].sum("forms_requested"), "total_forms_returned": val["reduction"].sum("forms_returned"), "denver_data_averages": r.branch( ((doc.get_field("campus").default(None) == "DN") & (val["group"] <= 20144)), { "r_fairness": val["reduction"].get_field("denver_data").get_field("r_fairness").avg().default(None), "r_presentation": val["reduction"] .get_field("denver_data") .get_field("r_presentation") .avg() .default(None), "r_workload": val["reduction"].get_field("denver_data").get_field("r_workload").avg().default(None), "r_diversity": val["reduction"] .get_field("denver_data") .get_field("r_diversity") .avg() .default(None), "r_accessibility": val["reduction"] .get_field("denver_data") .get_field("r_accessibility") .avg() .default(None), "r_learning": val["reduction"].get_field("denver_data").get_field("r_learning").avg().default(None), }, None, ), } def _general_stats(doc): return { "total_fcqs": doc["fcq_data"].count(), "total_forms_requested": doc["fcq_data"].sum("forms_requested"), "total_forms_returned": doc["fcq_data"].sum("forms_returned"), } def _instructor_overtime(doc, val, unchained=False): iot = { "GR_fcqs": val["reduction"].filter({"level": "GR"}).count(), "UD_fcqs": val["reduction"].filter({"level": "UD"}).count(), "LD_fcqs": val["reduction"].filter({"level": "LD"}).count(), "total_courses": val["reduction"].get_field("course_id").distinct().count(), "instructoroverall_average": val["reduction"].get_field("instructoroverall").avg().default(None), "instructoroverall_sd_average": val["reduction"].get_field("instructoroverall_sd").avg().default(None), "instructor_effectiveness_average": val["reduction"] .get_field("instructor_effectiveness") .avg() .default(None), "instructor_availability_average": val["reduction"] .get_field("instructor_availability") .avg() .default(None), "instructor_respect_average": val["reduction"].get_field("instructor_respect").avg().default(None), } chain = {} if unchained else _general_overtime(doc, val) iot.update(chain) return iot def _instructor_stats(doc, unchained=False): iot = { "GR_fcqs": doc["fcq_data"].filter({"level": "GR"}).count(), "UD_fcqs": doc["fcq_data"].filter({"level": "UD"}).count(), "LD_fcqs": doc["fcq_data"].filter({"level": "LD"}).count(), "total_courses": doc["fcq_data"].get_field("course_id").distinct().count(), "instructoroverall_average": doc["fcq_data"].get_field("instructoroverall").avg().default(None), "instructoroverall_sd_average": doc["fcq_data"].get_field("instructoroverall_sd").avg().default(None), "instructor_effectiveness_average": doc["fcq_data"] .get_field("instructor_effectiveness") .avg() .default(None), "instructor_availability_average": doc["fcq_data"].get_field("instructor_availability").avg().default(None), "instructor_respect_average": doc["fcq_data"].get_field("instructor_respect").avg().default(None), } chain = {} if unchained else _general_stats(doc) iot.update(chain) return iot def _course_overtime(doc, val, unchained=False): cot = { "total_instructors": val["reduction"].get_field("instructor_id").distinct().count(), "courseoverall_average": val["reduction"].get_field("courseoverall").avg().default(None), "courseoverall_sd_average": val["reduction"].get_field("courseoverall_sd").avg().default(None), "course_challenge_average": val["reduction"].get_field("course_challenge").avg().default(None), "course_howmuchlearned_average": val["reduction"].get_field("course_howmuchlearned").avg().default(None), "course_priorinterest_average": val["reduction"].get_field("course_priorinterest").avg().default(None), } chain = {} if unchained else _general_overtime(doc, val) cot.update(chain) return cot def _course_stats(doc, unchained=False): cot = { "total_instructors": doc["fcq_data"].get_field("instructor_id").distinct().count(), "courseoverall_average": doc["fcq_data"].get_field("courseoverall").avg().default(None), "courseoverall_sd_average": doc["fcq_data"].get_field("courseoverall_sd").avg().default(None), "course_challenge_average": doc["fcq_data"].get_field("course_challenge").avg().default(None), "course_howmuchlearned_average": doc["fcq_data"].get_field("course_howmuchlearned").avg().default(None), "course_priorinterest_average": doc["fcq_data"].get_field("course_priorinterest").avg().default(None), } chain = {} if unchained else _general_stats(doc) cot.update(chain) return cot def _department_overtime(doc, val): iot = _instructor_overtime(doc, val, unchained=True) cot = _course_overtime(doc, val, unchained=True) got = _general_overtime(doc, val) dot = { "GR_courses": val["reduction"].filter({"level": "GR"}).get_field("course_id").distinct().count(), "UD_courses": val["reduction"].filter({"level": "UD"}).get_field("course_id").distinct().count(), "LD_courses": val["reduction"].filter({"level": "LD"}).get_field("course_id").distinct().count(), "TA_instructors": val["reduction"] .filter({"instructor_group": "TA"}) .get_field("instructor_id") .distinct() .count(), "OTH_instructors": val["reduction"] .filter({"instructor_group": "OTH"}) .get_field("instructor_id") .distinct() .count(), "TTT_instructors": val["reduction"] .filter({"instructor_group": "TTT"}) .get_field("instructor_id") .distinct() .count(), "TA_instructoroverall_average": val["reduction"] .filter({"instructor_group": "TA"}) .get_field("instructoroverall") .avg() .default(None), "OTH_instructoroverall_average": val["reduction"] .filter({"instructor_group": "OTH"}) .get_field("instructoroverall") .avg() .default(None), "TTT_instructoroverall_average": val["reduction"] .filter({"instructor_group": "TTT"}) .get_field("instructoroverall") .avg() .default(None), "GR_courseoverall_average": val["reduction"] .filter({"level": "GR"}) .get_field("courseoverall") .avg() .default(None), "UD_courseoverall_average": val["reduction"] .filter({"level": "UD"}) .get_field("courseoverall") .avg() .default(None), "LD_courseoverall_average": val["reduction"] .filter({"level": "LD"}) .get_field("courseoverall") .avg() .default(None), "GR_forms_requested": val["reduction"].filter({"level": "GR"}).sum("forms_requested"), "UD_forms_requested": val["reduction"].filter({"level": "UD"}).sum("forms_requested"), "LD_forms_requested": val["reduction"].filter({"level": "LD"}).sum("forms_requested"), } dot.update(iot) dot.update(cot) dot.update(got) return dot def _department_stats(doc): iot = _instructor_stats(doc, unchained=True) cot = _course_stats(doc, unchained=True) got = _general_stats(doc) dot = { "GR_courses": doc["fcq_data"].filter({"level": "GR"}).get_field("course_id").distinct().count(), "UD_courses": doc["fcq_data"].filter({"level": "UD"}).get_field("course_id").distinct().count(), "LD_courses": doc["fcq_data"].filter({"level": "LD"}).get_field("course_id").distinct().count(), "TA_instructors": doc["fcq_data"] .filter({"instructor_group": "TA"}) .get_field("instructor_id") .distinct() .count(), "OTH_instructors": doc["fcq_data"] .filter({"instructor_group": "OTH"}) .get_field("instructor_id") .distinct() .count(), "TTT_instructors": doc["fcq_data"] .filter({"instructor_group": "TTT"}) .get_field("instructor_id") .distinct() .count(), } dot.update(iot) dot.update(cot) dot.update(got) return dot # model_overtime for model in ["Instructor", "Department", "Course"]: _model_overtime = { "Instructor": _instructor_overtime, "Department": _department_overtime, "Course": _course_overtime, }[model] _model_stats = {"Instructor": _instructor_stats, "Department": _department_stats, "Course": _course_stats}[ model ] overtime_query = ( r.db(db) .table(model) .merge( lambda doc: { "fcq_data": r.branch( doc["fcqs"].count() > 0, r.db(db).table("Fcq").get_all(r.args(doc["fcqs"])).coerce_to("array"), [], ), "grade_data": r.branch( doc["grades"].count() > 0, r.db(db).table("Grade").get_all(r.args(doc["grades"])).coerce_to("array"), [], ), } ) .for_each( lambda doc: r.db(db) .table(model) .get(doc["id"]) .update( { "fcq_data": None, "fcqs_overtime": doc["fcq_data"] .group("yearterm") .ungroup() .map(lambda val: [val["group"].coerce_to("string"), _model_overtime(doc, val)]) .coerce_to("object"), "fcqs_stats": _model_stats(doc), "grades_overtime": doc["grade_data"] .group("yearterm") .ungroup() .map(lambda val: [val["group"].coerce_to("string"), _grades_overtime(doc, val)]) .coerce_to("object"), "grades_stats": None, } ) ) .run(conn, array_limit=200000) ) logging.info(overtime_query)
def run(self): self.tid = get_tid() logs.downloads.debug( 'RUN-DOWNLOAD-THREAD-------------------------------------') if self.stop is False: self.r_conn = new_rethink_connection() for c in r.table('media').get_all(r.args(['Deleting','Deleted','Downloaded','DownloadStarting', 'Downloading','Download','DownloadAborting']), index='status').\ pluck('id', 'path', 'url-isard', 'url-web', 'status' ).merge( {'table': 'media'}).changes(include_initial=True).union( r.table('domains').get_all(r.args(['Downloaded','DownloadStarting', 'Downloading','DownloadAborting']), index='status').\ pluck('id', 'create_dict', 'url-isard', 'url-web', 'status').merge( {"table": "domains"}).changes(include_initial=True)).union( r.table('engine').pluck('threads', 'status_all_threads').merge({'table': 'engine'}).changes()).run(self.r_conn): if self.stop: break if c.get('new_val', None) is not None: if c['new_val'].get('table', False) == 'engine': if c['new_val']['status_all_threads'] == 'Stopping': break else: continue logs.downloads.debug('DOWNLOAD CHANGES DETECTED:') logs.downloads.debug(pprint.pformat(c)) if c.get('old_val', None) is None: if c['new_val']['status'] == 'DownloadStarting': self.start_download(c['new_val']) elif c.get('new_val', None) is None: if c['old_val']['status'] in ['DownloadAborting']: self.remove_download_thread(c['old_val']) elif 'old_val' in c and 'new_val' in c: if c['old_val']['status'] == 'FailedDownload' and c[ 'new_val']['status'] == 'DownloadStarting': self.start_download(c['new_val']) elif c['old_val']['status'] == 'Downloaded' and c[ 'new_val']['status'] == 'Deleting': if c['new_val']['table'] == 'media': self.delete_media(c['new_val']) elif c['old_val']['status'] == 'Deleting' and c['new_val'][ 'status'] == 'Deleted': if c['new_val']['table'] == 'media': remove_media(c['new_val']['id']) elif c['old_val']['status'] == 'Downloading' and c[ 'new_val']['status'] == 'FailedDownload': pass elif c['old_val']['status'] == 'DownloadStarting' and c[ 'new_val']['status'] == 'Downloading': pass elif c['old_val']['status'] == 'Downloading' and c[ 'new_val']['status'] == 'Downloaded': pass elif c['old_val']['status'] == 'Downloading' and c[ 'new_val']['status'] == 'DownloadAborting': self.abort_download(c['new_val'])
print '\nRepo with GitHub path %s occurs %s times' % ( owner_repo, len(repos)) # Use the most recently scraped row as the canonical row canonical = repos.pop() assert canonical print 'Using %s as the canoncial row' % canonical['id'] canonical_owner_repo = (canonical['owner'], canonical['repo_name']) if canonical_owner_repo != owner_repo: print "Normalizing %s to %s for our canonical row" % ( canonical_owner_repo, owner_repo) r.table(table).get(canonical['id']).update({ 'owner': canonical['owner'].lower(), 'repo_name': canonical['repo_name'].lower()}).run(r_conn()) updated += 1 if repos: dupe_ids = [dupe['id'] for dupe in repos] print 'Deleting duplicates rows: %s' % ', '.join(dupe_ids) r.table(table).get_all(r.args(dupe_ids)).delete().run(r_conn()) deleted += len(repos) print "Updated %d rows and deleted %d" % (updated, deleted)
def get(self, id=None, o=None): """ - GET /projects (public) Projects list - GET /projects/<id> (public) Project with <id> - GET /projects/<id>/(users|slices) (auth) Users/Slices list of the project with <id> :return: """ response = [] current_user = self.get_current_user() # GET /projects if not id and not o: if not current_user: self.userError('permission denied') return if self.isAdmin(): f = {} else: f = lambda project: project["pi_users"].contains(current_user[ 'id']) cursor = yield r.table('projects') \ .pluck(self.fields['projects']) \ .filter(f) \ .merge(lambda project: { 'authority': r.table('authorities').get(project['authority']) \ .pluck(self.fields_short['authorities']) \ .default({'id': project['authority']}) }) \ .merge(lambda project: { 'slices': r.table('slices') \ .get_all(r.args(project['slices'])) \ .distinct() \ .pluck(self.fields_short['slices']) \ .coerce_to('array') }) \ .merge(lambda project: { 'pi_users': r.table('users') \ .get_all(r.args(project['pi_users'])) \ .distinct() \ .pluck(self.fields_short['users']) \ .coerce_to('array') }) \ .merge(lambda project: { 'users': r.table('users') \ .get_all(r.args(project['users'])) \ .distinct() \ .pluck(self.fields_short['users']) \ .coerce_to('array') }) \ .run(self.dbconnection) while (yield cursor.fetch_next()): project = yield cursor.next() response.append(project) # GET /projects/<id> elif not o and id and self.isUrn(id): if not current_user: self.userError('permission denied') return cursor = yield r.table('projects') \ .pluck(self.fields['projects']) \ .filter({'id': id}) \ .filter(lambda project: project["pi_users"].contains(current_user['id']) or project["users"].contains(current_user['id'])) \ .merge(lambda project: { 'authority': r.table('authorities').get(project['authority']) \ .pluck(self.fields_short['authorities']) \ .default({'id': project['authority']}) }) \ .run(self.dbconnection) while (yield cursor.fetch_next()): project = yield cursor.next() response.append(project) # GET /projects/<id>/users elif id and self.isUrn(id) and o == 'users': cursor = yield r.table(o) \ .pluck(self.fields[o]) \ .filter(lambda user: user["projects"].contains(id)) \ .merge(lambda user: { 'authority': r.table('authorities').get(user['authority']) \ .pluck(self.fields_short['authorities']) \ .default({'id': user['authority']}) }) \ .run(self.dbconnection) while (yield cursor.fetch_next()): item = yield cursor.next() response.append(item) # GET /projects/<id>/slices elif id and self.isUrn(id) and o == 'slices': cursor = yield r.table(o) \ .pluck(self.fields[o]) \ .filter({ "project": id }) \ .merge(lambda slice: { 'project': r.table('projects').get(slice['project']) \ .pluck(self.fields_short['projects']) \ .default({'id': slice['project']}) }) \ .merge(lambda slice: { 'authority': r.table('authorities').get(slice['authority']) \ .pluck(self.fields_short['authorities']) \ .default({'id': slice['authority']}) }) \ .run(self.dbconnection) while (yield cursor.fetch_next()): item = yield cursor.next() response.append(item) else: self.userError("invalid request") return self.finish(json.dumps({"result": response}, cls=myJSONEncoder))
def get(cls, _id=None, index=None, filter=None, contains=None, raw=None, orderby=None, limit=0, db={}): """Get Returns one or more records from the table. Send no ID to fetch all records in the table. Set an index to look for something other than the primary key Args: _id (str|str[]): The ID(s) to fetch from the table index (str): If set, used as the index to search instead of the primary key filter (dict): If set, used as an additional filter to the ID or index lookup raw (bool|list): If set to true, raw dicts will be returned instead of Document instances. If set to a list or tuple, only those fields listed will be returned orderby (str|str[]): The field(s) to order the result by limit (uint): The number of records to return db (dict): Optional DB info 'server' for the name of the host info passed to server() 'postfix' for the postfix added to the DB name Returns: Table|Table[]|dict|dict[] Raises: StorageException """ # Assume multiple records bMultiple = True # Get the info dInfo = cls.info(db) # If there is an index passed if index: # If the index doesn't exist if index not in dInfo['conf']['indexes']: raise StorageException('no index', index, 'tree') # Get a connection to the server with connect_with(dInfo['server']) as oCon: # Create a cursor for all records oCur = r \ .db(dInfo['db']) \ .table(dInfo['tree']._name) \ # If all records must be returned, we don't need to modify the # cursor any further if _id == None: pass # Else, if there's an index elif index: # If it's a tuple if isinstance(_id, tuple): # Check if one of the values is None iNone = -1 for i in range(len(_id)): # If a value is None if _id[i] is None: # If we already have an index if iNone != -1: raise StorageException( 'can\'t list more than one None in an index tuple' ) # Store the index iNone = i # If we have one if iNone > -1: # Copy the tuples idMax = list(_id) idMin = list(_id) # Change the None accordingly idMax[iNone] = r.maxval idMin[iNone] = r.minval # Call between instead of get_all oCur = oCur.between(idMin, idMax, index=index) # Else we have no Nones, pass it through else: oCur = oCur.get_all(_id, index=index) # Else if it's a list elif isinstance(_id, list): oCur = oCur.get_all(r.args(_id), index=index) # Else just pass it through else: oCur = oCur.get_all(_id, index=index) # Else, we are dealing with the primary key else: # If we got multiple IDs if isinstance(_id, (tuple, list)): # Continue to filter using get all oCur = oCur.get_all(*_id) # Else we want one record else: # Turn off the multiple flag bMultiple = False # Filter to a single ID oCur = oCur.get(_id) # If an additional filter was passed if filter: oCur = oCur.filter(filter) # If there's a contains if contains: # If we don't have a list if not isinstance(contains[1], (tuple, list)): contains = [contains[0], [contains[1]]] # Add the contains filter oCur = oCur.filter( lambda obj: obj[contains[0]].contains(*contains[1])) # If there's a limit if limit > 0: oCur = oCur.limit(limit) # If a raw request was done with specific fields if isinstance(raw, (tuple, list)): oCur = oCur.pluck(*raw).default(None) # If an order by list was sent if isinstance(orderby, (tuple, list)): oCur = oCur.order_by(*orderby) # Else if an order field was sent elif isinstance(orderby, basestring): oCur = oCur.order_by(orderby) try: # Run the request itRes = oCur.run(oCon) except r.errors.ReqlOpFailedError as e: # The index doesn't exist if e.args[0][:5] == 'Index': raise StorageException('no index', index, 'table') # Else, re-raise raise e # If we are expecting a single record if limit == 1: # Try to get one row try: dRow = itRes.next() except r.net.DefaultCursorEmpty as e: return None # If it's raw, don't instantiate it return (raw and dRow or cls(dRow, db)) # If there's no data if not itRes: if bMultiple: if limit == 1: return None else: return [] else: return None # If multiple records are expected if bMultiple: # If Raw requested, return as is if raw: return [d for d in itRes] # Else create instances for each else: return [cls(d, db) for d in itRes] # Else, one record requested else: return raw and itRes or cls(itRes, db)
def get(self, id=None, o=None): """ - GET /users (auth) Users list - GET /users/<id> (auth) User with <id> - GET /users/<email> (auth) User with <email> - GET /users/(projects|slices|authorities) (auth) Projects/Slices/Authorities list of the authenticated user - GET /users/<id>/(projects|slices|authorities) (auth) Projects/Slices list of the user with <id> :return: """ response = [] current_user = self.get_current_user() if not current_user: self.userError("not authenticated") return # GET /users if not id and not o: cursor = yield r.table('users') \ .pluck(self.fields['users']) \ .merge(lambda user: { 'authority': r.table('authorities').get(user['authority']) \ .pluck(self.fields_short['authorities']) \ .default({'id' : user['authority']}) }) \ .merge(lambda user: { 'pi_authorities': r.table('authorities').get_all(r.args(user['pi_authorities'])) \ .distinct() \ .pluck(self.fields_short['authorities']) \ .coerce_to('array') }) \ .merge(lambda user: { 'projects': r.table('projects') \ .get_all(r.args(user['projects'])) \ .distinct() \ .pluck(self.fields_short['projects']) \ .coerce_to('array') }) \ .merge(lambda user: { 'slices': r.table('slices') \ .get_all(r.args(user['slices'])) \ .distinct() \ .pluck(self.fields_short['slices']) \ .coerce_to('array') }) \ .run(self.dbconnection) while (yield cursor.fetch_next()): users = yield cursor.next() response.append(users) # GET /users/<id> or /users/<email> elif not o and id: if self.isUrn(id): f = {'id':id} elif self.isEmail(id): f = {'email':id} if not current_user: self.userError('permission denied') return cursor = yield r.table('users') \ .pluck(self.fields['users']) \ .filter(f) \ .merge(lambda user: { 'authority': r.table('authorities').get(user['authority']) \ .pluck(self.fields_short['authorities']) \ .default({'id': user['authority']}) }) \ .merge(lambda user: { 'pi_authorities': r.table('authorities').get_all(r.args(user['pi_authorities'])) \ .distinct() \ .pluck(self.fields_short['authorities']) \ .coerce_to('array') }) \ .merge(lambda user: { 'projects': r.table('projects') \ .get_all(r.args(user['projects'])) \ .distinct() \ .pluck(self.fields_short['projects']) \ .coerce_to('array') }) \ .merge(lambda user: { 'slices': r.table('slices') \ .get_all(r.args(user['slices'])) \ .distinct() \ .pluck(self.fields_short['slices']) \ .coerce_to('array') }) \ .run(self.dbconnection) while (yield cursor.fetch_next()): user = yield cursor.next() response.append(user) # GET /users/[<id>/]projects elif o == 'projects': if not id or not self.isUrn(id): try: id = current_user['id'] except Exception as e: self.serverError(" user is not logged in") return cursor = yield r.table(o) \ .pluck(self.fields[o]) \ .filter(lambda project: project["pi_users"].contains(id)) \ .merge(lambda project: { 'authority': r.table('authorities').get(project['authority']) \ .pluck(self.fields_short['authorities']) \ .default({'id': project['authority']}) }) \ .merge(lambda project: { 'slices': r.table('slices') \ .get_all(r.args(project['slices'])) \ .distinct() \ .pluck(self.fields_short['slices']) \ .coerce_to('array') }) \ .run(self.dbconnection) while (yield cursor.fetch_next()): item = yield cursor.next() response.append(item) # GET /users/[<id>/]slices elif o == 'slices': if not id or not self.isUrn(id): try: id = current_user['id'] except Exception as e: self.serverError(" user is not logged in") return cursor = yield r.table(o) \ .pluck(self.fields[o]) \ .filter(lambda slice: slice["users"].contains(id)) \ .merge(lambda slice: { 'project': r.table('projects').get(slice['project']) \ .pluck(self.fields_short['projects']) \ .default({'id': slice['project']}) }) \ .merge(lambda slice: { 'authority': r.table('authorities').get(slice['authority']) \ .pluck(self.fields_short['authorities']) \ .default({'id': slice['authority']}) }) \ .run(self.dbconnection) while (yield cursor.fetch_next()): item = yield cursor.next() response.append(item) # GET /users/[<id>/]authorities elif o == 'authorities': if not id or not self.isUrn(id): try: id = current_user['id'] except Exception as e: self.serverError(" user is not logged in") return cursor = yield r.table('authorities') \ .pluck(self.fields['authorities']) \ .filter(lambda authority: authority["pi_users"].contains(current_user['id']) or authority["users"].contains(current_user['id'])) \ .run(self.dbconnection) while (yield cursor.fetch_next()): authority = yield cursor.next() response.append(authority) else: self.userError("invalid request") return self.finish(json.dumps({"result": response}, cls=myJSONEncoder))
print '\nRepo with GitHub path %s occurs %s times' % (owner_repo, len(repos)) # Use the most recently scraped row as the canonical row canonical = repos.pop() assert canonical print 'Using %s as the canoncial row' % canonical['id'] canonical_owner_repo = (canonical['owner'], canonical['repo_name']) if canonical_owner_repo != owner_repo: print "Normalizing %s to %s for our canonical row" % ( canonical_owner_repo, owner_repo) r.table(table).get(canonical['id']).update({ 'owner': canonical['owner'].lower(), 'repo_name': canonical['repo_name'].lower() }).run(r_conn()) updated += 1 if repos: dupe_ids = [dupe['id'] for dupe in repos] print 'Deleting duplicates rows: %s' % ', '.join(dupe_ids) r.table(table).get_all(r.args(dupe_ids)).delete().run(r_conn()) deleted += len(repos) print "Updated %d rows and deleted %d" % (updated, deleted)
def model_overtime(db, conn): def _grades_overtime(doc, val): return r.branch(((doc.get_field('grades').count() > 0) & ((val['group'] % 10) != 4)), { 'percent_a': val['reduction'].get_field('percent_a').avg().default(None), 'percent_b': val['reduction'].get_field('percent_b').avg().default(None), 'percent_c': val['reduction'].get_field('percent_c').avg().default(None), 'percent_d': val['reduction'].get_field('percent_d').avg().default(None), 'percent_f': val['reduction'].get_field('percent_f').avg().default(None), 'percent_incomplete': val['reduction'].get_field('percent_incomplete').avg().default(None), 'percent_c_minus_or_below': val['reduction'].get_field('percent_c_minus_or_below').avg().default(None), 'average_grade': val['reduction'].get_field('average_grade').avg().default(None), }, None) def _expanded_grades_overtime(doc, val): return r.branch(((doc.get_field('grades').count() > 0) & ((val['group'] % 10) != 4)), { 'percent_a': val['reduction'].get_field('percent_a').avg().default(None), 'percent_b': val['reduction'].get_field('percent_b').avg().default(None), 'percent_c': val['reduction'].get_field('percent_c').avg().default(None), 'percent_d': val['reduction'].get_field('percent_d').avg().default(None), 'percent_f': val['reduction'].get_field('percent_f').avg().default(None), 'percent_incomplete': val['reduction'].get_field('percent_incomplete').avg().default(None), 'percent_c_minus_or_below': val['reduction'].get_field('percent_c_minus_or_below').avg().default(None), 'average_grade': val['reduction'].get_field('average_grade').avg().default(None), 'GR_percent_a': val['reduction'].filter({'level': 'GR'}).get_field('percent_a').avg().default(None), 'GR_percent_b': val['reduction'].filter({'level': 'GR'}).get_field('percent_b').avg().default(None), 'GR_percent_c': val['reduction'].filter({'level': 'GR'}).get_field('percent_c').avg().default(None), 'GR_percent_d': val['reduction'].filter({'level': 'GR'}).get_field('percent_d').avg().default(None), 'GR_percent_f': val['reduction'].filter({'level': 'GR'}).get_field('percent_f').avg().default(None), 'GR_percent_incomplete': val['reduction'].filter({'level': 'GR'}).get_field('percent_incomplete').avg().default(None), 'GR_percent_c_minus_or_below': val['reduction'].filter({'level': 'GR'}).get_field('percent_c_minus_or_below').avg().default(None), 'GR_average_grade': val['reduction'].filter({'level': 'GR'}).get_field('average_grade').avg().default(None), 'UD_percent_a': val['reduction'].filter({'level': 'UD'}).get_field('percent_a').avg().default(None), 'UD_percent_b': val['reduction'].filter({'level': 'UD'}).get_field('percent_b').avg().default(None), 'UD_percent_c': val['reduction'].filter({'level': 'UD'}).get_field('percent_c').avg().default(None), 'UD_percent_d': val['reduction'].filter({'level': 'UD'}).get_field('percent_d').avg().default(None), 'UD_percent_f': val['reduction'].filter({'level': 'UD'}).get_field('percent_f').avg().default(None), 'UD_percent_incomplete': val['reduction'].filter({'level': 'UD'}).get_field('percent_incomplete').avg().default(None), 'UD_percent_c_minus_or_below': val['reduction'].filter({'level': 'UD'}).get_field('percent_c_minus_or_below').avg().default(None), 'UD_average_grade': val['reduction'].filter({'level': 'UD'}).get_field('average_grade').avg().default(None), 'LD_percent_a': val['reduction'].filter({'level': 'LD'}).get_field('percent_a').avg().default(None), 'LD_percent_b': val['reduction'].filter({'level': 'LD'}).get_field('percent_b').avg().default(None), 'LD_percent_c': val['reduction'].filter({'level': 'LD'}).get_field('percent_c').avg().default(None), 'LD_percent_d': val['reduction'].filter({'level': 'LD'}).get_field('percent_d').avg().default(None), 'LD_percent_f': val['reduction'].filter({'level': 'LD'}).get_field('percent_f').avg().default(None), 'LD_percent_incomplete': val['reduction'].filter({'level': 'LD'}).get_field('percent_incomplete').avg().default(None), 'LD_percent_c_minus_or_below': val['reduction'].filter({'level': 'LD'}).get_field('percent_c_minus_or_below').avg().default(None), 'LD_average_grade': val['reduction'].filter({'level': 'LD'}).get_field('average_grade').avg().default(None), }, None) def _grades_stats(doc): return r.branch(((doc.get_field('grades').count() > 0)), { 'percent_a': doc['grade_data'].get_field('percent_a').avg().default(None), 'percent_b': doc['grade_data'].get_field('percent_b').avg().default(None), 'percent_c': doc['grade_data'].get_field('percent_c').avg().default(None), 'percent_d': doc['grade_data'].get_field('percent_d').avg().default(None), 'percent_f': doc['grade_data'].get_field('percent_f').avg().default(None), 'percent_incomplete': doc['grade_data'].get_field('percent_incomplete').avg().default(None), 'percent_c_minus_or_below': doc['grade_data'].get_field('percent_c_minus_or_below').avg().default(None), 'average_grade': doc['grade_data'].get_field('average_grade').avg().default(None), }, None) def _expanded_grades_stats(doc): return r.branch(((doc.get_field('grades').count() > 0)), { 'percent_a': doc['grade_data'].get_field('percent_a').avg().default(None), 'percent_b': doc['grade_data'].get_field('percent_b').avg().default(None), 'percent_c': doc['grade_data'].get_field('percent_c').avg().default(None), 'percent_d': doc['grade_data'].get_field('percent_d').avg().default(None), 'percent_f': doc['grade_data'].get_field('percent_f').avg().default(None), 'percent_incomplete': doc['grade_data'].get_field('percent_incomplete').avg().default(None), 'percent_c_minus_or_below': doc['grade_data'].get_field('percent_c_minus_or_below').avg().default(None), 'average_grade': doc['grade_data'].get_field('average_grade').avg().default(None), 'GR_percent_a': doc['grade_data'].filter({'level': 'GR'}).get_field('percent_a').avg().default(None), 'GR_percent_b': doc['grade_data'].filter({'level': 'GR'}).get_field('percent_b').avg().default(None), 'GR_percent_c': doc['grade_data'].filter({'level': 'GR'}).get_field('percent_c').avg().default(None), 'GR_percent_d': doc['grade_data'].filter({'level': 'GR'}).get_field('percent_d').avg().default(None), 'GR_percent_f': doc['grade_data'].filter({'level': 'GR'}).get_field('percent_f').avg().default(None), 'GR_percent_incomplete': doc['grade_data'].filter({'level': 'GR'}).get_field('percent_incomplete').avg().default(None), 'GR_percent_c_minus_or_below': doc['grade_data'].filter({'level': 'GR'}).get_field('percent_c_minus_or_below').avg().default(None), 'GR_average_grade': doc['grade_data'].filter({'level': 'GR'}).get_field('average_grade').avg().default(None), 'UD_percent_a': doc['grade_data'].filter({'level': 'UD'}).get_field('percent_a').avg().default(None), 'UD_percent_b': doc['grade_data'].filter({'level': 'UD'}).get_field('percent_b').avg().default(None), 'UD_percent_c': doc['grade_data'].filter({'level': 'UD'}).get_field('percent_c').avg().default(None), 'UD_percent_d': doc['grade_data'].filter({'level': 'UD'}).get_field('percent_d').avg().default(None), 'UD_percent_f': doc['grade_data'].filter({'level': 'UD'}).get_field('percent_f').avg().default(None), 'UD_percent_incomplete': doc['grade_data'].filter({'level': 'UD'}).get_field('percent_incomplete').avg().default(None), 'UD_percent_c_minus_or_below': doc['grade_data'].filter({'level': 'UD'}).get_field('percent_c_minus_or_below').avg().default(None), 'UD_average_grade': doc['grade_data'].filter({'level': 'UD'}).get_field('average_grade').avg().default(None), 'LD_percent_a': doc['grade_data'].filter({'level': 'LD'}).get_field('percent_a').avg().default(None), 'LD_percent_b': doc['grade_data'].filter({'level': 'LD'}).get_field('percent_b').avg().default(None), 'LD_percent_c': doc['grade_data'].filter({'level': 'LD'}).get_field('percent_c').avg().default(None), 'LD_percent_d': doc['grade_data'].filter({'level': 'LD'}).get_field('percent_d').avg().default(None), 'LD_percent_f': doc['grade_data'].filter({'level': 'LD'}).get_field('percent_f').avg().default(None), 'LD_percent_incomplete': doc['grade_data'].filter({'level': 'LD'}).get_field('percent_incomplete').avg().default(None), 'LD_percent_c_minus_or_below': doc['grade_data'].filter({'level': 'LD'}).get_field('percent_c_minus_or_below').avg().default(None), 'LD_average_grade': doc['grade_data'].filter({'level': 'LD'}).get_field('average_grade').avg().default(None), }, None) def _general_overtime(doc, val): return { 'total_fcqs': val['reduction'].count(), 'total_forms_requested': val['reduction'].sum('forms_requested'), 'total_forms_returned': val['reduction'].sum('forms_returned'), 'denver_data_averages': r.branch(((doc.get_field('campus').default(None) == 'DN') & (val['group'] <= 20144)), { 'r_fairness': val['reduction'].get_field('denver_data').get_field('r_fairness').avg().default(None), 'r_presentation': val['reduction'].get_field('denver_data').get_field('r_presentation').avg().default(None), 'r_workload': val['reduction'].get_field('denver_data').get_field('r_workload').avg().default(None), 'r_diversity': val['reduction'].get_field('denver_data').get_field('r_diversity').avg().default(None), 'r_accessibility': val['reduction'].get_field('denver_data').get_field('r_accessibility').avg().default(None), 'r_learning': val['reduction'].get_field('denver_data').get_field('r_learning').avg().default(None), }, None) } def _general_stats(doc): return { 'total_fcqs': doc['fcq_data'].count(), 'total_forms_requested': doc['fcq_data'].sum('forms_requested'), 'total_forms_returned': doc['fcq_data'].sum('forms_returned'), } def _instructor_overtime(doc, val, unchained=False): iot = { 'GR_fcqs': val['reduction'].filter({'level': 'GR'}).count(), 'UD_fcqs': val['reduction'].filter({'level': 'UD'}).count(), 'LD_fcqs': val['reduction'].filter({'level': 'LD'}).count(), 'total_courses': val['reduction'].get_field('course_id').distinct().count(), 'instructoroverall_average': val['reduction'].get_field('instructoroverall').avg().default(None), 'instructoroverall_sd_average': val['reduction'].get_field('instructoroverall_sd').avg().default(None), 'instructor_effectiveness_average': val['reduction'].get_field('instructor_effectiveness').avg().default(None), 'instructor_availability_average': val['reduction'].get_field('instructor_availability').avg().default(None), 'instructor_respect_average': val['reduction'].get_field('instructor_respect').avg().default(None) } chain = {} if unchained else _general_overtime(doc, val) iot.update(chain) return iot def _instructor_stats(doc, unchained=False): iot = { 'GR_fcqs': doc['fcq_data'].filter({'level': 'GR'}).count(), 'UD_fcqs': doc['fcq_data'].filter({'level': 'UD'}).count(), 'LD_fcqs': doc['fcq_data'].filter({'level': 'LD'}).count(), 'total_courses': doc['fcq_data'].get_field('course_id').distinct().count(), 'instructoroverall_average': doc['fcq_data'].get_field('instructoroverall').avg().default(None), 'instructoroverall_sd_average': doc['fcq_data'].get_field('instructoroverall_sd').avg().default(None), 'instructor_effectiveness_average': doc['fcq_data'].get_field('instructor_effectiveness').avg().default(None), 'instructor_availability_average': doc['fcq_data'].get_field('instructor_availability').avg().default(None), 'instructor_respect_average': doc['fcq_data'].get_field('instructor_respect').avg().default(None) } chain = {} if unchained else _general_stats(doc) iot.update(chain) return iot def _course_overtime(doc, val, unchained=False): cot = { 'total_instructors': val['reduction'].get_field('instructor_id').distinct().count(), 'courseoverall_average': val['reduction'].get_field('courseoverall').avg().default(None), 'courseoverall_sd_average': val['reduction'].get_field('courseoverall_sd').avg().default(None), 'course_challenge_average': val['reduction'].get_field('course_challenge').avg().default(None), 'course_howmuchlearned_average': val['reduction'].get_field('course_howmuchlearned').avg().default(None), 'course_priorinterest_average': val['reduction'].get_field('course_priorinterest').avg().default(None) } chain = {} if unchained else _general_overtime(doc, val) cot.update(chain) return cot def _course_stats(doc, unchained=False): cot = { 'total_instructors': doc['fcq_data'].get_field('instructor_id').distinct().count(), 'courseoverall_average': doc['fcq_data'].get_field('courseoverall').avg().default(None), 'courseoverall_sd_average': doc['fcq_data'].get_field('courseoverall_sd').avg().default(None), 'course_challenge_average': doc['fcq_data'].get_field('course_challenge').avg().default(None), 'course_howmuchlearned_average': doc['fcq_data'].get_field('course_howmuchlearned').avg().default(None), 'course_priorinterest_average': doc['fcq_data'].get_field('course_priorinterest').avg().default(None) } chain = {} if unchained else _general_stats(doc) cot.update(chain) return cot def _department_overtime(doc, val): iot = _instructor_overtime(doc, val, unchained=True) cot = _course_overtime(doc, val, unchained=True) got = _general_overtime(doc, val) dot = { 'GR_courses': val['reduction'].filter({'level': 'GR'}).get_field('course_id').distinct().count(), 'UD_courses': val['reduction'].filter({'level': 'UD'}).get_field('course_id').distinct().count(), 'LD_courses': val['reduction'].filter({'level': 'LD'}).get_field('course_id').distinct().count(), 'TA_instructors': val['reduction'].filter({'instructor_group': 'TA'}).get_field('instructor_id').distinct().count(), 'OTH_instructors': val['reduction'].filter({'instructor_group': 'OTH'}).get_field('instructor_id').distinct().count(), 'TTT_instructors': val['reduction'].filter({'instructor_group': 'TTT'}).get_field('instructor_id').distinct().count(), 'TA_instructoroverall_average': val['reduction'].filter({'instructor_group': 'TA'}).get_field('instructoroverall').avg().default(None), 'OTH_instructoroverall_average': val['reduction'].filter({'instructor_group': 'OTH'}).get_field('instructoroverall').avg().default(None), 'TTT_instructoroverall_average': val['reduction'].filter({'instructor_group': 'TTT'}).get_field('instructoroverall').avg().default(None), 'GR_courseoverall_average': val['reduction'].filter({'level': 'GR'}).get_field('courseoverall').avg().default(None), 'UD_courseoverall_average': val['reduction'].filter({'level': 'UD'}).get_field('courseoverall').avg().default(None), 'LD_courseoverall_average': val['reduction'].filter({'level': 'LD'}).get_field('courseoverall').avg().default(None), 'GR_forms_requested': val['reduction'].filter({'level': 'GR'}).sum('forms_requested'), 'UD_forms_requested': val['reduction'].filter({'level': 'UD'}).sum('forms_requested'), 'LD_forms_requested': val['reduction'].filter({'level': 'LD'}).sum('forms_requested') } dot.update(iot) dot.update(cot) dot.update(got) return dot def _department_stats(doc): iot = _instructor_stats(doc, unchained=True) cot = _course_stats(doc, unchained=True) got = _general_stats(doc) dot = { 'GR_courses': doc['fcq_data'].filter({'level': 'GR'}).get_field('course_id').distinct().count(), 'UD_courses': doc['fcq_data'].filter({'level': 'UD'}).get_field('course_id').distinct().count(), 'LD_courses': doc['fcq_data'].filter({'level': 'LD'}).get_field('course_id').distinct().count(), 'TA_instructors': doc['fcq_data'].filter({'instructor_group': 'TA'}).get_field('instructor_id').distinct().count(), 'OTH_instructors': doc['fcq_data'].filter({'instructor_group': 'OTH'}).get_field('instructor_id').distinct().count(), 'TTT_instructors': doc['fcq_data'].filter({'instructor_group': 'TTT'}).get_field('instructor_id').distinct().count() } dot.update(iot) dot.update(cot) dot.update(got) return dot # model_overtime for model in ['Instructor', 'Department', 'Course', 'College', 'Campus']: _model_overtime = { 'Instructor': _instructor_overtime, 'Department': _department_overtime, 'College': _department_overtime, 'Campus': _department_overtime, 'Course': _course_overtime }[model] _model_stats = { 'Instructor': _instructor_stats, 'Department': _department_stats, 'College': _department_stats, 'Campus': _department_stats, 'Course': _course_stats }[model] _model_grades_overtime = { 'Instructor': _expanded_grades_overtime, 'Department': _expanded_grades_overtime, 'College': _expanded_grades_overtime, 'Campus': _expanded_grades_overtime, 'Course': _grades_overtime }[model] _model_grades_stats = { 'Instructor': _expanded_grades_stats, 'Department': _expanded_grades_stats, 'College': _expanded_grades_stats, 'Campus': _expanded_grades_stats, 'Course': _grades_stats }[model] overtime_query = r.db(db).table(model).merge( lambda doc: { 'fcq_data': r.branch( doc['fcqs'].count() > 0, r.db(db).table('Fcq').get_all(r.args(doc['fcqs'])).coerce_to('array'), [] ), 'grade_data': r.branch( doc['grades'].count() > 0, r.db(db).table('Grade').get_all(r.args(doc['grades'])).coerce_to('array'), [] ) } ).for_each( lambda doc: r.db(db).table(model).get(doc['id']).update({ 'fcq_data': None, 'fcqs_overtime': doc['fcq_data'].group('yearterm').ungroup().map( lambda val: [val['group'].coerce_to('string'), _model_overtime(doc, val)] ).coerce_to('object'), 'fcqs_stats': _model_stats(doc), 'grades_overtime': doc['grade_data'].group('yearterm').ungroup().map( lambda val: [val['group'].coerce_to('string'), _model_grades_overtime(doc, val)] ).coerce_to('object'), 'grades_stats': _model_grades_stats(doc) }) ).run(conn, array_limit=200000) logging.info(overtime_query)