def crawler_stats(cls, crawler_id): stats = {} col = func.count(func.distinct(cls.crawler_run)) q = db.session.query(col) q = q.filter(cls.crawler_id == crawler_id) stats['run_count'] = q.scalar() last_run_id, last_run_time = cls.crawler_last_run(crawler_id) # Check if the crawler was active very recently, if so, don't # allow the user to execute a new run right now. timeout = (datetime.utcnow() - CrawlerState.TIMEOUT) stats['running'] = last_run_time > timeout if last_run_time else False q = db.session.query(func.count(func.distinct(cls.foreign_id))) q = q.filter(cls.crawler_id == crawler_id) for section in ['last', 'all']: data = {} sq = q if section == 'last': sq = sq.filter(cls.crawler_run == last_run_id) okq = sq.filter(cls.status == cls.STATUS_OK) data['ok'] = okq.scalar() if last_run_id else 0 failq = sq.filter(cls.status == cls.STATUS_FAIL) data['fail'] = failq.scalar() if last_run_id else 0 stats[section] = data stats['last']['updated'] = last_run_time stats['last']['run_id'] = last_run_id return stats
def _infer_hypothesis_id(self): try: hypothesis_id = self.parameters['hypothesis_id'] return hypothesis_id except KeyError: session = object_session(self) if self.analysis_type == AnalysisTypeEnum.glycopeptide_lc_msms: from . import IdentifiedGlycopeptide, GlycopeptideHypothesis, Glycopeptide hypothesis_id = session.query( func.distinct( GlycopeptideHypothesis.id)).join(Glycopeptide).join( IdentifiedGlycopeptide, Glycopeptide.id == IdentifiedGlycopeptide.structure_id).filter( IdentifiedGlycopeptide.analysis_id == self.id).scalar() return hypothesis_id elif self.analysis_type == AnalysisTypeEnum.glycan_lc_ms: from . import GlycanComposition, GlycanCompositionChromatogram, GlycanHypothesis hypothesis_id = session.query( func.distinct( GlycanHypothesis.id)).join(GlycanComposition).join( GlycanCompositionChromatogram, GlycanCompositionChromatogram.glycan_composition_id == GlycanComposition.id).filter( GlycanCompositionChromatogram.analysis_id == self.id).scalar() return hypothesis_id else: raise ValueError(self.analysis_type)
def resolve_pipelines_by_stage_id_and_tag_id_and_field_id( self, info, stage_id=None, tag_id=None, field_id=None, **args): sub_query = db_session.query( func.distinct(models.Pipelines.pipeline_id).label('pipeline_id'), models.Pipelines.name.label('pipeline_name'), models.Pipelines.display_name.label('pipeline_display_name'), models.Pipelines.pipeline_status_id.label('pipeline_status_id'), models.PipelineStage.display_name.label('stage_display_name'), func.count(func.distinct(models.Processes.process_id)).label('process_count'), func.max(models.Processes.process_id).label('last_process_id'), ).select_from( models.Pipelines ).join( models.Pipelines.processes ).join( models.PipelineStage ).join( models.ProcessStatus ).group_by( models.Pipelines.pipeline_id, models.PipelineStage.pipeline_stage_id ).order_by( models.PipelineStage.display_name, models.Pipelines.display_name ) _filters = list() # _filters.append(models.Processes.flag_removed == False) _filters.append(models.Processes.instance == INSTANCE) # The link between the table processes and release_tag table depends on # the table fields if field_id or tag_id: sub_query = sub_query.join( models.ProcessFields).join( models.Fields) if field_id: _filters.append(models.ProcessFields.field_id == field_id) if tag_id: sub_query = sub_query.join(models.ReleaseTag) _filters.append(models.ReleaseTag.tag_id == tag_id) if stage_id: _filters.append(models.Pipelines.pipeline_stage_id == stage_id) sub_query = sub_query.filter(and_(*_filters)).subquery() query = db_session.query( sub_query, models.Processes.start_time.label('last_process_start_time'), models.Processes.end_time.label('last_process_end_time'), models.ProcessStatus.name.label('last_process_status'), ).join( sub_query, models.Processes.process_id == sub_query.c.last_process_id).join( models.ProcessStatus).all() result = list() for row in query: result.append(schemas.PipelinesExecution(**row._asdict())) return result
def select_tz(self): """Select time zones and other related fields from database. Selects count of messages, count of distinct senders, time zone. Returns ------- Query object """ query = self.add_columns( label("tz", ((DB.Messages.first_date_tz.op('div')(3600) + 36) % 24) - 12), label("messages", func.count(func.distinct(DB.Messages.message_ID))), label("authors", func.count(func.distinct(DB.MessagesPeople.email_address)))) self.joined.append(DB.Messages) if DB.MessagesPeople not in self.joined: query = query.join(DB.MessagesPeople) self.joined.append(DB.MessagesPeople) query = query.filter(DB.MessagesPeople.type_of_recipient == "From") return query
def nearest_neighbors(self, limit=10): ''' Returns a list of (user, score) tuples with the closest matching skills. If they haven't answered the equivalent skill question, we consider that a very big difference (12). Order is closest to least close, which is an ascending score. ''' my_skills = aliased(UserSkill, name='my_skills', adapt_on_names=True) their_skills = aliased(UserSkill, name='their_skills', adapt_on_names=True) # difference we assume for user that has not answered question unanswered_difference = (LEVELS['LEVEL_I_CAN_DO_IT']['score'] - LEVELS['LEVEL_I_WANT_TO_LEARN']['score']) * 2 return User.query_in_deployment().\ add_column(((len(self.skills) - func.count(func.distinct(their_skills.id))) * unanswered_difference) + \ func.sum(func.abs(their_skills.level - my_skills.level))).\ filter(their_skills.user_id != my_skills.user_id).\ filter(User.id == their_skills.user_id).\ filter(their_skills.name == my_skills.name).\ filter(my_skills.user_id == self.id).\ group_by(User).\ order_by(((len(self.skills) - func.count(func.distinct(their_skills.id))) * unanswered_difference) + \ func.sum(func.abs(their_skills.level - my_skills.level))).\ limit(limit)
def general_stats(info_role): """Return stats about synthese. .. :quickref: Synthese; - nb of observations - nb of distinct species - nb of distinct observer - nb of datasets """ allowed_datasets = get_datasets_cruved(info_role) q = select( [ func.count(Synthese.id_synthese), func.count(func.distinct(Synthese.cd_nom)), func.count(func.distinct(Synthese.observers)) ] ) synthese_query_obj = SyntheseQuery(Synthese, q, {}) synthese_query_obj.filter_query_with_cruved(info_role) result = DB.session.execute(synthese_query_obj.query) synthese_counts = result.fetchone() data = { "nb_data": synthese_counts[0], "nb_species": synthese_counts[1], "nb_observers": synthese_counts[2], "nb_dataset": len(allowed_datasets), } return data
def get_phenotype_range_summary( self, phenotype: str, chromosome_range: ChromosomeRange, flags: typing.Dict[str, typing.Any] = {}) -> SearchSummary: session = self.Session() flags = { **{ "phenotype1": phenotype, "locus_id1_chromosome": chromosome_range.chromosome, "locus_id1_position.gte": chromosome_range.start, "locus_id1_position.lte": chromosome_range.stop }, **flags } _, count = self.support.create_filter(session.query( self.support.clazz), flags=flags) count = count.count() unique_phenotype2 = session.query( func.count(func.distinct(getattr(self.support.clazz, "phenotype2")))) warnings, unique_phenotype2 = self.support.create_filter( unique_phenotype2, flags=flags) unique_phenotype2 = unique_phenotype2.scalar() unique_tissue2 = session.query( func.count(func.distinct(getattr(self.support.clazz, "tissue2")))) warnings, unique_tissue2 = self.support.create_filter(unique_tissue2, flags=flags) unique_tissue2 = unique_tissue2.scalar() return SearchSummary(count=count, unique_phenotype2=unique_phenotype2, unique_tissue2=unique_tissue2)
def member_quiz_statistics(member_id: int): '''Returns information about all quizzes and if the given member has completed them''' quizzes = db_session.query(Quiz).filter(Quiz.deleted_at == None).all() answered_questions_per_quiz_query = (db_session.query( QuizQuestion.quiz_id, func.count(func.distinct(QuizAnswer.option_id))).join( QuizAnswer, QuizQuestion.id == QuizAnswer.question_id).filter( QuizAnswer.member_id == member_id).filter( ((QuizAnswer.id == None) | QuizAnswer.correct) & (QuizAnswer.deleted_at == None) & (QuizQuestion.deleted_at == None)).group_by( QuizQuestion.quiz_id)) answered_questions_per_quiz = mapify( answered_questions_per_quiz_query.all()) total_questions_in_quiz = mapify( (db_session.query(QuizQuestion.quiz_id, func.count(func.distinct(QuizQuestion.id))).filter( QuizQuestion.deleted_at == None).group_by( QuizQuestion.quiz_id)).all()) return [{ "quiz": quiz_entity.to_obj(quiz), "total_questions_in_quiz": total_questions_in_quiz[quiz.id], "correctly_answered_questions": answered_questions_per_quiz[quiz.id] if quiz.id in answered_questions_per_quiz else 0, } for quiz in quizzes]
def select_tz (self): """Select time zones and other related fields from database. Selects count of messages, count of distinct senders, time zone. Returns ------- Query object """ query = self.add_columns( label("tz", ((DB.Messages.first_date_tz.op('div')(3600) + 36) % 24) - 12), label("messages", func.count(func.distinct(DB.Messages.message_ID))), label("authors", func.count(func.distinct(DB.MessagesPeople.email_address)))) self.joined.append (DB.Messages) if DB.MessagesPeople not in self.joined: query = query.join (DB.MessagesPeople) self.joined.append (DB.MessagesPeople) query = query.filter (DB.MessagesPeople.type_of_recipient == "From") return query
def general_stats(info_role): """Return stats about synthese. .. :quickref: Synthese; - nb of observations - nb of distinct species - nb of distinct observer - nb ob datasets """ allowed_datasets = get_datasets_cruved(info_role) q = DB.session.query( func.count(Synthese.id_synthese), func.count(func.distinct(Synthese.cd_nom)), func.count(func.distinct(Synthese.observers)), ) q = synthese_query.filter_query_with_cruved(Synthese, q, info_role) data = q.one() data = { "nb_data": data[0], "nb_species": data[1], "nb_observers": data[2], "nb_dataset": len(allowed_datasets), } return data
def test_query_macro(self): query = sql[( func.distinct(x.continent) for x in db.country if (func.sum(w.population) for w in db.country if w.continent == x.continent).as_scalar() > 100000000)] sql_results = engine.execute(query).fetchall() query_macro_results = query[( func.distinct(x.continent) for x in db.country if (func.sum(w.population) for w in db.country if w.continent == x.continent).as_scalar() > 100000000)] assert sql_results == query_macro_results
def connections(self): ''' Count the number of distinct email addresses this person has sent or received messages from in the deployment. ''' sent = db.session.query(func.count(func.distinct(Email.to_user_id))).\ filter(Email.to_user_id != self.id).\ filter(Email.from_user_id == self.id).first()[0] received = db.session.query(func.count(func.distinct(Email.from_user_id))).\ filter(Email.from_user_id != self.id).\ filter(Email.to_user_id == self.id).first()[0] return sent + received
def last_datasets(self, nb_dataset=1): """ return the n last dataset of each family type loaded for this instance """ family_types = ( db.session.query(func.distinct(DataSet.family_type)) .filter(AutocompleteParameter.id == self.id) .all() ) result = [] for family_type in family_types: data_sets = ( db.session.query(DataSet) .join(Job) .join(AutocompleteParameter) .filter( AutocompleteParameter.id == self.id, DataSet.family_type == family_type, Job.state == 'done' ) .order_by(Job.created_at.desc()) .limit(nb_dataset) .all() ) result += data_sets return result
def last_datasets(self, nb_dataset=1, family_type=None): """ return the n last dataset of each family type loaded for this instance """ query = db.session.query(func.distinct(DataSet.family_type)).filter( Instance.id == self.id, DataSet.family_type != 'mimir' ) if family_type: query = query.filter(DataSet.family_type == family_type) family_types = query.all() result = [] for family_type in family_types: data_sets = ( db.session.query(DataSet) .join(Job) .join(Instance) .filter(Instance.id == self.id, DataSet.family_type == family_type, Job.state == 'done') .order_by(Job.created_at.desc()) .limit(nb_dataset) .all() ) result += data_sets return result
def stations(): station_query = session.query(func.distinct(Measurement.station)).all() station_list = list(np.ravel(station_query)) return jsonify(station_list)
async def query(payload: Query): # print("q: ", payload.query) # print("q_opt: ", optimiseQuery(payload.query)) srcs, stmt = basicOrGroupQuery(eavs.c.data)(optimiseQuery(payload.query)) # print(payload.query) srcs = [eavs] + srcs query = select([func.distinct(eavs.c.subject_id), eavs.c.data]) for s in srcs: query = query.select_from(s) query = query.where(stmt) query = simQuery(query, payload.query) print( query.compile(compile_kwargs={"literal_binds": True}, dialect=postgresql.dialect())) res = await database.fetch_all(query=query) if payload.result_type and payload.result_type == 'full': return {'full': [row['data'] for row in res]} else: return {'count': len(res)}
def portmap_portstat_route(port): """generate port statistics fragment""" stats = db.session.query(Service.proto, func.count(Service.id)).filter(Service.port == port) \ .group_by(Service.proto).order_by(Service.proto) infos = db.session.query(Service.info, func.count(Service.id).label('info_count')) \ .filter(Service.port == port, Service.info != '', Service.info != None).group_by(Service.info).order_by(desc('info_count')) # noqa: E501,E711 pylint: disable=singleton-comparison comments = db.session.query(func.distinct(Service.comment)).filter( Service.port == port, Service.comment != '').order_by(Service.comment) hosts = db.session.query(Host.address, Host.hostname, Host.id) \ .select_from(Service).outerjoin(Host) \ .filter(Service.port == port).order_by(Host.address) if 'filter' in request.values: parsed_filter = filter_parser.parse(request.values.get('filter')) stats = apply_filters(stats, parsed_filter, do_auto_join=False) infos = apply_filters(infos, parsed_filter, do_auto_join=False) comments = apply_filters(comments, parsed_filter, do_auto_join=False) hosts = apply_filters(hosts, parsed_filter, do_auto_join=False) try: portname = getservbyport(int(port)) except OSError: portname = '' return render_template('visuals/portmap_portstat.html', port=port, portname=portname, stats=stats.all(), infos=infos.all(), hosts=hosts.all(), comments=comments.all())
def get_recipes_without_images(*args): app = create_app() import os if len(os.listdir('/home/ubuntu/eaterator/spool')) > 300: return with app.app_context(): default = -1 recipes = Recipe.query.\ filter( not_( Recipe.pk.in_( db.session.query(func.distinct(RecipeImage.recipe)) ) ), Recipe.title.isnot(None) ).limit(55).all() if len(recipes) <= 0: app.logger.debug( "CLICKR CRON | Added reicpes from failed searches") default = -2 recipes = Recipe.query.filter( Recipe.pk.in_( db.session.query(RecipeImage.recipe).filter( RecipeImage.secret == 'default', RecipeImage.farm_id != '-2'))).limit(55).all() for recipe in recipes: if recipe.title: uwsgi.spool({ b'pk': str(recipe.pk).encode('utf-8'), b'title': recipe.title.encode('utf-8'), b'default': str(default).encode('utf-8') }) db.session.close() db.session.remove() return
def get_total_unique_skills(): return db.session.query(func.count( func.distinct(cast(models.UserSkill.level, String) + '-' + cast(models.UserSkill.name, String)) )).\ filter(models.UserSkill.level != \ LEVELS['LEVEL_I_WANT_TO_LEARN']['score']).scalar()
def repo_contributors(repo_name: str): all_repo = get_repo_collection() if repo_name not in all_repo: raise HTTPException(status_code=404, detail=f'Repo {repo_name} not found') id = all_repo[repo_name] all_commits_select = select([ authors_table.c.mapping_id.label('author_id'), repo_table.c.commit_hash.label('commit_hash') ]).select_from( repo_table.join(authors_table, authors_table.c.id == repo_table.c.author_id)).where( repo_table.c.repo_id == id).alias('all_commits__') aggregate_stmt = select([ all_commits_select.c.author_id.label('author_id'), func.count(func.distinct( all_commits_select.c.commit_hash)).label('commit_count') ]).select_from(all_commits_select).group_by( all_commits_select.c.author_id).alias('aggregated_commits__') mapped_total_stmt = select([ authors_table.c.author_name.label('author_name'), aggregate_stmt.c.commit_count.label('commit_count') ]).select_from( aggregate_stmt.join( authors_table, authors_table.c.id == aggregate_stmt.c.author_id)).order_by( desc(aggregate_stmt.c.commit_count)).alias('mapped_total__') with engine.connect() as connection: total = connection.execute(mapped_total_stmt) return [{r.author_name: r.commit_count} for r in total]
def index(): date_ = db.session.query( func.distinct(CourseCount.update_at).label('update_at')).order_by( CourseCount.update_at.asc() # noqa ).all() name_ = db.session.query(Course.name).order_by(Course.id.asc() # noqa ).all() course_count_ = db.session.query( CourseCount.count, CourseCount.update_at, Course.name).join(Course, CourseCount.course_id == Course.id).order_by( CourseCount.update_at.asc(), # noqa CourseCount.course_id.asc() # noqa ).all() x_date = [_.update_at for _ in date_] y_name = [_.name for _ in name_] data = [] for name in y_name: d = list() for course_count in course_count_: if course_count.name == name: d.append(course_count.count) data.append((name, d)) c = Line().add_xaxis(x_date).set_global_opts(title_opts=opts.TitleOpts( title="腾讯课堂-课程数量走势")) for d in data: c.add_yaxis(d[0], d[1], is_connect_nones=True) return Markup(c.render_embed())
def index(self): from gviz_data_table import Table from rockpack.mainsite.services.user.models import User, UserActivity, UserAccountEvent if request.args.get('activity') == 'activity': activity_model, activity_date = UserActivity, UserActivity.date_actioned else: activity_model, activity_date = UserAccountEvent, UserAccountEvent.event_date try: interval_count = int(request.args['interval_count']) except Exception: interval_count = 10 interval = request.args.get('interval') if interval not in ('week', 'month'): interval = 'week' cohort = func.date_part(interval, User.date_joined) cohort_label = func.min(func.date(User.date_joined)) active_interval = (func.date_part(interval, activity_date) - cohort).label('active_interval') q = readonly_session.query(User).filter( User.date_joined > LAUNCHDATE, User.refresh_token != '') if request.args.get('gender') in ('m', 'f'): q = q.filter(User.gender == request.args['gender']) if request.args.get('locale') in app.config['ENABLED_LOCALES']: q = q.filter(User.locale == request.args['locale']) if request.args.get('age') in ('13-18', '18-25', '25-35', '35-45', '45-55'): age1, age2 = map(int, request.args['age'].split('-')) q = q.filter(between( func.age(User.date_of_birth), text("interval '%d years'" % age1), text("interval '%d years'" % age2) )) active_users = dict( ((c, int(w)), u) for c, w, u in q.join( activity_model, (activity_model.user == User.id) & (activity_date >= User.date_joined) ).group_by(cohort, active_interval).values( cohort, active_interval, func.count(func.distinct(activity_model.user)) ) ) table = Table( [dict(id='cohort', type=date)] + [dict(id='%s%d' % (interval, i), type=str) for i in range(interval_count)] ) totals = q.group_by(cohort).order_by(cohort) for c, l, t in totals.values(cohort, cohort_label, func.count()): data = [] for i in range(interval_count): a = active_users.get((c, i), '') data.append(a and '%d%% (%d)' % (ceil(a * 100.0 / t), a)) table.append([l] + data) return self.render('admin/retention_stats.html', data=table.encode())
def index_old(self): from gviz_data_table import Table from rockpack.mainsite.services.user.models import User, UserActivity user_count = readonly_session.query(func.count(User.id)).\ filter(User.refresh_token != '').scalar() header = ('user count', 'max lifetime', 'avg lifetime', 'stddev lifetime', 'max active days', 'avg active days', 'stddev active days') lifetime = func.date_part('days', func.max(UserActivity.date_actioned) - func.min(UserActivity.date_actioned)).label('lifetime') active_days = func.count(func.distinct(func.date( UserActivity.date_actioned))).label('active_days') activity = readonly_session.query(UserActivity.user, lifetime, active_days).\ group_by(UserActivity.user) ctx = {} for key, having_expr in ('all', None), ('1day', lifetime > 1), ('7day', lifetime > 7): data = activity.having(having_expr).from_self( func.count('*'), func.max(lifetime), func.avg(lifetime), func.stddev_samp(lifetime), func.max(active_days), func.avg(active_days), func.stddev_samp(active_days) ).one() table = Table([ dict(id='metric', type=str), dict(id='value', type=float), dict(id='%', type=str), ]) pdata = ('%d%%' % (data[0] * 100 / user_count),) + ('',) * 6 table.extend(zip(*(header, map(float, data), pdata))) ctx['ret_%s_data' % key] = table.encode() return self.render('admin/retention_stats_old.html', **ctx)
def select_nscmlog(self, variables): """Select a variable which is a field in Scmlog. - variables (list): variables to select Currently supported: "commits", "authors", "committers" """ if not isinstance(variables, (list, tuple)): raise Exception ("select_nscmlog: Argument is not list or tuple") elif len (variables) == 0: raise Exception ("select_nscmlog: No variables") fields = [] for variable in variables: if variable == "commits": name = "nocommits" field = SCMLog.id elif variable == "authors": name = "nauthors" field = SCMLog.author_id elif variable == "committers": name = "ncommitters" field = SCMLog.committer_id else: raise Exception ("select_nscmlog: Unknown variable %s." \ % variable) fields.append (label (name, func.count(func.distinct(field)))) return self.add_columns (*fields)
def all_statements(cls, dataset=None, canonical_id=None, inverted_ids=None): table = cls.__table__ q = select(table) if canonical_id is not None: q = q.filter(table.c.canonical_id == canonical_id) if inverted_ids is not None: alias = table.alias() sq = select(func.distinct(alias.c.canonical_id)) sq = sq.filter(alias.c.prop_type == registry.entity.name) sq = sq.filter(alias.c.value.in_(inverted_ids)) # sq = sq.subquery() # cte = select(func.distinct(cls.canonical_id).label("canonical_id")) # cte = cte.where(cls.prop_type == registry.entity.name) # cte = cte.where(cls.value.in_(inverted_ids)) # cte = cte.cte("inverted") # Find entities which refer to the given entity in one of their # property values. # inverted = aliased(cls) q = q.filter(table.c.canonical_id.in_(sq)) # q = q.filter(inverted.prop_type == registry.entity.name) # q = q.filter(inverted.value.in_(inverted_ids)) if dataset is not None: q = q.filter(table.c.dataset.in_(dataset.source_names)) q = q.order_by(table.c.canonical_id.asc()) res = db.session.execute(q) while True: batch = res.fetchmany(10000) if not batch: break yield from batch
def resolve_pipelines_by_stage_id( self, info, stage_id=None, **args): query = db_session.query( func.distinct(models.Pipelines.pipeline_id).label('pipeline_id'), models.Pipelines.name.label('pipeline_name'), models.Pipelines.display_name.label('pipeline_display_name'), models.Pipelines.pipeline_status_id.label('pipeline_status_id'), models.PipelineStage.display_name.label('stage_display_name'), ).select_from( models.Pipelines ).join( models.PipelineStage ).group_by( models.Pipelines.pipeline_id, models.PipelineStage.pipeline_stage_id ).filter( models.Pipelines.pipeline_stage_id == stage_id ).order_by( models.PipelineStage.display_name, models.Pipelines.display_name ) result = list() for row in query.all(): result.append(schemas.PipelinesStage(**row._asdict())) return result
def landing(): """ Show a landing page giving a short intro blurb to unregistered users and very basic metrics such as total users. """ # Create a list of total project counts in the form # [(day, count), ...]. projects_graph_data = [] now = datetime.datetime.utcnow() for day_ago in range(30): limit = now - datetime.timedelta(days=day_ago) projects_graph_data.append(( time.mktime(limit.timetuple()) * 1000, Project.query.filter(Project.created <= limit).count() )) # Find the 10 latest public projects. new_projects = ( Project.visible(Project.query, user=g.user) .order_by(False) .order_by(Project.created.desc()) ).paginate(1, 10, False) # Sum the total number of messages across all projects, caching # it for the next two minutes. total_messages = g.redis.get('cache_message_count') if total_messages is None: total_messages = g.db.session.query( func.sum(Project.message_count) ).scalar() if total_messages is None: total_messages = 0 g.redis.setex('cache_message_count', 120, total_messages) # Total # of users. total_users = User.query.count() # Find the 10 most popular networks. top_networks = ( Channel.visible(g.db.session.query( Channel.host, func.count(func.distinct(Channel.channel)).label('count') ), user=g.user) .group_by(Channel.host) .order_by('count desc') ) total_networks = top_networks.count() top_networks = top_networks.limit(10) return render_template( 'landing.html', projects_graph_data=projects_graph_data, new_projects=new_projects, top_networks=top_networks, total_networks=total_networks, total_messages=total_messages, total_users=total_users )
def count_by_time(start, end, t='task'): filter_group = ( CobraTaskInfo.created_at >= '{0} 00:00:00'.format(start), CobraTaskInfo.created_at <= '{0} 23:59:59'.format(end), # Active project CobraProjects.status > 0, CobraProjects.repository == CobraTaskInfo.target) count = 0 if t == 'task': count = db.session.query( func.count(CobraTaskInfo.id).label('count')).filter( *filter_group).first() elif t == 'project': count = db.session.query( func.count(func.distinct( CobraTaskInfo.target)).label('count')).filter( *filter_group).first() elif t == 'line': count = db.session.query( func.sum(CobraTaskInfo.code_number).label('count')).filter( *filter_group).first() elif t == 'file': count = db.session.query( func.sum(CobraTaskInfo.file_count).label('count')).filter( *filter_group).first() if count[0] is None: return 0 else: logging.debug('SD {t} {start} {end} {count}'.format(start=start, end=end, t=t, count=int( count[0]))) return int(count[0])
def get_by_date_verb_ratio(session): # type: (Session) -> OrderedDict results = OrderedDict() for day in session.query(func.distinct(LogMsg.time_received_date)).all(): day: str = day[0] day_counter = OrderedDict() for method, os, count in session.query( LogMsg.request_method, LogMsg.operating_system, func.count('*')) \ .filter(LogMsg.time_received_date == day) \ .group_by(LogMsg.operating_system, LogMsg.request_method).all(): try: day_counter[os][method]: int = count except KeyError: day_counter[os] = OrderedDict() day_counter[os][method] = count results[day] = [] for os in day_counter: if 'GET' not in day_counter[os].keys(): os_ratio = 0 elif 'POST' not in day_counter[os].keys(): os_ratio = 'NAN' else: ratio = float(day_counter[os]["GET"]) / day_counter[os]["POST"] os_ratio = '{:.4}'.format(ratio) results[day].append([os, os_ratio]) _logger.debug(f'results: {results}') return results
def select_listpersons(self, kind="all"): """Select a list of persons (authors, committers) - kind: kind of person to select authors: authors of commits committers: committers of commits all: authors and committers Returns a Query object, with (id, name, email) selected. """ query = self.add_columns(label("id", func.distinct(DB.People.id)), label("name", DB.People.name), label('email', DB.People.email)) if kind == "authors": return query \ .join (DB.SCMLog, DB.People.id == DB.SCMLog.author_id) elif kind == "committers": return query \ .join (DB.SCMLog, DB.People.id == DB.SCMLog.committer_id) elif kind == "all": return query \ .join (DB.SCMLog, DB.People.id == DB.SCMLog.author_id or DB.People.id == DB.SCMLog.committer_id) else: raise Exception ("select_listpersons: Unknown kind %s." \ % kind)
def select_listpersons(self, kind = "all"): """Select a list of persons (authors, committers) - kind: kind of person to select authors: authors of commits committers: committers of commits all: authors and committers Returns a SCMQuery object, with (id, name, email) selected. """ query = self.add_columns (label("id", func.distinct(People.id)), label("name", People.name), label('email', People.email)) if kind == "authors": return query.join (SCMLog, People.id == SCMLog.author_id) elif kind == "committers": return query.join (SCMLog, People.id == SCMLog.committer_id) elif kind == "all": return query.join (SCMLog, People.id == SCMLog.author_id or People.id == SCMLog.committer_id) else: raise Exception ("select_listpersons: Unknown kind %s." \ % kind)
def select_listpersons_uid(self, kind = "all"): """Select a list of persons (authors, committers), using uids - kind: kind of person to select authors: authors of commits committers: committers of commits all: authors and committers Returns a SCMQuery object, with (id, name, email) selected. """ query = self.add_columns (label("id", func.distinct(UPeople.id)), label("name", UPeople.identifier)) \ .join (PeopleUPeople, UPeople.id == PeopleUPeople.upeople_id) if kind == "authors": return query.join (SCMLog, PeopleUPeople.people_id == SCMLog.author_id) elif kind == "committers": return query.join (SCMLog, PeopleUPeople.people_id == SCMLog.committer_id) elif kind == "all": return query.join (SCMLog, PeopleUPeople.people_id == SCMLog.author_id or PeopleUPeople.people_id == SCMLog.committer_id) else: raise Exception ("select_listpersons_uid: Unknown kind %s." \ % kind)
def get_content_item_ids(self, org, type, **kw): content_items = db.session.query(func.distinct(ContentItem.id))\ .filter_by(org_id=org.id)\ .filter_by(type=type)\ .all() return [c[0] for c in content_items]
def update_user_interests(date_from, date_to): active_users = readonly_session.query(UserActivity.user).filter( UserActivity.date_actioned.between(date_from, date_to)).subquery() activity_categories = readonly_session.query( UserActivity.user, Channel.category, func.count(func.distinct(Channel.id)) ).outerjoin( VideoInstance, (UserActivity.object_type == 'video_instance') & (UserActivity.object_id == VideoInstance.id) ).filter( ((UserActivity.object_type == 'channel') & (UserActivity.object_id == Channel.id)) | (VideoInstance.channel == Channel.id) ).filter( UserActivity.user.in_(active_users), Channel.category != None ).group_by('1, 2').order_by('1, 3 desc') for user, categories in groupby(activity_categories, lambda x: x[0]): UserInterest.query.filter_by(user=user).delete() db.session.execute(UserInterest.__table__.insert(), [ dict(user=user, explicit=False, category=category, weight=weight) for user, category, weight in categories ][:10])
def get(self): # 메인 강연 회차를 가져온다. main_roundtable = db_session.query(Roundtable).filter( Roundtable.is_active == True).first() area_opened_library = db_session.query(func.distinct( Library.area)).join(RoundtableAndLibrary).filter( RoundtableAndLibrary.roundtable_id == main_roundtable.id) opened_area = [entry[0] for entry in area_opened_library] # 강연이 열리는 도서관 목록을 가져온다. opened_library = RoundtableAndLibrary.query.filter( RoundtableAndLibrary.roundtable == main_roundtable) records = [] for library_match in opened_library: # 도서관별 토탈 강연 회차를 가져와서 도서관에 매치된 강연 회차가 몇개 존재하는지 가지고 온다. records.append( dict(area=library_match.library.area, library_id=library_match.library.id, library_name=library_match.library.library_name, round_num=library_match.round_num, lecture_cnt=len(library_match.library.lecture), host_cnt=len(library_match.library.host))) return render_template("admin/dashboard.html", opened_area=opened_area, opened_library=records)
def movie_detailed_info(): movie = request.args.get('movie_name') if not movie: return jsonify({"msg": "Please provide movie_name"}) movies_detailed_info = [] today_date = datetime.datetime.now().date() no = get_records_count() try: query = db.session.query( func.max(Cinemalevel.movie_name).label("movie"), func.max(Cinemalevel.crawl_hour).label("crawl_hour"), func.max(Cinemalevel.crawl_date).label("crawl_date"), func.avg(Cinemalevel.percent_occupancy).label('percent_occupancy'), func.sum(Cinemalevel.category_occupied_seats).label('tickets_sold'), func.abs(func.sum((Cinemalevel.category_occupied_seats)*(Cinemalevel.category_price))/func.sum(Cinemalevel.category_occupied_seats)).label("avg_price"), func.count(func.distinct(Cinemalevel.show_datetime)).label("shows"))\ .filter_by(movie_name=movie, show_date=today_date)\ .group_by(Cinemalevel.movie_name, Cinemalevel.crawl_date, Cinemalevel.crawl_hour)\ .order_by(Cinemalevel.crawl_date.desc(), Cinemalevel.crawl_hour.desc())\ .limit(no).all() movies_detailed_info = [each._asdict() for each in query] # remove the latest crawl as it may be still running, provide one hour ago data movies_detailed_info = movies_detailed_info[1:] except Exception as err_msg: print(err_msg) return jsonify({'data': movies_detailed_info, "movie_name": movie})
def top_networks(limit=20): return (db.session.query( Channel.host, func.count(func.distinct(Channel.channel)).label('count'), ).join(Channel.project).filter( Project.public == True, Channel.public == True).group_by( Channel.host).order_by(text('count desc')).limit(limit)).all()
def query3(): if request.method == 'POST': sala = request.form.get('sale') film = request.form.get('film') if sala != 'Seleziona...' and film != 'Seleziona...': settimana = date.today() - timedelta(days=7) duesettimane = date.today() - timedelta(days=14) mese = date.today() - timedelta(days=30) conn = choiceEngine() #numeri di posti prenotati per sala per film unasettimana = select([func.count(booking.c.id).label('count')]).\ select_from(booking.join(movieSchedule, booking.c.idmovieSchedule == movieSchedule.c.id).\ join(movies,movieSchedule.c.idMovie == movies.c.id)).\ where( and_(movieSchedule.c.idMovie == bindparam('film'),#controlla che funzioni bene la clausola where , datetime.now()??? movieSchedule.c.theater == bindparam('sala'),\ movieSchedule.c.dateTime.between(bindparam('tempo'),\ datetime.datetime.now()))) titolo = select([movies]).where(movies.c.id == film) ristitolo = conn.execute(titolo).fetchone() ris1 = conn.execute(unasettimana, { 'sala': sala, 'film': film, 'tempo': settimana }).fetchone() ris2 = conn.execute(unasettimana, { 'sala': sala, 'film': film, 'tempo': duesettimane }).fetchone() ris3 = conn.execute(unasettimana, { 'sala': sala, 'film': film, 'tempo': mese }).fetchone() conn.close() return render_template("/manager/statistiche/resultOccupazioneSala.html", sala = sala,\ film = ristitolo['title'], settimana = ris1['count'],\ duesettimane= ris2['count'], mese = ris3['count']) #mi servono per visualizzare le possibili scelte tra film e sale s3 = select([theaters]) #trovo tutte le sale s41 = movieSchedule.join(movies, movieSchedule.c.idMovie == movies.c.id) #trovo solo i film con prenotazioni, mi serve il distinct perche non voglio doppioni s4 = select([func.distinct(movies.c.id).label('id'), movies.c.title]).select_from(s41).order_by(movies.c.title) conn = choiceEngine() sale = conn.execute(s3) film = conn.execute(s4) resp = make_response( render_template("/manager/statistiche/occupazioneSala.html", theaters=sale, movies=film)) conn.close() return resp
def select_listcommits(self): """Select a list of commits""" if DB.SCMLog not in self.joined: self.joined.append(DB.SCMLog) return self \ .add_columns (label("id", func.distinct(DB.SCMLog.id)), label("date", DB.SCMLog.date))
def domains(self): """ Domains which an organization manages. """ domains = db.session.query(func.distinct(ContentItem.domain))\ .filter_by(org_id=self.id)\ .all() return [d[0] for d in domains]
def projects_total(self, **kwargs): """Total number of projects""" filters = list(self.get_filters(**kwargs)) res = db.session.query(func.count(func.distinct(self.project_id))) \ .filter(*filters).scalar() if res is None: res = 0 return res
def get_voted_users_num(self): from models.vote import Vote result = db.session\ .query(func.count(func.distinct(Vote.user_id)))\ .filter_by(voting_id=self.id)\ .first() return result[0] if len(result) > 0 else 0
def test_query_macro(self): query = sql[( func.distinct(x.continent) for x in db.country if ( func.sum(w.population) for w in db.country if w.continent == x.continent ) > 100000000 )] sql_results = engine.execute(query).fetchall() query_macro_results = query[( func.distinct(x.continent) for x in db.country if ( func.sum(w.population) for w in db.country if w.continent == x.continent ) > 100000000 )] assert sql_results == query_macro_results
def find_running_or_queued_action_workflow_ids(datastore_id): resultset = db.session\ .query(func.distinct(ActionDao.data['workflow_id'].astext))\ .filter(ActionDao.data['datastore_id'].astext == datastore_id)\ .filter(ActionDao.data['state'].astext.in_([ActionState.RUNNING, ActionState.PENDING, ActionState.QUEUED]))\ .filter(ActionDao.data['workflow_id'] != 'null')\ .all() return [r[0] for r in resultset]
def connections_in_deployment(cls): ''' Count total number of distinct connections in deployment. This must be done after committing the emails originally associated with this event. ''' return db.session.query(func.count(func.distinct( cast(Email.to_user_id, String) + '-' + cast(Email.from_user_id, String)))).first()[0]
def page_backlinks(self, title): """Gives a list of pages linking to specified page.""" backlinks = self.db.query(func.distinct(Title.title)).\ join((Link, Link.src==Title.id)).\ filter(Link.target==title).\ order_by(Title.title) for (backlink,) in backlinks: yield unicode(backlink)
def history__github(): grain = _get_grain() # Filtered list of github IDs repo = request.args.get('repo', None) repoFilter = None if repo is not None: repo = repo.split(',') repoFilter = SnapshotOfGithub.repo_name.in_(repo) # Date filter date_group = func.date_trunc(grain, SnapshotOfGithub.timestamp) # Query: Range of dates q1 = Session.query()\ .add_column( func.distinct(date_group).label('d') )\ .order_by(date_group.desc()) response = _prepare(q1.count()) q1 = q1.offset( response['offset'] )\ .limit( response['per_page'] ) if q1.count(): date_column = q1.subquery().columns.d (min_date,max_date) = Session.query(func.min(date_column), func.max(date_column)).first() else: # Impossible date range (min_date,max_date) = datetime.now()+timedelta(days=1),datetime.now() # Grouped query S = SnapshotOfGithub q = Session.query()\ .add_column( func.sum(S.watchers) )\ .add_column( func.max(S.forks) )\ .add_column( func.max(S.open_issues) )\ .add_column( func.max(S.size) )\ .add_column( date_group )\ .add_column( S.repo_name )\ .group_by(date_group)\ .group_by(S.repo_name)\ .order_by(date_group.desc())\ .filter( date_group>=min_date )\ .filter( date_group<=max_date )\ .filter( repoFilter ) results = {} _dictize = lambda x: { 'watchers':x[0], 'forks':x[1], 'issues':x[2], 'size':x[3], 'timestamp':x[4].date().isoformat(), } for x in q: repo_name = x[5] results[repo_name] = results.get(repo_name, { 'repo':repo_name, 'data':[] }) results[repo_name]['data'].append( _dictize(x) ) # Inner function transforms SELECT tuple into recognizable format response['grain'] = grain response['data'] = results response['repos'] = repo response['min_date'] = min_date.date().isoformat() response['max_date'] = max_date.date().isoformat() return response
def attribute_keys(dataset): entities = Entity.__table__ col = func.distinct(func.skeys(entities.c.attributes)).label('keys') q = select([col], entities.c.dataset_id==dataset.id, [entities]) rp = db.engine.execute(q) keys = set() for row in rp.fetchall(): keys.add(row[0]) return sorted(keys)
def get_content_item_ids(self, org, tag_id, **kw): """ Get all content item ids for a Tag. """ content_items = db.session\ .query(func.distinct(content_items_tags.c.content_item_id))\ .filter(content_items_tags.c.tag_id == tag_id)\ .all() return [c[0] for c in content_items]
def subscriber_count_for_userid(cls, userid): from rockpack.mainsite.services.video import models value = Subscription.query.join( models.Channel, (models.Channel.id == Subscription.channel) & (models.Channel.deleted == False) & (models.Channel.owner == userid) ).with_entities(func.count(func.distinct(Subscription.user))).first() return value[0] if value else 0
def collaborators_total(self, **kwargs): """Total number of collaborators""" filters = list(self.get_filters(**kwargs)) filters.append(self.thread == None) filters.append(self.private == 0) res = db.session.query(func.count(func.distinct(self.user_id))) \ .filter(*filters).scalar() if res is None: res = 0 return res
def get_movie_api_ids(city: str) -> List[namedtuple]: query = DB.session.query( func.min(Screening.date_time).label('next_screening'), func.count().label('screenings'), func.count(func.distinct(Screening.cinema_api_id)).label('cinemas'), Screening.movie_api_id) query = query.filter(Screening.city == city) query = query.filter(Screening.date_time > get_now(city)) query = query.group_by(Screening.movie_api_id) return query.all()
def history__mailman(): grain = _get_grain() # Filtered list of mailman IDs lists = request.args.get('list') listFilter = None if lists is not None: lists = lists.split(',') listFilter = SnapshotOfMailman.list_name.in_(lists) # Date filter date_group = func.date_trunc(grain, SnapshotOfMailman.timestamp) # Query: Range of dates q1 = Session.query()\ .add_column( func.distinct(date_group).label('d') )\ .order_by(date_group.desc()) response = _prepare(q1.count()) q1 = q1.offset( response['offset'] )\ .limit( response['per_page'] ) if q1.count(): subquery = q1.subquery() (min_date,max_date) = Session.query(func.min(subquery.columns.d), func.max(subquery.columns.d)).first() else: # Impossible date range (min_date,max_date) = datetime.now()+timedelta(days=1),datetime.now() # Grouped query S = SnapshotOfMailman q = Session.query()\ .add_column( func.sum(S.posts_today) )\ .add_column( func.max(S.subscribers) )\ .add_column( date_group )\ .add_column( S.list_name )\ .group_by(date_group)\ .group_by(S.list_name)\ .order_by(date_group.desc())\ .filter( date_group>=min_date )\ .filter( date_group<=max_date )\ .filter( listFilter ) results = {} # Inner function transforms SELECT tuple into recognizable format _dictize = lambda x: { 'posts':x[0], 'subscribers':x[1], 'timestamp':x[2].isoformat(), } # Build output datastructure from rows for x in q: list_name = x[3] results[list_name] = results.get(list_name, { 'list_name':list_name, 'data':[] }) results[list_name]['data'].append( _dictize(x) ) # Write response response['grain'] = grain response['data'] = results response['list'] = lists response['min_date'] = min_date.isoformat() response['max_date'] = max_date.isoformat() return response
def distinct_key(self): dist = func.distinct(self.key) q = select(columns=[dist], from_obj=self.from_clause) q = self.apply_filters(q) q = q.where(self.key != None) # noqa rp = self.config.engine.execute(q) while True: row = rp.fetchone() if not row: break yield row[0]
def example1(): """ find all customers who never order anything. """ query = ses.query(Customer.name) \ .filter( Customer.id.notin_( ses.query(func.distinct(Order.customer_id)) ) ) pprint(query, engine)