def query_experiment_sets(): """This queries all the replicates of an experiment and groups them for further processing""" experiment_sets = {} ome = base.Session() RNASeqExperiment = datasets.RNASeqExperiment ArrayExperiment = datasets.ArrayExperiment ChIPExperiment = datasets.ChIPExperiment experiment_sets['RNAseq'] = ome.query(func.array_agg(RNASeqExperiment.name),RNASeqExperiment.group_name).\ group_by(RNASeqExperiment.group_name, RNASeqExperiment.strain_id, RNASeqExperiment.environment_id, RNASeqExperiment.machine_id, RNASeqExperiment.sequencing_type).all() experiment_sets['array'] = ome.query(func.array_agg(ArrayExperiment.name)).\ group_by(ArrayExperiment.strain_id, ArrayExperiment.environment_id,\ ArrayExperiment.platform).all() experiment_sets['ChIP'] = ome.query(func.array_agg(ChIPExperiment.name)).\ group_by(ChIPExperiment.strain_id, ChIPExperiment.environment_id,\ ChIPExperiment.antibody, ChIPExperiment.protocol_type,\ ChIPExperiment.target).all() ome.close() return experiment_sets
def query_person_contacts(): return ( DBSession.query( Person.id.label('person_id'), func.array_to_string( func.array_agg( case([(Contact.contact_type == 'phone', Contact.contact)]) ), ', ' ).label('phone'), func.array_to_string( func.array_agg( case([(Contact.contact_type == 'email', Contact.contact)]) ), ', ' ).label('email'), func.array_to_string( func.array_agg( case([(Contact.contact_type == 'skype', Contact.contact)]) ), ', ' ).label('skype'), ) .join(Contact, Person.contacts) .group_by(Person.id) )
def _get_permissions_query(self, session, identifier): """ select domain, json_agg(parts) as permissions from (select domain, row_to_json(r) as parts from (select domain, action, array_agg(distinct target) as target from (select (case when domain is null then '*' else domain end) as domain, (case when target is null then '*' else target end) as target, array_agg(distinct (case when action is null then '*' else action end)) as action from permission group by domain, target ) x group by domain, action) r) parts group by domain; """ thedomain = case([(Domain.name == None, "*")], else_=Domain.name) theaction = case([(Action.name == None, "*")], else_=Action.name) theresource = case([(Resource.name == None, "*")], else_=Resource.name) action_agg = func.array_agg(theaction.distinct()) stmt1 = ( session.query( Permission.domain_id, thedomain.label("domain"), Permission.resource_id, theresource.label("resource"), action_agg.label("action"), ) .select_from(User) .join(role_membership_table, User.pk_id == role_membership_table.c.user_id) .join(role_permission_table, role_membership_table.c.role_id == role_permission_table.c.role_id) .join(Permission, role_permission_table.c.permission_id == Permission.pk_id) .outerjoin(Domain, Permission.domain_id == Domain.pk_id) .outerjoin(Action, Permission.action_id == Action.pk_id) .outerjoin(Resource, Permission.resource_id == Resource.pk_id) .filter(User.identifier == identifier) .group_by(Permission.domain_id, Domain.name, Permission.resource_id, Resource.name) ).subquery() stmt2 = ( session.query(stmt1.c.domain, stmt1.c.action, func.array_agg(stmt1.c.resource.distinct()).label("resource")) .select_from(stmt1) .group_by(stmt1.c.domain, stmt1.c.action) ).subquery() stmt3 = ( session.query(stmt2.c.domain, func.row_to_json(as_row(stmt2)).label("parts")).select_from(stmt2) ).subquery() final = ( session.query(stmt3.c.domain, cast(func.json_agg(stmt3.c.parts), Text)) .select_from(stmt3) .group_by(stmt3.c.domain) ) return final
def percentage_by_district(self): """ Returns the percentage of votes aggregated by the distict. Includes uncounted districts and districts with no results available. """ results = self.election.results results = results.join(ElectionResult.candidate_results) results = results.filter(CandidateResult.candidate_id == self.id) results = results.with_entities( ElectionResult.district.label('name'), func.array_agg(ElectionResult.entity_id).label('entities'), func.coalesce( func.bool_and(ElectionResult.counted), False ).label('counted'), func.sum(ElectionResult.accounted_ballots).label('total'), func.sum(CandidateResult.votes).label('votes'), ) results = results.group_by(ElectionResult.district) results = results.order_by(None) results = results.all() percentage = { r.name: { 'counted': r.counted, 'entities': r.entities, 'percentage': 100 * (r.votes / r.total) if r.total else 0.0 } for r in results } empty = self.election.results empty = empty.with_entities( ElectionResult.district.label('name'), func.array_agg(ElectionResult.entity_id).label('entities'), func.coalesce( func.bool_and(ElectionResult.counted), False ).label('counted') ) empty = empty.group_by(ElectionResult.district) empty = empty.order_by(None) for result in empty: update = ( result.name not in percentage or ( set(percentage[result.name]['entities']) != set(result.entities) ) ) if update: percentage[result.name] = { 'counted': result.counted, 'entities': result.entities, 'percentage': 0.0 } return percentage
def _get_nonldap_sources(session): sources = session.query( CtiDirectories.name, CtiDirectories.uri, Directories.dirtype, Directories.xivo_username, Directories.xivo_password, Directories.xivo_verify_certificate, Directories.xivo_custom_ca_path, CtiDirectories.delimiter, CtiDirectories.match_direct, CtiDirectories.match_reverse, func.array_agg(CtiDirectoryFields.fieldname).label('fields'), func.array_agg(CtiDirectoryFields.value).label('values'), ).join( Directories, Directories.uri == CtiDirectories.uri ).outerjoin( CtiDirectoryFields, CtiDirectoryFields.dir_id == CtiDirectories.id ).group_by( CtiDirectories.name, CtiDirectories.uri, Directories.dirtype, Directories.xivo_username, Directories.xivo_password, Directories.xivo_verify_certificate, Directories.xivo_custom_ca_path, CtiDirectories.delimiter, CtiDirectories.match_direct, CtiDirectories.match_reverse, ) source_configs = [] for source in sources.all(): source_config = { 'name': source.name, 'type': source.dirtype, 'uri': source.uri, 'delimiter': source.delimiter, 'searched_columns': json.loads(source.match_direct or '[]'), 'first_matched_columns': json.loads(source.match_reverse or '[]'), 'format_columns': _format_columns(source.fields, source.values), } if source.dirtype == 'xivo': source_config['xivo_username'] = source.xivo_username source_config['xivo_password'] = source.xivo_password source_config['xivo_verify_certificate'] = source.xivo_verify_certificate source_config['xivo_custom_ca_path'] = source.xivo_custom_ca_path source_configs.append(source_config) return source_configs
def query_profile_tag_classes(config_tag_classes, public_tag_classes): config_tag_queries = [ query_value_with_option_type_label(tag_class.query, tag_class, name) for tag_class, name in config_tag_classes ] public_tag_queries = [ query_value_with_option_type_label(tag_class.query, tag_class, name).filter( tag_class.public.is_(True)) for tag_class, name in public_tag_classes ] queries = config_tag_queries + public_tag_queries select = union_queries(queries).order_by("value") cte = select.cte("tags") result = db.session.query(cte.columns.option_type, func.array_agg(cte.columns.value)).group_by( cte.columns.option_type) # Need to default to empty list for each tag type, since tags with no # searchable values will not be in the result empty_values = { name: [] for _, name in [*config_tag_classes, *public_tag_classes] } return {**empty_values, **dict(result)}
def results_by_district(self): """ Returns the results aggregated by the distict. """ counted = func.coalesce(func.bool_and(BallotResult.counted), False) yeas = func.sum(BallotResult.yeas) nays = func.sum(BallotResult.nays) yeas_percentage = 100 * yeas / ( cast(func.coalesce(func.nullif(yeas + nays, 0), 1), Float) ) nays_percentage = 100 - yeas_percentage accepted = case({True: yeas > nays}, counted) results = self.results.with_entities( BallotResult.district.label('name'), counted.label('counted'), accepted.label('accepted'), yeas.label('yeas'), nays.label('nays'), yeas_percentage.label('yeas_percentage'), nays_percentage.label('nays_percentage'), func.sum(BallotResult.empty).label('empty'), func.sum(BallotResult.invalid).label('invalid'), func.sum(BallotResult.eligible_voters).label('eligible_voters'), func.array_agg(BallotResult.entity_id).label('entity_ids') ) results = results.group_by(BallotResult.district) results = results.order_by(None).order_by(BallotResult.district) return results
def set_available_langs(documents, loaded=False): """Load and set the available langs for the given documents. """ if len(documents) == 0: return if loaded: # all locales are already loaded, so simply set the attribute for document in documents: document.available_langs = [ locale.lang for locale in document.locales ] else: document_ids = [doc.document_id for doc in documents] documents_for_id = {doc.document_id: doc for doc in documents} # aggregate the langs per document into an array lang_agg = func.array_agg( DocumentLocale.lang, type_=postgresql.ARRAY(String)).label('langs') langs_per_doc = DBSession.query( DocumentLocale.document_id, lang_agg). \ filter(DocumentLocale.document_id.in_(document_ids)). \ group_by(DocumentLocale.document_id). \ all() for document_id, langs in langs_per_doc: document = documents_for_id.get(document_id) document.available_langs = langs
def start_requests(self): summary_utc = datetime.utcnow() - timedelta(days=1) db_engine = create_engine(self.settings.get('SQLALCHEMY_DATABASE_URI')) db_session = sessionmaker(bind=db_engine)() db_query = db_session.query(LiveTVSite.id.label('site_id'), LiveTVRoom.id.label('room_id'), LiveTVRoom.url.label('room_url'), LiveTVRoomPresent.crawl_date_format.label('summary_date'), func.array_agg(LiveTVRoomPresent.online).label('online_list'))\ .join(LiveTVSite, LiveTVRoom, LiveTVRoomPresent)\ .filter(LiveTVRoomPresent.crawl_date_format == summary_utc.strftime(DAILY_DATE_FORMAT))\ .group_by(LiveTVSite.id, LiveTVRoom.id, LiveTVRoom.url, LiveTVRoomPresent.crawl_date_format) for group_row in db_query: meta_info = { 'site_id': group_row.site_id, 'room_id': group_row.room_id, 'summary_date': group_row.summary_date, 'online': numpy.median(group_row.online_list) } room = self.session.query(LiveTVRoom).filter_by(id=meta_info['room_id']).one_or_none() if room: yield DailyItem(site_id=group_row.site_id, room_id=group_row.room_id, summary_date=group_row.summary_date, online=numpy.median(group_row.online_list), followers=room.followers, description=room.description, announcement=room.announcement, fallback=False) db_session.close()
def build_query_to_populate(self, query, full_table, aggregate_table): insert_columns = [aggregate_table.c.rtt_samples] mean = full_table.c.sumrtt / full_table.c.countrtt is_safe = full_table.c.countrtt > 0 safe_mean = case([(is_safe, mean)], else_ = None) select_query = (query.column(func.array_agg(safe_mean))) return insert_columns, select_query
def get_rates_by_dates_for_currency_in_period(self, currency, start_date, end_date): """ :type currency: str :type start_date: datetime.date :type end_date: datetime.date :rtype: dict[datetime.date, list[decimal.Decimal]] """ result = self.db_session\ .query( ExchangeRate.date, func.array_agg(aggregate_order_by(ExchangeRate.rate, ExchangeRate.provider_id.asc())) )\ .filter( and_( ExchangeRate.date >= start_date, ExchangeRate.date <= end_date, ExchangeRate.currency == currency, ExchangeRate.rate.isnot(None) ) )\ .group_by(ExchangeRate.date)\ .order_by(ExchangeRate.date)\ .all() return {r[0]: list(r[1]) for r in result}
def upgrade(): if not is_sqlite: connection = op.get_bind() attendees = connection.execute(select([ attendee_table.c.hotel_pin, func.count(attendee_table.c.id), func.array_agg(attendee_table.c.id), ]).where(and_( attendee_table.c.hotel_pin != None, attendee_table.c.hotel_pin != '', )).group_by( attendee_table.c.hotel_pin, ).having( func.count(attendee_table.c.id) > 1, )) for hotel_pin, count, ids in attendees: hotel_pin_template = '{{:0{}d}}{{}}'.format(len(str(count))) if count > 9 else '{}{}' for i, id in enumerate(ids): new_hotel_pin = hotel_pin_template.format(i, hotel_pin) connection.execute( attendee_table.update().where(attendee_table.c.id == id).values({ 'hotel_pin': new_hotel_pin, }) ) op.create_unique_constraint(op.f('uq_attendee_hotel_pin'), 'attendee', ['hotel_pin'])
def get_published_briefs(self): subquery = ( db .session .query( BriefUser.brief_id, func.array_agg(func.substring(User.email_address, '@(.*)')).label('domain') ) .join(User) .group_by(BriefUser.brief_id) .subquery() ) result = ( db .session .query( Brief.id, Brief.data['organisation'].astext.label('organisation'), Brief.published_at, Brief.withdrawn_at, Brief.data['title'].astext.label('title'), Brief.data['sellerSelector'].astext.label('openTo'), Brief.data['areaOfExpertise'].astext.label('brief_category'), Lot.name.label('brief_type'), subquery.columns.domain[1].label('publisher_domain') ) .join(subquery, Brief.id == subquery.columns.brief_id) .join(Lot) .filter(Brief.published_at.isnot(None)) .order_by(Brief.id) .all() ) return [r._asdict() for r in result]
def set_available_langs(documents, loaded=False): """Load and set the available langs for the given documents. """ if len(documents) == 0: return if loaded: # all locales are already loaded, so simply set the attribute for document in documents: document.available_langs = [ locale.lang for locale in document.locales] else: document_ids = [doc.document_id for doc in documents] documents_for_id = {doc.document_id: doc for doc in documents} # aggregate the langs per document into an array lang_agg = func.array_agg( DocumentLocale.lang, type_=postgresql.ARRAY(String)).label('langs') langs_per_doc = DBSession.query( DocumentLocale.document_id, lang_agg). \ filter(DocumentLocale.document_id.in_(document_ids)). \ group_by(DocumentLocale.document_id). \ all() for document_id, langs in langs_per_doc: document = documents_for_id.get(document_id) document.available_langs = langs
def build_query_to_populate(self, query, full_table, aggregate_table): insert_columns = [aggregate_table.c.upload_samples] mean = full_table.c.upload_octets / full_table.c.upload_time is_safe = and_(full_table.c.upload_time > 0, full_table.c.download_flag == 'f') safe_mean = case([(is_safe, mean)], else_ = None) select_query = (query.column(func.array_agg(safe_mean))) return insert_columns, select_query
def _daily_stats_agg_query(session): agg = lambda f, c: f(c).label(c.name) class DailyStatsBundle(Bundle): def create_row_processor(self, query, procs, labels): """Override create_row_processor to return values as dictionaries""" from pyroaring import BitMap def proc(row): row = dict(zip(labels, (proc(row) for proc in procs))) bm = BitMap.union( *map(BitMap.deserialize, row.pop('_active_users'))) row['active_users'] = len(bm) return row return proc stats = DailyStatsBundle( 'stats', agg(func.sum, DailyStats.post_count), agg(func.sum, DailyStats.edit_count), agg(func.sum, DailyStats.threads_created), func.array_agg(DailyStats.active_users).label('_active_users'), cast( func.sum(DailyStats.posts_length) / func.sum(DailyStats.post_count), Integer).label('avg_post_length')) return session.query(stats)
def search_datasets_query(expressions, source_exprs=None, select_fields=None, with_source_ids=False): # type: (Tuple[Expression], Tuple[Expression], Iterable[PgField], bool) -> sqlalchemy.Expression if select_fields: select_columns = tuple( f.alchemy_expression.label(f.name) for f in select_fields) else: select_columns = _DATASET_SELECT_W_LOCAL if with_source_ids: # Include the IDs of source datasets select_columns += (select( (func.array_agg(DATASET_SOURCE.c.source_dataset_ref), )).select_from(DATASET_SOURCE).where( DATASET_SOURCE.c.dataset_ref == DATASET.c.id).group_by( DATASET_SOURCE.c.dataset_ref).label('dataset_refs'), ) raw_expressions = PostgresDbAPI._alchemify_expressions(expressions) from_expression = PostgresDbAPI._from_expression( DATASET, expressions, select_fields) where_expr = and_(DATASET.c.archived == None, *raw_expressions) if not source_exprs: return (select(select_columns).select_from(from_expression).where( where_expr)) base_query = (select(select_columns + ( DATASET_SOURCE.c.source_dataset_ref, literal(1).label('distance'), DATASET_SOURCE.c.classifier.label('path'))).select_from( from_expression.join( DATASET_SOURCE, DATASET.c.id == DATASET_SOURCE.c.dataset_ref)).where( where_expr)).cte(name="base_query", recursive=True) recursive_query = base_query.union_all( select([ col for col in base_query.columns if col.name not in ['source_dataset_ref', 'distance', 'path'] ] + [ DATASET_SOURCE.c.source_dataset_ref, (base_query.c.distance + 1).label('distance'), (base_query.c.path + '.' + DATASET_SOURCE.c.classifier).label('path') ]).select_from( base_query.join( DATASET_SOURCE, base_query.c.source_dataset_ref == DATASET_SOURCE.c.dataset_ref))) return (select([distinct(recursive_query.c.id)] + [ col for col in recursive_query.columns if col.name not in ['id', 'source_dataset_ref', 'distance', 'path'] ]).select_from( recursive_query.join( DATASET, DATASET.c.id == recursive_query.c.source_dataset_ref)).where( and_(DATASET.c.archived == None, *PostgresDbAPI._alchemify_expressions(source_exprs))))
def _light_no_filter_query(self, processed_filters, limit=JR_PAGE_LENGTH): """If there's no filter to shorten the query (eg we're just selecting all feed with no category) we make a request more adapted to the task. """ sub_query = session.query(*JR_SQLA_FIELDS)\ .filter(*processed_filters)\ .order_by(Cluster.main_date.desc())\ .cte('clu') aggreg_feed = func.array_agg(Article.feed_id).label('feeds_id') aggreg_cat = func.array_agg(Article.category_id).label('categories_id') query = (session.query(sub_query, aggreg_feed, aggreg_cat).join( Article, Article.cluster_id == sub_query.c.id).filter( Article.user_id == self.user_id)) yield from self._iter_on_query( query.group_by(*sub_query.c).order_by( sub_query.c.main_date.desc()).limit(limit))
def test_funcfilter_arrayagg_subscript(self): num = column("q") self.assert_compile( func.array_agg(num).filter(num % 2 == 0)[1], "(array_agg(q) FILTER (WHERE q %% %(q_1)s = " "%(param_1)s))[%(param_2)s]", dialect="postgresql", )
def get_correct_answers(cls, lesson_id, session): answers_agg = func.array_agg(ChoiceModel.id, type_=ARRAY(UUID)).label('answers') return session.query(ChoiceModel.question_id, QuestionModel.score, QuestionModel.type_question, answers_agg). \ join(QuestionModel.choices) \ .filter(QuestionModel.lesson_id == lesson_id, ChoiceModel.is_correct) \ .group_by(ChoiceModel.question_id).group_by(QuestionModel.score) \ .group_by(QuestionModel.type_question)\ .all()
def y(cls): s = database.Session.object_session(cls) arrunnest = func.unnest(cls.indices) xarr = (func.unnest(cls.indices) / cls.size).label('xarrind') arrind = (arrunnest - xarr * cls.size).label('yarrind') y = s.query(arrind).select_from(cls).subquery('yarr') yagg = s.query(func.array_agg(y.c.yarrind)) return yagg.as_scalar()
def get_fields_query(self): labels_query = select([ models.session_label.c.session_id, models.session_label.c.label_id ]).where(models.session_label.c.session_id == models.Session.id).distinct().correlate( models.Session).alias() return select([ label("_type", text("'session'")), label("_index", text("'session'")), label("_id", models.Session.id), *[ getattr(models.Session, column_name) for column_name in models.Session.__table__.columns.keys() if column_name not in {'timespan', 'parameters'} ], models.User.email.label('user_email'), select([ func.array_agg( func.json_build_object('timestamp', models.Error.timestamp, 'message', models.Error.message)) ]).where(models.Error.session_id == models.Session.id).label( 'session_errors'), select([ func.array_agg( func.json_build_object('timestamp', models.Warning.timestamp, 'message', models.Warning.message)) ]).where(models.Warning.session_id == models.Session.id).label( 'session_warnings'), select([ func.array_agg( func.json_build_object("name", models.Label.name)) ]).select_from( labels_query.join( models.Label, models.Label.id == labels_query.c.label_id)).label( 'session_labels'), select([ func.json_object_agg(models.SessionMetadata.key, models.SessionMetadata.metadata_item) ]).where(models.SessionMetadata.session_id == models.Session.id).label('session_metadata'), ]).select_from( models.Session.__table__.outerjoin( models.User.__table__, models.Session.user_id == models.User.id))
def blacklist_filter(self): db = object_session(self) if db.query(BlacklistedTag).filter(BlacklistedTag.user_id == self.id).first() is None: return None return ~Request.tag_ids.overlap( db.query(func.array_agg(BlacklistedTag.tag_id)) .filter(BlacklistedTag.user_id == self.id) )
def _get_ldap_sources(session): ldap_cti_directories = session.query( CtiDirectories.name, CtiDirectories.uri, CtiDirectories.match_direct, CtiDirectories.match_reverse, func.array_agg(CtiDirectoryFields.fieldname).label('fields'), func.array_agg(CtiDirectoryFields.value).label('values'), ).outerjoin( CtiDirectoryFields, CtiDirectoryFields.dir_id == CtiDirectories.id ).filter( CtiDirectories.uri.like('ldapfilter://%%') ).group_by( CtiDirectories.name, CtiDirectories.uri, CtiDirectories.match_direct, CtiDirectories.match_reverse, ) source_configs = [] for dir in ldap_cti_directories.all(): _, _, name = dir.uri.partition('ldapfilter://') try: ldap_config = ldap_dao.build_ldapinfo_from_ldapfilter(name) except LookupError: logger.warning('Skipping LDAP source %s', dir.name) continue custom_filter = ldap_config.get('filter') or '' if custom_filter: custom_filter = '({})'.format(custom_filter) source_configs.append({'type': 'ldap', 'name': dir.name, 'searched_columns': json.loads(dir.match_direct or '[]'), 'first_matched_columns': json.loads(dir.match_reverse or '[]'), 'format_columns': _format_columns(dir.fields, dir.values), 'ldap_uri': ldap_config['uri'], 'ldap_base_dn': ldap_config['basedn'], 'ldap_username': ldap_config['username'], 'ldap_password': ldap_config['password'], 'ldap_custom_filter': custom_filter}) return source_configs
def get_advanced_search_query(employer_id, params, status): skills = params.get('skills') locations = params.get('locations') role = params.get('role') name = params.get('name') salary = params.get('salary') query = DBSession.query(Candidate.id).filter(Candidate.status == status) if employer_id: query = query.outerjoin(V_CANDIDATE_CURRENT_EMPLOYERS, and_(V_CANDIDATE_CURRENT_EMPLOYERS.c.candidate_id == Candidate.id, V_CANDIDATE_CURRENT_EMPLOYERS.c.employer_id == employer_id)) \ .filter(V_CANDIDATE_CURRENT_EMPLOYERS.c.candidate_id == None) if locations: query = query.join(PreferredLocation, Candidate.id == PreferredLocation.candidate_id) country_filter = set([c['country_iso'] for c in locations]) city_filter = [and_(City.name == loc['city'], City.country_iso == loc['country_iso']) for loc in locations] city_ids = DBSession.query(City.id).filter(or_(*city_filter)).all() query = query.filter(or_(PreferredLocation.city_id.in_(city_ids), PreferredLocation.country_iso.in_(country_filter))) if salary or role: query = query.join(TargetPosition) if salary: query = query.filter(TargetPosition.minimum_salary <= salary) if role: role = get_by_name_or_raise(Role, role) query = query.filter(TargetPosition.role_id == role.id) if name and employer_id: name = name.lower() employer_ids = func.array_agg(Offer.employer_id, type_=ARRAY(TEXT)).label('employer_ids') offer_query = DBSession.query(Offer.candidate_id, employer_ids).filter(Offer.accepted != None) \ .group_by(Offer.candidate_id).subquery() query = query.outerjoin(offer_query, offer_query.c.candidate_id == Candidate.id).filter( or_(cast(Candidate.id, TEXT).startswith(name), and_( or_(func.lower(Candidate.first_name).startswith(name), func.lower(Candidate.last_name).startswith(name)), or_( offer_query.c.employer_ids.any(str(employer_id)), Candidate.anonymous == False ) ) ) ) query = query.group_by(Candidate.id) if skills: query = query.join(CandidateSkill).join(Skill).filter(Skill.name.in_(skills)) \ .having(func.count(Skill.name) == len(skills)) return query
class OptimizationProcedureORM(ProcedureMixin, BaseResultORM): """ An Optimization procedure """ __tablename__ = 'optimization_procedure' id = Column(Integer, ForeignKey('base_result.id', ondelete='cascade'), primary_key=True) def __init__(self, **kwargs): kwargs.setdefault("version", 1) self.procedure = "optimization" super().__init__(**kwargs) schema_version = Column(Integer, default=1) initial_molecule = Column(Integer, ForeignKey('molecule.id')) initial_molecule_obj = relationship(MoleculeORM, lazy='select', foreign_keys=initial_molecule) # # Results energies = Column(JSON) #Column(ARRAY(Float)) final_molecule = Column(Integer, ForeignKey('molecule.id')) final_molecule_obj = relationship(MoleculeORM, lazy='select', foreign_keys=final_molecule) # ids, calculated not stored in this table # NOTE: this won't work in SQLite since it returns ARRAYS, aggregate_order_by trajectory = column_property( select([func.array_agg(aggregate_order_by(Trajectory.result_id, Trajectory.position))]).where(Trajectory.opt_id == id)) # array of objects (results) - Lazy - raise error of accessed trajectory_obj = relationship(Trajectory, cascade="all, delete-orphan", # backref="optimization_procedure", order_by=Trajectory.position, collection_class=ordering_list('position')) __mapper_args__ = { 'polymorphic_identity': 'optimization_procedure', # to have separate select when querying BaseResultsORM 'polymorphic_load': 'selectin', } __table_args__ = ( Index('ix_optimization_program', 'program'), # todo: needed for procedures? ) def update_relations(self, trajectory=None, **kwarg): # update optimization_results relations # self._update_many_to_many(opt_result_association, 'opt_id', 'result_id', # self.id, trajectory, self.trajectory) self.trajectory_obj = [] trajectory = [] if not trajectory else trajectory for result_id in trajectory: traj = Trajectory(opt_id=int(self.id), result_id=int(result_id)) self.trajectory_obj.append(traj)
def add_outcomes( self, feature_query, ): # The events table holds all the events, not just conversion ones relevant_events = self.bq_session.query( self.events.c['time'].cast(DATE).label('date'), self.events.c['type'].label('outcome'), self.events.c['user_id'].label('user_id')).filter( and_( self.events.c['type'].in_(list(LABELS.keys())), cast(self.events.c['time'], DATE) > cast( self.aggregation_time, DATE), cast(self.events.c['time'], DATE) <= cast( self.aggregation_time + timedelta(days=EVENT_LOOKAHEAD), DATE))).subquery() # TODO: Remove deduplication, once the event table doesn't contain any relevant_events_deduplicated = self.bq_session.query( relevant_events.c['date'], relevant_events.c['user_id'], # This case when provides logic for dealing with multiple outcomes during the same time period # an example is user_id 195379 during the 4/2020 where the user renews, but then cancels and gets # a refund (the current pipeline provides both labels) case( [ # If there is at least one churn event, we identify the user as churned (literal(self.negative_label()).in_( func.unnest( func.array_agg(relevant_events.c['outcome']))), self.negative_label()) ], # In case of any number of any positive only events we consider the event as a renewal else_=self.positive_label()).label('outcome')).group_by( relevant_events.c['date'].label('date'), relevant_events.c['user_id'].label('user_id')).subquery() feature_query_w_outcome = self.bq_session.query( *[ column.label(column.name) for column in feature_query.columns if column.name not in ['user_id', 'date', 'outcome_date'] ], func.coalesce( feature_query.c['user_id'], relevant_events_deduplicated.c['user_id']).label('user_id'), func.coalesce(feature_query.c['date'], self.aggregation_time.date()).label('date'), relevant_events_deduplicated.c['outcome'].label('outcome'), func.coalesce(relevant_events_deduplicated.c['date'], feature_query.c['outcome_date'].cast(DATE)). label('outcome_date')).outerjoin( relevant_events_deduplicated, feature_query.c['user_id'] == relevant_events_deduplicated. c['user_id']).subquery('feature_query_w_outcome') return feature_query_w_outcome
def get_group_team_stats(self, replay_ids, session=None): query = session.query(PlayerGame.game, func.array_agg( aggregate_order_by(PlayerGame.player, PlayerGame.player)).label("team")).filter( PlayerGame.game.in_(replay_ids)).group_by(PlayerGame.game).group_by( PlayerGame.is_orange).subquery() teams = session.query(query.c.team, func.array_agg(query.c.game)).group_by(query.c.team).all() return { "teamStats": [ { "team": team[0], "games": team[1], "names": [name for (name,) in session.query(func.min(PlayerGame.name)).filter( PlayerGame.game.in_(team[1])).filter( PlayerGame.player.in_(team[0])).order_by( PlayerGame.player).group_by(PlayerGame.player).all()], **self._create_group_stats(session, player_filter=team[0], replay_ids=team[1]) } for team in teams] }
def get_recipe_aggregated_ingredients(self): t_i = aliased(Ingredient) t_r = aliased(Recipe) t_ri = aliased(RecipeIngredient) return (self.session.query( t_r.id, t_r.title, func.array_agg(t_i.ing_name).label('ings')).join( t_ri, t_ri.recipe_id == t_r.id).join( t_i, t_ri.ingredient_id == t_i.id).group_by( t_r.id).group_by(t_r.title).all())
def get_brands_summary(): '''Prints out each brand name, and each model name for that brand using only ONE database query.''' list_of_all = db.session.query(Model.brand_name, label("models", func.array_agg(Model.name))).group_by(Model.brand_name).all() for item in list_of_all: print print "Brand: %s, Model %s, " % (item.brand_name, set(item.models))
def query_callback(from_query): filtered_persons = from_query.cte('filtered_persons') with_memberships = self.context.session.query( filtered_persons, func.count( Membership.id.distinct()).label('membership_count'), func.array_agg(Group.id.distinct()).label('group_ids'), func.array_agg(Group.name.distinct()).label('group_names'), func.count( Contributor.work_id.distinct()).label('work_count')) with_memberships = with_memberships.outerjoin( Contributor, Contributor.person_id == filtered_persons.c.id) with_memberships = with_memberships.outerjoin( Membership, Membership.person_id == filtered_persons.c.id) with_memberships = with_memberships.outerjoin( Group, Group.id == Membership.group_id).group_by( filtered_persons.c.id, filtered_persons.c.name) return with_memberships.order_by(filtered_persons.c.name)
class CustomArtist(Artist): area = relationship('CustomArea', foreign_keys=[Artist.area_id]) begin_area = relationship('CustomArea', foreign_keys=[Artist.begin_area_id]) end_area = relationship('CustomArea', foreign_keys=[Artist.end_area_id]) tags = relationship('ArtistTag') artist_credit_names = relationship("ArtistCreditName", innerjoin=True) primary_aliases = column_property( select([func.array_agg(ArtistAlias.name)]).where( and_(ArtistAlias.artist_id == Artist.id, ArtistAlias.primary_for_locale == True)))
def test_aggregate_order_by_one(self): m = MetaData() table = Table('table1', m, Column('a', Integer), Column('b', Integer)) expr = func.array_agg(aggregate_order_by(table.c.a, table.c.b.desc())) stmt = select([expr]) # note this tests that the object exports FROM objects # correctly self.assert_compile( stmt, "SELECT array_agg(table1.a ORDER BY table1.b DESC) " "AS array_agg_1 FROM table1")
async def select_all_resource_contracts(conn: SAConn) -> RowProxy: cursor = await conn.execute(select([mainresource, func.array_agg(contract.c.id), func.array_agg(contract.c.name), association_t.c.mainresource_id]).apply_labels(). where (and_ (association_t.c.mainresource_id==mainresource.c.id, association_t.c.contract_id==contract.c.id)). group_by(mainresource.c.id, mainresource.c.name, association_t.c.mainresource_id)) item = await cursor.fetchall() return item
def test_array_agg_array_literal_explicit_type(self): from sqlalchemy.dialects.postgresql import array expr = array([column('data', Integer), column('d2', Integer)]) agg_expr = func.array_agg(expr, type_=ARRAY(Integer)) is_(agg_expr.type._type_affinity, ARRAY) is_(agg_expr.type.item_type._type_affinity, Integer) self.assert_compile(agg_expr, "array_agg(ARRAY[data, d2])", dialect="postgresql")
def correlation(min_racers=5): """Calculates the Spearman correlation for races with the given minimum number of racers.""" results = Results.query \ .filter(Results.Place != None) \ .with_entities(Results.RaceName, Results.RaceCategoryName, func.array_agg(Results.Place), func.array_agg(Results.prior_mu)) \ .group_by(Results.RaceName, Results.RaceCategoryName) print('Calculating Spearman correlation...') correlations = [] for name, category, places, mus in results: if len(places) >= min_racers: correlations.append(spearmanr(places, mus).correlation) print('Done calculating Spearman correlation!') return correlations
def synchronize(self, engine, firstid): # cache routing information, so we don't have to get it every time route_cache = {} with engine.begin() as conn: h = self.t_hier.data s = self.t_segment.data sel = select([s.c.id, func.array_agg(h.c.parent).label('rels')])\ .where(s.c.rels.any(h.c.child)).group_by(s.c.id) if firstid > 0: sel = sel.where(s.c.id >= firstid) for seg in conn.execute(sel): self._update_segment_style(conn, seg, route_cache) # and copy geometries sel = self.data.update().where(self.data.c.id == s.c.id)\ .values(geom=ST_Simplify(s.c.geom, 1), geom100=ST_Simplify(s.c.geom, 100)) if firstid > 0: sel = sel.where(self.data.c.id >= firstid) conn.execute(sel) # now synchronize all segments where a hierarchical relation has changed if firstid > 0: segs = select([s.c.id, s.c.rels], distinct=True)\ .where(s.c.rels.any(h.c.child))\ .where(h.c.depth > 1)\ .where(s.c.id < firstid)\ .where(h.c.parent.in_(select([self.t_relchange.c.id])))\ .alias() h2 = self.t_hier.data.alias() sel = select([segs.c.id, func.array_agg(h2.c.parent).label('rels')])\ .where(segs.c.rels.any(h2.c.child)).group_by(segs.c.id) for seg in conn.execute(sel): self._update_segment_style(conn, seg, route_cache, update=True)
def get_available_langs(document_id): """Return the available languages (e.g. ['en', 'fr']) for a single document. """ return DBSession. \ query( func.array_agg( DocumentLocale.lang, type_=postgresql.ARRAY(String))). \ filter(DocumentLocale.document_id == document_id). \ group_by(DocumentLocale.document_id). \ scalar()
def test_array_agg_array_literal_explicit_type(self): from sqlalchemy.dialects.postgresql import array expr = array([column("data", Integer), column("d2", Integer)]) agg_expr = func.array_agg(expr, type_=ARRAY(Integer)) is_(agg_expr.type._type_affinity, ARRAY) is_(agg_expr.type.item_type._type_affinity, Integer) self.assert_compile( agg_expr, "array_agg(ARRAY[data, d2])", dialect="postgresql" )
def get_resume_query(session): user_data = func.array_agg(func.row(User.id, User.name)).label('user') query = session.query( Delivery.status, Address.city, user_data). \ join(User, Delivery.user_id == User.id). \ join(Address, Delivery.user_id == Address.user_id) deliveries = query.group_by(Delivery.status, Address.city) return deliveries
def test_aggregate_order_by_adapt(self): m = MetaData() table = Table('table1', m, Column('a', Integer), Column('b', Integer)) expr = func.array_agg(aggregate_order_by(table.c.a, table.c.b.desc())) stmt = select([expr]) a1 = table.alias('foo') stmt2 = sql_util.ClauseAdapter(a1).traverse(stmt) self.assert_compile( stmt2, "SELECT array_agg(foo.a ORDER BY foo.b DESC) AS array_agg_1 FROM table1 AS foo" )
def add_categories(): """Update the Races table with the categories represented in the Results table. """ # results of query are (arbitrary ResultID (so we can order), Race ID, # category name, number of racers in that category) cat_counts = (Results.query .with_entities( Results.race_id, Results.RaceCategoryName, func.count(Results.RaceCategoryName).label('count')) .group_by(Results.race_id, Results.RaceCategoryName) .order_by(Results.RaceCategoryName) .subquery('cat_counts')) collected = (db.session .query(cat_counts.c.race_id, func.array_agg(cat_counts.c.RaceCategoryName).label('categories'), func.array_agg(cat_counts.c.count).label('num_racers')) .group_by(cat_counts.c.race_id)) Races.update([c._asdict() for c in collected])
def get_consumer_roles(dbengine): query = select([ service.c.id, func.array_agg(role.c.id, type_=UUIDArray()).label('role_ids'), ]) query = query.where(service.c.is_consumer) query = query.select_from( join(service, role, role.c.service_id == service.c.id)) query = query.group_by(service.c.id) cr = yield from dbengine.execute(query) res = yield from cr.fetchall() return res
def query_person_passports(): return ( DBSession.query( Person.id.label('person_id'), func.array_to_string( func.array_agg( case([(Passport.passport_type == 'citizen', Passport.num)]) ), ', ' ).label('citizen'), func.array_to_string( func.array_agg( case([(Passport.passport_type == 'foreign', Passport.num)]) ), ', ' ).label('foreign'), ) .join(Passport, Person.passports) .group_by(Person.id) )
def get_dataset_sources(self, dataset_id): # recursively build the list of (dataset_ref, source_dataset_ref) pairs starting from dataset_id # include (dataset_ref, NULL) [hence the left join] sources = select( [DATASET.c.id.label('dataset_ref'), DATASET_SOURCE.c.source_dataset_ref, DATASET_SOURCE.c.classifier] ).select_from( DATASET.join(DATASET_SOURCE, DATASET.c.id == DATASET_SOURCE.c.dataset_ref, isouter=True) ).where( DATASET.c.id == dataset_id ).cte(name="sources", recursive=True) sources = sources.union_all( select( [sources.c.source_dataset_ref.label('dataset_ref'), DATASET_SOURCE.c.source_dataset_ref, DATASET_SOURCE.c.classifier] ).select_from( sources.join(DATASET_SOURCE, sources.c.source_dataset_ref == DATASET_SOURCE.c.dataset_ref, isouter=True) ).where(sources.c.source_dataset_ref != None)) # turn the list of pairs into adjacency list (dataset_ref, [source_dataset_ref, ...]) # some source_dataset_ref's will be NULL aggd = select( [sources.c.dataset_ref, func.array_agg(sources.c.source_dataset_ref).label('sources'), func.array_agg(sources.c.classifier).label('classes')] ).group_by(sources.c.dataset_ref).alias('aggd') # join the adjacency list with datasets table query = select( _DATASET_SELECT_FIELDS + (aggd.c.sources, aggd.c.classes) ).select_from(aggd.join(DATASET, DATASET.c.id == aggd.c.dataset_ref)) return self._connection.execute(query).fetchall()
def y(cls): #arrind = func.unnest(cls.indices).label('arrind') #x = arrind / cls.size #y = func.array_agg(arrind - x*cls.size).label('y') #return y s = db.Session() arrunnest = func.unnest(cls.indices) xarr = (func.unnest(cls.indices) / cls.size).label('xarrind') arrind = (arrunnest - xarr * cls.size).label('yarrind') #n.arrind-(n.arrind/n.size)*n.size y = s.query(arrind).select_from(cls).subquery('yarr') yagg = s.query(func.array_agg(y.c.yarrind)) return yagg.as_scalar()
def test_aggregate_order_by_one(self): m = MetaData() table = Table('table1', m, Column('a', Integer), Column('b', Integer)) expr = func.array_agg(aggregate_order_by(table.c.a, table.c.b.desc())) stmt = select([expr]) # note this tests that the object exports FROM objects # correctly self.assert_compile( stmt, "SELECT array_agg(table1.a ORDER BY table1.b DESC) " "AS array_agg_1 FROM table1" )
async def get_most_active_user(year: int = None, month: int = None, day=None): query_params = { "year": year, "month": month, "day": day } filter_spec = [{'field': k, 'op': '==', 'value': v} for k, v in query_params.items() if v] query = db.query( UserContributes.user, func.count(UserContributes.user), func.array_agg(UserContributes.year), func.array_agg(UserContributes.month), func.array_agg(UserContributes.day), ).group_by( UserContributes.user, ).order_by( func.count(UserContributes.user).desc()) filtered_query = apply_filters(query, filter_spec) return filtered_query.first()
def add_outcomes( feature_query, start_time: datetime, positive_event_lookahead: int = 1, ): # The events table holds all the events, not just conversion ones relevant_events = bq_session.query( events.c['time'].cast(DATE).label('date'), events.c['type'].label('outcome'), events.c['user_id'].label('user_id')).filter( events.c['type'].in_(list(LABELS.keys())), cast(events.c['time'], DATE) > cast(start_time, DATE), cast(events.c['time'], DATE) <= cast( start_time + timedelta(days=positive_event_lookahead), DATE)).subquery() # TODO: Remove deduplication, once the event table doesn't contain any relevant_events_deduplicated = bq_session.query( relevant_events.c['date'], relevant_events.c['user_id'], # This case when provides logic for dealing with multiple outcomes during the same time period # an example is user_id 195379 during the 4/2020 where the user renews, but then cancels and gets # a refund (the current pipeline provides both labels) case( [ # If there is at least one churn event, we identify the user as churned (literal(negative_label()).in_( func.unnest(func.array_agg( relevant_events.c['outcome']))), negative_label()) ], # In case of any number of any positive only events we consider the event as a renewal else_=positive_labels()).label('outcome')).group_by( relevant_events.c['date'].label('date'), relevant_events.c['user_id'].label('user_id')).subquery() feature_query_w_outcome = bq_session.query( feature_query, relevant_events_deduplicated.c['outcome'].label('outcome'), relevant_events_deduplicated.c['date'].label('outcome_date') ).outerjoin( relevant_events_deduplicated, and_( feature_query.c['user_id'] == relevant_events_deduplicated.c['user_id'], feature_query.c['date'] >= func.date_sub( relevant_events_deduplicated.c['date'], text(f'interval {positive_event_lookahead} day')), feature_query.c['date'] <= relevant_events_deduplicated.c['date']) ).subquery('feature_query_w_outcome') return feature_query_w_outcome
def search_datasets(self, expressions, select_fields=None, with_source_ids=False): """ :type with_source_ids: bool :type select_fields: tuple[datacube.index.postgres._fields.PgField] :type expressions: tuple[datacube.index.postgres._fields.PgExpression] :rtype: dict """ if select_fields: select_columns = tuple( f.alchemy_expression.label(f.name) for f in select_fields ) else: select_columns = _DATASET_SELECT_FIELDS if with_source_ids: # Include the IDs of source datasets select_columns += ( select( (func.array_agg(DATASET_SOURCE.c.source_dataset_ref),) ).select_from( DATASET_SOURCE ).where( DATASET_SOURCE.c.dataset_ref == DATASET.c.id ).group_by( DATASET_SOURCE.c.dataset_ref ).label('dataset_refs'), ) def raw_expr(expression): if isinstance(expression, OrExpression): return or_(raw_expr(expr) for expr in expression.exprs) return expression.alchemy_expression raw_expressions = [raw_expr(expression) for expression in expressions] from_tables = DATASET.join(DATASET_TYPE).join(METADATA_TYPE) select_query = ( select( select_columns ).select_from( from_tables ).where( and_(DATASET.c.archived == None, *raw_expressions) ) ) results = self._connection.execute(select_query) for result in results: yield result
def read_rolepermissions(): q_session = Session() query = q_session.query( Roles.name, Roles.id, func.array_agg(RolesPermissions.permissions_id), ).join( RolesPermissions, Roles.id == RolesPermissions.role_id ).group_by( Roles.id ).order_by( asc(Roles.name) ).all() return query
def test_array_agg_array_literal_implicit_type(self): from sqlalchemy.dialects.postgresql import array, ARRAY as PG_ARRAY expr = array([column('data', Integer), column('d2', Integer)]) assert isinstance(expr.type, PG_ARRAY) agg_expr = func.array_agg(expr) assert isinstance(agg_expr.type, PG_ARRAY) is_(agg_expr.type._type_affinity, ARRAY) is_(agg_expr.type.item_type._type_affinity, Integer) self.assert_compile( agg_expr, "array_agg(ARRAY[data, d2])", dialect="postgresql" )
def user_roles(): if session["logged_in"] and ("Read users" in session or "Edit users" in session): roles = read_roles() q_session = Session() query = ( q_session.query(UserRoles.email, func.array_agg(UserRoles.user_role_id)) .group_by(UserRoles.email) .order_by(asc(UserRoles.email)) .all() ) if query: return render_template("manageusers.html", roles=roles, usersandroles=query) else: errormsg = "No roles found. Please add roles and assign them to users." return render_template("manageusers.html", error=errormsg) else: session["no-access"] = True session["tried"] = "Users" return redirect(url_for("login"))
def get_tags(db, entity_model, tag_model, entity_ids): """Get tags associated with entities. Args: db (Session object): Session object. entity_model (mbdata.models): Model of the entity. tag_model (mbdata.models): Tag of the model. entity_ids (list): IDs of the entity whose tags are to be fetched Returns: List of tuples containing the entity_ids and the list of associated tags. """ tags = db.query(entity_model.id, func.array_agg(Tag.name)).\ join(tag_model).\ join(Tag).\ filter(entity_model.id.in_(entity_ids)).\ group_by(entity_model.id).\ all() return tags
def start_requests(self): summary_utc = datetime.utcnow() #- timedelta(days=1) db_engine = create_engine(self.settings.get('SQLALCHEMY_DATABASE_URI')) db_session = sessionmaker(bind=db_engine)() db_query = db_session.query(LiveTVSite.id.label('site_id'), LiveTVRoom.id.label('room_id'), LiveTVRoom.url.label('room_url'), LiveTVRoomPresent.crawl_date_format.label('summary_date'), func.array_agg(LiveTVRoomPresent.online).label('online_list')) \ .join(LiveTVRoom, LiveTVRoomPresent) \ .filter(LiveTVSite.code == 'bilibili') \ .filter(LiveTVRoomPresent.crawl_date_format == summary_utc.strftime(DAILY_DATE_FORMAT)) \ .group_by(LiveTVSite.id, LiveTVRoom.id, LiveTVRoom.url, LiveTVRoomPresent.crawl_date_format) for group_row in db_query: meta_info = { 'site_id': group_row.site_id, 'room_id': group_row.room_id, 'summary_date': group_row.summary_date, 'online': numpy.median(group_row.online_list) } yield Request(group_row.room_url, callback=self.parse, meta=meta_info) db_session.close()