Beispiel #1
0
def query_experiment_sets():
    """This queries all the replicates of an experiment and groups them for further processing"""
    experiment_sets = {}

    ome = base.Session()

    RNASeqExperiment = datasets.RNASeqExperiment
    ArrayExperiment = datasets.ArrayExperiment
    ChIPExperiment = datasets.ChIPExperiment

    experiment_sets['RNAseq'] = ome.query(func.array_agg(RNASeqExperiment.name),RNASeqExperiment.group_name).\
                                          group_by(RNASeqExperiment.group_name, RNASeqExperiment.strain_id,
                                                   RNASeqExperiment.environment_id, RNASeqExperiment.machine_id,
                                                   RNASeqExperiment.sequencing_type).all()

    experiment_sets['array'] = ome.query(func.array_agg(ArrayExperiment.name)).\
                                         group_by(ArrayExperiment.strain_id, ArrayExperiment.environment_id,\
                                         ArrayExperiment.platform).all()

    experiment_sets['ChIP'] = ome.query(func.array_agg(ChIPExperiment.name)).\
                                        group_by(ChIPExperiment.strain_id, ChIPExperiment.environment_id,\
                                        ChIPExperiment.antibody, ChIPExperiment.protocol_type,\
                                        ChIPExperiment.target).all()
    ome.close()
    return experiment_sets
Beispiel #2
0
def query_person_contacts():
    return (
        DBSession.query(
            Person.id.label('person_id'),
            func.array_to_string(
                func.array_agg(
                    case([(Contact.contact_type == 'phone', Contact.contact)])
                ),
                ', '
            ).label('phone'),
            func.array_to_string(
                func.array_agg(
                    case([(Contact.contact_type == 'email', Contact.contact)])
                ),
                ', '
            ).label('email'),
            func.array_to_string(
                func.array_agg(
                    case([(Contact.contact_type == 'skype', Contact.contact)])
                ),
                ', '
            ).label('skype'),
        )
        .join(Contact, Person.contacts)
        .group_by(Person.id)
    )
    def _get_permissions_query(self, session, identifier):
        """
        select domain, json_agg(parts) as permissions from
            (select domain, row_to_json(r) as parts from
                    (select domain, action, array_agg(distinct target) as target from
                        (select (case when domain is null then '*' else domain end) as domain,
                                (case when target is null then '*' else target end) as target,
                                array_agg(distinct (case when action is null then '*' else action end)) as action
                           from permission
                          group by domain, target
                         ) x
                      group by domain, action)
              r) parts
        group by domain;
        """
        thedomain = case([(Domain.name == None, "*")], else_=Domain.name)
        theaction = case([(Action.name == None, "*")], else_=Action.name)
        theresource = case([(Resource.name == None, "*")], else_=Resource.name)

        action_agg = func.array_agg(theaction.distinct())

        stmt1 = (
            session.query(
                Permission.domain_id,
                thedomain.label("domain"),
                Permission.resource_id,
                theresource.label("resource"),
                action_agg.label("action"),
            )
            .select_from(User)
            .join(role_membership_table, User.pk_id == role_membership_table.c.user_id)
            .join(role_permission_table, role_membership_table.c.role_id == role_permission_table.c.role_id)
            .join(Permission, role_permission_table.c.permission_id == Permission.pk_id)
            .outerjoin(Domain, Permission.domain_id == Domain.pk_id)
            .outerjoin(Action, Permission.action_id == Action.pk_id)
            .outerjoin(Resource, Permission.resource_id == Resource.pk_id)
            .filter(User.identifier == identifier)
            .group_by(Permission.domain_id, Domain.name, Permission.resource_id, Resource.name)
        ).subquery()

        stmt2 = (
            session.query(stmt1.c.domain, stmt1.c.action, func.array_agg(stmt1.c.resource.distinct()).label("resource"))
            .select_from(stmt1)
            .group_by(stmt1.c.domain, stmt1.c.action)
        ).subquery()

        stmt3 = (
            session.query(stmt2.c.domain, func.row_to_json(as_row(stmt2)).label("parts")).select_from(stmt2)
        ).subquery()

        final = (
            session.query(stmt3.c.domain, cast(func.json_agg(stmt3.c.parts), Text))
            .select_from(stmt3)
            .group_by(stmt3.c.domain)
        )

        return final
Beispiel #4
0
    def percentage_by_district(self):
        """ Returns the percentage of votes aggregated by the distict. Includes
        uncounted districts and districts with no results available.

        """

        results = self.election.results
        results = results.join(ElectionResult.candidate_results)
        results = results.filter(CandidateResult.candidate_id == self.id)
        results = results.with_entities(
            ElectionResult.district.label('name'),
            func.array_agg(ElectionResult.entity_id).label('entities'),
            func.coalesce(
                func.bool_and(ElectionResult.counted), False
            ).label('counted'),
            func.sum(ElectionResult.accounted_ballots).label('total'),
            func.sum(CandidateResult.votes).label('votes'),
        )
        results = results.group_by(ElectionResult.district)
        results = results.order_by(None)
        results = results.all()
        percentage = {
            r.name: {
                'counted': r.counted,
                'entities': r.entities,
                'percentage': 100 * (r.votes / r.total) if r.total else 0.0
            } for r in results
        }

        empty = self.election.results
        empty = empty.with_entities(
            ElectionResult.district.label('name'),
            func.array_agg(ElectionResult.entity_id).label('entities'),
            func.coalesce(
                func.bool_and(ElectionResult.counted), False
            ).label('counted')
        )
        empty = empty.group_by(ElectionResult.district)
        empty = empty.order_by(None)
        for result in empty:
            update = (
                result.name not in percentage
                or (
                    set(percentage[result.name]['entities'])
                    != set(result.entities)
                )
            )
            if update:
                percentage[result.name] = {
                    'counted': result.counted,
                    'entities': result.entities,
                    'percentage': 0.0
                }

        return percentage
Beispiel #5
0
def _get_nonldap_sources(session):
    sources = session.query(
        CtiDirectories.name,
        CtiDirectories.uri,
        Directories.dirtype,
        Directories.xivo_username,
        Directories.xivo_password,
        Directories.xivo_verify_certificate,
        Directories.xivo_custom_ca_path,
        CtiDirectories.delimiter,
        CtiDirectories.match_direct,
        CtiDirectories.match_reverse,
        func.array_agg(CtiDirectoryFields.fieldname).label('fields'),
        func.array_agg(CtiDirectoryFields.value).label('values'),
    ).join(
        Directories,
        Directories.uri == CtiDirectories.uri
    ).outerjoin(
        CtiDirectoryFields,
        CtiDirectoryFields.dir_id == CtiDirectories.id
    ).group_by(
        CtiDirectories.name,
        CtiDirectories.uri,
        Directories.dirtype,
        Directories.xivo_username,
        Directories.xivo_password,
        Directories.xivo_verify_certificate,
        Directories.xivo_custom_ca_path,
        CtiDirectories.delimiter,
        CtiDirectories.match_direct,
        CtiDirectories.match_reverse,
    )

    source_configs = []
    for source in sources.all():
        source_config = {
            'name': source.name,
            'type': source.dirtype,
            'uri': source.uri,
            'delimiter': source.delimiter,
            'searched_columns': json.loads(source.match_direct or '[]'),
            'first_matched_columns': json.loads(source.match_reverse or '[]'),
            'format_columns': _format_columns(source.fields, source.values),
        }
        if source.dirtype == 'xivo':
            source_config['xivo_username'] = source.xivo_username
            source_config['xivo_password'] = source.xivo_password
            source_config['xivo_verify_certificate'] = source.xivo_verify_certificate
            source_config['xivo_custom_ca_path'] = source.xivo_custom_ca_path
        source_configs.append(source_config)

    return source_configs
Beispiel #6
0
def query_profile_tag_classes(config_tag_classes, public_tag_classes):
    config_tag_queries = [
        query_value_with_option_type_label(tag_class.query, tag_class, name)
        for tag_class, name in config_tag_classes
    ]

    public_tag_queries = [
        query_value_with_option_type_label(tag_class.query, tag_class,
                                           name).filter(
                                               tag_class.public.is_(True))
        for tag_class, name in public_tag_classes
    ]

    queries = config_tag_queries + public_tag_queries

    select = union_queries(queries).order_by("value")

    cte = select.cte("tags")

    result = db.session.query(cte.columns.option_type,
                              func.array_agg(cte.columns.value)).group_by(
                                  cte.columns.option_type)

    # Need to default to empty list for each tag type, since tags with no
    # searchable values will not be in the result
    empty_values = {
        name: []
        for _, name in [*config_tag_classes, *public_tag_classes]
    }

    return {**empty_values, **dict(result)}
Beispiel #7
0
    def results_by_district(self):
        """ Returns the results aggregated by the distict.  """

        counted = func.coalesce(func.bool_and(BallotResult.counted), False)
        yeas = func.sum(BallotResult.yeas)
        nays = func.sum(BallotResult.nays)
        yeas_percentage = 100 * yeas / (
            cast(func.coalesce(func.nullif(yeas + nays, 0), 1), Float)
        )
        nays_percentage = 100 - yeas_percentage
        accepted = case({True: yeas > nays}, counted)
        results = self.results.with_entities(
            BallotResult.district.label('name'),
            counted.label('counted'),
            accepted.label('accepted'),
            yeas.label('yeas'),
            nays.label('nays'),
            yeas_percentage.label('yeas_percentage'),
            nays_percentage.label('nays_percentage'),
            func.sum(BallotResult.empty).label('empty'),
            func.sum(BallotResult.invalid).label('invalid'),
            func.sum(BallotResult.eligible_voters).label('eligible_voters'),
            func.array_agg(BallotResult.entity_id).label('entity_ids')
        )
        results = results.group_by(BallotResult.district)
        results = results.order_by(None).order_by(BallotResult.district)
        return results
Beispiel #8
0
def set_available_langs(documents, loaded=False):
    """Load and set the available langs for the given documents.
    """
    if len(documents) == 0:
        return

    if loaded:
        # all locales are already loaded, so simply set the attribute
        for document in documents:
            document.available_langs = [
                locale.lang for locale in document.locales
            ]
    else:
        document_ids = [doc.document_id for doc in documents]
        documents_for_id = {doc.document_id: doc for doc in documents}

        # aggregate the langs per document into an array
        lang_agg = func.array_agg(
            DocumentLocale.lang, type_=postgresql.ARRAY(String)).label('langs')

        langs_per_doc = DBSession.query(
            DocumentLocale.document_id, lang_agg). \
            filter(DocumentLocale.document_id.in_(document_ids)). \
            group_by(DocumentLocale.document_id). \
            all()

        for document_id, langs in langs_per_doc:
            document = documents_for_id.get(document_id)
            document.available_langs = langs
Beispiel #9
0
 def start_requests(self):
     summary_utc = datetime.utcnow() - timedelta(days=1)
     db_engine = create_engine(self.settings.get('SQLALCHEMY_DATABASE_URI'))
     db_session = sessionmaker(bind=db_engine)()
     db_query = db_session.query(LiveTVSite.id.label('site_id'), LiveTVRoom.id.label('room_id'),
                                 LiveTVRoom.url.label('room_url'),
                                 LiveTVRoomPresent.crawl_date_format.label('summary_date'),
                                 func.array_agg(LiveTVRoomPresent.online).label('online_list'))\
         .join(LiveTVSite, LiveTVRoom, LiveTVRoomPresent)\
         .filter(LiveTVRoomPresent.crawl_date_format == summary_utc.strftime(DAILY_DATE_FORMAT))\
         .group_by(LiveTVSite.id, LiveTVRoom.id, LiveTVRoom.url, LiveTVRoomPresent.crawl_date_format)
     for group_row in db_query:
         meta_info = {
             'site_id': group_row.site_id,
             'room_id': group_row.room_id,
             'summary_date': group_row.summary_date,
             'online': numpy.median(group_row.online_list)
         }
         room = self.session.query(LiveTVRoom).filter_by(id=meta_info['room_id']).one_or_none()
         if room:
             yield DailyItem(site_id=group_row.site_id, room_id=group_row.room_id,
                             summary_date=group_row.summary_date, online=numpy.median(group_row.online_list),
                             followers=room.followers, description=room.description, announcement=room.announcement,
                             fallback=False)
     db_session.close()
 def build_query_to_populate(self, query, full_table, aggregate_table):
     insert_columns = [aggregate_table.c.rtt_samples]
     mean = full_table.c.sumrtt / full_table.c.countrtt
     is_safe = full_table.c.countrtt > 0
     safe_mean = case([(is_safe, mean)], else_ = None)
     select_query = (query.column(func.array_agg(safe_mean)))
     return insert_columns, select_query
    def get_rates_by_dates_for_currency_in_period(self, currency, start_date, end_date):
        """
        :type currency: str
        :type start_date: datetime.date
        :type end_date: datetime.date
        :rtype: dict[datetime.date, list[decimal.Decimal]]
        """
        result = self.db_session\
            .query(
                ExchangeRate.date,
                func.array_agg(aggregate_order_by(ExchangeRate.rate, ExchangeRate.provider_id.asc()))
            )\
            .filter(
                and_(
                    ExchangeRate.date >= start_date,
                    ExchangeRate.date <= end_date,
                    ExchangeRate.currency == currency,
                    ExchangeRate.rate.isnot(None)
                )
            )\
            .group_by(ExchangeRate.date)\
            .order_by(ExchangeRate.date)\
            .all()

        return {r[0]: list(r[1]) for r in result}
def upgrade():
    if not is_sqlite:
        connection = op.get_bind()
        attendees = connection.execute(select([
            attendee_table.c.hotel_pin,
            func.count(attendee_table.c.id),
            func.array_agg(attendee_table.c.id),
        ]).where(and_(
            attendee_table.c.hotel_pin != None,
            attendee_table.c.hotel_pin != '',
        )).group_by(
            attendee_table.c.hotel_pin,
        ).having(
            func.count(attendee_table.c.id) > 1,
        ))
        for hotel_pin, count, ids in attendees:
            hotel_pin_template = '{{:0{}d}}{{}}'.format(len(str(count))) if count > 9 else '{}{}'

            for i, id in enumerate(ids):
                new_hotel_pin = hotel_pin_template.format(i, hotel_pin)
                connection.execute(
                    attendee_table.update().where(attendee_table.c.id == id).values({
                        'hotel_pin': new_hotel_pin,
                    })
                )

    op.create_unique_constraint(op.f('uq_attendee_hotel_pin'), 'attendee', ['hotel_pin'])
    def get_published_briefs(self):
        subquery = (
            db
            .session
            .query(
                BriefUser.brief_id,
                func.array_agg(func.substring(User.email_address, '@(.*)')).label('domain')
            )
            .join(User)
            .group_by(BriefUser.brief_id)
            .subquery()
        )
        result = (
            db
            .session
            .query(
                Brief.id,
                Brief.data['organisation'].astext.label('organisation'),
                Brief.published_at,
                Brief.withdrawn_at,
                Brief.data['title'].astext.label('title'),
                Brief.data['sellerSelector'].astext.label('openTo'),
                Brief.data['areaOfExpertise'].astext.label('brief_category'),
                Lot.name.label('brief_type'),
                subquery.columns.domain[1].label('publisher_domain')
            )
            .join(subquery, Brief.id == subquery.columns.brief_id)
            .join(Lot)
            .filter(Brief.published_at.isnot(None))
            .order_by(Brief.id)
            .all()
        )

        return [r._asdict() for r in result]
Beispiel #14
0
def set_available_langs(documents, loaded=False):
    """Load and set the available langs for the given documents.
    """
    if len(documents) == 0:
        return

    if loaded:
        # all locales are already loaded, so simply set the attribute
        for document in documents:
            document.available_langs = [
                locale.lang for locale in document.locales]
    else:
        document_ids = [doc.document_id for doc in documents]
        documents_for_id = {doc.document_id: doc for doc in documents}

        # aggregate the langs per document into an array
        lang_agg = func.array_agg(
            DocumentLocale.lang,
            type_=postgresql.ARRAY(String)).label('langs')

        langs_per_doc = DBSession.query(
            DocumentLocale.document_id, lang_agg). \
            filter(DocumentLocale.document_id.in_(document_ids)). \
            group_by(DocumentLocale.document_id). \
            all()

        for document_id, langs in langs_per_doc:
            document = documents_for_id.get(document_id)
            document.available_langs = langs
 def build_query_to_populate(self, query, full_table, aggregate_table):
     insert_columns = [aggregate_table.c.upload_samples]
     mean = full_table.c.upload_octets / full_table.c.upload_time
     is_safe = and_(full_table.c.upload_time > 0, full_table.c.download_flag == 'f')
     safe_mean = case([(is_safe, mean)], else_ = None)
     select_query = (query.column(func.array_agg(safe_mean)))
     return insert_columns, select_query
Beispiel #16
0
def _daily_stats_agg_query(session):
    agg = lambda f, c: f(c).label(c.name)

    class DailyStatsBundle(Bundle):
        def create_row_processor(self, query, procs, labels):
            """Override create_row_processor to return values as dictionaries"""
            from pyroaring import BitMap

            def proc(row):
                row = dict(zip(labels, (proc(row) for proc in procs)))
                bm = BitMap.union(
                    *map(BitMap.deserialize, row.pop('_active_users')))
                row['active_users'] = len(bm)
                return row

            return proc

    stats = DailyStatsBundle(
        'stats', agg(func.sum, DailyStats.post_count),
        agg(func.sum, DailyStats.edit_count),
        agg(func.sum, DailyStats.threads_created),
        func.array_agg(DailyStats.active_users).label('_active_users'),
        cast(
            func.sum(DailyStats.posts_length) /
            func.sum(DailyStats.post_count), Integer).label('avg_post_length'))

    return session.query(stats)
Beispiel #17
0
    def search_datasets_query(expressions,
                              source_exprs=None,
                              select_fields=None,
                              with_source_ids=False):
        # type: (Tuple[Expression], Tuple[Expression], Iterable[PgField], bool) -> sqlalchemy.Expression
        if select_fields:
            select_columns = tuple(
                f.alchemy_expression.label(f.name) for f in select_fields)
        else:
            select_columns = _DATASET_SELECT_W_LOCAL

        if with_source_ids:
            # Include the IDs of source datasets
            select_columns += (select(
                (func.array_agg(DATASET_SOURCE.c.source_dataset_ref),
                 )).select_from(DATASET_SOURCE).where(
                     DATASET_SOURCE.c.dataset_ref == DATASET.c.id).group_by(
                         DATASET_SOURCE.c.dataset_ref).label('dataset_refs'), )

        raw_expressions = PostgresDbAPI._alchemify_expressions(expressions)
        from_expression = PostgresDbAPI._from_expression(
            DATASET, expressions, select_fields)
        where_expr = and_(DATASET.c.archived == None, *raw_expressions)

        if not source_exprs:
            return (select(select_columns).select_from(from_expression).where(
                where_expr))

        base_query = (select(select_columns + (
            DATASET_SOURCE.c.source_dataset_ref, literal(1).label('distance'),
            DATASET_SOURCE.c.classifier.label('path'))).select_from(
                from_expression.join(
                    DATASET_SOURCE,
                    DATASET.c.id == DATASET_SOURCE.c.dataset_ref)).where(
                        where_expr)).cte(name="base_query", recursive=True)

        recursive_query = base_query.union_all(
            select([
                col for col in base_query.columns
                if col.name not in ['source_dataset_ref', 'distance', 'path']
            ] + [
                DATASET_SOURCE.c.source_dataset_ref, (base_query.c.distance +
                                                      1).label('distance'),
                (base_query.c.path + '.' +
                 DATASET_SOURCE.c.classifier).label('path')
            ]).select_from(
                base_query.join(
                    DATASET_SOURCE, base_query.c.source_dataset_ref ==
                    DATASET_SOURCE.c.dataset_ref)))

        return (select([distinct(recursive_query.c.id)] + [
            col for col in recursive_query.columns if col.name not in
            ['id', 'source_dataset_ref', 'distance', 'path']
        ]).select_from(
            recursive_query.join(
                DATASET,
                DATASET.c.id == recursive_query.c.source_dataset_ref)).where(
                    and_(DATASET.c.archived == None,
                         *PostgresDbAPI._alchemify_expressions(source_exprs))))
Beispiel #18
0
    def _light_no_filter_query(self, processed_filters, limit=JR_PAGE_LENGTH):
        """If there's no filter to shorten the query (eg we're just selecting
        all feed with no category) we make a request more adapted to the task.
        """
        sub_query = session.query(*JR_SQLA_FIELDS)\
                           .filter(*processed_filters)\
                           .order_by(Cluster.main_date.desc())\
                           .cte('clu')

        aggreg_feed = func.array_agg(Article.feed_id).label('feeds_id')
        aggreg_cat = func.array_agg(Article.category_id).label('categories_id')
        query = (session.query(sub_query, aggreg_feed, aggreg_cat).join(
            Article, Article.cluster_id == sub_query.c.id).filter(
                Article.user_id == self.user_id))
        yield from self._iter_on_query(
            query.group_by(*sub_query.c).order_by(
                sub_query.c.main_date.desc()).limit(limit))
Beispiel #19
0
 def test_funcfilter_arrayagg_subscript(self):
     num = column("q")
     self.assert_compile(
         func.array_agg(num).filter(num % 2 == 0)[1],
         "(array_agg(q) FILTER (WHERE q %% %(q_1)s = "
         "%(param_1)s))[%(param_2)s]",
         dialect="postgresql",
     )
Beispiel #20
0
 def get_correct_answers(cls, lesson_id, session):
     answers_agg = func.array_agg(ChoiceModel.id, type_=ARRAY(UUID)).label('answers')
     return session.query(ChoiceModel.question_id, QuestionModel.score, QuestionModel.type_question, answers_agg). \
             join(QuestionModel.choices) \
             .filter(QuestionModel.lesson_id == lesson_id, ChoiceModel.is_correct) \
             .group_by(ChoiceModel.question_id).group_by(QuestionModel.score) \
             .group_by(QuestionModel.type_question)\
             .all()
Beispiel #21
0
 def y(cls):
     s = database.Session.object_session(cls)
     arrunnest = func.unnest(cls.indices)
     xarr = (func.unnest(cls.indices) / cls.size).label('xarrind')
     arrind = (arrunnest - xarr * cls.size).label('yarrind')
     y = s.query(arrind).select_from(cls).subquery('yarr')
     yagg = s.query(func.array_agg(y.c.yarrind))
     return yagg.as_scalar()
Beispiel #22
0
 def get_fields_query(self):
     labels_query = select([
         models.session_label.c.session_id, models.session_label.c.label_id
     ]).where(models.session_label.c.session_id ==
              models.Session.id).distinct().correlate(
                  models.Session).alias()
     return select([
         label("_type", text("'session'")),
         label("_index", text("'session'")),
         label("_id", models.Session.id),
         *[
             getattr(models.Session, column_name)
             for column_name in models.Session.__table__.columns.keys()
             if column_name not in {'timespan', 'parameters'}
         ],
         models.User.email.label('user_email'),
         select([
             func.array_agg(
                 func.json_build_object('timestamp', models.Error.timestamp,
                                        'message', models.Error.message))
         ]).where(models.Error.session_id == models.Session.id).label(
             'session_errors'),
         select([
             func.array_agg(
                 func.json_build_object('timestamp',
                                        models.Warning.timestamp, 'message',
                                        models.Warning.message))
         ]).where(models.Warning.session_id == models.Session.id).label(
             'session_warnings'),
         select([
             func.array_agg(
                 func.json_build_object("name", models.Label.name))
         ]).select_from(
             labels_query.join(
                 models.Label,
                 models.Label.id == labels_query.c.label_id)).label(
                     'session_labels'),
         select([
             func.json_object_agg(models.SessionMetadata.key,
                                  models.SessionMetadata.metadata_item)
         ]).where(models.SessionMetadata.session_id ==
                  models.Session.id).label('session_metadata'),
     ]).select_from(
         models.Session.__table__.outerjoin(
             models.User.__table__,
             models.Session.user_id == models.User.id))
Beispiel #23
0
 def blacklist_filter(self):
     db = object_session(self)
     if db.query(BlacklistedTag).filter(BlacklistedTag.user_id == self.id).first() is None:
         return None
     return ~Request.tag_ids.overlap(
         db.query(func.array_agg(BlacklistedTag.tag_id))
         .filter(BlacklistedTag.user_id == self.id)
     )
Beispiel #24
0
def _get_ldap_sources(session):
    ldap_cti_directories = session.query(
        CtiDirectories.name,
        CtiDirectories.uri,
        CtiDirectories.match_direct,
        CtiDirectories.match_reverse,
        func.array_agg(CtiDirectoryFields.fieldname).label('fields'),
        func.array_agg(CtiDirectoryFields.value).label('values'),
    ).outerjoin(
        CtiDirectoryFields,
        CtiDirectoryFields.dir_id == CtiDirectories.id
    ).filter(
        CtiDirectories.uri.like('ldapfilter://%%')
    ).group_by(
        CtiDirectories.name,
        CtiDirectories.uri,
        CtiDirectories.match_direct,
        CtiDirectories.match_reverse,
    )

    source_configs = []
    for dir in ldap_cti_directories.all():
        _, _, name = dir.uri.partition('ldapfilter://')
        try:
            ldap_config = ldap_dao.build_ldapinfo_from_ldapfilter(name)
        except LookupError:
            logger.warning('Skipping LDAP source %s', dir.name)
            continue

        custom_filter = ldap_config.get('filter') or ''
        if custom_filter:
            custom_filter = '({})'.format(custom_filter)

        source_configs.append({'type': 'ldap',
                               'name': dir.name,
                               'searched_columns': json.loads(dir.match_direct or '[]'),
                               'first_matched_columns': json.loads(dir.match_reverse or '[]'),
                               'format_columns': _format_columns(dir.fields, dir.values),
                               'ldap_uri': ldap_config['uri'],
                               'ldap_base_dn': ldap_config['basedn'],
                               'ldap_username': ldap_config['username'],
                               'ldap_password': ldap_config['password'],
                               'ldap_custom_filter': custom_filter})

    return source_configs
Beispiel #25
0
def get_advanced_search_query(employer_id, params, status):
    skills = params.get('skills')
    locations = params.get('locations')
    role = params.get('role')
    name = params.get('name')
    salary = params.get('salary')

    query = DBSession.query(Candidate.id).filter(Candidate.status == status)

    if employer_id:
        query = query.outerjoin(V_CANDIDATE_CURRENT_EMPLOYERS,
                                and_(V_CANDIDATE_CURRENT_EMPLOYERS.c.candidate_id == Candidate.id,
                                     V_CANDIDATE_CURRENT_EMPLOYERS.c.employer_id == employer_id)) \
            .filter(V_CANDIDATE_CURRENT_EMPLOYERS.c.candidate_id == None)

    if locations:
        query = query.join(PreferredLocation, Candidate.id == PreferredLocation.candidate_id)

        country_filter = set([c['country_iso'] for c in locations])
        city_filter = [and_(City.name == loc['city'], City.country_iso == loc['country_iso']) for loc in locations]
        city_ids = DBSession.query(City.id).filter(or_(*city_filter)).all()

        query = query.filter(or_(PreferredLocation.city_id.in_(city_ids),
                                 PreferredLocation.country_iso.in_(country_filter)))

    if salary or role:
        query = query.join(TargetPosition)
        if salary:
            query = query.filter(TargetPosition.minimum_salary <= salary)
        if role:
            role = get_by_name_or_raise(Role, role)
            query = query.filter(TargetPosition.role_id == role.id)

    if name and employer_id:
        name = name.lower()
        employer_ids = func.array_agg(Offer.employer_id, type_=ARRAY(TEXT)).label('employer_ids')
        offer_query = DBSession.query(Offer.candidate_id, employer_ids).filter(Offer.accepted != None) \
            .group_by(Offer.candidate_id).subquery()
        query = query.outerjoin(offer_query, offer_query.c.candidate_id == Candidate.id).filter(
            or_(cast(Candidate.id, TEXT).startswith(name),
                and_(
                    or_(func.lower(Candidate.first_name).startswith(name),
                        func.lower(Candidate.last_name).startswith(name)),
                    or_(
                        offer_query.c.employer_ids.any(str(employer_id)),
                        Candidate.anonymous == False
                    )
                )
            )
        )

    query = query.group_by(Candidate.id)

    if skills:
        query = query.join(CandidateSkill).join(Skill).filter(Skill.name.in_(skills)) \
            .having(func.count(Skill.name) == len(skills))
    return query
Beispiel #26
0
class OptimizationProcedureORM(ProcedureMixin, BaseResultORM):
    """
        An Optimization  procedure
    """

    __tablename__ = 'optimization_procedure'

    id = Column(Integer, ForeignKey('base_result.id', ondelete='cascade'), primary_key=True)

    def __init__(self, **kwargs):
        kwargs.setdefault("version", 1)
        self.procedure = "optimization"
        super().__init__(**kwargs)

    schema_version = Column(Integer, default=1)
    initial_molecule = Column(Integer, ForeignKey('molecule.id'))
    initial_molecule_obj = relationship(MoleculeORM, lazy='select', foreign_keys=initial_molecule)

    # # Results
    energies = Column(JSON)  #Column(ARRAY(Float))
    final_molecule = Column(Integer, ForeignKey('molecule.id'))
    final_molecule_obj = relationship(MoleculeORM, lazy='select', foreign_keys=final_molecule)

    # ids, calculated not stored in this table
    # NOTE: this won't work in SQLite since it returns ARRAYS, aggregate_order_by
    trajectory = column_property(
        select([func.array_agg(aggregate_order_by(Trajectory.result_id,
                                                  Trajectory.position))]).where(Trajectory.opt_id == id))

    # array of objects (results) - Lazy - raise error of accessed
    trajectory_obj = relationship(Trajectory, cascade="all, delete-orphan",
                                  # backref="optimization_procedure",
                                  order_by=Trajectory.position,
                                  collection_class=ordering_list('position'))

    __mapper_args__ = {
        'polymorphic_identity': 'optimization_procedure',
        # to have separate select when querying BaseResultsORM
        'polymorphic_load': 'selectin',
    }

    __table_args__ = (
        Index('ix_optimization_program', 'program'),  # todo: needed for procedures?
    )

    def update_relations(self, trajectory=None, **kwarg):

        # update optimization_results relations
        # self._update_many_to_many(opt_result_association, 'opt_id', 'result_id',
        #                 self.id, trajectory, self.trajectory)

        self.trajectory_obj = []
        trajectory = [] if not trajectory else trajectory
        for result_id in trajectory:
            traj = Trajectory(opt_id=int(self.id), result_id=int(result_id))
            self.trajectory_obj.append(traj)
Beispiel #27
0
    def add_outcomes(
        self,
        feature_query,
    ):
        # The events table holds all the events, not just conversion ones
        relevant_events = self.bq_session.query(
            self.events.c['time'].cast(DATE).label('date'),
            self.events.c['type'].label('outcome'),
            self.events.c['user_id'].label('user_id')).filter(
                and_(
                    self.events.c['type'].in_(list(LABELS.keys())),
                    cast(self.events.c['time'], DATE) > cast(
                        self.aggregation_time, DATE),
                    cast(self.events.c['time'], DATE) <= cast(
                        self.aggregation_time +
                        timedelta(days=EVENT_LOOKAHEAD), DATE))).subquery()

        # TODO: Remove deduplication, once the event table doesn't contain any
        relevant_events_deduplicated = self.bq_session.query(
            relevant_events.c['date'],
            relevant_events.c['user_id'],
            # This case when provides logic for dealing with multiple outcomes during the same time period
            # an example is user_id 195379 during the 4/2020 where the user renews, but then cancels and gets
            # a refund (the current pipeline provides both labels)
            case(
                [
                    # If there is at least one churn event, we identify the user as churned
                    (literal(self.negative_label()).in_(
                        func.unnest(
                            func.array_agg(relevant_events.c['outcome']))),
                     self.negative_label())
                ],
                # In case of any number of any positive only events we consider the event as a renewal
                else_=self.positive_label()).label('outcome')).group_by(
                    relevant_events.c['date'].label('date'),
                    relevant_events.c['user_id'].label('user_id')).subquery()

        feature_query_w_outcome = self.bq_session.query(
            *[
                column.label(column.name) for column in feature_query.columns
                if column.name not in ['user_id', 'date', 'outcome_date']
            ],
            func.coalesce(
                feature_query.c['user_id'],
                relevant_events_deduplicated.c['user_id']).label('user_id'),
            func.coalesce(feature_query.c['date'],
                          self.aggregation_time.date()).label('date'),
            relevant_events_deduplicated.c['outcome'].label('outcome'),
            func.coalesce(relevant_events_deduplicated.c['date'],
                          feature_query.c['outcome_date'].cast(DATE)).
            label('outcome_date')).outerjoin(
                relevant_events_deduplicated,
                feature_query.c['user_id'] == relevant_events_deduplicated.
                c['user_id']).subquery('feature_query_w_outcome')

        return feature_query_w_outcome
Beispiel #28
0
 def get_group_team_stats(self, replay_ids, session=None):
     query = session.query(PlayerGame.game, func.array_agg(
         aggregate_order_by(PlayerGame.player, PlayerGame.player)).label("team")).filter(
         PlayerGame.game.in_(replay_ids)).group_by(PlayerGame.game).group_by(
         PlayerGame.is_orange).subquery()
     teams = session.query(query.c.team, func.array_agg(query.c.game)).group_by(query.c.team).all()
     return {
         "teamStats": [
             {
                 "team": team[0],
                 "games": team[1],
                 "names": [name for (name,) in session.query(func.min(PlayerGame.name)).filter(
                     PlayerGame.game.in_(team[1])).filter(
                     PlayerGame.player.in_(team[0])).order_by(
                     PlayerGame.player).group_by(PlayerGame.player).all()],
                 **self._create_group_stats(session, player_filter=team[0], replay_ids=team[1])
             }
             for team in teams]
     }
Beispiel #29
0
 def get_recipe_aggregated_ingredients(self):
     t_i = aliased(Ingredient)
     t_r = aliased(Recipe)
     t_ri = aliased(RecipeIngredient)
     return (self.session.query(
         t_r.id, t_r.title,
         func.array_agg(t_i.ing_name).label('ings')).join(
             t_ri, t_ri.recipe_id == t_r.id).join(
                 t_i, t_ri.ingredient_id == t_i.id).group_by(
                     t_r.id).group_by(t_r.title).all())
Beispiel #30
0
def get_brands_summary():
    '''Prints out each brand name, and each model name for that brand
     using only ONE database query.'''

    list_of_all = db.session.query(Model.brand_name, 
        label("models", func.array_agg(Model.name))).group_by(Model.brand_name).all()

    for item in list_of_all:
        print
        print "Brand: %s, Model %s, " % (item.brand_name, set(item.models))
Beispiel #31
0
 def query_callback(from_query):
     filtered_persons = from_query.cte('filtered_persons')
     with_memberships = self.context.session.query(
         filtered_persons,
         func.count(
             Membership.id.distinct()).label('membership_count'),
         func.array_agg(Group.id.distinct()).label('group_ids'),
         func.array_agg(Group.name.distinct()).label('group_names'),
         func.count(
             Contributor.work_id.distinct()).label('work_count'))
     with_memberships = with_memberships.outerjoin(
         Contributor,
         Contributor.person_id == filtered_persons.c.id)
     with_memberships = with_memberships.outerjoin(
         Membership, Membership.person_id == filtered_persons.c.id)
     with_memberships = with_memberships.outerjoin(
         Group, Group.id == Membership.group_id).group_by(
             filtered_persons.c.id, filtered_persons.c.name)
     return with_memberships.order_by(filtered_persons.c.name)
Beispiel #32
0
class CustomArtist(Artist):
    area = relationship('CustomArea', foreign_keys=[Artist.area_id])
    begin_area = relationship('CustomArea',
                              foreign_keys=[Artist.begin_area_id])
    end_area = relationship('CustomArea', foreign_keys=[Artist.end_area_id])
    tags = relationship('ArtistTag')
    artist_credit_names = relationship("ArtistCreditName", innerjoin=True)
    primary_aliases = column_property(
        select([func.array_agg(ArtistAlias.name)]).where(
            and_(ArtistAlias.artist_id == Artist.id,
                 ArtistAlias.primary_for_locale == True)))
Beispiel #33
0
    def test_aggregate_order_by_one(self):
        m = MetaData()
        table = Table('table1', m, Column('a', Integer), Column('b', Integer))
        expr = func.array_agg(aggregate_order_by(table.c.a, table.c.b.desc()))
        stmt = select([expr])

        # note this tests that the object exports FROM objects
        # correctly
        self.assert_compile(
            stmt, "SELECT array_agg(table1.a ORDER BY table1.b DESC) "
            "AS array_agg_1 FROM table1")
Beispiel #34
0
async def select_all_resource_contracts(conn: SAConn) -> RowProxy:
      cursor = await conn.execute(select([mainresource, func.array_agg(contract.c.id), func.array_agg(contract.c.name),
                                            association_t.c.mainresource_id]).apply_labels().
                                            where
                                            (and_
                                             (association_t.c.mainresource_id==mainresource.c.id,
                                               association_t.c.contract_id==contract.c.id)).
                                                    group_by(mainresource.c.id, mainresource.c.name,  association_t.c.mainresource_id))

      item = await cursor.fetchall()
      return item
Beispiel #35
0
    def test_array_agg_array_literal_explicit_type(self):
        from sqlalchemy.dialects.postgresql import array
        expr = array([column('data', Integer), column('d2', Integer)])

        agg_expr = func.array_agg(expr, type_=ARRAY(Integer))
        is_(agg_expr.type._type_affinity, ARRAY)
        is_(agg_expr.type.item_type._type_affinity, Integer)

        self.assert_compile(agg_expr,
                            "array_agg(ARRAY[data, d2])",
                            dialect="postgresql")
Beispiel #36
0
def correlation(min_racers=5):
    """Calculates the Spearman correlation for races with the given minimum
    number of racers."""

    results = Results.query \
                     .filter(Results.Place != None) \
                     .with_entities(Results.RaceName,
                                    Results.RaceCategoryName,
                                    func.array_agg(Results.Place),
                                    func.array_agg(Results.prior_mu)) \
                     .group_by(Results.RaceName, Results.RaceCategoryName)

    print('Calculating Spearman correlation...')
    correlations = []
    for name, category, places, mus in results:
        if len(places) >= min_racers:
            correlations.append(spearmanr(places, mus).correlation)
    print('Done calculating Spearman correlation!')

    return correlations
Beispiel #37
0
    def synchronize(self, engine, firstid):
        # cache routing information, so we don't have to get it every time
        route_cache = {}

        with engine.begin() as conn:
            h = self.t_hier.data
            s = self.t_segment.data
            sel = select([s.c.id, func.array_agg(h.c.parent).label('rels')])\
                     .where(s.c.rels.any(h.c.child)).group_by(s.c.id)

            if firstid > 0:
                sel = sel.where(s.c.id >= firstid)

            for seg in conn.execute(sel):
                self._update_segment_style(conn, seg, route_cache)

            # and copy geometries
            sel = self.data.update().where(self.data.c.id == s.c.id)\
                          .values(geom=ST_Simplify(s.c.geom, 1),
                                  geom100=ST_Simplify(s.c.geom, 100))
            if firstid > 0:
                sel = sel.where(self.data.c.id >= firstid)
            conn.execute(sel)

            # now synchronize all segments where a hierarchical relation has changed
            if firstid > 0:
                segs = select([s.c.id, s.c.rels], distinct=True)\
                        .where(s.c.rels.any(h.c.child))\
                        .where(h.c.depth > 1)\
                        .where(s.c.id < firstid)\
                        .where(h.c.parent.in_(select([self.t_relchange.c.id])))\
                        .alias()
                h2 = self.t_hier.data.alias()
                sel = select([segs.c.id, func.array_agg(h2.c.parent).label('rels')])\
                         .where(segs.c.rels.any(h2.c.child)).group_by(segs.c.id)

                for seg in conn.execute(sel):
                    self._update_segment_style(conn,
                                               seg,
                                               route_cache,
                                               update=True)
Beispiel #38
0
def get_available_langs(document_id):
    """Return the available languages (e.g. ['en', 'fr']) for a single
    document.
    """
    return DBSession. \
        query(
            func.array_agg(
                DocumentLocale.lang,
                type_=postgresql.ARRAY(String))). \
        filter(DocumentLocale.document_id == document_id). \
        group_by(DocumentLocale.document_id). \
        scalar()
Beispiel #39
0
    def test_array_agg_array_literal_explicit_type(self):
        from sqlalchemy.dialects.postgresql import array

        expr = array([column("data", Integer), column("d2", Integer)])

        agg_expr = func.array_agg(expr, type_=ARRAY(Integer))
        is_(agg_expr.type._type_affinity, ARRAY)
        is_(agg_expr.type.item_type._type_affinity, Integer)

        self.assert_compile(
            agg_expr, "array_agg(ARRAY[data, d2])", dialect="postgresql"
        )
Beispiel #40
0
def get_resume_query(session):

    user_data = func.array_agg(func.row(User.id, User.name)).label('user')
    query = session.query(
        Delivery.status,
        Address.city,
        user_data). \
        join(User, Delivery.user_id == User.id). \
        join(Address, Delivery.user_id == Address.user_id)

    deliveries = query.group_by(Delivery.status, Address.city)
    return deliveries
Beispiel #41
0
def get_available_langs(document_id):
    """Return the available languages (e.g. ['en', 'fr']) for a single
    document.
    """
    return DBSession. \
        query(
            func.array_agg(
                DocumentLocale.lang,
                type_=postgresql.ARRAY(String))). \
        filter(DocumentLocale.document_id == document_id). \
        group_by(DocumentLocale.document_id). \
        scalar()
Beispiel #42
0
    def test_aggregate_order_by_adapt(self):
        m = MetaData()
        table = Table('table1', m, Column('a', Integer), Column('b', Integer))
        expr = func.array_agg(aggregate_order_by(table.c.a, table.c.b.desc()))
        stmt = select([expr])

        a1 = table.alias('foo')
        stmt2 = sql_util.ClauseAdapter(a1).traverse(stmt)
        self.assert_compile(
            stmt2,
            "SELECT array_agg(foo.a ORDER BY foo.b DESC) AS array_agg_1 FROM table1 AS foo"
        )
Beispiel #43
0
def add_categories():
    """Update the Races table with the categories represented in the
    Results table.
    """
    # results of query are (arbitrary ResultID (so we can order), Race ID,
    # category name, number of racers in that category)
    cat_counts = (Results.query
                         .with_entities(
                            Results.race_id,
                            Results.RaceCategoryName,
                            func.count(Results.RaceCategoryName).label('count'))
                         .group_by(Results.race_id,
                                   Results.RaceCategoryName)
                         .order_by(Results.RaceCategoryName)
                         .subquery('cat_counts'))
    collected = (db.session
                   .query(cat_counts.c.race_id,
                          func.array_agg(cat_counts.c.RaceCategoryName).label('categories'),
                          func.array_agg(cat_counts.c.count).label('num_racers'))
                   .group_by(cat_counts.c.race_id))
    Races.update([c._asdict() for c in collected])
Beispiel #44
0
    def test_aggregate_order_by_adapt(self):
        m = MetaData()
        table = Table('table1', m, Column('a', Integer), Column('b', Integer))
        expr = func.array_agg(aggregate_order_by(table.c.a, table.c.b.desc()))
        stmt = select([expr])

        a1 = table.alias('foo')
        stmt2 = sql_util.ClauseAdapter(a1).traverse(stmt)
        self.assert_compile(
            stmt2,
            "SELECT array_agg(foo.a ORDER BY foo.b DESC) AS array_agg_1 FROM table1 AS foo"
        )
Beispiel #45
0
def get_consumer_roles(dbengine):
    query = select([
        service.c.id,
        func.array_agg(role.c.id, type_=UUIDArray()).label('role_ids'),
    ])
    query = query.where(service.c.is_consumer)
    query = query.select_from(
        join(service, role, role.c.service_id == service.c.id))
    query = query.group_by(service.c.id)
    cr = yield from dbengine.execute(query)
    res = yield from cr.fetchall()
    return res
Beispiel #46
0
def query_person_passports():
    return (
        DBSession.query(
            Person.id.label('person_id'),
            func.array_to_string(
                func.array_agg(
                    case([(Passport.passport_type == 'citizen', Passport.num)])
                ),
                ', '
            ).label('citizen'),
            func.array_to_string(
                func.array_agg(
                    case([(Passport.passport_type == 'foreign', Passport.num)])
                ),
                ', '
            ).label('foreign'),
        )
        .join(Passport, Person.passports)
        .group_by(Person.id)
    )
    
Beispiel #47
0
    def get_dataset_sources(self, dataset_id):
        # recursively build the list of (dataset_ref, source_dataset_ref) pairs starting from dataset_id
        # include (dataset_ref, NULL) [hence the left join]
        sources = select(
            [DATASET.c.id.label('dataset_ref'),
             DATASET_SOURCE.c.source_dataset_ref,
             DATASET_SOURCE.c.classifier]
        ).select_from(
            DATASET.join(DATASET_SOURCE,
                         DATASET.c.id == DATASET_SOURCE.c.dataset_ref,
                         isouter=True)
        ).where(
            DATASET.c.id == dataset_id
        ).cte(name="sources", recursive=True)

        sources = sources.union_all(
            select(
                [sources.c.source_dataset_ref.label('dataset_ref'),
                 DATASET_SOURCE.c.source_dataset_ref,
                 DATASET_SOURCE.c.classifier]
            ).select_from(
                sources.join(DATASET_SOURCE,
                             sources.c.source_dataset_ref == DATASET_SOURCE.c.dataset_ref,
                             isouter=True)
            ).where(sources.c.source_dataset_ref != None))

        # turn the list of pairs into adjacency list (dataset_ref, [source_dataset_ref, ...])
        # some source_dataset_ref's will be NULL
        aggd = select(
            [sources.c.dataset_ref,
             func.array_agg(sources.c.source_dataset_ref).label('sources'),
             func.array_agg(sources.c.classifier).label('classes')]
        ).group_by(sources.c.dataset_ref).alias('aggd')

        # join the adjacency list with datasets table
        query = select(
            _DATASET_SELECT_FIELDS + (aggd.c.sources, aggd.c.classes)
        ).select_from(aggd.join(DATASET, DATASET.c.id == aggd.c.dataset_ref))

        return self._connection.execute(query).fetchall()
Beispiel #48
0
    def get_dataset_sources(self, dataset_id):
        # recursively build the list of (dataset_ref, source_dataset_ref) pairs starting from dataset_id
        # include (dataset_ref, NULL) [hence the left join]
        sources = select(
            [DATASET.c.id.label('dataset_ref'),
             DATASET_SOURCE.c.source_dataset_ref,
             DATASET_SOURCE.c.classifier]
        ).select_from(
            DATASET.join(DATASET_SOURCE,
                         DATASET.c.id == DATASET_SOURCE.c.dataset_ref,
                         isouter=True)
        ).where(
            DATASET.c.id == dataset_id
        ).cte(name="sources", recursive=True)

        sources = sources.union_all(
            select(
                [sources.c.source_dataset_ref.label('dataset_ref'),
                 DATASET_SOURCE.c.source_dataset_ref,
                 DATASET_SOURCE.c.classifier]
            ).select_from(
                sources.join(DATASET_SOURCE,
                             sources.c.source_dataset_ref == DATASET_SOURCE.c.dataset_ref,
                             isouter=True)
            ).where(sources.c.source_dataset_ref != None))

        # turn the list of pairs into adjacency list (dataset_ref, [source_dataset_ref, ...])
        # some source_dataset_ref's will be NULL
        aggd = select(
            [sources.c.dataset_ref,
             func.array_agg(sources.c.source_dataset_ref).label('sources'),
             func.array_agg(sources.c.classifier).label('classes')]
        ).group_by(sources.c.dataset_ref).alias('aggd')

        # join the adjacency list with datasets table
        query = select(
            _DATASET_SELECT_FIELDS + (aggd.c.sources, aggd.c.classes)
        ).select_from(aggd.join(DATASET, DATASET.c.id == aggd.c.dataset_ref))

        return self._connection.execute(query).fetchall()
Beispiel #49
0
 def y(cls):
     #arrind = func.unnest(cls.indices).label('arrind')
     #x = arrind / cls.size
     #y = func.array_agg(arrind - x*cls.size).label('y')
     #return y
     s = db.Session()
     arrunnest = func.unnest(cls.indices)
     xarr = (func.unnest(cls.indices) / cls.size).label('xarrind')
     arrind = (arrunnest - xarr * cls.size).label('yarrind')
     #n.arrind-(n.arrind/n.size)*n.size
     y = s.query(arrind).select_from(cls).subquery('yarr')
     yagg = s.query(func.array_agg(y.c.yarrind))
     return yagg.as_scalar()
Beispiel #50
0
    def test_aggregate_order_by_one(self):
        m = MetaData()
        table = Table('table1', m, Column('a', Integer), Column('b', Integer))
        expr = func.array_agg(aggregate_order_by(table.c.a, table.c.b.desc()))
        stmt = select([expr])

        # note this tests that the object exports FROM objects
        # correctly
        self.assert_compile(
            stmt,
            "SELECT array_agg(table1.a ORDER BY table1.b DESC) "
            "AS array_agg_1 FROM table1"
        )
Beispiel #51
0
async def get_most_active_user(year: int = None, month: int = None, day=None):
    query_params = {
        "year": year,
        "month": month,
        "day": day
    }

    filter_spec = [{'field': k, 'op': '==', 'value': v} for k, v in query_params.items() if v]

    query = db.query(
        UserContributes.user,
        func.count(UserContributes.user),
        func.array_agg(UserContributes.year),
        func.array_agg(UserContributes.month),
        func.array_agg(UserContributes.day),
    ).group_by(
        UserContributes.user,
    ).order_by(
        func.count(UserContributes.user).desc())

    filtered_query = apply_filters(query, filter_spec)
    return filtered_query.first()
Beispiel #52
0
def add_outcomes(
    feature_query,
    start_time: datetime,
    positive_event_lookahead: int = 1,
):
    # The events table holds all the events, not just conversion ones
    relevant_events = bq_session.query(
        events.c['time'].cast(DATE).label('date'),
        events.c['type'].label('outcome'),
        events.c['user_id'].label('user_id')).filter(
            events.c['type'].in_(list(LABELS.keys())),
            cast(events.c['time'], DATE) > cast(start_time, DATE),
            cast(events.c['time'], DATE) <= cast(
                start_time + timedelta(days=positive_event_lookahead),
                DATE)).subquery()

    # TODO: Remove deduplication, once the event table doesn't contain any
    relevant_events_deduplicated = bq_session.query(
        relevant_events.c['date'],
        relevant_events.c['user_id'],
        # This case when provides logic for dealing with multiple outcomes during the same time period
        # an example is user_id 195379 during the 4/2020 where the user renews, but then cancels and gets
        # a refund (the current pipeline provides both labels)
        case(
            [
                # If there is at least one churn event, we identify the user as churned
                (literal(negative_label()).in_(
                    func.unnest(func.array_agg(
                        relevant_events.c['outcome']))), negative_label())
            ],
            # In case of any number of any positive only events we consider the event as a renewal
            else_=positive_labels()).label('outcome')).group_by(
                relevant_events.c['date'].label('date'),
                relevant_events.c['user_id'].label('user_id')).subquery()

    feature_query_w_outcome = bq_session.query(
        feature_query,
        relevant_events_deduplicated.c['outcome'].label('outcome'),
        relevant_events_deduplicated.c['date'].label('outcome_date')
    ).outerjoin(
        relevant_events_deduplicated,
        and_(
            feature_query.c['user_id'] ==
            relevant_events_deduplicated.c['user_id'],
            feature_query.c['date'] >= func.date_sub(
                relevant_events_deduplicated.c['date'],
                text(f'interval {positive_event_lookahead} day')),
            feature_query.c['date'] <= relevant_events_deduplicated.c['date'])
    ).subquery('feature_query_w_outcome')

    return feature_query_w_outcome
Beispiel #53
0
    def search_datasets(self, expressions, select_fields=None, with_source_ids=False):
        """
        :type with_source_ids: bool
        :type select_fields: tuple[datacube.index.postgres._fields.PgField]
        :type expressions: tuple[datacube.index.postgres._fields.PgExpression]
        :rtype: dict
        """

        if select_fields:
            select_columns = tuple(
                f.alchemy_expression.label(f.name)
                for f in select_fields
            )
        else:
            select_columns = _DATASET_SELECT_FIELDS

        if with_source_ids:
            # Include the IDs of source datasets
            select_columns += (
                select(
                    (func.array_agg(DATASET_SOURCE.c.source_dataset_ref),)
                ).select_from(
                    DATASET_SOURCE
                ).where(
                    DATASET_SOURCE.c.dataset_ref == DATASET.c.id
                ).group_by(
                    DATASET_SOURCE.c.dataset_ref
                ).label('dataset_refs'),
            )

        def raw_expr(expression):
            if isinstance(expression, OrExpression):
                return or_(raw_expr(expr) for expr in expression.exprs)
            return expression.alchemy_expression

        raw_expressions = [raw_expr(expression) for expression in expressions]

        from_tables = DATASET.join(DATASET_TYPE).join(METADATA_TYPE)
        select_query = (
            select(
                select_columns
            ).select_from(
                from_tables
            ).where(
                and_(DATASET.c.archived == None, *raw_expressions)
            )
        )

        results = self._connection.execute(select_query)
        for result in results:
            yield result
    def synchronize(self, engine, firstid):
        # cache routing information, so we don't have to get it every time
        route_cache = {}

        with engine.begin() as conn:
            h = self.t_hier.data
            s = self.t_segment.data
            sel = select([s.c.id, func.array_agg(h.c.parent).label('rels')])\
                     .where(s.c.rels.any(h.c.child)).group_by(s.c.id)

            if firstid > 0:
                sel = sel.where(s.c.id >= firstid)

            for seg in conn.execute(sel):
                self._update_segment_style(conn, seg, route_cache)

            # and copy geometries
            sel = self.data.update().where(self.data.c.id == s.c.id)\
                          .values(geom=ST_Simplify(s.c.geom, 1),
                                  geom100=ST_Simplify(s.c.geom, 100))
            if firstid > 0:
                sel = sel.where(self.data.c.id >= firstid)
            conn.execute(sel)

            # now synchronize all segments where a hierarchical relation has changed
            if firstid > 0:
                segs = select([s.c.id, s.c.rels], distinct=True)\
                        .where(s.c.rels.any(h.c.child))\
                        .where(h.c.depth > 1)\
                        .where(s.c.id < firstid)\
                        .where(h.c.parent.in_(select([self.t_relchange.c.id])))\
                        .alias()
                h2 = self.t_hier.data.alias()
                sel = select([segs.c.id, func.array_agg(h2.c.parent).label('rels')])\
                         .where(segs.c.rels.any(h2.c.child)).group_by(segs.c.id)

                for seg in conn.execute(sel):
                    self._update_segment_style(conn, seg, route_cache, update=True)
Beispiel #55
0
def read_rolepermissions():
    q_session = Session()
    query = q_session.query(
        Roles.name,
        Roles.id,
        func.array_agg(RolesPermissions.permissions_id),
    ).join(
        RolesPermissions, Roles.id == RolesPermissions.role_id
    ).group_by(
        Roles.id
    ).order_by(
        asc(Roles.name)
    ).all()
    return query
Beispiel #56
0
    def test_array_agg_array_literal_implicit_type(self):
        from sqlalchemy.dialects.postgresql import array, ARRAY as PG_ARRAY
        expr = array([column('data', Integer), column('d2', Integer)])

        assert isinstance(expr.type, PG_ARRAY)

        agg_expr = func.array_agg(expr)
        assert isinstance(agg_expr.type, PG_ARRAY)
        is_(agg_expr.type._type_affinity, ARRAY)
        is_(agg_expr.type.item_type._type_affinity, Integer)

        self.assert_compile(
            agg_expr,
            "array_agg(ARRAY[data, d2])",
            dialect="postgresql"
        )
Beispiel #57
0
def user_roles():
    if session["logged_in"] and ("Read users" in session or "Edit users" in session):
        roles = read_roles()
        q_session = Session()
        query = (
            q_session.query(UserRoles.email, func.array_agg(UserRoles.user_role_id))
            .group_by(UserRoles.email)
            .order_by(asc(UserRoles.email))
            .all()
        )
        if query:
            return render_template("manageusers.html", roles=roles, usersandroles=query)
        else:
            errormsg = "No roles found. Please add roles and assign them to users."
            return render_template("manageusers.html", error=errormsg)
    else:
        session["no-access"] = True
        session["tried"] = "Users"
        return redirect(url_for("login"))
def get_tags(db, entity_model, tag_model, entity_ids):
    """Get tags associated with entities.

    Args:
        db (Session object): Session object.
        entity_model (mbdata.models): Model of the entity.
        tag_model (mbdata.models): Tag of the model.
        entity_ids (list): IDs of the entity whose tags are to be fetched

    Returns:
        List of tuples containing the entity_ids and the list of associated tags.
    """
    tags = db.query(entity_model.id, func.array_agg(Tag.name)).\
        join(tag_model).\
        join(Tag).\
        filter(entity_model.id.in_(entity_ids)).\
        group_by(entity_model.id).\
        all()
    return tags
Beispiel #59
0
 def start_requests(self):
     summary_utc = datetime.utcnow() #- timedelta(days=1)
     db_engine = create_engine(self.settings.get('SQLALCHEMY_DATABASE_URI'))
     db_session = sessionmaker(bind=db_engine)()
     db_query = db_session.query(LiveTVSite.id.label('site_id'), LiveTVRoom.id.label('room_id'),
                                 LiveTVRoom.url.label('room_url'),
                                 LiveTVRoomPresent.crawl_date_format.label('summary_date'),
                                 func.array_agg(LiveTVRoomPresent.online).label('online_list')) \
         .join(LiveTVRoom, LiveTVRoomPresent) \
         .filter(LiveTVSite.code == 'bilibili') \
         .filter(LiveTVRoomPresent.crawl_date_format == summary_utc.strftime(DAILY_DATE_FORMAT)) \
         .group_by(LiveTVSite.id, LiveTVRoom.id, LiveTVRoom.url, LiveTVRoomPresent.crawl_date_format)
     for group_row in db_query:
         meta_info = {
             'site_id': group_row.site_id,
             'room_id': group_row.room_id,
             'summary_date': group_row.summary_date,
             'online': numpy.median(group_row.online_list)
         }
         yield Request(group_row.room_url, callback=self.parse, meta=meta_info)
     db_session.close()