Python array_agg Examples, sqlalchemy.dialects.postgresql.array_agg Python Examples

Example #1

0

Show file

File: counts.py Project: vkelk/factiva-rtf-db

def get_articles(gvkey, date=None):
    session = Session()
    q = session.query(
            array_agg(CompanyArticle.article_id).label('article_ids'),
            array_agg(CompanyArticle.main_category).label('main_cats'),
            array_agg(CompanyArticle.sub_category).label('sub_cats'),
            Articles.PD
        ) \
        .join(Articles, Articles.id == CompanyArticle.article_id) \
        .filter(CompanyArticle.gvkey == gvkey)
    if date is not None:
        q = q.filter(Articles.PD == date)
    q = q.group_by(Articles.PD).order_by(Articles.PD)
    return q.all()

Example #2

0

Show file

    def _update_handle_new_ways(self, engine):
        w = self.way_src.data
        r = self.relway_view
        wold = self.data

        sub = sa.select([r.c.way_id, array_agg(r.c.relation_id).label('rels')])\
                .where(r.c.way_id.notin_(sa.select([wold.c.id])))\
                .group_by(r.c.way_id).alias('aggway')

        cols = [sub.c.way_id, sub.c.rels, w.c.nodes]
        if hasattr(self, 'transform_tags'):
            cols.append(w.c.tags)

        sql = sa.select(cols).where(w.c.id == sub.c.way_id)

        changeset = {}
        inserts = []
        for obj in engine.execute(sql):
            cols = self._construct_row(obj, engine)
            if cols is not None:
                changeset[obj['way_id']] = 'A'
                inserts.append(cols)

        if len(inserts):
            engine.execute(self.data.insert().values(inserts))

        return changeset

Example #3

0

Show file

File: app.py Project: maryjac/FSND

def venues():
  # TODO: replace with real venues data.
  #       num_shows should be aggregated based on number of upcoming shows per venue.  
  areas = []
  venues_agg = array_agg(Venue.id).label('venues')
  areas_data = db.session.query(
    Venue.city,
    Venue.state,
    venues_agg
  ).group_by(Venue.city, Venue.state).all()

  for (city, state, venue_ids) in areas_data:
    area_venues = []
    for vid in venue_ids:
      v = {}
      v['name'] = Venue.query.get(vid).name
      v['id'] = vid
      area_venues.append(v)

    area = {}
    area['city'] = city
    area['state'] = state
    area['venues'] = area_venues

    areas.append(area)

  return render_template('pages/venues.html', areas=areas)

Example #4

0

Show file

File: build_tests.py Project: robopsi/zeus

    def get(self, build: Build):
        """
        Return a list of test cases for a given build.
        """
        job_ids = db.session.query(Job.id).filter(Job.build_id == build.id).subquery()

        query = db.session.query(
            TestCase.hash,
            TestCase.name,
            array_agg_row(
                TestCase.id, TestCase.job_id, TestCase.duration, TestCase.result
            ).label(
                "runs"
            ),
        ).filter(
            TestCase.job_id.in_(job_ids)
        ).group_by(
            TestCase.hash, TestCase.name
        )

        result = request.args.get("result")
        if result:
            try:
                query = query.filter(TestCase.result == getattr(Result, result))
            except AttributeError:
                raise NotImplementedError

        query = query.order_by(
            (
                array_agg(TestCase.result).label("results").contains([Result.failed])
            ).desc(),
            TestCase.name.asc(),
        )

        return self.paginate_with_schema(testcases_schema, query)

Example #5

0

Show file

def _group_lines_stops(list_stops):
    """ Groups lines and stops such that each distinct line and direction has
        a group of stops associated with it.
    """
    stops = [s.atco_code for s in list_stops]

    separator = db.literal_column("' / '")
    destinations = db.func.string_agg(
        db.distinct(models.JourneyPattern.destination),
        pg.aggregate_order_by(separator, models.JourneyPattern.destination)
    )
    array_stops = pg.array_agg(db.distinct(models.JourneyLink.stop_point_ref))
    groups = (
        db.session.query(
            models.Service.code.label("code"),
            models.JourneyPattern.direction.label("direction"),
            models.Service.line.label("line"),
            destinations.label("destination"),
            array_stops.label("stops")
        )
        .select_from(models.Service)
        .join(models.Service.patterns)
        .join(models.JourneyPattern.links)
        .filter(models.JourneyLink.stop_point_ref.in_(stops))
        .group_by(models.Service.code, models.Service.line,
                  models.JourneyPattern.direction)
        .order_by(models.Service.line, models.JourneyPattern.direction)
        .all()
    )

    return [g._asdict() for g in groups]

Example #6

0

Show file

File: views.py Project: MFinkendei/oeplatform

    def post(self, request):
        results = []
        engine = actions._get_engine()
        metadata = sqla.MetaData(bind=engine)
        Session = sessionmaker()
        session = Session(bind=engine)
        search_view = sqla.Table("meta_search", metadata, autoload=True)

        filter_tags = [
            int(key[len('select_'):]) for key in request.POST
            if key.startswith('select_')
        ]

        tag_agg = array_agg(Table_tags.tag)
        query = session.query(
            search_view.c.schema.label('schema'),
            search_view.c.table.label('table'), tag_agg).outerjoin(
                Table_tags, (search_view.c.table == Table_tags.table_name)
                and (search_view.c.table == Table_tags.table_name))
        if filter_tags:
            query = query.having(tag_agg.contains(filter_tags))

        query = query.group_by(search_view.c.schema, search_view.c.table)
        results = session.execute(query)

        session.commit()
        ret = [{'schema': r.schema, 'table': r.table} for r in results]
        print(ret)
        return render(request, 'dataedit/search.html', {
            'results': ret,
            'tags': get_all_tags(),
            'selected': filter_tags
        })

Example #7

0

Show file

 def check_tags(offers, tags_ids):
     if tags_ids is None:
         return offers
     elif not tags_ids:
         return offers
     elif tags_ids is not None:
         return offers\
             .join(OffersTags)\
             .filter(OffersTags.tag_id.in_(tags_ids))\
             .group_by(Offer.id)\
             .having(array_agg(OffersTags.tag_id).contains(tags_ids))

Example #8

0

Show file

File: derived.py Project: macph/nextbus

    def matching_groups(cls, query, admin_areas=None):
        """ Finds all admin areas and table names covering matching results for
            a query.

            The areas and groups are sorted into two sets, to be used for
            filtering.

            :param query: web search query as string.
            :param admin_areas: Filter by admin areas to get matching groups
            :returns: A tuple with a dict of groups and a dict with
            administrative area references and names
        """
        if not _test_query(query):
            return None

        array_t = pg.array_agg(db.distinct(cls.table_name))
        if admin_areas is not None:
            # Filter array of tables by whether aggregated rows' admin areas
            # match those already selected
            array_t = array_t.filter(cls.admin_areas.overlap(admin_areas))

        array_areas = pg.array((AdminArea.code, AdminArea.name))
        array_a = pg.array_agg(db.distinct(array_areas))

        result = (db.session.query(
            array_t.label("tables"), array_a.label("areas")).select_from(
                db.func.unnest(cls.admin_areas).alias("unnest_areas")).join(
                    AdminArea,
                    db.column("unnest_areas") == AdminArea.code).filter(
                        cls.match(query)).one())

        # All data should have been aggregated into one row
        tables = set(result.tables) if result.tables is not None else set()
        groups = {
            g: n
            for g, n in cls.GROUP_NAMES.items() if tables & cls.GROUPS[g]
        }
        areas = dict(result.areas) if result.areas is not None else {}

        return groups, areas

Example #9

0

Show file

File: app.py Project: sophieq/mnemosyne-image-repo

def index():
    name = current_user.first_name + "'s"
    # get previously uploaded photos
    image_objects = db.session.query(Image.month_uploaded, postgresql.array_agg(Image.path))\
                    .join(User)\
                    .filter(Image.user_id==current_user.id)\
                    .group_by(Image.month_uploaded)\
                    .order_by(desc(Image.month_uploaded))\
                    .all()
    image_sections = ImageStorageService.get_user_images(image_objects)
    return render_template('index.html',
                           name=name,
                           image_sections=image_sections)

Example #10

0

Show file

    def _update_handle_changed_rels(self, engine):
        w = self.data
        r = self.relation_src.data
        rs = self.relway_view.alias('relsrc')

        # Recreate the relation set for all ways in changed relations.
        sub = sa.select([array_agg(r.c.id)])\
                .where(r.c.members.contains(
                         sa.func.jsonb_build_array(
                           sa.func.jsonb_build_object('type', 'W', 'id', w.c.id))))
        sql = sa.select([w.c.id, w.c.rels, sub.label('new_rels')])\
                .where(sa.or_(
                    w.c.id.in_(
                         sa.select([rs.c.way_id]).
                           where(rs.c.relation_id.in_(
                               self.relation_src.select_add_modify()))),
                    w.c.rels.op('&& ARRAY')(sa.select([self.relation_src.cc.id]))
                           ))

        inserts = []
        deletes = []
        changeset = {}
        for obj in engine.execute(sql):
            oid = obj['id']
            # If the new set is empty, the way has been removed from the set.
            if obj['new_rels'] is None:
                deletes.append({'oid': oid})
                changeset[oid] = 'D'
            # If the relation set differs, there was a relevant change.
            # (Only update the way set here. geometry and tag changes have
            #  already been done during the first pass.)
            else:
                rels = sorted(obj['new_rels'])
                if rels != obj['rels']:
                    inserts.append({'oid': oid, 'rels': rels})
                    changeset[oid] = 'M'

        if len(inserts):
            engine.execute(
                self.data.update().where(
                    self.c.id == sa.bindparam('oid')).values(
                        rels=sa.bindparam('rels')), inserts)

        if len(deletes):
            engine.execute(
                self.data.delete().where(self.c.id == sa.bindparam('oid')),
                deletes)

        return changeset

Example #11

0

Show file

    def get_services(self):
        """ Queries and returns two datasets for services and operators at this
            stoplist including the origin and destination of these services,
            grouped by service ID and direction. Services are also checked for
            whether they terminate at this stop or not. Operators are returned
            as a dict of local operator codes and operator names.
        """
        # Checks if associated link is not last in sequence
        link = db.aliased(JourneyLink)
        next_link = (
            db.session.query(link.id)
            .filter(link.pattern_ref == JourneyLink.pattern_ref,
                    link.sequence == JourneyLink.sequence + 1)
            .as_scalar()
        )

        # Give service instance name in keyed tuple object
        service = db.aliased(Service, name="service")
        operator = pg.array((
            LocalOperator.code,
            db.func.coalesce(Operator.name, LocalOperator.name)
        ))
        query_services = (
            db.session.query(
                service,
                JourneyPattern.direction,
                db.func.string_agg(JourneyPattern.origin.distinct(), ' / ')
                .label("origin"),
                db.func.string_agg(JourneyPattern.destination.distinct(), ' / ')
                .label("destination"),
                (db.func.count(next_link) == 0).label("terminates"),
                pg.array_agg(db.distinct(operator)).label("operators")
            )
            .join(service.patterns)
            .join(JourneyPattern.links)
            .join(JourneyPattern.local_operator)
            .outerjoin(LocalOperator.operator)
            .filter(JourneyLink.stop_point_ref == self.atco_code)
            .group_by(service.id, JourneyPattern.direction)
            .order_by(service.line, service.description,
                      JourneyPattern.direction)
        )

        services = query_services.all()
        operators = {}
        for sv in services:
            operators.update(sv.operators)

        return services, operators

Example #12

0

Show file

def describe_skill(skill_name):
    skill = Skill.query.filter_by(name=skill_name).first_or_404()
    city_counts = db.session.query(
            City.coords,
            db.func.count(User.id),
            array_agg(City.name)) \
        .join(User, City.name == User.city) \
        .join(users_skills_table) \
        .join(Skill) \
        .filter(Skill.id == skill.id) \
        .group_by(City.coords)

    c1 = db.alias(City, 'c1')
    c2 = db.alias(City, 'c2')
    u1 = db.alias(User, 'u1')
    u2 = db.alias(User, 'u2')
    us1 = db.alias(users_skills_table, 'us1')
    us2 = db.alias(users_skills_table, 'us2')
    city_connections = db.session.query(
            c1.c.coords, c2.c.coords) \
        .distinct(c1.c.coords, c2.c.coords) \
        .filter(
            c1.c.name == u1.c.city,
            c2.c.name == u2.c.city,
            user_friends_table.c.user_id == u1.c.user_id,
            user_friends_table.c.friend_id == u2.c.user_id,
            u1.c.user_id == us1.c.user_id,
            u2.c.user_id == us2.c.user_id,
            us1.c.skill_id == skill.id,
            us2.c.skill_id == skill.id)

    city_connections_json = [[
        _wkb_to_json(coords1.data),
        _wkb_to_json(coords2.data)
    ] for coords1, coords2 in city_connections]

    return jsonify({
        'name':
        skill.name,
        'city_counts': [{
            'name': names[0],
            'count': ccount,
            'coords': _wkb_to_json(coords.data)
        } for coords, ccount, names in city_counts],
        'city_connections':
        city_connections_json,
    })

Example #13

0

Show file

    def get(self, revision: Revision):
        """
        Return a list of test cases for a given revision.
        """
        build = fetch_build_for_revision(revision)
        if not build:
            return self.respond(status=404)

        build_ids = [original.id for original in build.original]

        job_query = db.session.query(Job.id).filter(
            Job.build_id.in_(build_ids))

        result = request.args.get("allowed_failures")
        if result == "false":
            job_query = job_query.filter(Job.allow_failure == False)  # NOQA
        job_ids = job_query.subquery()

        query = (db.session.query(
            TestCase.hash,
            TestCase.name,
            array_agg_row(TestCase.id, TestCase.job_id, TestCase.duration,
                          TestCase.result).label("runs"),
        ).filter(TestCase.job_id.in_(job_ids)).group_by(
            TestCase.hash, TestCase.name))

        result = request.args.get("result")
        if result:
            try:
                query = query.filter(
                    TestCase.result == getattr(Result, result))
            except AttributeError:
                raise NotImplementedError

        query = query.order_by(
            (array_agg(TestCase.result).label("results").contains(
                [Result.failed])).desc(),
            TestCase.name.asc(),
        )

        schema = AggregateTestCaseSummarySchema(many=True,
                                                strict=True,
                                                exclude=("build", ))
        return self.paginate_with_schema(schema, query)

Example #14

0

Show file

 def by_role(cls, role, since=None):
     columns = array_agg(Subscription.channel).label('channels')
     sq = db.session.query(columns)
     sq = sq.filter(Subscription.deleted_at == None)  # noqa
     sq = sq.filter(Subscription.role_id == role.id)
     sq = sq.cte('sq')
     q = cls.all()
     q = q.filter(or_(
         cls.actor_id != role.id,
         cls.actor_id == None  # noqa
     ))
     q = q.filter(cls.channels.overlap(sq.c.channels))
     q = q.filter(cls._event.in_(Events.names()))
     if since is not None:
         q = q.filter(cls.created_at >= since)
     if role.notified_at is not None:
         q = q.filter(cls.created_at >= role.notified_at)
     q = q.order_by(cls.created_at.desc())
     q = q.order_by(cls.id.desc())
     return q

Example #15

0

Show file

    def get(self, build: Build):
        """
        Return a list of test cases for a given build.
        """
        job_query = db.session.query(Job.id).filter(Job.build_id == build.id)

        result = request.args.get("allowed_failures")
        if result == "false":
            job_query = job_query.filter(Job.allow_failure == False)  # NOQA
        job_ids = job_query.subquery()

        query = (
            db.session.query(
                TestCase.hash,
                TestCase.name,
                array_agg_row(
                    TestCase.id, TestCase.job_id, TestCase.duration, TestCase.result
                ).label("runs"),
            )
            .filter(TestCase.job_id.in_(job_ids))
            .group_by(TestCase.hash, TestCase.name)
        )

        result = request.args.get("result")
        if result:
            try:
                query = query.filter(TestCase.result == getattr(Result, result))
            except AttributeError:
                raise NotImplementedError

        query = query.order_by(
            (
                array_agg(TestCase.result).label("results").contains([Result.failed])
            ).desc(),
            TestCase.name.asc(),
        )

        schema = AggregateTestCaseSummarySchema(
            many=True, strict=True, context={"build": build}
        )
        return self.paginate_with_schema(schema, query)

Example #16

0

Show file

    def build_movement_list_lookup(self):
        """List the unreconciled movements that could be part of bundles."""
        dbsession = self.dbsession
        owner = self.owner
        period = self.period

        movement_rows = (dbsession.query(
            FileMovement.issuer_id,
            TransferRecord.transfer_id,
            func.sum(file_movement_delta).label('delta'),
            array_agg(FileMovement.movement_id).label('movement_ids'),
        ).select_from(FileMovement).join(
            TransferRecord,
            TransferRecord.id == FileMovement.transfer_record_id).join(
                Period, Period.id == FileMovement.period_id).filter(
                    FileMovement.owner_id == owner.id,
                    FileMovement.file_id == period.file_id,
                    FileMovement.reco_id == null,
                    file_movement_delta != 0,
                    TransferRecord.bundle_transfer_id != null,
                    ~Period.closed,
                ).group_by(
                    FileMovement.issuer_id,
                    TransferRecord.transfer_id,
                ).all())

        log.info(
            "BundleFinder: %s unreconciled bundled movement(s) for period %s",
            len(movement_rows), period.id)

        # movement_list_lookup: {
        #     (bundled_transfer_id, issuer_id): (delta, movement_ids)
        # }
        movement_list_lookup = {}
        for row in movement_rows:
            key = (row.transfer_id, row.issuer_id)
            movement_list_lookup[key] = (row.delta, row.movement_ids)

        return movement_list_lookup

Example #17

0

Show file

File: views.py Project: openego/oeplatform

    def post(self, request):
        results = []
        engine = actions._get_engine()
        metadata = sqla.MetaData(bind=engine)
        Session = sessionmaker()
        session = Session(bind=engine)
        search_view = sqla.Table("meta_search", metadata, autoload=True)

        filter_tags = [int(key[len('select_'):]) for key in request.POST if key.startswith('select_')]

        tag_agg = array_agg(Table_tags.tag)
        query = session.query(search_view.c.schema.label('schema'), search_view.c.table.label('table'), tag_agg).outerjoin(Table_tags, (search_view.c.table == Table_tags.table_name) and (search_view.c.table == Table_tags.table_name))
        if filter_tags:
            query = query.having(tag_agg.contains(filter_tags))

        query = query.group_by(search_view.c.schema, search_view.c.table)
        results = session.execute(query)

        session.commit()
        ret = [{'schema': r.schema, 'table':r.table} for r in results]
        print(ret)
        return render(request, 'dataedit/search.html', {'results': ret, 'tags':get_all_tags(), 'selected': filter_tags})

Example #18

0

Show file

    def get(self, repo: Repository, test_hash: str):
        query = (
            db.session.query(
                Build.id.label("build_id"),
                Build.number,
                TestCase.hash,
                TestCase.name,
                array_agg_row(TestCase.id, TestCase.job_id, TestCase.duration,
                              TestCase.result).label("runs"),
            )
            # .join(Job, Job.id == TestCase.job_id)
            # .join(Build, Build.id == Job.build_id)
            .filter(
                TestCase.hash == test_hash,
                TestCase.repository_id == repo.id,
                Build.id == Job.build_id,
                Job.id == TestCase.job_id,
            ).group_by(Build.id, Build.number, TestCase.hash, TestCase.name))

        result = request.args.get("result")
        if result:
            try:
                query = query.filter(
                    TestCase.result == getattr(Result, result))
            except AttributeError:
                raise NotImplementedError

        query = query.order_by(
            Build.number.desc(),
            (array_agg(TestCase.result).label("results").contains(
                [Result.failed])).desc(),
            TestCase.name.asc(),
        )

        schema = AggregateTestCaseSummarySchema(many=True,
                                                strict=True,
                                                context={"repo": repo})
        return self.paginate_with_schema(schema, query)

Example #19

0

Show file

    def construct(self, engine):
        self.truncate(engine)

        # manual indexes
        relidx = sa.Index(self.data.name + "_rels_idx",
                          self.data.c.rels,
                          postgresql_using='gin')
        ndsidx = sa.Index(self.data.name + "_nodes_idx",
                          self.data.c.nodes,
                          postgresql_using='gin')
        # drop indexes if any
        engine.execute(DropIndexIfExists(relidx))
        engine.execute(DropIndexIfExists(ndsidx))

        w = self.way_src.data
        r = self.relway_view

        sub = sa.select([r.c.way_id, array_agg(r.c.relation_id).label('rels')])\
                .group_by(r.c.way_id).alias('aggway')

        cols = [sub.c.way_id, sub.c.rels, w.c.nodes]
        if hasattr(self, 'transform_tags'):
            cols.append(w.c.tags)

        sql = sa.select(cols).where(w.c.id == sub.c.way_id)

        res = engine.execution_options(stream_results=True).execute(sql)
        workers = self.create_worker_queue(engine,
                                           self._process_construct_next)
        for obj in res:
            workers.add_task(obj)

        workers.finish()

        # need reverse lookup indexes for rels and nodes
        relidx.create(engine)
        ndsidx.create(engine)

Example #20

0

Show file

    def __init__(self, tables, *, column=None, order=None, label=None,
                 join_on=None):
        self._tables = list(always_iterable(tables))
        self._label = label

        self._from = reduce(lambda x, y: x.join(y), self._tables)
        if self._label is not None:
            self._from = self._from.alias(self._label)

        if column is not None:
            self.column = column
        else:
            if len(self._tables) > 1:
                raise ValueError('For multi-table-joined relations `column` should be specified')
            self.column = self._tables[0].c.id

        if order is not None:
            self._order = order
        else:
            self._order = self.column.asc().nullslast()

        self._join_on = join_on

        self.aggregated = array_agg(aggregate_order_by(self.column, self._order))

Example #21

0

Show file

File: views.py Project: riahtu/oeplatform

def listtables(request, schema_name):
    """
    :param request: A HTTP-request object sent by the Django framework
    :param schema_name: Name of a schema
    :return: Renders the list of all tables in the specified schema
    """

    searchedQueryString = request.GET.get("query")
    searchedTagIds = list(map(
        int,
        request.GET.getlist("tags"),
    ))

    for tag_id in searchedTagIds:
        increment_usage_count(tag_id)

    labels = get_readable_table_names(schema_name)
    filter_kwargs = dict(
        search=SearchQuery(" & ".join(
            p + ":*" for p in re.findall("[\w]+", searchedQueryString)),
                           search_type="raw")) if searchedQueryString else {}

    engine = actions._get_engine()
    conn = engine.connect()
    Session = sessionmaker()
    session = Session(bind=conn)
    tag_query = session.query(TableTags.table_name,
                              array_agg(sqla.distinct(TableTags.tag)),
                              array_agg(sqla.distinct(Tag.name)),
                              array_agg(sqla.distinct(Tag.color))).filter(
                                  TableTags.schema_name == schema_name,
                                  TableTags.tag == Tag.id).group_by(
                                      TableTags.table_name)
    tags = {
        r[0]: [
            dict(id=ident, name=label, color="#" + format(color, "06X"))
            for ident, label, color in zip(r[1], r[2], r[3])
        ]
        for r in tag_query
    }

    tables = [(table.name, labels.get(table.name), tags.get(table.name, []))
              for table in Table.objects.filter(schema__name=schema_name,
                                                **filter_kwargs)]

    # Apply tag filter later on, because I am not smart enough to do it inline.
    tables = [
        tableEntry for tableEntry in tables
        if {tag["id"]
            for tag in tableEntry[2]}.issuperset(searchedTagIds or set())
    ]

    tables = sorted(tables, key=lambda x: x[0])
    return render(
        request,
        "dataedit/dataedit_tablelist.html",
        {
            "schema": schema_name,
            "tables": tables,
            "query": searchedQueryString,
            "tags": searchedTagIds
        },
    )

Example #22

0

Show file

File: suppliers.py Project: AusDTO/dto-digitalmarketplace-api

def casestudies_search():
    search_query = get_json_from_request()

    offset = get_nonnegative_int_or_400(request.args, 'from', 0)
    result_count = get_positive_int_or_400(request.args, 'size', current_app.config['DM_API_SUPPLIERS_PAGE_SIZE'])

    sort_dir = search_query.get('sort_dir', 'asc')
    sort_by = search_query.get('sort_by', None)
    domains = search_query.get('domains', None)
    seller_types = search_query.get('seller_types', None)
    search_term = search_query.get('search_term', None)
    framework_slug = request.args.get('framework', 'digital-marketplace')

    q = db.session.query(CaseStudy).join(Supplier).outerjoin(SupplierDomain).outerjoin(Domain) \
        .outerjoin(SupplierFramework).outerjoin(Framework)
    q = q.filter(Supplier.status != 'deleted', or_(Framework.slug == framework_slug, ~Supplier.frameworks.any()))
    tsquery = None
    if search_term:
        if ' ' in search_term:
            tsquery = func.plainto_tsquery(search_term)
        else:
            tsquery = func.to_tsquery(search_term + ":*")
        q = q.add_column(func.ts_headline(
            'english',
            func.concat(
                CaseStudy.data['approach'].astext,
                ' ',
                CaseStudy.data['role'].astext),
            tsquery,
            'MaxWords=150, MinWords=75, ShortWord=3, HighlightAll=FALSE, FragmentDelimiter=" ... " '
        ))
    else:
        q = q.add_column("''")
    q = q.add_column(Supplier.name)
    q = q.add_column(postgres.array_agg(Supplier.data))
    q = q.group_by(CaseStudy.id, Supplier.name)

    if domains:
        d_agg = postgres.array_agg(cast(Domain.name, TEXT))
        q = q.having(d_agg.contains(array(domains)))

    if seller_types:
        selected_seller_types = select(
            [postgres.array_agg(column('key'))],
            from_obj=func.json_each_text(Supplier.data[('seller_type',)]),
            whereclause=cast(column('value'), Boolean)
        ).as_scalar()

        q = q.filter(selected_seller_types.contains(array(seller_types)))

    if sort_dir in ('desc', 'z-a'):
        ob = [desc(CaseStudy.data['title'].astext)]
    else:
        ob = [asc(CaseStudy.data['title'].astext)]

    if search_term:
        ob = [desc(func.ts_rank_cd(func.to_tsvector(
            func.concat(Supplier.name, CaseStudy.data['title'].astext,
                        CaseStudy.data['approach'].astext)), tsquery))] + ob

        condition = func.to_tsvector(func.concat(Supplier.name,
                                                 CaseStudy.data['title'].astext,
                                                 CaseStudy.data['approach'].astext)).op('@@')(tsquery)

        q = q.filter(condition)
    q = q.order_by(*ob)

    raw_results = list(q)
    results = []

    for x in range(len(raw_results)):
        result = raw_results[x][0].serialize()
        if raw_results[x][1] is not None and raw_results[x][1] != '':
            result['approach'] = raw_results[x][1]
        if raw_results[x][2] is not None:
            result['supplierName'] = raw_results[x][2]
        if raw_results[x][3] is not None and raw_results[x][3][0] is not None:
            result['seller_type'] = raw_results[x][3][0].get('seller_type')
        results.append(result)

    total_results = len(results)

    sliced_results = results[offset:(offset + result_count)]

    result = {
        'hits': {
            'total': total_results,
            'hits': [{'_source': r} for r in sliced_results]
        }
    }

    try:
        response = jsonify(result), 200
    except Exception as e:
        response = jsonify(message=str(e)), 500

    return response

Example #23

0

Show file

    def fetchMore(self):
        logger.debug('TaskItem.fetchMore() is started for item: %s' %
                     self.text())

        if self.canFetchMore():
            from sqlalchemy.orm import aliased
            from sqlalchemy.dialects.postgresql import array_agg
            from stalker import Task, User
            from stalker.models.task import Task_Resources
            from stalker.db.session import DBSession

            inner_tasks = aliased(Task)
            subquery = DBSession.query(Task.id) \
                .filter(Task.id == inner_tasks.parent_id)

            query = DBSession.query(
                Task.id,
                Task.name,
                Task.entity_type,
                Task.status_id,
                subquery.exists().label('has_children'),
                array_agg(User.name).label('resources')
            ) \
                .outerjoin(Task_Resources, Task.__table__.c.id == Task_Resources.c.task_id) \
                .outerjoin(User, Task_Resources.c.resource_id == User.id) \
                .group_by(
                    Task.id,
                    Task.name,
                    Task.entity_type,
                    Task.status_id,
                    subquery.exists().label('has_children')
                )

            if self.task.entity_type != 'Project':
                # query child tasks
                query = query.filter(Task.parent_id == self.task.id)
            else:
                # query only root tasks
                query = query.filter(Task.project_id == self.task.id)\
                    .filter(Task.parent_id==None)

            tasks = query.order_by(Task.name).all()

            # # model = self.model() # This will cause a SEGFAULT
            # # TODO: update it later on

            # start = time.time()
            from anima import defaults
            task_items = []
            for task in tasks:
                task_item = TaskItem(0, 4, task=task)
                task_item.parent = self

                # color with task status
                task_item.setData(
                    QtGui.QColor(
                        *defaults.status_colors_by_id[task.status_id]),
                    QtCore.Qt.BackgroundRole)

                # use black text
                task_item.setForeground(QtGui.QBrush(QtGui.QColor(0, 0, 0)))

                task_items.append(task_item)

            if task_items:
                # self.appendRows(task_items)
                for task_item in task_items:
                    # TODO: Create a custom QStandardItem for each data type in different columns
                    entity_type_item = QtGui.QStandardItem()
                    entity_type_item.setData(task_item.task.entity_type,
                                             QtCore.Qt.DisplayRole)

                    resources_item = QtGui.QStandardItem()
                    if task_item.task.resources != [None]:
                        resources_item.setData(
                            ', '.join(map(str, task_item.task.resources)),
                            QtCore.Qt.DisplayRole)

                    self.appendRow(
                        [task_item, entity_type_item, resources_item])

            self.fetched_all = True

        logger.debug('TaskItem.fetchMore() is finished for item: %s' %
                     self.text())

Example #24

0

Show file

File: suppliers.py Project: AusDTO/dto-digitalmarketplace-api

def do_search(search_query, offset, result_count, new_domains, framework_slug):
    try:
        sort_dir = list(search_query['sort'][0].values())[0]['order']
    except (KeyError, IndexError):
        sort_dir = 'asc'

    try:
        sort_by = list(search_query['sort'][0].values())[0]['sort_by']
    except (KeyError, IndexError):
        sort_by = None

    try:
        terms = search_query['query']['filtered']['filter']['terms']
    except (KeyError, IndexError):
        terms = {}

    roles_list = None
    seller_types_list = None

    if terms:
        new_domains = 'prices.serviceRole.role' not in terms

        try:
            if new_domains:
                roles_list = terms['domains.assessed']
            else:
                roles = terms['prices.serviceRole.role']
                roles_list = set(_['role'][7:] for _ in roles)
        except KeyError:
            pass

        try:
            seller_types_list = terms['seller_types']
        except:  # noqa
            pass

    try:
        search_term = search_query['query']['match_phrase_prefix']['name']
    except KeyError:
        search_term = ''

    EXCLUDE_LEGACY_ROLES = not current_app.config['LEGACY_ROLE_MAPPING']

    if new_domains:
        q = db.session.query(Supplier).outerjoin(SupplierDomain).outerjoin(Domain) \
            .outerjoin(SupplierFramework).outerjoin(Framework)
    else:
        q = db.session.query(Supplier).outerjoin(PriceSchedule).outerjoin(ServiceRole) \
            .outerjoin(SupplierFramework).outerjoin(Framework)

    q = q.filter(Supplier.status != 'deleted', Supplier.abn != Supplier.DUMMY_ABN,
                 or_(Framework.slug == framework_slug, ~Supplier.frameworks.any()))

    tsquery = None
    if search_term:
        if any(c in search_term for c in ['#', '-', '_', '/', '\\']):
            tsquery = func.phraseto_tsquery(search_term)
        elif ' ' in search_term:
            tsquery = func.plainto_tsquery(search_term)
        else:
            tsquery = func.to_tsquery(search_term + ":*")
        q = q.add_column(func.ts_headline(
            'english',
            func.concat(Supplier.summary,
                         ' ',
                         Supplier.data['tools'].astext,
                         ' ',
                         Supplier.data['methodologies'].astext,
                         ' ',
                         Supplier.data['technologies'].astext, ''),
            tsquery,
            'MaxWords=25, MinWords=20, ShortWord=3, HighlightAll=FALSE, MaxFragments=1'
        ))

    q = q.group_by(Supplier.id)

    try:
        code = search_query['query']['term']['code']
        q = q.filter(Supplier.code == code)
    except KeyError:
        pass

    if roles_list is not None:
        if new_domains:
            if EXCLUDE_LEGACY_ROLES:
                d_agg = postgres.array_agg(cast(Domain.name, TEXT))
                q = q.having(d_agg.contains(array(roles_list)))
        else:
            sr_agg = postgres.array_agg(cast(func.substring(ServiceRole.name, 8), TEXT))
            q = q.having(sr_agg.contains(array(roles_list)))

    if seller_types_list is not None and 'recruiter' in seller_types_list:
        q = q.filter(Supplier.is_recruiter == 'true')
        seller_types_list.remove('recruiter')
        if len(seller_types_list) == 0:
            seller_types_list = None

    if seller_types_list is not None:
        selected_seller_types = select(
            [postgres.array_agg(column('key'))],
            from_obj=func.json_each_text(Supplier.data[('seller_type',)]),
            whereclause=cast(column('value'), Boolean)
        ).as_scalar()

        q = q.filter(selected_seller_types.contains(array(seller_types_list)))

    if sort_by:
        if sort_by == 'latest':
            ob = [desc(Supplier.last_update_time)]
        else:
            ob = [asc(Supplier.name)]
    else:
        if sort_dir == 'desc':
            ob = [desc(Supplier.name)]
        else:
            ob = [asc(Supplier.name)]

    if search_term:
        ob = [desc(func.ts_rank_cd(Supplier.text_vector, tsquery))] + ob

        q = q.filter(Supplier.text_vector.op('@@')(tsquery))

    q = q.order_by(*ob)

    raw_results = list(q)
    results = []

    for x in range(len(raw_results)):
        if type(raw_results[x]) is Supplier:
            result = raw_results[x]
        else:
            result = raw_results[x][0]
            if raw_results[x][1] is not None and raw_results[x][1] != '':
                result.summary = raw_results[x][1]
        results.append(result)

    sliced_results = results[offset:(offset + result_count)]

    q = db.session.query(Supplier.code, Supplier.name, Supplier.summary, Supplier.is_recruiter,
                         Supplier.data, Domain.name.label('domain_name'),
                         SupplierDomain.status.label('domain_status'))\
        .outerjoin(SupplierDomain, Domain)\
        .filter(Supplier.id.in_([sr.id for sr in sliced_results]))\
        .order_by(Supplier.name)

    suppliers = [r._asdict() for r in q]

    sliced_results = []
    for key, group in groupby(suppliers, key=itemgetter('code')):
        supplier = group.next()

        supplier['seller_type'] = supplier.get('data') and supplier['data'].get('seller_type')

        supplier['domains'] = {'assessed': [], 'unassessed': []}
        for s in chain([supplier], group):
            domain, status = s['domain_name'], s['domain_status']
            if domain:
                if status == 'assessed':
                    supplier['domains']['assessed'].append(domain)
                else:
                    supplier['domains']['unassessed'].append(domain)

        for e in ['domain_name', 'domain_status', 'data']:
            supplier.pop(e, None)

        sliced_results.append(supplier)

    return sliced_results, len(results)

Example #25

0

Show file

def casestudies_search():
    search_query = get_json_from_request()

    offset = get_nonnegative_int_or_400(request.args, 'from', 0)
    result_count = get_positive_int_or_400(
        request.args, 'size', current_app.config['DM_API_SUPPLIERS_PAGE_SIZE'])

    sort_dir = search_query.get('sort_dir', 'asc')
    sort_by = search_query.get('sort_by', None)
    domains = search_query.get('domains', None)
    seller_types = search_query.get('seller_types', None)
    search_term = search_query.get('search_term', None)
    framework_slug = request.args.get('framework', 'digital-marketplace')

    q = db.session.query(CaseStudy).join(Supplier).outerjoin(SupplierDomain).outerjoin(Domain) \
        .outerjoin(SupplierFramework).outerjoin(Framework)
    q = q.filter(
        Supplier.status != 'deleted',
        or_(Framework.slug == framework_slug, ~Supplier.frameworks.any()))
    tsquery = None
    if search_term:
        if ' ' in search_term:
            tsquery = func.plainto_tsquery(search_term)
        else:
            tsquery = func.to_tsquery(search_term + ":*")
        q = q.add_column(
            func.ts_headline(
                'english',
                func.concat(CaseStudy.data['approach'].astext, ' ',
                            CaseStudy.data['role'].astext), tsquery,
                'MaxWords=150, MinWords=75, ShortWord=3, HighlightAll=FALSE, FragmentDelimiter=" ... " '
            ))
    else:
        q = q.add_column("''")
    q = q.add_column(Supplier.name)
    q = q.add_column(postgres.array_agg(Supplier.data))
    q = q.group_by(CaseStudy.id, Supplier.name)

    if domains:
        d_agg = postgres.array_agg(cast(Domain.name, TEXT))
        q = q.having(d_agg.contains(array(domains)))

    if seller_types:
        selected_seller_types = select(
            [postgres.array_agg(column('key'))],
            from_obj=func.json_each_text(Supplier.data[('seller_type', )]),
            whereclause=cast(column('value'), Boolean)).as_scalar()

        q = q.filter(selected_seller_types.contains(array(seller_types)))

    if sort_dir in ('desc', 'z-a'):
        ob = [desc(CaseStudy.data['title'].astext)]
    else:
        ob = [asc(CaseStudy.data['title'].astext)]

    if search_term:
        ob = [
            desc(
                func.ts_rank_cd(
                    func.to_tsvector(
                        func.concat(
                            Supplier.name, CaseStudy.data['title'].astext,
                            CaseStudy.data['approach'].astext)), tsquery))
        ] + ob

        condition = func.to_tsvector(
            func.concat(Supplier.name, CaseStudy.data['title'].astext,
                        CaseStudy.data['approach'].astext)).op('@@')(tsquery)

        q = q.filter(condition)
    q = q.order_by(*ob)

    raw_results = list(q)
    results = []

    for x in range(len(raw_results)):
        result = raw_results[x][0].serialize()
        if raw_results[x][1] is not None and raw_results[x][1] != '':
            result['approach'] = raw_results[x][1]
        if raw_results[x][2] is not None:
            result['supplierName'] = raw_results[x][2]
        if raw_results[x][3] is not None and raw_results[x][3][0] is not None:
            result['seller_type'] = raw_results[x][3][0].get('seller_type')
        results.append(result)

    total_results = len(results)

    sliced_results = results[offset:(offset + result_count)]

    result = {
        'hits': {
            'total': total_results,
            'hits': [{
                '_source': r
            } for r in sliced_results]
        }
    }

    try:
        response = jsonify(result), 200
    except Exception as e:
        response = jsonify(message=str(e)), 500

    return response

Example #26

0

Show file

File: task.py Project: eoyilmaz/anima

    def fetchMore(self):
        logger.debug(
            'TaskItem.fetchMore() is started for item: %s' % self.text()
        )

        if self.canFetchMore():
            from sqlalchemy import alias
            from sqlalchemy.dialects.postgresql import array_agg
            from stalker import Task, User
            from stalker.models.task import Task_Resources
            from stalker.db.session import DBSession

            inner_tasks = alias(Task.__table__)
            subquery = DBSession.query(Task.id)\
                .filter(Task.id == inner_tasks.c.parent_id)
            query = DBSession.query(
                Task.id,
                Task.name,
                Task.entity_type,
                Task.status_id,
                subquery.exists().label('has_children'),
                array_agg(User.name).label('resources')
            )\
                .outerjoin(Task_Resources, Task.id == Task_Resources.c.task_id)\
                .outerjoin(User, Task_Resources.c.resource_id == User.id) \
                .group_by(
                    Task.id,
                    Task.name,
                    Task.entity_type,
                    Task.status_id,
                    subquery.exists().label('has_children')
                )

            if self.task.entity_type != 'Project':
                # query child tasks
                query = query.filter(Task.parent_id == self.task.id)
            else:
                # query only root tasks
                query = query.filter(Task.project_id == self.task.id)\
                    .filter(Task.parent_id==None)

            tasks = query.order_by(Task.name).all()

            # # model = self.model() # This will cause a SEGFAULT
            # # TODO: update it later on

            # start = time.time()
            from anima import defaults
            task_items = []
            for task in tasks:
                task_item = TaskItem(0, 4, entity=task)
                task_item.parent = self

                # color with task status
                task_item.setData(
                    QtGui.QColor(
                        *defaults.status_colors_by_id[task.status_id]
                    ),
                    QtCore.Qt.BackgroundRole
                )

                # use black text
                task_item.setForeground(
                    QtGui.QBrush(QtGui.QColor(0, 0, 0))
                )

                task_items.append(task_item)

            if task_items:
                # self.appendRows(task_items)
                for task_item in task_items:
                    # TODO: Create a custom QStandardItem for each data type in different columns
                    entity_type_item = QtGui.QStandardItem()
                    entity_type_item.setData(task_item.task.entity_type, QtCore.Qt.DisplayRole)

                    resources_item = QtGui.QStandardItem()
                    if task_item.task.resources != [None]:
                        resources_item.setData(', '.join(map(str, task_item.task.resources)), QtCore.Qt.DisplayRole)

                    self.appendRow([task_item, entity_type_item, resources_item])

            self.fetched_all = True

        logger.debug(
            'TaskItem.fetchMore() is finished for item: %s' % self.text()
        )

Example #27

0

Show file

def do_search(search_query, offset, result_count, new_domains, framework_slug):
    try:
        sort_dir = list(search_query['sort'][0].values())[0]['order']
    except (KeyError, IndexError):
        sort_dir = 'asc'

    try:
        sort_by = list(search_query['sort'][0].values())[0]['sort_by']
    except (KeyError, IndexError):
        sort_by = None

    try:
        terms = search_query['query']['filtered']['filter']['terms']
    except (KeyError, IndexError):
        terms = {}

    roles_list = None
    seller_types_list = None

    if terms:
        new_domains = 'prices.serviceRole.role' not in terms

        try:
            if new_domains:
                roles_list = terms['domains.assessed']
            else:
                roles = terms['prices.serviceRole.role']
                roles_list = set(_['role'][7:] for _ in roles)
        except KeyError:
            pass

        try:
            seller_types_list = terms['seller_types']
        except:  # noqa
            pass

    try:
        search_term = search_query['query']['match_phrase_prefix']['name']
    except KeyError:
        search_term = ''

    EXCLUDE_LEGACY_ROLES = not current_app.config['LEGACY_ROLE_MAPPING']

    if new_domains:
        q = db.session.query(Supplier).outerjoin(SupplierDomain).outerjoin(Domain) \
            .outerjoin(SupplierFramework).outerjoin(Framework)
    else:
        q = db.session.query(Supplier).outerjoin(PriceSchedule).outerjoin(ServiceRole) \
            .outerjoin(SupplierFramework).outerjoin(Framework)

    q = q.filter(
        Supplier.status != 'deleted', Supplier.abn != Supplier.DUMMY_ABN,
        or_(Framework.slug == framework_slug, ~Supplier.frameworks.any()))

    tsquery = None
    if search_term:
        if any(c in search_term for c in ['#', '-', '_', '/', '\\']):
            tsquery = func.phraseto_tsquery(search_term)
        elif ' ' in search_term:
            tsquery = func.plainto_tsquery(search_term)
        else:
            tsquery = func.to_tsquery(search_term + ":*")
        q = q.add_column(
            func.ts_headline(
                'english',
                func.concat(Supplier.summary, ' ',
                            Supplier.data['tools'].astext, ' ',
                            Supplier.data['methodologies'].astext, ' ',
                            Supplier.data['technologies'].astext, ''), tsquery,
                'MaxWords=25, MinWords=20, ShortWord=3, HighlightAll=FALSE, MaxFragments=1'
            ))

    q = q.group_by(Supplier.id)

    try:
        code = search_query['query']['term']['code']
        q = q.filter(Supplier.code == code)
    except KeyError:
        pass

    if roles_list is not None:
        if new_domains:
            if EXCLUDE_LEGACY_ROLES:
                d_agg = postgres.array_agg(cast(Domain.name, TEXT))
                q = q.filter(SupplierDomain.status == 'assessed')
                q = q.having(d_agg.contains(array(roles_list)))
        else:
            sr_agg = postgres.array_agg(
                cast(func.substring(ServiceRole.name, 8), TEXT))
            q = q.having(sr_agg.contains(array(roles_list)))

    if seller_types_list is not None and 'recruiter' in seller_types_list:
        q = q.filter(Supplier.is_recruiter == 'true')
        seller_types_list.remove('recruiter')
        if len(seller_types_list) == 0:
            seller_types_list = None

    if seller_types_list is not None:
        selected_seller_types = select(
            [postgres.array_agg(column('key'))],
            from_obj=func.json_each_text(Supplier.data[('seller_type', )]),
            whereclause=cast(column('value'), Boolean)).as_scalar()

        q = q.filter(selected_seller_types.contains(array(seller_types_list)))

    if sort_by:
        if sort_by == 'latest':
            ob = [desc(Supplier.last_update_time)]
        else:
            ob = [asc(Supplier.name)]
    else:
        if sort_dir == 'desc':
            ob = [desc(Supplier.name)]
        else:
            ob = [asc(Supplier.name)]

    if search_term:
        ob = [desc(func.ts_rank_cd(Supplier.text_vector, tsquery))] + ob

        q = q.filter(Supplier.text_vector.op('@@')(tsquery))

    q = q.order_by(*ob)

    raw_results = list(q)
    results = []

    for x in range(len(raw_results)):
        if type(raw_results[x]) is Supplier:
            result = raw_results[x]
        else:
            result = raw_results[x][0]
            if raw_results[x][1] is not None and raw_results[x][1] != '':
                result.summary = raw_results[x][1]
        results.append(result)

    sliced_results = results[offset:(offset + result_count)]

    q = db.session.query(Supplier.code, Supplier.name, Supplier.summary, Supplier.is_recruiter,
                         Supplier.data, Domain.name.label('domain_name'),
                         SupplierDomain.status.label('domain_status'))\
        .outerjoin(SupplierDomain, Domain)\
        .filter(Supplier.id.in_([sr.id for sr in sliced_results]))\
        .order_by(Supplier.name)

    suppliers = [r._asdict() for r in q]

    sliced_results = []
    for key, group in groupby(suppliers, key=itemgetter('code')):
        supplier = group.next()

        supplier['seller_type'] = supplier.get(
            'data') and supplier['data'].get('seller_type')

        supplier['domains'] = {'assessed': [], 'unassessed': []}
        for s in chain([supplier], group):
            domain, status = s['domain_name'], s['domain_status']
            if domain:
                if status == 'assessed':
                    supplier['domains']['assessed'].append(domain)
                else:
                    supplier['domains']['unassessed'].append(domain)

        for e in ['domain_name', 'domain_status', 'data']:
            supplier.pop(e, None)

        sliced_results.append(supplier)

    return sliced_results, len(results)

Example #28

0

Show file

File: views.py Project: sandhose/obs-matcher

class AttributesView(Base, ViewMixin):
    __tablename__ = "vw_attributes"

    __materialized__ = True
    __table_args__ = {
        "selectable":
        select([
            Column("external_object_id", primary_key=True),
            _titles,
            Column("dates", ARRAY(Integer)),
            Column("genres", ARRAY(Text)),
            Column("durations", ARRAY(Float)),
            Column("names", ARRAY(Text)),
            Column("countries", ARRAY(CHAR(length=2))),
            # Build a search vector from the first four titles
            func.setweight(func.to_tsvector(func.coalesce(_titles[0], "")),
                           "A").op("||")
            (func.setweight(func.to_tsvector(func.coalesce(_titles[1], "")),
                            "B")).op("||")
            (func.setweight(func.to_tsvector(func.coalesce(_titles[2], "")),
                            "C")).op("||")
            (func.setweight(func.to_tsvector(func.coalesce(_titles[3], "")),
                            "D")).label("search_vector"),
        ]).select_from(
            crosstab(
                select([
                    Value.external_object_id,
                    Value.type,
                    func.coalesce(
                        array_agg(
                            aggregate_order_by(Value.text,
                                               ValueScoreView.score.desc())),
                        cast(text("'{}'"), ARRAY(Text)),
                    ),
                ]).select_from(
                    join(Value, ValueScoreView,
                         Value.id == ValueScoreView.value_id).join(
                             ValueSource, Value.id == ValueSource.value_id)).
                where(
                    and_(
                        _attribute_filter,
                        tuple_(
                            Value.external_object_id,
                            Value.type,
                            ValueSource.platform_id,
                        ).in_(
                            select([
                                PlatformSourceOrderByValueType.val_eo_id,
                                PlatformSourceOrderByValueType.val_type,
                                PlatformSourceOrderByValueType.pl_id,
                            ]).select_from(PlatformSourceOrderByValueType).
                            where(
                                and_(
                                    PlatformSourceOrderByValueType.pl_order ==
                                    1,
                                    or_(
                                        PlatformSourceOrderByValueType.pl_type
                                        == PlatformType.INFO,
                                        PlatformSourceOrderByValueType.val_type
                                        == ValueType.TITLE,
                                    ),
                                ))),
                    )).group_by(Value.external_object_id, Value.type),
                table(
                    "ct",
                    column("external_object_id", Integer),
                    column("titles", ARRAY(Text)),
                    column("dates", ARRAY(Integer)),
                    column("genres", ARRAY(Text)),
                    column("durations", ARRAY(Float)),
                    column("names", ARRAY(Text)),
                    column("countries", ARRAY(CHAR(length=2))),
                ),
                categories=select(
                    [func.unnest(array([v.name for v in ValueType]))]),
                auto_order=False,
            )),
        "dependencies": (
            Value,
            ValueScoreView,
            ValueSource,
            PlatformSourceOrderByValueType,
        ),
        "materialized":
        True,
        "indexes": (Index("pk_vw_attributes",
                          "external_object_id",
                          unique=True), ),
    }

Example #29

0

Show file

File: app.py Project: jungleBadger/udacity_artist_booking_site

def venues():
    data = Venue.query.with_entities(Venue.city, Venue.state,
                                     postgresql.array_agg(
                                         func.json_build_object('id', Venue.id, 'name', Venue.name)).label('venues')) \
        .group_by(Venue.city, Venue.state).all()
    return render_template('pages/venues.html', areas=data)

Example #30

0

Show file

File: views.py Project: riahtu/oeplatform

def listschemas(request):
    """
    Loads all schemas that are present in the external database specified in
    oeplatform/securitysettings.py. Only schemas that are present in the
    whitelist are processed that do not start with an underscore.

    :param request: A HTTP-request object sent by the Django framework

    :return: Renders the schema list
    """

    searchedQueryString = request.GET.get("query")
    searchedTagIds = list(map(
        lambda t: int(t),
        request.GET.getlist("tags"),
    ))

    for tag_id in searchedTagIds:
        increment_usage_count(tag_id)

    filter_kwargs = dict(
        search=SearchQuery(" & ".join(
            p + ":*" for p in re.findall("[\w]+", searchedQueryString)),
                           search_type="raw")) if searchedQueryString else {}
    response = Table.objects.filter(
        **filter_kwargs).values("schema__name").annotate(
            tables_count=Count("name"))

    engine = actions._get_engine()
    conn = engine.connect()
    Session = sessionmaker()
    session = Session(bind=conn)
    tags = {
        r[0]: set(r[1])
        for r in session.query(TableTags.schema_name, array_agg(
            TableTags.tag)).group_by(TableTags.schema_name)
    }

    description = {
        "boundaries":
        "Data that depicts boundaries, such as geographic, administrative or political boundaries. Such data comes as polygons.",
        "climate":
        "Data related to climate and weather. This includes, for example, precipitation, temperature, cloud cover and atmospheric conditions.",
        "economy":
        "Data related to economic activities. Examples: sectoral value added, sectoral inputs and outputs, GDP, prices of commodities etc.",
        "demand":
        "Data on demand. Demand can relate to commodities but also to services.",
        "grid":
        "Energy transmission infrastructure. examples: power lines, substation, pipelines",
        "supply":
        "Data on supply. Supply can relate to commodities but also to services.",
        "environment":
        "environmental resources, protection and conservation. examples: environmental pollution, waste storage and treatment, environmental impact assessment, monitoring environmental risk, nature reserves, landscape",
        "society":
        "Demographic data such as population statistics and projections, fertility, mortality etc.",
        "model_draft":
        "Unfinished data of any kind. Note: there is no version control and data is still volatile.",
        "scenario":
        "Scenario data in the broadest sense. Includes input and output data from models that project scenarios into the future. Example inputs: assumptions made about future developments of key parameters such as energy prices and GDP. Example outputs: projected electricity transmission, projected greenhouse gas emissions. Note that inputs to one model could be an output of another model and the other way around.",
        "reference":
        "Contains sources, literature and auxiliary/helper tables that can help you with your work.",
        "emission":
        "Data on emissions. Examples: total greenhouse gas emissions, CO2-emissions, energy-related CO2-emissions, methane emissions, air pollutants etc.",
        "openstreetmap":
        "OpenStreetMap is a open project that collects and structures freely usable geodata and keeps them in a database for use by anyone. This data is available under a free license, the Open Database License.",
        "policy":
        "Data on policies and measures. This could, for example, include a list of renewable energy policies per European Member State. It could also be a list of climate related policies and measures in a specific country."
    }

    schemas = sorted(
        [(row["schema__name"],
          description.get(row["schema__name"], "No description"),
          row["tables_count"], tags.get(row["schema__name"], []))
         for row in response
         if row["schema__name"] in schema_whitelist and tags.get(
             row["schema__name"], set()).issuperset(searchedTagIds or set())],
        key=lambda x: x[0],
    )

    return render(request, "dataedit/dataedit_schemalist.html", {
        "schemas": schemas,
        "query": searchedQueryString,
        "tags": searchedTagIds
    })

Example #31

0

Show file

def make_submission_dataframe(query, form, selected_tags=None,
                              excluded_tags=None):
    if not db.session.query(query.exists()).scalar():
        return pd.DataFrame()

    # get column headers by getting all ancestor names
    # of the submission location's location type
    sample_submission = query.first()
    location_type = sample_submission.location.location_type
    ancestor_names = [a.name for a in location_type.ancestors()] + \
        [location_type.name]

    # excluded tags have higher priority than selected tags
    fields = set(form.tags)
    if selected_tags:
        fields = fields.intersection(selected_tags)
    if excluded_tags:
        fields = fields.difference(excluded_tags)

    # the 'updated' field is required for results analysis
    columns = [
        Submission.data[tag].label(tag) for tag in fields] + [
            Submission.updated
        ]

    # alias just in case the query is already joined to the tables below
    loc = aliased(Location)
    loc_path = aliased(LocationPath)
    own_loc = aliased(Location)

    # add registered voters and path extraction to the columns
    columns.append(own_loc.registered_voters.label(
        'registered_voters'))
    columns.append(
        array_agg(aggregate_order_by(loc.name, loc_path.depth.desc())).label(
            'location_data'))

    query2 = query.join(
        loc_path, Submission.location_id == loc_path.descendant_id
    ).join(loc, loc.id == loc_path.ancestor_id).join(
        own_loc, own_loc.id == Submission.location_id)

    # type coercion is necessary for numeric columns
    # if we allow Pandas to infer the column type for these,
    # there's a risk that it will guess wrong, then it might
    # raise exceptions when we're calculating the mean and
    # standard deviation on those columns
    type_coercions = {
        tag: np.float64
        for tag in form.tags
        if form.get_field_by_tag(tag)['type'] == 'integer'}

    dataframe_query = query2.with_entities(*columns).group_by(
        Submission.id, own_loc.registered_voters)

    df = pd.read_sql(dataframe_query.statement, dataframe_query.session.bind)
    df = df.astype(type_coercions)

    df_locations = df['location_data'].apply(pd.Series).rename(
        columns=dict(enumerate(ancestor_names)))

    return pd.concat(
        [df, df_locations], axis=1, join_axes=[df.index])