def get_articles(gvkey, date=None): session = Session() q = session.query( array_agg(CompanyArticle.article_id).label('article_ids'), array_agg(CompanyArticle.main_category).label('main_cats'), array_agg(CompanyArticle.sub_category).label('sub_cats'), Articles.PD ) \ .join(Articles, Articles.id == CompanyArticle.article_id) \ .filter(CompanyArticle.gvkey == gvkey) if date is not None: q = q.filter(Articles.PD == date) q = q.group_by(Articles.PD).order_by(Articles.PD) return q.all()
def _update_handle_new_ways(self, engine): w = self.way_src.data r = self.relway_view wold = self.data sub = sa.select([r.c.way_id, array_agg(r.c.relation_id).label('rels')])\ .where(r.c.way_id.notin_(sa.select([wold.c.id])))\ .group_by(r.c.way_id).alias('aggway') cols = [sub.c.way_id, sub.c.rels, w.c.nodes] if hasattr(self, 'transform_tags'): cols.append(w.c.tags) sql = sa.select(cols).where(w.c.id == sub.c.way_id) changeset = {} inserts = [] for obj in engine.execute(sql): cols = self._construct_row(obj, engine) if cols is not None: changeset[obj['way_id']] = 'A' inserts.append(cols) if len(inserts): engine.execute(self.data.insert().values(inserts)) return changeset
def venues(): # TODO: replace with real venues data. # num_shows should be aggregated based on number of upcoming shows per venue. areas = [] venues_agg = array_agg(Venue.id).label('venues') areas_data = db.session.query( Venue.city, Venue.state, venues_agg ).group_by(Venue.city, Venue.state).all() for (city, state, venue_ids) in areas_data: area_venues = [] for vid in venue_ids: v = {} v['name'] = Venue.query.get(vid).name v['id'] = vid area_venues.append(v) area = {} area['city'] = city area['state'] = state area['venues'] = area_venues areas.append(area) return render_template('pages/venues.html', areas=areas)
def get(self, build: Build): """ Return a list of test cases for a given build. """ job_ids = db.session.query(Job.id).filter(Job.build_id == build.id).subquery() query = db.session.query( TestCase.hash, TestCase.name, array_agg_row( TestCase.id, TestCase.job_id, TestCase.duration, TestCase.result ).label( "runs" ), ).filter( TestCase.job_id.in_(job_ids) ).group_by( TestCase.hash, TestCase.name ) result = request.args.get("result") if result: try: query = query.filter(TestCase.result == getattr(Result, result)) except AttributeError: raise NotImplementedError query = query.order_by( ( array_agg(TestCase.result).label("results").contains([Result.failed]) ).desc(), TestCase.name.asc(), ) return self.paginate_with_schema(testcases_schema, query)
def _group_lines_stops(list_stops): """ Groups lines and stops such that each distinct line and direction has a group of stops associated with it. """ stops = [s.atco_code for s in list_stops] separator = db.literal_column("' / '") destinations = db.func.string_agg( db.distinct(models.JourneyPattern.destination), pg.aggregate_order_by(separator, models.JourneyPattern.destination) ) array_stops = pg.array_agg(db.distinct(models.JourneyLink.stop_point_ref)) groups = ( db.session.query( models.Service.code.label("code"), models.JourneyPattern.direction.label("direction"), models.Service.line.label("line"), destinations.label("destination"), array_stops.label("stops") ) .select_from(models.Service) .join(models.Service.patterns) .join(models.JourneyPattern.links) .filter(models.JourneyLink.stop_point_ref.in_(stops)) .group_by(models.Service.code, models.Service.line, models.JourneyPattern.direction) .order_by(models.Service.line, models.JourneyPattern.direction) .all() ) return [g._asdict() for g in groups]
def post(self, request): results = [] engine = actions._get_engine() metadata = sqla.MetaData(bind=engine) Session = sessionmaker() session = Session(bind=engine) search_view = sqla.Table("meta_search", metadata, autoload=True) filter_tags = [ int(key[len('select_'):]) for key in request.POST if key.startswith('select_') ] tag_agg = array_agg(Table_tags.tag) query = session.query( search_view.c.schema.label('schema'), search_view.c.table.label('table'), tag_agg).outerjoin( Table_tags, (search_view.c.table == Table_tags.table_name) and (search_view.c.table == Table_tags.table_name)) if filter_tags: query = query.having(tag_agg.contains(filter_tags)) query = query.group_by(search_view.c.schema, search_view.c.table) results = session.execute(query) session.commit() ret = [{'schema': r.schema, 'table': r.table} for r in results] print(ret) return render(request, 'dataedit/search.html', { 'results': ret, 'tags': get_all_tags(), 'selected': filter_tags })
def check_tags(offers, tags_ids): if tags_ids is None: return offers elif not tags_ids: return offers elif tags_ids is not None: return offers\ .join(OffersTags)\ .filter(OffersTags.tag_id.in_(tags_ids))\ .group_by(Offer.id)\ .having(array_agg(OffersTags.tag_id).contains(tags_ids))
def matching_groups(cls, query, admin_areas=None): """ Finds all admin areas and table names covering matching results for a query. The areas and groups are sorted into two sets, to be used for filtering. :param query: web search query as string. :param admin_areas: Filter by admin areas to get matching groups :returns: A tuple with a dict of groups and a dict with administrative area references and names """ if not _test_query(query): return None array_t = pg.array_agg(db.distinct(cls.table_name)) if admin_areas is not None: # Filter array of tables by whether aggregated rows' admin areas # match those already selected array_t = array_t.filter(cls.admin_areas.overlap(admin_areas)) array_areas = pg.array((AdminArea.code, AdminArea.name)) array_a = pg.array_agg(db.distinct(array_areas)) result = (db.session.query( array_t.label("tables"), array_a.label("areas")).select_from( db.func.unnest(cls.admin_areas).alias("unnest_areas")).join( AdminArea, db.column("unnest_areas") == AdminArea.code).filter( cls.match(query)).one()) # All data should have been aggregated into one row tables = set(result.tables) if result.tables is not None else set() groups = { g: n for g, n in cls.GROUP_NAMES.items() if tables & cls.GROUPS[g] } areas = dict(result.areas) if result.areas is not None else {} return groups, areas
def index(): name = current_user.first_name + "'s" # get previously uploaded photos image_objects = db.session.query(Image.month_uploaded, postgresql.array_agg(Image.path))\ .join(User)\ .filter(Image.user_id==current_user.id)\ .group_by(Image.month_uploaded)\ .order_by(desc(Image.month_uploaded))\ .all() image_sections = ImageStorageService.get_user_images(image_objects) return render_template('index.html', name=name, image_sections=image_sections)
def _update_handle_changed_rels(self, engine): w = self.data r = self.relation_src.data rs = self.relway_view.alias('relsrc') # Recreate the relation set for all ways in changed relations. sub = sa.select([array_agg(r.c.id)])\ .where(r.c.members.contains( sa.func.jsonb_build_array( sa.func.jsonb_build_object('type', 'W', 'id', w.c.id)))) sql = sa.select([w.c.id, w.c.rels, sub.label('new_rels')])\ .where(sa.or_( w.c.id.in_( sa.select([rs.c.way_id]). where(rs.c.relation_id.in_( self.relation_src.select_add_modify()))), w.c.rels.op('&& ARRAY')(sa.select([self.relation_src.cc.id])) )) inserts = [] deletes = [] changeset = {} for obj in engine.execute(sql): oid = obj['id'] # If the new set is empty, the way has been removed from the set. if obj['new_rels'] is None: deletes.append({'oid': oid}) changeset[oid] = 'D' # If the relation set differs, there was a relevant change. # (Only update the way set here. geometry and tag changes have # already been done during the first pass.) else: rels = sorted(obj['new_rels']) if rels != obj['rels']: inserts.append({'oid': oid, 'rels': rels}) changeset[oid] = 'M' if len(inserts): engine.execute( self.data.update().where( self.c.id == sa.bindparam('oid')).values( rels=sa.bindparam('rels')), inserts) if len(deletes): engine.execute( self.data.delete().where(self.c.id == sa.bindparam('oid')), deletes) return changeset
def get_services(self): """ Queries and returns two datasets for services and operators at this stoplist including the origin and destination of these services, grouped by service ID and direction. Services are also checked for whether they terminate at this stop or not. Operators are returned as a dict of local operator codes and operator names. """ # Checks if associated link is not last in sequence link = db.aliased(JourneyLink) next_link = ( db.session.query(link.id) .filter(link.pattern_ref == JourneyLink.pattern_ref, link.sequence == JourneyLink.sequence + 1) .as_scalar() ) # Give service instance name in keyed tuple object service = db.aliased(Service, name="service") operator = pg.array(( LocalOperator.code, db.func.coalesce(Operator.name, LocalOperator.name) )) query_services = ( db.session.query( service, JourneyPattern.direction, db.func.string_agg(JourneyPattern.origin.distinct(), ' / ') .label("origin"), db.func.string_agg(JourneyPattern.destination.distinct(), ' / ') .label("destination"), (db.func.count(next_link) == 0).label("terminates"), pg.array_agg(db.distinct(operator)).label("operators") ) .join(service.patterns) .join(JourneyPattern.links) .join(JourneyPattern.local_operator) .outerjoin(LocalOperator.operator) .filter(JourneyLink.stop_point_ref == self.atco_code) .group_by(service.id, JourneyPattern.direction) .order_by(service.line, service.description, JourneyPattern.direction) ) services = query_services.all() operators = {} for sv in services: operators.update(sv.operators) return services, operators
def describe_skill(skill_name): skill = Skill.query.filter_by(name=skill_name).first_or_404() city_counts = db.session.query( City.coords, db.func.count(User.id), array_agg(City.name)) \ .join(User, City.name == User.city) \ .join(users_skills_table) \ .join(Skill) \ .filter(Skill.id == skill.id) \ .group_by(City.coords) c1 = db.alias(City, 'c1') c2 = db.alias(City, 'c2') u1 = db.alias(User, 'u1') u2 = db.alias(User, 'u2') us1 = db.alias(users_skills_table, 'us1') us2 = db.alias(users_skills_table, 'us2') city_connections = db.session.query( c1.c.coords, c2.c.coords) \ .distinct(c1.c.coords, c2.c.coords) \ .filter( c1.c.name == u1.c.city, c2.c.name == u2.c.city, user_friends_table.c.user_id == u1.c.user_id, user_friends_table.c.friend_id == u2.c.user_id, u1.c.user_id == us1.c.user_id, u2.c.user_id == us2.c.user_id, us1.c.skill_id == skill.id, us2.c.skill_id == skill.id) city_connections_json = [[ _wkb_to_json(coords1.data), _wkb_to_json(coords2.data) ] for coords1, coords2 in city_connections] return jsonify({ 'name': skill.name, 'city_counts': [{ 'name': names[0], 'count': ccount, 'coords': _wkb_to_json(coords.data) } for coords, ccount, names in city_counts], 'city_connections': city_connections_json, })
def get(self, revision: Revision): """ Return a list of test cases for a given revision. """ build = fetch_build_for_revision(revision) if not build: return self.respond(status=404) build_ids = [original.id for original in build.original] job_query = db.session.query(Job.id).filter( Job.build_id.in_(build_ids)) result = request.args.get("allowed_failures") if result == "false": job_query = job_query.filter(Job.allow_failure == False) # NOQA job_ids = job_query.subquery() query = (db.session.query( TestCase.hash, TestCase.name, array_agg_row(TestCase.id, TestCase.job_id, TestCase.duration, TestCase.result).label("runs"), ).filter(TestCase.job_id.in_(job_ids)).group_by( TestCase.hash, TestCase.name)) result = request.args.get("result") if result: try: query = query.filter( TestCase.result == getattr(Result, result)) except AttributeError: raise NotImplementedError query = query.order_by( (array_agg(TestCase.result).label("results").contains( [Result.failed])).desc(), TestCase.name.asc(), ) schema = AggregateTestCaseSummarySchema(many=True, strict=True, exclude=("build", )) return self.paginate_with_schema(schema, query)
def by_role(cls, role, since=None): columns = array_agg(Subscription.channel).label('channels') sq = db.session.query(columns) sq = sq.filter(Subscription.deleted_at == None) # noqa sq = sq.filter(Subscription.role_id == role.id) sq = sq.cte('sq') q = cls.all() q = q.filter(or_( cls.actor_id != role.id, cls.actor_id == None # noqa )) q = q.filter(cls.channels.overlap(sq.c.channels)) q = q.filter(cls._event.in_(Events.names())) if since is not None: q = q.filter(cls.created_at >= since) if role.notified_at is not None: q = q.filter(cls.created_at >= role.notified_at) q = q.order_by(cls.created_at.desc()) q = q.order_by(cls.id.desc()) return q
def get(self, build: Build): """ Return a list of test cases for a given build. """ job_query = db.session.query(Job.id).filter(Job.build_id == build.id) result = request.args.get("allowed_failures") if result == "false": job_query = job_query.filter(Job.allow_failure == False) # NOQA job_ids = job_query.subquery() query = ( db.session.query( TestCase.hash, TestCase.name, array_agg_row( TestCase.id, TestCase.job_id, TestCase.duration, TestCase.result ).label("runs"), ) .filter(TestCase.job_id.in_(job_ids)) .group_by(TestCase.hash, TestCase.name) ) result = request.args.get("result") if result: try: query = query.filter(TestCase.result == getattr(Result, result)) except AttributeError: raise NotImplementedError query = query.order_by( ( array_agg(TestCase.result).label("results").contains([Result.failed]) ).desc(), TestCase.name.asc(), ) schema = AggregateTestCaseSummarySchema( many=True, strict=True, context={"build": build} ) return self.paginate_with_schema(schema, query)
def build_movement_list_lookup(self): """List the unreconciled movements that could be part of bundles.""" dbsession = self.dbsession owner = self.owner period = self.period movement_rows = (dbsession.query( FileMovement.issuer_id, TransferRecord.transfer_id, func.sum(file_movement_delta).label('delta'), array_agg(FileMovement.movement_id).label('movement_ids'), ).select_from(FileMovement).join( TransferRecord, TransferRecord.id == FileMovement.transfer_record_id).join( Period, Period.id == FileMovement.period_id).filter( FileMovement.owner_id == owner.id, FileMovement.file_id == period.file_id, FileMovement.reco_id == null, file_movement_delta != 0, TransferRecord.bundle_transfer_id != null, ~Period.closed, ).group_by( FileMovement.issuer_id, TransferRecord.transfer_id, ).all()) log.info( "BundleFinder: %s unreconciled bundled movement(s) for period %s", len(movement_rows), period.id) # movement_list_lookup: { # (bundled_transfer_id, issuer_id): (delta, movement_ids) # } movement_list_lookup = {} for row in movement_rows: key = (row.transfer_id, row.issuer_id) movement_list_lookup[key] = (row.delta, row.movement_ids) return movement_list_lookup
def post(self, request): results = [] engine = actions._get_engine() metadata = sqla.MetaData(bind=engine) Session = sessionmaker() session = Session(bind=engine) search_view = sqla.Table("meta_search", metadata, autoload=True) filter_tags = [int(key[len('select_'):]) for key in request.POST if key.startswith('select_')] tag_agg = array_agg(Table_tags.tag) query = session.query(search_view.c.schema.label('schema'), search_view.c.table.label('table'), tag_agg).outerjoin(Table_tags, (search_view.c.table == Table_tags.table_name) and (search_view.c.table == Table_tags.table_name)) if filter_tags: query = query.having(tag_agg.contains(filter_tags)) query = query.group_by(search_view.c.schema, search_view.c.table) results = session.execute(query) session.commit() ret = [{'schema': r.schema, 'table':r.table} for r in results] print(ret) return render(request, 'dataedit/search.html', {'results': ret, 'tags':get_all_tags(), 'selected': filter_tags})
def get(self, repo: Repository, test_hash: str): query = ( db.session.query( Build.id.label("build_id"), Build.number, TestCase.hash, TestCase.name, array_agg_row(TestCase.id, TestCase.job_id, TestCase.duration, TestCase.result).label("runs"), ) # .join(Job, Job.id == TestCase.job_id) # .join(Build, Build.id == Job.build_id) .filter( TestCase.hash == test_hash, TestCase.repository_id == repo.id, Build.id == Job.build_id, Job.id == TestCase.job_id, ).group_by(Build.id, Build.number, TestCase.hash, TestCase.name)) result = request.args.get("result") if result: try: query = query.filter( TestCase.result == getattr(Result, result)) except AttributeError: raise NotImplementedError query = query.order_by( Build.number.desc(), (array_agg(TestCase.result).label("results").contains( [Result.failed])).desc(), TestCase.name.asc(), ) schema = AggregateTestCaseSummarySchema(many=True, strict=True, context={"repo": repo}) return self.paginate_with_schema(schema, query)
def construct(self, engine): self.truncate(engine) # manual indexes relidx = sa.Index(self.data.name + "_rels_idx", self.data.c.rels, postgresql_using='gin') ndsidx = sa.Index(self.data.name + "_nodes_idx", self.data.c.nodes, postgresql_using='gin') # drop indexes if any engine.execute(DropIndexIfExists(relidx)) engine.execute(DropIndexIfExists(ndsidx)) w = self.way_src.data r = self.relway_view sub = sa.select([r.c.way_id, array_agg(r.c.relation_id).label('rels')])\ .group_by(r.c.way_id).alias('aggway') cols = [sub.c.way_id, sub.c.rels, w.c.nodes] if hasattr(self, 'transform_tags'): cols.append(w.c.tags) sql = sa.select(cols).where(w.c.id == sub.c.way_id) res = engine.execution_options(stream_results=True).execute(sql) workers = self.create_worker_queue(engine, self._process_construct_next) for obj in res: workers.add_task(obj) workers.finish() # need reverse lookup indexes for rels and nodes relidx.create(engine) ndsidx.create(engine)
def __init__(self, tables, *, column=None, order=None, label=None, join_on=None): self._tables = list(always_iterable(tables)) self._label = label self._from = reduce(lambda x, y: x.join(y), self._tables) if self._label is not None: self._from = self._from.alias(self._label) if column is not None: self.column = column else: if len(self._tables) > 1: raise ValueError('For multi-table-joined relations `column` should be specified') self.column = self._tables[0].c.id if order is not None: self._order = order else: self._order = self.column.asc().nullslast() self._join_on = join_on self.aggregated = array_agg(aggregate_order_by(self.column, self._order))
def listtables(request, schema_name): """ :param request: A HTTP-request object sent by the Django framework :param schema_name: Name of a schema :return: Renders the list of all tables in the specified schema """ searchedQueryString = request.GET.get("query") searchedTagIds = list(map( int, request.GET.getlist("tags"), )) for tag_id in searchedTagIds: increment_usage_count(tag_id) labels = get_readable_table_names(schema_name) filter_kwargs = dict( search=SearchQuery(" & ".join( p + ":*" for p in re.findall("[\w]+", searchedQueryString)), search_type="raw")) if searchedQueryString else {} engine = actions._get_engine() conn = engine.connect() Session = sessionmaker() session = Session(bind=conn) tag_query = session.query(TableTags.table_name, array_agg(sqla.distinct(TableTags.tag)), array_agg(sqla.distinct(Tag.name)), array_agg(sqla.distinct(Tag.color))).filter( TableTags.schema_name == schema_name, TableTags.tag == Tag.id).group_by( TableTags.table_name) tags = { r[0]: [ dict(id=ident, name=label, color="#" + format(color, "06X")) for ident, label, color in zip(r[1], r[2], r[3]) ] for r in tag_query } tables = [(table.name, labels.get(table.name), tags.get(table.name, [])) for table in Table.objects.filter(schema__name=schema_name, **filter_kwargs)] # Apply tag filter later on, because I am not smart enough to do it inline. tables = [ tableEntry for tableEntry in tables if {tag["id"] for tag in tableEntry[2]}.issuperset(searchedTagIds or set()) ] tables = sorted(tables, key=lambda x: x[0]) return render( request, "dataedit/dataedit_tablelist.html", { "schema": schema_name, "tables": tables, "query": searchedQueryString, "tags": searchedTagIds }, )
def casestudies_search(): search_query = get_json_from_request() offset = get_nonnegative_int_or_400(request.args, 'from', 0) result_count = get_positive_int_or_400(request.args, 'size', current_app.config['DM_API_SUPPLIERS_PAGE_SIZE']) sort_dir = search_query.get('sort_dir', 'asc') sort_by = search_query.get('sort_by', None) domains = search_query.get('domains', None) seller_types = search_query.get('seller_types', None) search_term = search_query.get('search_term', None) framework_slug = request.args.get('framework', 'digital-marketplace') q = db.session.query(CaseStudy).join(Supplier).outerjoin(SupplierDomain).outerjoin(Domain) \ .outerjoin(SupplierFramework).outerjoin(Framework) q = q.filter(Supplier.status != 'deleted', or_(Framework.slug == framework_slug, ~Supplier.frameworks.any())) tsquery = None if search_term: if ' ' in search_term: tsquery = func.plainto_tsquery(search_term) else: tsquery = func.to_tsquery(search_term + ":*") q = q.add_column(func.ts_headline( 'english', func.concat( CaseStudy.data['approach'].astext, ' ', CaseStudy.data['role'].astext), tsquery, 'MaxWords=150, MinWords=75, ShortWord=3, HighlightAll=FALSE, FragmentDelimiter=" ... " ' )) else: q = q.add_column("''") q = q.add_column(Supplier.name) q = q.add_column(postgres.array_agg(Supplier.data)) q = q.group_by(CaseStudy.id, Supplier.name) if domains: d_agg = postgres.array_agg(cast(Domain.name, TEXT)) q = q.having(d_agg.contains(array(domains))) if seller_types: selected_seller_types = select( [postgres.array_agg(column('key'))], from_obj=func.json_each_text(Supplier.data[('seller_type',)]), whereclause=cast(column('value'), Boolean) ).as_scalar() q = q.filter(selected_seller_types.contains(array(seller_types))) if sort_dir in ('desc', 'z-a'): ob = [desc(CaseStudy.data['title'].astext)] else: ob = [asc(CaseStudy.data['title'].astext)] if search_term: ob = [desc(func.ts_rank_cd(func.to_tsvector( func.concat(Supplier.name, CaseStudy.data['title'].astext, CaseStudy.data['approach'].astext)), tsquery))] + ob condition = func.to_tsvector(func.concat(Supplier.name, CaseStudy.data['title'].astext, CaseStudy.data['approach'].astext)).op('@@')(tsquery) q = q.filter(condition) q = q.order_by(*ob) raw_results = list(q) results = [] for x in range(len(raw_results)): result = raw_results[x][0].serialize() if raw_results[x][1] is not None and raw_results[x][1] != '': result['approach'] = raw_results[x][1] if raw_results[x][2] is not None: result['supplierName'] = raw_results[x][2] if raw_results[x][3] is not None and raw_results[x][3][0] is not None: result['seller_type'] = raw_results[x][3][0].get('seller_type') results.append(result) total_results = len(results) sliced_results = results[offset:(offset + result_count)] result = { 'hits': { 'total': total_results, 'hits': [{'_source': r} for r in sliced_results] } } try: response = jsonify(result), 200 except Exception as e: response = jsonify(message=str(e)), 500 return response
def fetchMore(self): logger.debug('TaskItem.fetchMore() is started for item: %s' % self.text()) if self.canFetchMore(): from sqlalchemy.orm import aliased from sqlalchemy.dialects.postgresql import array_agg from stalker import Task, User from stalker.models.task import Task_Resources from stalker.db.session import DBSession inner_tasks = aliased(Task) subquery = DBSession.query(Task.id) \ .filter(Task.id == inner_tasks.parent_id) query = DBSession.query( Task.id, Task.name, Task.entity_type, Task.status_id, subquery.exists().label('has_children'), array_agg(User.name).label('resources') ) \ .outerjoin(Task_Resources, Task.__table__.c.id == Task_Resources.c.task_id) \ .outerjoin(User, Task_Resources.c.resource_id == User.id) \ .group_by( Task.id, Task.name, Task.entity_type, Task.status_id, subquery.exists().label('has_children') ) if self.task.entity_type != 'Project': # query child tasks query = query.filter(Task.parent_id == self.task.id) else: # query only root tasks query = query.filter(Task.project_id == self.task.id)\ .filter(Task.parent_id==None) tasks = query.order_by(Task.name).all() # # model = self.model() # This will cause a SEGFAULT # # TODO: update it later on # start = time.time() from anima import defaults task_items = [] for task in tasks: task_item = TaskItem(0, 4, task=task) task_item.parent = self # color with task status task_item.setData( QtGui.QColor( *defaults.status_colors_by_id[task.status_id]), QtCore.Qt.BackgroundRole) # use black text task_item.setForeground(QtGui.QBrush(QtGui.QColor(0, 0, 0))) task_items.append(task_item) if task_items: # self.appendRows(task_items) for task_item in task_items: # TODO: Create a custom QStandardItem for each data type in different columns entity_type_item = QtGui.QStandardItem() entity_type_item.setData(task_item.task.entity_type, QtCore.Qt.DisplayRole) resources_item = QtGui.QStandardItem() if task_item.task.resources != [None]: resources_item.setData( ', '.join(map(str, task_item.task.resources)), QtCore.Qt.DisplayRole) self.appendRow( [task_item, entity_type_item, resources_item]) self.fetched_all = True logger.debug('TaskItem.fetchMore() is finished for item: %s' % self.text())
def do_search(search_query, offset, result_count, new_domains, framework_slug): try: sort_dir = list(search_query['sort'][0].values())[0]['order'] except (KeyError, IndexError): sort_dir = 'asc' try: sort_by = list(search_query['sort'][0].values())[0]['sort_by'] except (KeyError, IndexError): sort_by = None try: terms = search_query['query']['filtered']['filter']['terms'] except (KeyError, IndexError): terms = {} roles_list = None seller_types_list = None if terms: new_domains = 'prices.serviceRole.role' not in terms try: if new_domains: roles_list = terms['domains.assessed'] else: roles = terms['prices.serviceRole.role'] roles_list = set(_['role'][7:] for _ in roles) except KeyError: pass try: seller_types_list = terms['seller_types'] except: # noqa pass try: search_term = search_query['query']['match_phrase_prefix']['name'] except KeyError: search_term = '' EXCLUDE_LEGACY_ROLES = not current_app.config['LEGACY_ROLE_MAPPING'] if new_domains: q = db.session.query(Supplier).outerjoin(SupplierDomain).outerjoin(Domain) \ .outerjoin(SupplierFramework).outerjoin(Framework) else: q = db.session.query(Supplier).outerjoin(PriceSchedule).outerjoin(ServiceRole) \ .outerjoin(SupplierFramework).outerjoin(Framework) q = q.filter(Supplier.status != 'deleted', Supplier.abn != Supplier.DUMMY_ABN, or_(Framework.slug == framework_slug, ~Supplier.frameworks.any())) tsquery = None if search_term: if any(c in search_term for c in ['#', '-', '_', '/', '\\']): tsquery = func.phraseto_tsquery(search_term) elif ' ' in search_term: tsquery = func.plainto_tsquery(search_term) else: tsquery = func.to_tsquery(search_term + ":*") q = q.add_column(func.ts_headline( 'english', func.concat(Supplier.summary, ' ', Supplier.data['tools'].astext, ' ', Supplier.data['methodologies'].astext, ' ', Supplier.data['technologies'].astext, ''), tsquery, 'MaxWords=25, MinWords=20, ShortWord=3, HighlightAll=FALSE, MaxFragments=1' )) q = q.group_by(Supplier.id) try: code = search_query['query']['term']['code'] q = q.filter(Supplier.code == code) except KeyError: pass if roles_list is not None: if new_domains: if EXCLUDE_LEGACY_ROLES: d_agg = postgres.array_agg(cast(Domain.name, TEXT)) q = q.having(d_agg.contains(array(roles_list))) else: sr_agg = postgres.array_agg(cast(func.substring(ServiceRole.name, 8), TEXT)) q = q.having(sr_agg.contains(array(roles_list))) if seller_types_list is not None and 'recruiter' in seller_types_list: q = q.filter(Supplier.is_recruiter == 'true') seller_types_list.remove('recruiter') if len(seller_types_list) == 0: seller_types_list = None if seller_types_list is not None: selected_seller_types = select( [postgres.array_agg(column('key'))], from_obj=func.json_each_text(Supplier.data[('seller_type',)]), whereclause=cast(column('value'), Boolean) ).as_scalar() q = q.filter(selected_seller_types.contains(array(seller_types_list))) if sort_by: if sort_by == 'latest': ob = [desc(Supplier.last_update_time)] else: ob = [asc(Supplier.name)] else: if sort_dir == 'desc': ob = [desc(Supplier.name)] else: ob = [asc(Supplier.name)] if search_term: ob = [desc(func.ts_rank_cd(Supplier.text_vector, tsquery))] + ob q = q.filter(Supplier.text_vector.op('@@')(tsquery)) q = q.order_by(*ob) raw_results = list(q) results = [] for x in range(len(raw_results)): if type(raw_results[x]) is Supplier: result = raw_results[x] else: result = raw_results[x][0] if raw_results[x][1] is not None and raw_results[x][1] != '': result.summary = raw_results[x][1] results.append(result) sliced_results = results[offset:(offset + result_count)] q = db.session.query(Supplier.code, Supplier.name, Supplier.summary, Supplier.is_recruiter, Supplier.data, Domain.name.label('domain_name'), SupplierDomain.status.label('domain_status'))\ .outerjoin(SupplierDomain, Domain)\ .filter(Supplier.id.in_([sr.id for sr in sliced_results]))\ .order_by(Supplier.name) suppliers = [r._asdict() for r in q] sliced_results = [] for key, group in groupby(suppliers, key=itemgetter('code')): supplier = group.next() supplier['seller_type'] = supplier.get('data') and supplier['data'].get('seller_type') supplier['domains'] = {'assessed': [], 'unassessed': []} for s in chain([supplier], group): domain, status = s['domain_name'], s['domain_status'] if domain: if status == 'assessed': supplier['domains']['assessed'].append(domain) else: supplier['domains']['unassessed'].append(domain) for e in ['domain_name', 'domain_status', 'data']: supplier.pop(e, None) sliced_results.append(supplier) return sliced_results, len(results)
def casestudies_search(): search_query = get_json_from_request() offset = get_nonnegative_int_or_400(request.args, 'from', 0) result_count = get_positive_int_or_400( request.args, 'size', current_app.config['DM_API_SUPPLIERS_PAGE_SIZE']) sort_dir = search_query.get('sort_dir', 'asc') sort_by = search_query.get('sort_by', None) domains = search_query.get('domains', None) seller_types = search_query.get('seller_types', None) search_term = search_query.get('search_term', None) framework_slug = request.args.get('framework', 'digital-marketplace') q = db.session.query(CaseStudy).join(Supplier).outerjoin(SupplierDomain).outerjoin(Domain) \ .outerjoin(SupplierFramework).outerjoin(Framework) q = q.filter( Supplier.status != 'deleted', or_(Framework.slug == framework_slug, ~Supplier.frameworks.any())) tsquery = None if search_term: if ' ' in search_term: tsquery = func.plainto_tsquery(search_term) else: tsquery = func.to_tsquery(search_term + ":*") q = q.add_column( func.ts_headline( 'english', func.concat(CaseStudy.data['approach'].astext, ' ', CaseStudy.data['role'].astext), tsquery, 'MaxWords=150, MinWords=75, ShortWord=3, HighlightAll=FALSE, FragmentDelimiter=" ... " ' )) else: q = q.add_column("''") q = q.add_column(Supplier.name) q = q.add_column(postgres.array_agg(Supplier.data)) q = q.group_by(CaseStudy.id, Supplier.name) if domains: d_agg = postgres.array_agg(cast(Domain.name, TEXT)) q = q.having(d_agg.contains(array(domains))) if seller_types: selected_seller_types = select( [postgres.array_agg(column('key'))], from_obj=func.json_each_text(Supplier.data[('seller_type', )]), whereclause=cast(column('value'), Boolean)).as_scalar() q = q.filter(selected_seller_types.contains(array(seller_types))) if sort_dir in ('desc', 'z-a'): ob = [desc(CaseStudy.data['title'].astext)] else: ob = [asc(CaseStudy.data['title'].astext)] if search_term: ob = [ desc( func.ts_rank_cd( func.to_tsvector( func.concat( Supplier.name, CaseStudy.data['title'].astext, CaseStudy.data['approach'].astext)), tsquery)) ] + ob condition = func.to_tsvector( func.concat(Supplier.name, CaseStudy.data['title'].astext, CaseStudy.data['approach'].astext)).op('@@')(tsquery) q = q.filter(condition) q = q.order_by(*ob) raw_results = list(q) results = [] for x in range(len(raw_results)): result = raw_results[x][0].serialize() if raw_results[x][1] is not None and raw_results[x][1] != '': result['approach'] = raw_results[x][1] if raw_results[x][2] is not None: result['supplierName'] = raw_results[x][2] if raw_results[x][3] is not None and raw_results[x][3][0] is not None: result['seller_type'] = raw_results[x][3][0].get('seller_type') results.append(result) total_results = len(results) sliced_results = results[offset:(offset + result_count)] result = { 'hits': { 'total': total_results, 'hits': [{ '_source': r } for r in sliced_results] } } try: response = jsonify(result), 200 except Exception as e: response = jsonify(message=str(e)), 500 return response
def fetchMore(self): logger.debug( 'TaskItem.fetchMore() is started for item: %s' % self.text() ) if self.canFetchMore(): from sqlalchemy import alias from sqlalchemy.dialects.postgresql import array_agg from stalker import Task, User from stalker.models.task import Task_Resources from stalker.db.session import DBSession inner_tasks = alias(Task.__table__) subquery = DBSession.query(Task.id)\ .filter(Task.id == inner_tasks.c.parent_id) query = DBSession.query( Task.id, Task.name, Task.entity_type, Task.status_id, subquery.exists().label('has_children'), array_agg(User.name).label('resources') )\ .outerjoin(Task_Resources, Task.id == Task_Resources.c.task_id)\ .outerjoin(User, Task_Resources.c.resource_id == User.id) \ .group_by( Task.id, Task.name, Task.entity_type, Task.status_id, subquery.exists().label('has_children') ) if self.task.entity_type != 'Project': # query child tasks query = query.filter(Task.parent_id == self.task.id) else: # query only root tasks query = query.filter(Task.project_id == self.task.id)\ .filter(Task.parent_id==None) tasks = query.order_by(Task.name).all() # # model = self.model() # This will cause a SEGFAULT # # TODO: update it later on # start = time.time() from anima import defaults task_items = [] for task in tasks: task_item = TaskItem(0, 4, entity=task) task_item.parent = self # color with task status task_item.setData( QtGui.QColor( *defaults.status_colors_by_id[task.status_id] ), QtCore.Qt.BackgroundRole ) # use black text task_item.setForeground( QtGui.QBrush(QtGui.QColor(0, 0, 0)) ) task_items.append(task_item) if task_items: # self.appendRows(task_items) for task_item in task_items: # TODO: Create a custom QStandardItem for each data type in different columns entity_type_item = QtGui.QStandardItem() entity_type_item.setData(task_item.task.entity_type, QtCore.Qt.DisplayRole) resources_item = QtGui.QStandardItem() if task_item.task.resources != [None]: resources_item.setData(', '.join(map(str, task_item.task.resources)), QtCore.Qt.DisplayRole) self.appendRow([task_item, entity_type_item, resources_item]) self.fetched_all = True logger.debug( 'TaskItem.fetchMore() is finished for item: %s' % self.text() )
def do_search(search_query, offset, result_count, new_domains, framework_slug): try: sort_dir = list(search_query['sort'][0].values())[0]['order'] except (KeyError, IndexError): sort_dir = 'asc' try: sort_by = list(search_query['sort'][0].values())[0]['sort_by'] except (KeyError, IndexError): sort_by = None try: terms = search_query['query']['filtered']['filter']['terms'] except (KeyError, IndexError): terms = {} roles_list = None seller_types_list = None if terms: new_domains = 'prices.serviceRole.role' not in terms try: if new_domains: roles_list = terms['domains.assessed'] else: roles = terms['prices.serviceRole.role'] roles_list = set(_['role'][7:] for _ in roles) except KeyError: pass try: seller_types_list = terms['seller_types'] except: # noqa pass try: search_term = search_query['query']['match_phrase_prefix']['name'] except KeyError: search_term = '' EXCLUDE_LEGACY_ROLES = not current_app.config['LEGACY_ROLE_MAPPING'] if new_domains: q = db.session.query(Supplier).outerjoin(SupplierDomain).outerjoin(Domain) \ .outerjoin(SupplierFramework).outerjoin(Framework) else: q = db.session.query(Supplier).outerjoin(PriceSchedule).outerjoin(ServiceRole) \ .outerjoin(SupplierFramework).outerjoin(Framework) q = q.filter( Supplier.status != 'deleted', Supplier.abn != Supplier.DUMMY_ABN, or_(Framework.slug == framework_slug, ~Supplier.frameworks.any())) tsquery = None if search_term: if any(c in search_term for c in ['#', '-', '_', '/', '\\']): tsquery = func.phraseto_tsquery(search_term) elif ' ' in search_term: tsquery = func.plainto_tsquery(search_term) else: tsquery = func.to_tsquery(search_term + ":*") q = q.add_column( func.ts_headline( 'english', func.concat(Supplier.summary, ' ', Supplier.data['tools'].astext, ' ', Supplier.data['methodologies'].astext, ' ', Supplier.data['technologies'].astext, ''), tsquery, 'MaxWords=25, MinWords=20, ShortWord=3, HighlightAll=FALSE, MaxFragments=1' )) q = q.group_by(Supplier.id) try: code = search_query['query']['term']['code'] q = q.filter(Supplier.code == code) except KeyError: pass if roles_list is not None: if new_domains: if EXCLUDE_LEGACY_ROLES: d_agg = postgres.array_agg(cast(Domain.name, TEXT)) q = q.filter(SupplierDomain.status == 'assessed') q = q.having(d_agg.contains(array(roles_list))) else: sr_agg = postgres.array_agg( cast(func.substring(ServiceRole.name, 8), TEXT)) q = q.having(sr_agg.contains(array(roles_list))) if seller_types_list is not None and 'recruiter' in seller_types_list: q = q.filter(Supplier.is_recruiter == 'true') seller_types_list.remove('recruiter') if len(seller_types_list) == 0: seller_types_list = None if seller_types_list is not None: selected_seller_types = select( [postgres.array_agg(column('key'))], from_obj=func.json_each_text(Supplier.data[('seller_type', )]), whereclause=cast(column('value'), Boolean)).as_scalar() q = q.filter(selected_seller_types.contains(array(seller_types_list))) if sort_by: if sort_by == 'latest': ob = [desc(Supplier.last_update_time)] else: ob = [asc(Supplier.name)] else: if sort_dir == 'desc': ob = [desc(Supplier.name)] else: ob = [asc(Supplier.name)] if search_term: ob = [desc(func.ts_rank_cd(Supplier.text_vector, tsquery))] + ob q = q.filter(Supplier.text_vector.op('@@')(tsquery)) q = q.order_by(*ob) raw_results = list(q) results = [] for x in range(len(raw_results)): if type(raw_results[x]) is Supplier: result = raw_results[x] else: result = raw_results[x][0] if raw_results[x][1] is not None and raw_results[x][1] != '': result.summary = raw_results[x][1] results.append(result) sliced_results = results[offset:(offset + result_count)] q = db.session.query(Supplier.code, Supplier.name, Supplier.summary, Supplier.is_recruiter, Supplier.data, Domain.name.label('domain_name'), SupplierDomain.status.label('domain_status'))\ .outerjoin(SupplierDomain, Domain)\ .filter(Supplier.id.in_([sr.id for sr in sliced_results]))\ .order_by(Supplier.name) suppliers = [r._asdict() for r in q] sliced_results = [] for key, group in groupby(suppliers, key=itemgetter('code')): supplier = group.next() supplier['seller_type'] = supplier.get( 'data') and supplier['data'].get('seller_type') supplier['domains'] = {'assessed': [], 'unassessed': []} for s in chain([supplier], group): domain, status = s['domain_name'], s['domain_status'] if domain: if status == 'assessed': supplier['domains']['assessed'].append(domain) else: supplier['domains']['unassessed'].append(domain) for e in ['domain_name', 'domain_status', 'data']: supplier.pop(e, None) sliced_results.append(supplier) return sliced_results, len(results)
class AttributesView(Base, ViewMixin): __tablename__ = "vw_attributes" __materialized__ = True __table_args__ = { "selectable": select([ Column("external_object_id", primary_key=True), _titles, Column("dates", ARRAY(Integer)), Column("genres", ARRAY(Text)), Column("durations", ARRAY(Float)), Column("names", ARRAY(Text)), Column("countries", ARRAY(CHAR(length=2))), # Build a search vector from the first four titles func.setweight(func.to_tsvector(func.coalesce(_titles[0], "")), "A").op("||") (func.setweight(func.to_tsvector(func.coalesce(_titles[1], "")), "B")).op("||") (func.setweight(func.to_tsvector(func.coalesce(_titles[2], "")), "C")).op("||") (func.setweight(func.to_tsvector(func.coalesce(_titles[3], "")), "D")).label("search_vector"), ]).select_from( crosstab( select([ Value.external_object_id, Value.type, func.coalesce( array_agg( aggregate_order_by(Value.text, ValueScoreView.score.desc())), cast(text("'{}'"), ARRAY(Text)), ), ]).select_from( join(Value, ValueScoreView, Value.id == ValueScoreView.value_id).join( ValueSource, Value.id == ValueSource.value_id)). where( and_( _attribute_filter, tuple_( Value.external_object_id, Value.type, ValueSource.platform_id, ).in_( select([ PlatformSourceOrderByValueType.val_eo_id, PlatformSourceOrderByValueType.val_type, PlatformSourceOrderByValueType.pl_id, ]).select_from(PlatformSourceOrderByValueType). where( and_( PlatformSourceOrderByValueType.pl_order == 1, or_( PlatformSourceOrderByValueType.pl_type == PlatformType.INFO, PlatformSourceOrderByValueType.val_type == ValueType.TITLE, ), ))), )).group_by(Value.external_object_id, Value.type), table( "ct", column("external_object_id", Integer), column("titles", ARRAY(Text)), column("dates", ARRAY(Integer)), column("genres", ARRAY(Text)), column("durations", ARRAY(Float)), column("names", ARRAY(Text)), column("countries", ARRAY(CHAR(length=2))), ), categories=select( [func.unnest(array([v.name for v in ValueType]))]), auto_order=False, )), "dependencies": ( Value, ValueScoreView, ValueSource, PlatformSourceOrderByValueType, ), "materialized": True, "indexes": (Index("pk_vw_attributes", "external_object_id", unique=True), ), }
def venues(): data = Venue.query.with_entities(Venue.city, Venue.state, postgresql.array_agg( func.json_build_object('id', Venue.id, 'name', Venue.name)).label('venues')) \ .group_by(Venue.city, Venue.state).all() return render_template('pages/venues.html', areas=data)
def listschemas(request): """ Loads all schemas that are present in the external database specified in oeplatform/securitysettings.py. Only schemas that are present in the whitelist are processed that do not start with an underscore. :param request: A HTTP-request object sent by the Django framework :return: Renders the schema list """ searchedQueryString = request.GET.get("query") searchedTagIds = list(map( lambda t: int(t), request.GET.getlist("tags"), )) for tag_id in searchedTagIds: increment_usage_count(tag_id) filter_kwargs = dict( search=SearchQuery(" & ".join( p + ":*" for p in re.findall("[\w]+", searchedQueryString)), search_type="raw")) if searchedQueryString else {} response = Table.objects.filter( **filter_kwargs).values("schema__name").annotate( tables_count=Count("name")) engine = actions._get_engine() conn = engine.connect() Session = sessionmaker() session = Session(bind=conn) tags = { r[0]: set(r[1]) for r in session.query(TableTags.schema_name, array_agg( TableTags.tag)).group_by(TableTags.schema_name) } description = { "boundaries": "Data that depicts boundaries, such as geographic, administrative or political boundaries. Such data comes as polygons.", "climate": "Data related to climate and weather. This includes, for example, precipitation, temperature, cloud cover and atmospheric conditions.", "economy": "Data related to economic activities. Examples: sectoral value added, sectoral inputs and outputs, GDP, prices of commodities etc.", "demand": "Data on demand. Demand can relate to commodities but also to services.", "grid": "Energy transmission infrastructure. examples: power lines, substation, pipelines", "supply": "Data on supply. Supply can relate to commodities but also to services.", "environment": "environmental resources, protection and conservation. examples: environmental pollution, waste storage and treatment, environmental impact assessment, monitoring environmental risk, nature reserves, landscape", "society": "Demographic data such as population statistics and projections, fertility, mortality etc.", "model_draft": "Unfinished data of any kind. Note: there is no version control and data is still volatile.", "scenario": "Scenario data in the broadest sense. Includes input and output data from models that project scenarios into the future. Example inputs: assumptions made about future developments of key parameters such as energy prices and GDP. Example outputs: projected electricity transmission, projected greenhouse gas emissions. Note that inputs to one model could be an output of another model and the other way around.", "reference": "Contains sources, literature and auxiliary/helper tables that can help you with your work.", "emission": "Data on emissions. Examples: total greenhouse gas emissions, CO2-emissions, energy-related CO2-emissions, methane emissions, air pollutants etc.", "openstreetmap": "OpenStreetMap is a open project that collects and structures freely usable geodata and keeps them in a database for use by anyone. This data is available under a free license, the Open Database License.", "policy": "Data on policies and measures. This could, for example, include a list of renewable energy policies per European Member State. It could also be a list of climate related policies and measures in a specific country." } schemas = sorted( [(row["schema__name"], description.get(row["schema__name"], "No description"), row["tables_count"], tags.get(row["schema__name"], [])) for row in response if row["schema__name"] in schema_whitelist and tags.get( row["schema__name"], set()).issuperset(searchedTagIds or set())], key=lambda x: x[0], ) return render(request, "dataedit/dataedit_schemalist.html", { "schemas": schemas, "query": searchedQueryString, "tags": searchedTagIds })
def make_submission_dataframe(query, form, selected_tags=None, excluded_tags=None): if not db.session.query(query.exists()).scalar(): return pd.DataFrame() # get column headers by getting all ancestor names # of the submission location's location type sample_submission = query.first() location_type = sample_submission.location.location_type ancestor_names = [a.name for a in location_type.ancestors()] + \ [location_type.name] # excluded tags have higher priority than selected tags fields = set(form.tags) if selected_tags: fields = fields.intersection(selected_tags) if excluded_tags: fields = fields.difference(excluded_tags) # the 'updated' field is required for results analysis columns = [ Submission.data[tag].label(tag) for tag in fields] + [ Submission.updated ] # alias just in case the query is already joined to the tables below loc = aliased(Location) loc_path = aliased(LocationPath) own_loc = aliased(Location) # add registered voters and path extraction to the columns columns.append(own_loc.registered_voters.label( 'registered_voters')) columns.append( array_agg(aggregate_order_by(loc.name, loc_path.depth.desc())).label( 'location_data')) query2 = query.join( loc_path, Submission.location_id == loc_path.descendant_id ).join(loc, loc.id == loc_path.ancestor_id).join( own_loc, own_loc.id == Submission.location_id) # type coercion is necessary for numeric columns # if we allow Pandas to infer the column type for these, # there's a risk that it will guess wrong, then it might # raise exceptions when we're calculating the mean and # standard deviation on those columns type_coercions = { tag: np.float64 for tag in form.tags if form.get_field_by_tag(tag)['type'] == 'integer'} dataframe_query = query2.with_entities(*columns).group_by( Submission.id, own_loc.registered_voters) df = pd.read_sql(dataframe_query.statement, dataframe_query.session.bind) df = df.astype(type_coercions) df_locations = df['location_data'].apply(pd.Series).rename( columns=dict(enumerate(ancestor_names))) return pd.concat( [df, df_locations], axis=1, join_axes=[df.index])