def _build_query(self, filter_values): having = [] filter_cols = [] external_cols = _get_grouping(filter_values) for fil in self.filters: if fil.column_name not in ['group', 'gender', 'group_leadership', 'disaggregate_by', 'table_card_group_by']: if fil.column_name not in external_cols and fil.column_name != 'maxmin': filter_cols.append(fil.column_name) having.append(fil.build_expression()) group_having = '' having_group_by = [] if ('disaggregate_by' in filter_values and filter_values['disaggregate_by'] == 'group') or ('table_card_group_by' in filter_values and filter_values['table_card_group_by']): group_having = "group_leadership=\'Y\'" having_group_by.append('group_leadership') elif 'group_leadership' in filter_values and filter_values['group_leadership']: group_having = "(MAX(CAST(gender as int4)) + MIN(CAST(gender as int4))) = :group_leadership and group_leadership=\'Y\'" having_group_by.append('group_leadership') filter_cols.append('group_leadership') elif 'gender' in filter_values and filter_values['gender']: group_having = "(MAX(CAST(gender as int4)) + MIN(CAST(gender as int4))) = :gender" table_card_group = [] if 'group_name' in self.group_by: table_card_group.append('group_name') s1 = alias(select(['doc_id', 'group_id', 'MAX(prop_value) + MIN(prop_value) as maxmin'] + filter_cols + external_cols, from_obj='"fluff_FarmerRecordFluff"', group_by=['doc_id', 'group_id'] + filter_cols + external_cols), name='x') s2 = alias(select(['group_id', '(MAX(CAST(gender as int4)) + MIN(CAST(gender as int4))) as gender'] + table_card_group, from_obj='"fluff_FarmerRecordFluff"', group_by=['group_id'] + table_card_group + having_group_by, having=group_having), name='y') return select(['COUNT(x.doc_id) as %s' % self.key] + self.group_by, group_by=['maxmin'] + filter_cols + self.group_by, having=" and ".join(having), from_obj=join(s1, s2, s1.c.group_id==s2.c.group_id)).params(filter_values)
def __repr__(self): """Readable serialisation.""" quoted_table = self.tables["quoted_statements"] asserted_table = self.tables["asserted_statements"] asserted_type_table = self.tables["type_statements"] literal_table = self.tables["literal_statements"] selects = [ (expression.alias(asserted_type_table, "typetable"), None, ASSERTED_TYPE_PARTITION), (expression.alias(quoted_table, "quoted"), None, QUOTED_PARTITION), (expression.alias(asserted_table, "asserted"), None, ASSERTED_NON_TYPE_PARTITION), (expression.alias(literal_table, "literal"), None, ASSERTED_LITERAL_PARTITION), ] q = union_select(selects, distinct=False, select_type=COUNT_SELECT) if hasattr(self, "engine"): with self.engine.connect() as connection: res = connection.execute(q) rt = res.fetchall() typeLen, quotedLen, assertedLen, literalLen = [ rtTuple[0] for rtTuple in rt] try: return ("<Partitioned SQL N3 Store: %s " + "contexts, %s classification assertions, " + "%s quoted statements, %s property/value " + "assertions, and %s other assertions>" % ( len([ctx for ctx in self.contexts()]), typeLen, quotedLen, literalLen, assertedLen)) except Exception: return "<Partitioned SQL N3 Store>" else: return "<Partitioned unopened SQL N3 Store>"
def __init__(self, data: List[str], asteroid_num: int, planets: Tuple[str]): super(ThreeBodyResonanceFactory, self).__init__(planets) self._bodies = dict( first_body={ 'name': self._planets[0], LONG_COEFF: int(data[0]), PERI_COEFF: int(data[3]) }, second_body={ 'name': self._planets[1], LONG_COEFF: int(data[1]), PERI_COEFF: int(data[4]) }, small_body={ 'name': 'A%s' % asteroid_num, LONG_COEFF: int(data[2]), PERI_COEFF: int(data[5]), 'axis': float(data[6]) } ) self._body_tables = dict( first_body=alias(_planet_table, 'first_body'), second_body=alias(_planet_table, 'second_body'), small_body=alias(_asteroid_table, 'small_body') ) self._resonance_cls = ThreeBodyResonance self._resonance_table = table(self._resonance_cls.__tablename__, *self._columns)
def _build_query(self, table, filter_values): having = [] filter_cols = [] external_cols = _get_grouping(filter_values) for fil in self.filters: if isinstance(fil, ANDFilter): filter_cols.append(fil.filters[0].column_name) having.append(fil) elif isinstance(fil, RawFilter): having.append(fil) elif fil.column_name not in ['group', 'gender', 'group_leadership', 'disaggregate_by', 'table_card_group_by']: if fil.column_name not in external_cols and fil.column_name != 'maxmin': filter_cols.append(fil.column_name) having.append(fil) group_having = '' having_group_by = [] if ('disaggregate_by' in filter_values and filter_values['disaggregate_by'] == 'group') or \ (filter_values.get('table_card_group_by') == 'group_leadership'): having_group_by.append('group_leadership') elif 'group_leadership' in filter_values and filter_values['group_leadership']: group_having = "(MAX(CAST(gender as int4)) + MIN(CAST(gender as int4))) " \ "= :group_leadership and group_leadership=\'Y\'" having_group_by.append('group_leadership') filter_cols.append('group_leadership') elif 'gender' in filter_values and filter_values['gender']: group_having = "(MAX(CAST(gender as int4)) + MIN(CAST(gender as int4))) = :gender" table_card_group = [] if 'group_name' in self.group_by: table_card_group.append('group_name') s1 = alias(select([table.c.doc_id, table.c.group_case_id, table.c.group_name, table.c.group_id, (sqlalchemy.func.max(table.c.prop_value) + sqlalchemy.func.min(table.c.prop_value)).label('maxmin')] + filter_cols + external_cols, from_obj=table, group_by=([table.c.doc_id, table.c.group_case_id, table.c.group_name, table.c.group_id] + filter_cols + external_cols)), name='x') s2 = alias( select( [table.c.group_case_id, sqlalchemy.cast( cast(func.max(table.c.gender), Integer) + cast(func.min(table.c.gender), Integer), VARCHAR ).label('gender')] + table_card_group, from_obj=table, group_by=[table.c.group_case_id] + table_card_group + having_group_by, having=group_having ), name='y' ) group_by = list(self.group_by) if 'group_case_id' in group_by: group_by[group_by.index('group_case_id')] = s1.c.group_case_id group_by[group_by.index('group_name')] = s1.c.group_name return select( [sqlalchemy.func.count(s1.c.doc_id).label(self.key)] + group_by, group_by=[s1.c.maxmin] + filter_cols + group_by, having=AND(having).build_expression(s1), from_obj=join(s1, s2, s1.c.group_case_id == s2.c.group_case_id) ).params(filter_values)
def ResultQuery(run_id, max_time=None, time=None, include_absent=False, results_from_id=None): if max_time == None and time == None: time, max_time = ServerCache().Get(('run_times', run_id), lambda: GetRunTimes(run_id)) run = Run.get_by(id=run_id) if run: r = session.query() team = alias(select([Team.table], Team.table.c.present.op('&')(1 << (run.day - 1))), alias="team") sort = alias(select([Sort.table], Sort.table.c.run_id==run_id), alias="sort") breed = alias(select([Breed.table]), alias="breed") res = alias(select([Result.table], Result.table.c.run_id==run_id), alias="result") r = r.add_entity(Team, alias=team) if results_from_id: res_from = alias(select([Result.table], Result.table.c.run_id==results_from_id), alias="result_from") r = r.add_entity(Result, alias=res_from) r = r.add_entity(Result, alias=res) r = r.outerjoin(res).outerjoin(res_from) else: r = r.add_entity(Result, alias=res) r = r.outerjoin(res) r = r.add_entity(Breed, alias=breed) r = r.add_entity(Sort, alias=sort) r = r.outerjoin(sort).outerjoin(breed) r = r.add_columns((team.c.handler_name + ' ' + team.c.handler_surname).label("team_handler")) r = r.add_columns((team.c.dog_name + ' ' + team.c.dog_kennel).label("team_dog")) r = r.add_columns((res.c.mistakes*5 + res.c.refusals*5).label("penalty")) r = r.add_columns(((res.c.time - time)*(res.c.time > time)).label("time_penalty")) r = r.add_columns(((res.c.time - time)*(res.c.time > time) + res.c.mistakes*5 + res.c.refusals*5).label("total_penalty")) r = r.add_columns(func.ifnull(run.length/res.c.time, 0).label('speed')) disq = (res.c.time > max_time) | (res.c.disqualified) | (res.c.refusals >= 3) if include_absent: disq = disq | (res.c.time == 0) r = r.add_columns(disq.label("disq")) if run.variant == 0: r = r.filter(team.c.category == run.category) else: r = r.filter(team.c.category != 3) s = ((func.ifnull(sort.c.value, 0) == 0) & (team.c.def_sort == 1)) | ((func.ifnull(sort.c.value, 0) == 3) & (team.c.def_sort == 0)) r = r.filter((s != 1) & ((res.c.time > 0) | 'disq')) r = r.order_by("disq, total_penalty, penalty, result_time") return r, {'team': team, 'result': res, 'sort': sort} else: return None, None
def delete_rows_with_missing_fkey(fkey, delete_missing=True): fkey = _as_fkey(fkey) if fkey.parent.nullable: return True session = get_session_maker()() source = fkey.parent.table target = fkey.column.table if source == target: target = alias(source) source_primary_key = primary_key_col(source) q = session.query(source_primary_key).outerjoin( target, fkey.parent == fkey.column).filter( target.c.id == None) count = q.count() if count: if delete_missing: with transaction.manager: #session.execute(source.delete(source.c.id.in_(q))) for (id,) in q: delete_row(session, source, id) mark_changed(session) else: print "There are %d ids in %s with dangling %s:" % ( count, source.name, fk_as_str(fkey)) print q.all() return False return True
def query_multiple( self, data, filter_col="piece.id", table="clefs_ins_piece"): if self.validate_table(table): _table = self.tables[table] _filter_col = getattr(_table.columns, filter_col) q = select([_filter_col]) for elem in data: query = _table.select() nxtalias = alias(_table) for key in elem: col = getattr(nxtalias.columns, key) expr = self.mk_or_expr(elem[key], col) query = query.where(expr) alias_filter = getattr(nxtalias.columns, filter_col) query = query.where(alias_filter == _filter_col) q = q.where(exists(query)) result_prox = self.execute(q) return set([elem[0] for elem in result_prox]) else: raise BadTableException( "table {} not in {}".format( table, self.tables.keys()))
def query_similar_rows(self, data, match_cols=[], excl_cols=[], table='pieces'): '''SELECT i2.ROWID, i2.name FROM instruments i1, instruments i2 WHERE i1.name = ? AND i2.diatonic = i1.diatonic AND i2.chromatic = i1.chromatic AND i2.name != i1.name''' if self.validate_table(table): _table = self.tables[table] query = _table.select() tbl_alias = alias(_table) for key in data: col = getattr(_table.columns, key) col2 = getattr(tbl_alias.columns, key) expr2 = col != col2 expr = col2 == data[key] query = query.where((expr) & (expr2)) for col in match_cols: column = getattr(_table.columns, col) col2 = getattr(tbl_alias.columns, col) expr = column == col2 query = query.where(expr) for exc in excl_cols: column = getattr(_table.columns, exc) col2 = getattr(tbl_alias.columns, exc) expr = column != col2 query = query.where(expr) res = list(self.execute(query)) res = self.to_dict(table, res) return res else: raise BadTableException("table {} not in {}".format( table, self.tables.keys()))
def query_multiple(self, data, filter_col="piece.id", table="clefs_ins_piece"): if self.validate_table(table): _table = self.tables[table] _filter_col = getattr(_table.columns, filter_col) q = select([_filter_col]) for elem in data: query = _table.select() nxtalias = alias(_table) for key in elem: col = getattr(nxtalias.columns, key) expr = self.mk_or_expr(elem[key], col) query = query.where(expr) alias_filter = getattr(nxtalias.columns, filter_col) query = query.where(alias_filter == _filter_col) q = q.where(exists(query)) result_prox = self.execute(q) return set([elem[0] for elem in result_prox]) else: raise BadTableException("table {} not in {}".format( table, self.tables.keys()))
def score_per_semanticfield_query(type_): # pragma: no cover aa = alias(score_per_meaning_query(type_), name='aa') return select([ aa.c.semantic_field_pk, func.sum(getattr(aa.c, '%s_score' % type_)) / func.count('*'), func.count('*'), ], group_by=[aa.c.semantic_field_pk])
def __call__(self, rp=100, offset=0, collection=None, no_count=False, show_ids=False): """ IN rp <int> offset <int> filters [<tuple>, ...] collecion <tuple> (<str>, <str>, <int>,) no_count <bool> => False show_ids <bool> => False OUT <DataSet> """ sql = self.sql if collection: child_table_name = collection[0] child_attr = collection[1] parent_id = collection[2] child_attr_alias = '%s$' % child_attr if self.tbl.c.has_key(child_attr_alias): sql = and_(sql, self.tbl.c[child_attr_alias] == parent_id) else: child_table = alias( Table(child_table_name, self.meta, autoload=True)) self.from_ = self.from_.\ join(child_table, and_(child_table.c.id == self.tbl.c.id, child_table.c[child_attr] == parent_id)) # where if isinstance(self.tbl, Select): qry = self.tbl.where(sql) else: qry = select([self.tbl], from_obj=self.from_, whereclause=sql) # order by if self.order: qry = qry.order_by(self.order) return DataSet.procesar_resultado(self.session, qry, rp, offset, no_count=no_count, show_ids=show_ids)
def test_date(session): dates = ( date(2016, 1, 1), date(2016, 1, 2), ) selects = tuple(select((MakeADate(d),)) for d in dates) data = alias(union(*selects, use_labels=True), 'dates') stmt = select((data,)) result = session.execute(stmt).fetchall() assert tuple(chain.from_iterable(result)) == dates
def test_date(session): dates = ( date(2016, 1, 1), date(2016, 1, 2), ) selects = tuple(select((MakeADate(d), )) for d in dates) data = alias(union(*selects, use_labels=True), 'dates') stmt = select((data, )) result = session.execute(stmt).fetchall() assert tuple(chain.from_iterable(result)) == dates
def query(self, PRONAC): verificacao_peca = alias(Verificacao, name='verificacao_peca') verificacao_veiculo = alias(Verificacao, name='verificacao_veiculo') query = (self.raw_query( verificacao_peca.c.Descricao.label('peca'), verificacao_veiculo.c.Descricao.label('veiculo')).select_from( PlanoDivulgacao).join( Projeto, Projeto.idProjeto == PlanoDivulgacao.idProjeto).join( verificacao_peca, verificacao_peca.c.idVerificacao == PlanoDivulgacao.idPeca).join( verificacao_veiculo, verificacao_veiculo.c.idVerificacao == PlanoDivulgacao.idVeiculo).filter( and_(Projeto.PRONAC == PRONAC, PlanoDivulgacao.stPlanoDivulgacao == 1))) return query
def __len__(self, context=None): """Number of statements in the store.""" quoted_table = self.tables["quoted_statements"] asserted_table = self.tables["asserted_statements"] asserted_type_table = self.tables["type_statements"] literal_table = self.tables["literal_statements"] typetable = expression.alias(asserted_type_table, "typetable") quoted = expression.alias(quoted_table, "quoted") asserted = expression.alias(asserted_table, "asserted") literal = expression.alias(literal_table, "literal") quotedContext = self.build_context_clause(context, quoted) assertedContext = self.build_context_clause(context, asserted) typeContext = self.build_context_clause(context, typetable) literalContext = self.build_context_clause(context, literal) if context is not None: selects = [ (typetable, typeContext, ASSERTED_TYPE_PARTITION), (quoted, quotedContext, QUOTED_PARTITION), (asserted, assertedContext, ASSERTED_NON_TYPE_PARTITION), (literal, literalContext, ASSERTED_LITERAL_PARTITION), ] q = union_select(selects, distinct=True, select_type=COUNT_SELECT) else: selects = [ (typetable, typeContext, ASSERTED_TYPE_PARTITION), (asserted, assertedContext, ASSERTED_NON_TYPE_PARTITION), (literal, literalContext, ASSERTED_LITERAL_PARTITION), ] q = union_select(selects, distinct=False, select_type=COUNT_SELECT) with self.engine.connect() as connection: res = connection.execute(q) rt = res.fetchall() return reduce(lambda x, y: x + y, [rtTuple[0] for rtTuple in rt])
def _build_query(self, table, filter_values): sum_query = alias(select(self.group_by + ["SUM(%s) AS sum_col" % self.key] + ['month'], group_by=self.group_by + ['month'], whereclause=AND(self.filters).build_expression(table), ), name='s') return select(self.group_by + ['AVG(s.sum_col) AS %s' % self.key], group_by=self.group_by, from_obj=sum_query ).params(filter_values)
def score_per_semanticfield_query(type_): # pragma: no cover aa = alias(score_per_meaning_query(type_), name='aa') return select( [ aa.c.semantic_field_pk, func.sum(getattr(aa.c, '%s_score' % type_)) / func.count('*'), func.count('*'), ], group_by=[aa.c.semantic_field_pk] )
def _build_query(self, table, filter_values): count_uniq = alias(select(self.group_by + ["COUNT(DISTINCT(%s)) AS count_unique" % self.key], group_by=self.group_by + ['month'], whereclause=AND(self.filters).build_expression(table), ), name='cq') return select(self.group_by + ['SUM(cq.count_unique) AS %s' % self.key], group_by=self.group_by, from_obj=count_uniq ).params(filter_values)
def _build_query(self, filter_values): sum_query = alias(select(self.group_by + ["SUM(%s) AS sum_col" % self.key] + ['month'], group_by=self.group_by + ['month'], whereclause=' AND '.join([f.build_expression() for f in self.filters]), from_obj="\"" + self.table_name + "\"" ), name='s') return select(self.group_by + ['AVG(s.sum_col) AS %s' % self.key], group_by=self.group_by, from_obj=sum_query ).params(filter_values)
def _build_query(self, filter_values): count_uniq = alias(select(self.group_by + ["COUNT(DISTINCT(%s)) AS count_unique" % self.key], group_by=self.group_by + ['month'], whereclause=' AND '.join([f.build_expression() for f in self.filters]), from_obj="\"" + self.table_name + "\"" ), name='cq') return select(self.group_by + ['SUM(cq.count_unique) AS %s' % self.key], group_by=self.group_by, from_obj=count_uniq ).params(filter_values)
def get_number_of_rows(db_engine: Engine, t: Table, sampling: int = 0): sampling = int(sampling) if 100 > sampling > 0: query_total = select([func.count().label('num')]).select_from( t.tablesample(sampling, name='alias', seed=text('{}'.format(SEED)))) else: query_total = select([func.count().label('num')]).select_from(alias(t)) res_t: ResultProxy = db_engine.execute(query_total) total_rows = res_t.first()['num'] res_t.close() return total_rows
def clear_data_for_current_system(self): p = alias(FileStorage) ps = alias(FileStorage) pl = alias(RpmDetailPatchStorageLink) pls = alias(RpmDetailPatchStorageLink) rd = alias(RpmDetail) s = alias(System) delete_links_sql = delete(pl).where( exists( select([1]).select_from( pls.join( rd, pls.c.rpm_detail_id == rd.c.rpm_detail_id).join( s, rd.c.system_id == s.c.system_id)).where( s.c.system_id == self.system.system_id).where( pl.c.id == pls.c.id))) delete_patches_sql = delete(p).where( not_( exists( select([1]).select_from( pl.join(ps, pl.c.file_storage_id == ps.c.id)).where( p.c.id == ps.c.id)))) result_links = self._session.execute(delete_links_sql) if result_links.rowcount: log.info(f"Removed {result_links.rowcount} previous patch links") result_patches = self._session.execute(delete_patches_sql) if result_patches.rowcount: log.info(f"Removed {result_patches.rowcount} previous patches")
def _build_query(self, filter_values): sum_query = alias(select( self.group_by + ["SUM(%s) AS sum_col" % self.key] + ['month'], group_by=self.group_by + ['month'], whereclause=' AND '.join( [f.build_expression() for f in self.filters]), from_obj="\"" + self.table_name + "\""), name='s') return select(self.group_by + ['AVG(s.sum_col) AS %s' % self.key], group_by=self.group_by, from_obj=sum_query).params(filter_values)
def _build_query(self, table, filter_values): having = [] filter_cols = [] external_cols = _get_grouping(filter_values) for fil in self.filters: if isinstance(fil, ANDFilter): filter_cols.append(fil.filters[0].column_name) having.append(fil) elif fil.column_name not in ['group', 'gender', 'group_leadership', 'disaggregate_by', 'table_card_group_by']: if fil.column_name not in external_cols and fil.column_name != 'maxmin': filter_cols.append(fil.column_name) having.append(fil) group_having = '' having_group_by = [] if ('disaggregate_by' in filter_values and filter_values['disaggregate_by'] == 'group') or ('table_card_group_by' in filter_values and filter_values['table_card_group_by']): group_having = "group_leadership=\'Y\'" having_group_by.append('group_leadership') elif 'group_leadership' in filter_values and filter_values['group_leadership']: group_having = "(MAX(CAST(gender as int4)) + MIN(CAST(gender as int4))) = :group_leadership and group_leadership=\'Y\'" having_group_by.append('group_leadership') filter_cols.append('group_leadership') elif 'gender' in filter_values and filter_values['gender']: group_having = "(MAX(CAST(gender as int4)) + MIN(CAST(gender as int4))) = :gender" table_card_group = [] if 'group_name' in self.group_by: table_card_group.append('group_name') s1 = alias(select(['doc_id', 'group_id', 'MAX(prop_value) + MIN(prop_value) as maxmin'] + filter_cols + external_cols, from_obj='"fluff_FarmerRecordFluff"', group_by=['doc_id', 'group_id'] + filter_cols + external_cols), name='x') s2 = alias(select(['group_id', '(MAX(CAST(gender as int4)) + MIN(CAST(gender as int4))) as gender'] + table_card_group, from_obj='"fluff_FarmerRecordFluff"', group_by=['group_id'] + table_card_group + having_group_by, having=group_having), name='y') return select(['COUNT(x.doc_id) as %s' % self.key] + self.group_by, group_by=['maxmin'] + filter_cols + self.group_by, having=AND(having).build_expression(table), from_obj=join(s1, s2, s1.c.group_id==s2.c.group_id)).params(filter_values)
def _build_query(self, filter_values): count_uniq = alias(select( self.group_by + ["COUNT(DISTINCT(%s)) AS count_unique" % self.key], group_by=self.group_by + ['month'], whereclause=' AND '.join( [f.build_expression() for f in self.filters]), from_obj="\"" + self.table_name + "\""), name='cq') return select(self.group_by + ['SUM(cq.count_unique) AS %s' % self.key], group_by=self.group_by, from_obj=count_uniq).params(filter_values)
def check_uniqueness(db_engine: Engine, table: Table, comb, total_rows: int = None, sampling: int = 0): if comb.__len__() == 0: return False fields = [c for c in comb] if not total_rows: total_rows = get_number_of_rows(db_engine, table, sampling) sampling = int(sampling) if 100 > sampling > 0: sample_t = table.tablesample(sampling, name='alias', seed=text('{}'.format(SEED))) sample_fields = [sample_t.columns[fn.name] for fn in comb] query_unique = select([func.count().label('num')]).select_from( alias(select(sample_fields).distinct())) res_u: ResultProxy = db_engine.execute(query_unique) else: if db_supports_checksum(db_engine): checksum_method = get_checksum_function(db_engine) query_unique = select( [func.count(checksum_method(*fields).distinct()).label('num')]) else: query_unique = select([func.count().label('num')]).select_from( alias(select(fields).distinct())) res_u: ResultProxy = db_engine.execute(query_unique) unique_len = res_u.first()['num'] res_u.close() return total_rows == unique_len, total_rows, unique_len
def _build_query(self, filter_values): count_uniq = alias( select( self.group_by + ["COUNT(DISTINCT(%s)) AS count_unique" % self.key], group_by=self.group_by + ["month"], whereclause=" AND ".join([f.build_expression() for f in self.filters]), from_obj='"' + self.table_name + '"', ), name="cq", ) return select( self.group_by + ["SUM(cq.count_unique) AS %s" % self.key], group_by=self.group_by, from_obj=count_uniq ).params(filter_values)
def _build_query(self, filter_values): sum_query = alias( select( self.group_by + ["SUM(%s) AS sum_col" % self.key] + ["month"], group_by=self.group_by + ["month"], whereclause=" AND ".join([f.build_expression() for f in self.filters]), from_obj='"' + self.table_name + '"', ), name="s", ) return select( self.group_by + ["AVG(s.sum_col) AS %s" % self.key], group_by=self.group_by, from_obj=sum_query ).params(filter_values)
def _get_tops(days): time_limit = datetime.datetime.now() \ - datetime.timedelta(days=days) total_alias = alias(func.sum(Score.score_count)) top_scorers = session.query( Person.username, func.sum(Score.score_count)).\ filter(and_(Person.id == Score.person_id, Score.score_time > time_limit)).\ group_by(Person.username).\ order_by(desc(total_alias)).\ limit(10).all() tops = [] for top in top_scorers: top_dict = { 'username': top[0], 'total_score': top[1]} tops.append(top_dict) return tops
def all(self, *args, **kwargs): # get all supervised bsfilters (_u, _p, _e, _l) = [getattr(self.db, options)._table for options in ('userfeatures', 'phonefunckey', 'extenumbers', 'linefeatures')] _l2 = alias(_l) conds = [ _l.c.iduserfeatures == _p.c.iduserfeatures, _u.c.id == _l.c.iduserfeatures, _p.c.typeextenumbers == 'extenfeatures', _p.c.typevalextenumbers == 'bsfilter', _p.c.typeextenumbersright == 'user', _p.c.supervision == 1, cast(_p.c.typeextenumbersright, VARCHAR(255)) == cast(_e.c.type, VARCHAR(255)), # 'user' _p.c.typevalextenumbersright == cast(_l2.c.iduserfeatures, VARCHAR(255)), _e.c.typeval == cast(_l2.c.id, VARCHAR(255)), coalesce(_l.c.number, '') != '' ] if 'context' in kwargs: conds.append(_l.c.context == kwargs['context']) q = select([_e.c.exten, _l.c.number, _u.c.bsfilter], and_(*conds)) return self.execute(q).fetchall()
def query_similar_rows( self, data, match_cols=[], excl_cols=[], table='pieces'): '''SELECT i2.ROWID, i2.name FROM instruments i1, instruments i2 WHERE i1.name = ? AND i2.diatonic = i1.diatonic AND i2.chromatic = i1.chromatic AND i2.name != i1.name''' if self.validate_table(table): _table = self.tables[table] query = _table.select() tbl_alias = alias(_table) for key in data: col = getattr(_table.columns, key) col2 = getattr(tbl_alias.columns, key) expr2 = col != col2 expr = col2 == data[key] query = query.where((expr) & (expr2)) for col in match_cols: column = getattr(_table.columns, col) col2 = getattr(tbl_alias.columns, col) expr = column == col2 query = query.where(expr) for exc in excl_cols: column = getattr(_table.columns, exc) col2 = getattr(tbl_alias.columns, exc) expr = column != col2 query = query.where(expr) res = list(self.execute(query)) res = self.to_dict(table, res) return res else: raise BadTableException( "table {} not in {}".format( table, self.tables.keys()))
def query(self, PRONAC): # noqa: N803 pais_origem = alias(Pais, name='pais_origem') pais_destino = alias(Pais, name='pais_destino') uf_origem = alias(UF, name='uf_origem') uf_destino = alias(UF, name='uf_destino') municipio_origem = alias(Municipios, name='municipios_origem') municipio_destino = alias(Municipios, name='municipios_destino') query = self.raw_query( Deslocamento.idDeslocamento.label("id_deslocamento"), Deslocamento.idProjeto.label("id_projeto"), pais_origem.c.Descricao.label("PaisOrigem"), pais_destino.c.Descricao.label("PaisDestino"), uf_origem.c.Descricao.label("UFOrigem"), uf_destino.c.Descricao.label("UFDestino"), municipio_origem.c.Descricao.label("MunicipioOrigem"), municipio_destino.c.Descricao.label("MunicipioDestino"), Deslocamento.Qtde.label("Qtde"), ) query = (query.select_from(Deslocamento).join( Projeto, Projeto.idProjeto == Deslocamento.idProjeto).join( pais_origem, pais_origem.c.idPais == Deslocamento.idPaisOrigem).join( pais_destino, pais_destino.c.idPais == Deslocamento.idPaisDestino).join( uf_origem, uf_origem.c.iduf == Deslocamento.idUFOrigem).join( uf_destino, uf_destino.c.iduf == Deslocamento.idUFDestino).join( municipio_origem, municipio_origem.c.idMunicipioIBGE == Deslocamento.idMunicipioOrigem).join( municipio_destino, municipio_destino.c.idMunicipioIBGE == Deslocamento.idMunicipioDestino).filter( Projeto.PRONAC == PRONAC)) return self.execute_query(query, {'PRONAC': PRONAC}).fetchall()
def score_per_meaning_query(type_, filter_=None): # pragma: no cover """ select a.id, a.label, a.semantic_category, a.semantic_field_id, sum(a.borrowed_score)/sum(a.representation) as borrowed_score, count(distinct a.word_id) from ( -- tabulate (word_id, meaning_id) pairs against a word's discounted score select x.word_id, x.meaning_id as id, x.label as label, x.semantic_field_id as semantic_field_id, x.semantic_category as semantic_category, y.borrowed_score, y.representation from ( select wm.word_id as word_id, m.id as meaning_id, m.label as label, m.semantic_field_id as semantic_field_id, m.semantic_category as semantic_category from word_meaning as w+ ) as x, -- -- tabulate word ids against score discounted by number of meanings -- ( select w.pk as word_pk, w.id as word_id, cast(w.borrowed_score as float)/count(*) as borrowed_score, cast(1 as float)/count(*) as representation from word as w, counterpart as wm where w.pk = wm.word_pk group by w.id, w.borrowed_score ) as y -- --------------------------------------------------------------------------- where x.word_id = y.word_id ) as a --, -- --------------------------------------------------------------------------- -- select words we are interested in -- group by a.label, a.id, a.semantic_category, a.semantic_field_id order by a.id """ assert type_ in ['borrowed', 'age', 'simplicity'] attr = '%s_score' % type_ word, counterpart, parameter, meaning, valueset, value = [ m.__table__ for m in [Word, Counterpart, Parameter, Meaning, ValueSet, Value] ] x = alias(select([ counterpart.c.word_pk.label('word_pk'), parameter.c.pk.label('meaning_pk'), meaning.c.semantic_field_pk.label('semantic_field_pk'), ], from_obj=value, whereclause=and_( value.c.valueset_pk == valueset.c.pk, valueset.c.parameter_pk == parameter.c.pk, parameter.c.pk == meaning.c.pk, value.c.pk == counterpart.c.pk)), name='x') y = alias(select( [ word.c.pk.label('word_pk'), (cast(getattr(word.c, attr), Float) / func.count('*')).label(attr), (cast(1, Float) / func.count('*')).label('representation'), ], from_obj=counterpart, whereclause=word.c.pk == counterpart.c.word_pk, group_by=[word.c.pk, getattr(word.c, attr)], ), name='y') a = alias(select([ x.c.meaning_pk, x.c.semantic_field_pk, getattr(y.c, attr), y.c.representation ], whereclause=x.c.word_pk == y.c.word_pk), name='a') query = select([ a.c.meaning_pk, a.c.semantic_field_pk, (func.sum(getattr(a.c, attr)) / func.sum(a.c.representation)).label(attr), func.count(distinct(a.c.meaning_pk)), ], group_by=[a.c.meaning_pk, a.c.semantic_field_pk], order_by=a.c.meaning_pk) if isinstance(filter_, Meaning): query = query.where(a.c.meaning_pk == filter_.pk) if isinstance(filter_, SemanticField): query = query.where(a.c.semantic_field_pk == filter_.pk) return query
def generateCountsJs(self, target_fn): n_markers_in = (self.session.query( Species, func.count(distinct( Sequence.id_ortholog))).join(Sequence).group_by(Species).all()) n_species = len(n_markers_in) # how many species are there? out_str = "var species_key = [\n" out_str += ',\n'.join([ "\t['%s', '%s', %d]" % (chr(64 + x[0].id), x[0].name, x[1]) for x in n_markers_in ]) out_str += "\n];\n\n" out_str += "var marker_sets_input = [ " # output single-species marker counts out_str += "{sets: ['%s'], size: %d}" % ( chr(64 + n_markers_in[0][0].id), n_markers_in[0][1]) for rec in n_markers_in[1:]: out_str += ",\n\t{sets: ['%s'], size: %d}" % (chr(64 + rec[0].id), rec[1]) # determine overlaps from sqlalchemy.sql.expression import alias, join, select seq_tab = Base.metadata.tables['sequences'] aka = [alias(seq_tab) for n in range(n_species)] for n_levels in range(2, n_species + 1): cols = [ func.count(distinct(aka[0].c.id_ortholog)), aka[0].c.id_species ] joins = aka[0] # build selected columns and joins for i in range(1, n_levels): cols += [aka[i].c.id_species] joins = join(joins, aka[i], aka[0].c.id_ortholog == aka[i].c.id_ortholog) # create select statement on columns and joins stmt = select(cols).select_from(joins) # add filtering clauses for i in range(1, n_levels): stmt = stmt.where( aka[i - 1].c.id_species < aka[i].c.id_species) # add grouping clauses for i in range(n_levels): stmt = stmt.group_by(aka[i].c.id_species) # execute query statement result = self.session.execute(stmt).fetchall() for rec in result: out_str += ",\n\t{sets: [%s], size: %d}" % (','.join( ["'%s'" % chr(64 + rec[i + 1]) for i in range(n_levels)]), rec[0]) out_str += "\n];\n\n" # load number of markers found for each species n_markers_out = (self.session.query( Species.name, func.count(distinct(PrimerSet.id_ortholog))).outerjoin( PrimerSet, Species.primer_sets).group_by(Species).all()) out_str += "var species_markers_output = [\n" out_str += "\t{name: '%s', value: %d}" % (n_markers_out[0][0], n_markers_out[0][1]) for rec in n_markers_out[1:]: out_str += ",\n\t{name: '%s', value: %d}" % (rec[0], rec[1]) out_str += "\n];\n" with open(target_fn, 'wt') as outfile: print(outfile.name) outfile.write(out_str)
def triples(self, triple, context=None): """ A generator over all the triples matching pattern. Pattern can be any objects for comparing against nodes in the store, for example, RegExLiteral, Date? DateRange? quoted table: <id>_quoted_statements asserted rdf:type table: <id>_type_statements asserted non rdf:type table: <id>_asserted_statements triple columns: subject, predicate, object, context, termComb, objLanguage, objDatatype class membership columns: member, klass, context, termComb FIXME: These union all selects *may* be further optimized by joins """ subject, predicate, obj = triple quoted_table = self.tables["quoted_statements"] asserted_table = self.tables["asserted_statements"] asserted_type_table = self.tables["type_statements"] literal_table = self.tables["literal_statements"] if predicate == RDF.type: # select from asserted rdf:type partition and quoted table # (if a context is specified) typeTable = expression.alias( asserted_type_table, "typetable") clause = self.build_clause(typeTable, subject, RDF.type, obj, context, True) selects = [ (typeTable, clause, ASSERTED_TYPE_PARTITION), ] elif isinstance(predicate, REGEXTerm) \ and predicate.compiledExpr.match(RDF.type) \ or not predicate: # Select from quoted partition (if context is specified), # Literal partition if (obj is Literal or None) and asserted # non rdf:type partition (if obj is URIRef or None) selects = [] if not self.STRONGLY_TYPED_TERMS \ or isinstance(obj, Literal) \ or not obj \ or (self.STRONGLY_TYPED_TERMS and isinstance(obj, REGEXTerm)): literal = expression.alias(literal_table, "literal") clause = self.build_clause(literal, subject, predicate, obj, context) selects.append((literal, clause, ASSERTED_LITERAL_PARTITION)) if not isinstance(obj, Literal) \ and not (isinstance(obj, REGEXTerm) and self.STRONGLY_TYPED_TERMS) \ or not obj: asserted = expression.alias(asserted_table, "asserted") clause = self.build_clause(asserted, subject, predicate, obj, context) selects.append((asserted, clause, ASSERTED_NON_TYPE_PARTITION)) typeTable = expression.alias(asserted_type_table, "typetable") clause = self.build_clause(typeTable, subject, RDF.type, obj, context, True) selects.append((typeTable, clause, ASSERTED_TYPE_PARTITION)) elif predicate: # select from asserted non rdf:type partition (optionally), # quoted partition (if context is specified), and literal # partition (optionally) selects = [] if not self.STRONGLY_TYPED_TERMS \ or isinstance(obj, Literal) \ or not obj \ or (self.STRONGLY_TYPED_TERMS and isinstance(obj, REGEXTerm)): literal = expression.alias(literal_table, "literal") clause = self.build_clause(literal, subject, predicate, obj, context) selects.append((literal, clause, ASSERTED_LITERAL_PARTITION)) if not isinstance(obj, Literal) \ and not (isinstance(obj, REGEXTerm) and self.STRONGLY_TYPED_TERMS) \ or not obj: asserted = expression.alias(asserted_table, "asserted") clause = self.build_clause(asserted, subject, predicate, obj, context) selects.append((asserted, clause, ASSERTED_NON_TYPE_PARTITION)) if context is not None: quoted = expression.alias(quoted_table, "quoted") clause = self.build_clause(quoted, subject, predicate, obj, context) selects.append((quoted, clause, QUOTED_PARTITION)) q = union_select(selects, select_type=TRIPLE_SELECT_NO_ORDER) with self.engine.connect() as connection: res = connection.execute(q) # TODO: False but it may have limitations on text column. Check # NOTE: SQLite does not support ORDER BY terms that aren't # integers, so the entire result set must be iterated in order # to be able to return a generator of contexts result = res.fetchall() tripleCoverage = {} for rt in result: id, s, p, o, (graphKlass, idKlass, graphId) = extract_triple(rt, self, context) contexts = tripleCoverage.get((s, p, o), []) contexts.append(graphKlass(self, idKlass(graphId))) tripleCoverage[(s, p, o)] = contexts for (s, p, o), contexts in tripleCoverage.items(): yield (s, p, o), (c for c in contexts)
def contexts(self, triple=None): quoted_table = self.tables["quoted_statements"] asserted_table = self.tables["asserted_statements"] asserted_type_table = self.tables["type_statements"] literal_table = self.tables["literal_statements"] typetable = expression.alias(asserted_type_table, "typetable") quoted = expression.alias(quoted_table, "quoted") asserted = expression.alias(asserted_table, "asserted") literal = expression.alias(literal_table, "literal") if triple is not None: subject, predicate, obj = triple if predicate == RDF.type: # Select from asserted rdf:type partition and quoted table # (if a context is specified) clause = self.build_clause(typetable, subject, RDF.type, obj, Any, True) selects = [(typetable, clause, ASSERTED_TYPE_PARTITION), ] elif isinstance(predicate, REGEXTerm) \ and predicate.compiledExpr.match(RDF.type) \ or not predicate: # Select from quoted partition (if context is specified), # literal partition if (obj is Literal or None) and # asserted non rdf:type partition (if obj is URIRef # or None) clause = self.build_clause(typetable, subject, RDF.type, obj, Any, True) selects = [(typetable, clause, ASSERTED_TYPE_PARTITION), ] if (not self.STRONGLY_TYPED_TERMS or isinstance(obj, Literal) or not obj or (self.STRONGLY_TYPED_TERMS and isinstance(obj, REGEXTerm))): clause = self.build_clause(literal, subject, predicate, obj) selects.append( (literal, clause, ASSERTED_LITERAL_PARTITION)) if not isinstance(obj, Literal) \ and not (isinstance(obj, REGEXTerm) and self.STRONGLY_TYPED_TERMS) \ or not obj: clause = self.build_clause(asserted, subject, predicate, obj) selects.append((asserted, clause, ASSERTED_NON_TYPE_PARTITION)) elif predicate: # select from asserted non rdf:type partition (optionally), # quoted partition (if context is speciied), and literal # partition (optionally) selects = [] if (not self.STRONGLY_TYPED_TERMS or isinstance(obj, Literal) or not obj or (self.STRONGLY_TYPED_TERMS and isinstance(obj, REGEXTerm))): clause = self.build_clause(literal, subject, predicate, obj) selects.append( (literal, clause, ASSERTED_LITERAL_PARTITION)) if not isinstance(obj, Literal) \ and not (isinstance(obj, REGEXTerm) and self.STRONGLY_TYPED_TERMS) \ or not obj: clause = self.build_clause(asserted, subject, predicate, obj) selects.append( (asserted, clause, ASSERTED_NON_TYPE_PARTITION)) clause = self.build_clause(quoted, subject, predicate, obj) selects.append((quoted, clause, QUOTED_PARTITION)) q = union_select(selects, distinct=True, select_type=CONTEXT_SELECT) else: selects = [ (typetable, None, ASSERTED_TYPE_PARTITION), (quoted, None, QUOTED_PARTITION), (asserted, None, ASSERTED_NON_TYPE_PARTITION), (literal, None, ASSERTED_LITERAL_PARTITION), ] q = union_select(selects, distinct=True, select_type=CONTEXT_SELECT) with self.engine.connect() as connection: res = connection.execute(q) rt = res.fetchall() for context in [rtTuple[0] for rtTuple in rt]: yield URIRef(context)
def contexts(self, triple=None): quoted_table = self.tables["quoted_statements"] asserted_table = self.tables["asserted_statements"] asserted_type_table = self.tables["type_statements"] literal_table = self.tables["literal_statements"] typetable = expression.alias(asserted_type_table, "typetable") quoted = expression.alias(quoted_table, "quoted") asserted = expression.alias(asserted_table, "asserted") literal = expression.alias(literal_table, "literal") if triple is not None: subject, predicate, obj = triple if predicate == RDF.type: # Select from asserted rdf:type partition and quoted table # (if a context is specified) clause = self.build_clause(typetable, subject, RDF.type, obj, Any, True) selects = [ (typetable, clause, ASSERTED_TYPE_PARTITION), ] elif isinstance(predicate, REGEXTerm) \ and predicate.compiledExpr.match(RDF.type) \ or not predicate: # Select from quoted partition (if context is specified), # literal partition if (obj is Literal or None) and # asserted non rdf:type partition (if obj is URIRef # or None) clause = self.build_clause(typetable, subject, RDF.type, obj, Any, True) selects = [ (typetable, clause, ASSERTED_TYPE_PARTITION), ] if (not self.STRONGLY_TYPED_TERMS or isinstance(obj, Literal) or not obj or (self.STRONGLY_TYPED_TERMS and isinstance(obj, REGEXTerm))): clause = self.build_clause(literal, subject, predicate, obj) selects.append( (literal, clause, ASSERTED_LITERAL_PARTITION)) if not isinstance(obj, Literal) \ and not (isinstance(obj, REGEXTerm) and self.STRONGLY_TYPED_TERMS) \ or not obj: clause = self.build_clause(asserted, subject, predicate, obj) selects.append( (asserted, clause, ASSERTED_NON_TYPE_PARTITION)) elif predicate: # select from asserted non rdf:type partition (optionally), # quoted partition (if context is speciied), and literal # partition (optionally) selects = [] if (not self.STRONGLY_TYPED_TERMS or isinstance(obj, Literal) or not obj or (self.STRONGLY_TYPED_TERMS and isinstance(obj, REGEXTerm))): clause = self.build_clause(literal, subject, predicate, obj) selects.append( (literal, clause, ASSERTED_LITERAL_PARTITION)) if not isinstance(obj, Literal) \ and not (isinstance(obj, REGEXTerm) and self.STRONGLY_TYPED_TERMS) \ or not obj: clause = self.build_clause(asserted, subject, predicate, obj) selects.append( (asserted, clause, ASSERTED_NON_TYPE_PARTITION)) clause = self.build_clause(quoted, subject, predicate, obj) selects.append((quoted, clause, QUOTED_PARTITION)) q = union_select(selects, distinct=True, select_type=CONTEXT_SELECT) else: selects = [ (typetable, None, ASSERTED_TYPE_PARTITION), (quoted, None, QUOTED_PARTITION), (asserted, None, ASSERTED_NON_TYPE_PARTITION), (literal, None, ASSERTED_LITERAL_PARTITION), ] q = union_select(selects, distinct=True, select_type=CONTEXT_SELECT) with self.engine.connect() as connection: res = connection.execute(q) rt = res.fetchall() for context in [rtTuple[0] for rtTuple in rt]: yield URIRef(context)
def _build_query(self, table, filter_values): having = [] filter_cols = [] external_cols = _get_grouping(filter_values) for fil in self.filters: if isinstance(fil, ANDFilter): filter_cols.append(fil.filters[0].column_name) having.append(fil) elif isinstance(fil, RawFilter): having.append(fil) elif fil.column_name not in [ 'group', 'gender', 'group_leadership', 'disaggregate_by', 'table_card_group_by' ]: if fil.column_name not in external_cols and fil.column_name != 'maxmin': filter_cols.append(fil.column_name) having.append(fil) group_having = '' having_group_by = [] if ('disaggregate_by' in filter_values and filter_values['disaggregate_by'] == 'group') or \ (filter_values.get('table_card_group_by') == 'group_leadership'): having_group_by.append('group_leadership') elif 'group_leadership' in filter_values and filter_values[ 'group_leadership']: group_having = "(MAX(CAST(gender as int4)) + MIN(CAST(gender as int4))) " \ "= :group_leadership and group_leadership=\'Y\'" having_group_by.append('group_leadership') filter_cols.append('group_leadership') elif 'gender' in filter_values and filter_values['gender']: group_having = "(MAX(CAST(gender as int4)) + MIN(CAST(gender as int4))) = :gender" table_card_group = [] if 'group_name' in self.group_by: table_card_group.append('group_name') s1 = alias(select([ table.c.doc_id, table.c.group_case_id, table.c.group_name, table.c.group_id, (sqlalchemy.func.max(table.c.prop_value) + sqlalchemy.func.min(table.c.prop_value)).label('maxmin') ] + filter_cols + external_cols, from_obj=table, group_by=([ table.c.doc_id, table.c.group_case_id, table.c.group_name, table.c.group_id ] + filter_cols + external_cols)), name='x') s2 = alias(select([ table.c.group_case_id, sqlalchemy.cast( cast(func.max(table.c.gender), Integer) + cast(func.min(table.c.gender), Integer), VARCHAR).label('gender') ] + table_card_group, from_obj=table, group_by=[table.c.group_case_id] + table_card_group + having_group_by, having=group_having), name='y') group_by = list(self.group_by) if 'group_case_id' in group_by: group_by[group_by.index('group_case_id')] = s1.c.group_case_id group_by[group_by.index('group_name')] = s1.c.group_name return select([sqlalchemy.func.count(s1.c.doc_id).label(self.key)] + group_by, group_by=[s1.c.maxmin] + filter_cols + group_by, having=AND(having).build_expression(s1), from_obj=join(s1, s2, s1.c.group_case_id == s2.c.group_case_id)).params(filter_values)
def score_per_meaning_query(type_, filter_=None): # pragma: no cover """ select a.id, a.label, a.semantic_category, a.semantic_field_id, sum(a.borrowed_score)/sum(a.representation) as borrowed_score, count(distinct a.word_id) from ( -- tabulate (word_id, meaning_id) pairs against a word's discounted score select x.word_id, x.meaning_id as id, x.label as label, x.semantic_field_id as semantic_field_id, x.semantic_category as semantic_category, y.borrowed_score, y.representation from ( select wm.word_id as word_id, m.id as meaning_id, m.label as label, m.semantic_field_id as semantic_field_id, m.semantic_category as semantic_category from word_meaning as w+ ) as x, -- -- tabulate word ids against score discounted by number of meanings -- ( select w.pk as word_pk, w.id as word_id, cast(w.borrowed_score as float)/count(*) as borrowed_score, cast(1 as float)/count(*) as representation from word as w, counterpart as wm where w.pk = wm.word_pk group by w.id, w.borrowed_score ) as y -- --------------------------------------------------------------------------- where x.word_id = y.word_id ) as a --, -- --------------------------------------------------------------------------- -- select words we are interested in -- group by a.label, a.id, a.semantic_category, a.semantic_field_id order by a.id """ assert type_ in ['borrowed', 'age', 'simplicity'] attr = '%s_score' % type_ word, counterpart, parameter, meaning, valueset, value = [ m.__table__ for m in [Word, Counterpart, Parameter, Meaning, ValueSet, Value]] x = alias( select( [ counterpart.c.word_pk.label('word_pk'), parameter.c.pk.label('meaning_pk'), meaning.c.semantic_field_pk.label('semantic_field_pk'), ], from_obj=value, whereclause=and_( value.c.valueset_pk == valueset.c.pk, valueset.c.parameter_pk == parameter.c.pk, parameter.c.pk == meaning.c.pk, value.c.pk == counterpart.c.pk)), name='x') y = alias( select( [ word.c.pk.label('word_pk'), (cast(getattr(word.c, attr), Float) / func.count('*')).label(attr), (cast(1, Float) / func.count('*')).label('representation'), ], from_obj=counterpart, whereclause=word.c.pk == counterpart.c.word_pk, group_by=[word.c.pk, getattr(word.c, attr)], ), name='y') a = alias( select( [ x.c.meaning_pk, x.c.semantic_field_pk, getattr(y.c, attr), y.c.representation ], whereclause=x.c.word_pk == y.c.word_pk), name='a') query = select( [ a.c.meaning_pk, a.c.semantic_field_pk, (func.sum(getattr(a.c, attr)) / func.sum(a.c.representation)).label(attr), func.count(distinct(a.c.meaning_pk)), ], group_by=[a.c.meaning_pk, a.c.semantic_field_pk], order_by=a.c.meaning_pk) if isinstance(filter_, Meaning): query = query.where(a.c.meaning_pk == filter_.pk) if isinstance(filter_, SemanticField): query = query.where(a.c.semantic_field_pk == filter_.pk) return query
def _triples_helper(self, triple, context=None): subject, predicate, obj = triple quoted_table = self.tables["quoted_statements"] asserted_table = self.tables["asserted_statements"] asserted_type_table = self.tables["type_statements"] literal_table = self.tables["literal_statements"] if predicate == RDF.type: # select from asserted rdf:type partition and quoted table # (if a context is specified) typeTable = expression.alias(asserted_type_table, "typetable") clause = self.build_clause(typeTable, subject, RDF.type, obj, context, True) selects = [ (typeTable, clause, ASSERTED_TYPE_PARTITION), ] elif isinstance(predicate, REGEXTerm) \ and predicate.compiledExpr.match(RDF.type) \ or not predicate: # Select from quoted partition (if context is specified), # Literal partition if (obj is Literal or None) and asserted # non rdf:type partition (if obj is URIRef or None) selects = [] if (not self.STRONGLY_TYPED_TERMS or isinstance(obj, Literal) or not obj or (self.STRONGLY_TYPED_TERMS and isinstance(obj, REGEXTerm))): literal = expression.alias(literal_table, "literal") clause = self.build_clause(literal, subject, predicate, obj, context) selects.append((literal, clause, ASSERTED_LITERAL_PARTITION)) if not isinstance(obj, Literal) \ and not (isinstance(obj, REGEXTerm) and self.STRONGLY_TYPED_TERMS) \ or not obj: asserted = expression.alias(asserted_table, "asserted") clause = self.build_clause(asserted, subject, predicate, obj, context) selects.append((asserted, clause, ASSERTED_NON_TYPE_PARTITION)) typeTable = expression.alias(asserted_type_table, "typetable") clause = self.build_clause(typeTable, subject, RDF.type, obj, context, True) selects.append((typeTable, clause, ASSERTED_TYPE_PARTITION)) elif predicate: # select from asserted non rdf:type partition (optionally), # quoted partition (if context is specified), and literal # partition (optionally) selects = [] if not self.STRONGLY_TYPED_TERMS \ or isinstance(obj, Literal) \ or not obj \ or (self.STRONGLY_TYPED_TERMS and isinstance(obj, REGEXTerm)): literal = expression.alias(literal_table, "literal") clause = self.build_clause(literal, subject, predicate, obj, context) selects.append((literal, clause, ASSERTED_LITERAL_PARTITION)) if not isinstance(obj, Literal) \ and not (isinstance(obj, REGEXTerm) and self.STRONGLY_TYPED_TERMS) \ or not obj: asserted = expression.alias(asserted_table, "asserted") clause = self.build_clause(asserted, subject, predicate, obj, context) selects.append((asserted, clause, ASSERTED_NON_TYPE_PARTITION)) if context is not None: quoted = expression.alias(quoted_table, "quoted") clause = self.build_clause(quoted, subject, predicate, obj, context) selects.append((quoted, clause, QUOTED_PARTITION)) return selects
def _build_query(self, filter_values): having = [] filter_cols = [] external_cols = _get_grouping(filter_values) for fil in self.filters: if isinstance(fil, ANDFilter): filter_cols.append(fil.filters[0].column_name) having.append(fil.build_expression()) elif fil.column_name not in [ "group", "gender", "group_leadership", "disaggregate_by", "table_card_group_by", ]: if fil.column_name not in external_cols and fil.column_name != "maxmin": filter_cols.append(fil.column_name) having.append(fil.build_expression()) group_having = "" having_group_by = [] if ("disaggregate_by" in filter_values and filter_values["disaggregate_by"] == "group") or ( "table_card_group_by" in filter_values and filter_values["table_card_group_by"] ): group_having = "group_leadership='Y'" having_group_by.append("group_leadership") elif "group_leadership" in filter_values and filter_values["group_leadership"]: group_having = ( "(MAX(CAST(gender as int4)) + MIN(CAST(gender as int4))) = :group_leadership and group_leadership='Y'" ) having_group_by.append("group_leadership") filter_cols.append("group_leadership") elif "gender" in filter_values and filter_values["gender"]: group_having = "(MAX(CAST(gender as int4)) + MIN(CAST(gender as int4))) = :gender" table_card_group = [] if "group_name" in self.group_by: table_card_group.append("group_name") s1 = alias( select( ["doc_id", "group_id", "MAX(prop_value) + MIN(prop_value) as maxmin"] + filter_cols + external_cols, from_obj='"fluff_FarmerRecordFluff"', group_by=["doc_id", "group_id"] + filter_cols + external_cols, ), name="x", ) s2 = alias( select( ["group_id", "(MAX(CAST(gender as int4)) + MIN(CAST(gender as int4))) as gender"] + table_card_group, from_obj='"fluff_FarmerRecordFluff"', group_by=["group_id"] + table_card_group + having_group_by, having=group_having, ), name="y", ) return select( ["COUNT(x.doc_id) as %s" % self.key] + self.group_by, group_by=["maxmin"] + filter_cols + self.group_by, having=" and ".join(having), from_obj=join(s1, s2, s1.c.group_id == s2.c.group_id), ).params(filter_values)
def GetSums(runs): if runs: run_objs = [] presence_mask = 0 for run_id in runs: run = Run.get_by(id=run_id) run_objs.append(run) presence_mask = presence_mask | (1 << (run.day - 1)) team = alias(select([Team.table], Team.table.c.present.op('&')(presence_mask)), alias="team") breed = alias(select([Breed.table]), alias="breed") r = session.query() r = r.add_entity(Team, alias=team) r = r.outerjoin(breed) r = r.add_entity(Breed, alias=breed) pen = [] time_pen = [] time = [] sorts = [] disq = [] ran = [] lengths = 0 for run in run_objs: run_time, run_max_time = ServerCache().Get(('run_times', run.id), lambda: GetRunTimes(run.id)) res = alias(select([Result.table], Result.table.c.run_id==run.id)) sort = alias(select([Sort.table], Sort.table.c.run_id==run.id)) pen.append(res.c.mistakes*5 + res.c.refusals*5) time_pen.append((res.c.time - run_time)*(res.c.time > run_time)) time.append(res.c.time) s = ((func.ifnull(sort.c.value, 0) == 0) & (team.c.def_sort == 1)) | ((func.ifnull(sort.c.value, 0) == 3) & (team.c.def_sort == 0)) sorts.append(s) lengths = lengths + run.length dis = ((res.c.time > run_max_time) | (res.c.disqualified) | (res.c.refusals >= 3)) disq.append(dis) ran.append((res.c.time > 0) | dis) r = r.outerjoin(res).outerjoin(sort) r = r.add_columns(reduce(lambda x,y: x+y, pen).label("penalty")) r = r.add_columns(reduce(lambda x,y: x+y, time_pen).label("time_penalty")) r = r.add_columns(reduce(lambda x,y: x+y, pen+time_pen).label("total_penalty")) r = r.add_columns(reduce(lambda x,y: x*y, time).label("time_fac")) r = r.add_columns(reduce(lambda x,y: x+y, ran).label("ran_all")) r = r.add_columns(reduce(lambda x,y: x+y, disq).label("disq")) r = r.add_columns(reduce(lambda x,y: max(x, y), sorts).label("sort")) r = r.add_columns("(team.handler_name || ' ' || team.handler_surname) team_handler") r = r.add_columns("(team.dog_name || ' ' || team.dog_kennel) team_dog") result_time = reduce(lambda x,y: x+y, time).label("result_time") r = r.add_columns(result_time) r = r.add_columns(func.ifnull(lengths/result_time, 0).label("speed")) r = r.filter("sort == 0 AND ran_all == %d" % len(runs)) r = r.order_by("disq, total_penalty, penalty, result_time") rows = session.execute(r).fetchall() num = 0 sums = [] for r in rows: r = dict(zip(r.keys(), r.values())) num += 1 r['rank'] = num sums.append(r) else: sums = [] return sums
def generateCountsJs(self, target_fn): n_markers_in = (self.session.query( Species, func.count(distinct(Sequence.id_ortholog))) .join(Sequence) .group_by(Species) .all() ) n_species = len(n_markers_in) # how many species are there? out_str = "var species_key = [\n" out_str += ',\n'.join(["\t['%s', '%s', %d]" % (chr(64+x[0].id), x[0].name, x[1]) for x in n_markers_in]) out_str += "\n];\n\n" out_str += "var marker_sets_input = [ " # output single-species marker counts out_str += "{sets: ['%s'], size: %d}" % (chr(64+n_markers_in[0][0].id), n_markers_in[0][1]) for rec in n_markers_in[1:]: out_str += ",\n\t{sets: ['%s'], size: %d}" % (chr(64+rec[0].id), rec[1]) # determine overlaps from sqlalchemy.sql.expression import alias, join, select seq_tab = Base.metadata.tables['sequences'] aka = [alias(seq_tab) for n in range(n_species)] for n_levels in range(2,n_species+1): cols = [func.count(distinct(aka[0].c.id_ortholog)), aka[0].c.id_species] joins = aka[0] # build selected columns and joins for i in range(1,n_levels): cols += [aka[i].c.id_species] joins = join(joins, aka[i], aka[0].c.id_ortholog == aka[i].c.id_ortholog) # create select statement on columns and joins stmt = select(cols).select_from(joins) # add filtering clauses for i in range(1,n_levels): stmt = stmt.where(aka[i-1].c.id_species < aka[i].c.id_species) # add grouping clauses for i in range(n_levels): stmt = stmt.group_by(aka[i].c.id_species) # execute query statement result = self.session.execute(stmt).fetchall() for rec in result: out_str += ",\n\t{sets: [%s], size: %d}" % (','.join(["'%s'" % chr(64+rec[i+1]) for i in range(n_levels)]), rec[0]) out_str += "\n];\n\n" # load number of markers found for each species n_markers_out = (self.session.query( Species.name, func.count(distinct(PrimerSet.id_ortholog))) .outerjoin(PrimerSet, Species.primer_sets) .group_by(Species) .all() ) out_str += "var species_markers_output = [\n" out_str += "\t{name: '%s', value: %d}" % (n_markers_out[0][0], n_markers_out[0][1]) for rec in n_markers_out[1:]: out_str += ",\n\t{name: '%s', value: %d}" % (rec[0], rec[1]) out_str += "\n];\n" with open(target_fn, 'wt') as outfile: print(outfile.name) outfile.write(out_str)