def soc_crosswalk_join(tbl1, tbl2, col): my_joins = [] cond1 = True pums_table = None bls_table = None if tbl1.get_schema_name() == "bls": bls_table = tbl1 elif tbl2.get_schema_name() == "bls": bls_table = tbl2 if tbl1.get_schema_name().startswith("pums"): pums_table = tbl1 elif tbl2.get_schema_name().startswith("pums"): pums_table = tbl2 if pums_table and bls_table: AliasedSocCrosswalk = aliased(SocCrosswalk) cond1 = AliasedSocCrosswalk.pums_soc == pums_table.soc if tbl1 is pums_table else AliasedSocCrosswalk.bls_soc == bls_table.soc j1 = [[AliasedSocCrosswalk, cond1], {"full": False, "isouter": False}] my_joins.append(j1) j2_cond = and_(AliasedSocCrosswalk.pums_soc == pums_table.soc, AliasedSocCrosswalk.bls_soc == bls_table.soc) j2 = [[tbl2, j2_cond], {"full": False, "isouter": False}] my_joins.append(j2) else: onet_table = tbl1 if tbl1.get_schema_name() == 'onet' else tbl2 other_table = pums_table or bls_table j2_cond = or_(onet_table.soc == other_table.soc, onet_table.soc == func.left(other_table.soc, 2) + '0000', onet_table.soc == func.left(other_table.soc, 3) + '000', onet_table.soc == func.left(other_table.soc, 3) + '100', onet_table.soc == func.left(other_table.soc, 5) + '0') my_joins.append([[tbl2, j2_cond], {}]) return my_joins
def load_articles(page, mtype): if mtype == 1: print 'new' arti = model.db.session.query(model.Article.id, model.Article.title, func.left(model.Article.content, 110), model.Article.scan_time, model.Article.agree_time, model.Article.time, model.Article.uid).\ order_by(model.Article.time.desc()).slice((page - 1)*30,page * 30).all() elif mtype == 2: print 'hot' arti = model.db.session.query(model.Article.id, model.Article.title, func.left(model.Article.content, 110), model.Article.scan_time, model.Article.agree_time, model.Article.time, model.Article.uid).\ order_by(model.Article.scan_time.desc()).slice((page - 1)*30,page * 30).all() ret = [] for item in arti: one_arti = { 'article' : item } id = item[0] uid = item[6] categorys = [] userinfo = model.db.session.query(model.User.nickname).filter_by(id=uid).first() category = model.db.session.query(model.Article_Category_Rela.cid).filter_by(aid=id).all() for item in category: categorys.append(model.db.session.query(model.Category.category).filter_by(id=item[0]).first()[0]) one_arti['category'] = categorys one_arti['nickname'] = userinfo[0] ret.append(one_arti) print ret return ret
def FindPeopleByPartialName(self, partialName): partialName = ' '.join(partialName.split()) #strip out extra spaces namelen = len(partialName) return db.session.query(Person).filter( or_( or_( func.left(Person.firstName, namelen) == partialName, func.left(Person.lastName, namelen) == partialName, ), func.left(Person.firstName + u" " + Person.lastName, namelen) == partialName ) ).all()
def handle(self, *args, **options): # set up config = get_config() if config is None: raise CommandError( 'Unable to process configuration file p_to_p.yml') connection = get_connection(config) pedsnet_session = init_pedsnet(connection) init_pcornet(connection) init_vocab(connection) pedsnet_pcornet_valueset_map = aliased(ValueSetMap) # extract the data from the death table death_cause = pedsnet_session.query(DeathPedsnet.person_id, func.left(DeathPedsnet.cause_source_value, 8), coalesce(pedsnet_pcornet_valueset_map.target_concept, 'OT'), bindparam("death_cause_type", "NI"), bindparam("death_cause_source", "L"), bindparam("death_cause_confidence", None), min(DeathPedsnet.site) ) \ .join(Demographic, Demographic.patid == cast(DeathPedsnet.person_id, String(256)), ) \ .join(VocabularyConcept, VocabularyConcept.concept_id == DeathPedsnet.cause_concept_id) \ .outerjoin(pedsnet_pcornet_valueset_map, and_(pedsnet_pcornet_valueset_map.source_concept_class == 'death cause code', cast(VocabularyConcept.vocabulary_id, String(200)) == pedsnet_pcornet_valueset_map.source_concept_id)) \ .filter(and_(DeathPedsnet.cause_source_value != None, DeathPedsnet.cause_source_concept_id != 44814650)) \ .group_by(DeathPedsnet.person_id, func.left(DeathPedsnet.cause_source_value, 8), coalesce(pedsnet_pcornet_valueset_map.target_concept, 'OT')) \ .all() # transform data to pcornet names and types # load to demographic table odo(death_cause, DeathCause.__table__, dshape= 'var * {patid: string, death_cause: string, death_cause_code: string,' 'death_cause_type: string, death_cause_source:string, ' 'death_cause_confidence: string, site: string}') # close session pedsnet_session.close() # output result self.stdout.ending = '' print('Death Cause ETL completed successfully', end='', file=self.stdout)
def handle(self, *args, **options): # set up config = get_config() if config is None: raise CommandError('Unable to process configuration file p_to_p.yml') connection = get_connection(config) pedsnet_session = init_pedsnet(connection) init_pcornet(connection) init_vocab(connection) pedsnet_pcornet_valueset_map = aliased(ValueSetMap) # extract the data from the death table death_cause = pedsnet_session.query(DeathPedsnet.person_id, func.left(DeathPedsnet.cause_source_value, 8), coalesce(pedsnet_pcornet_valueset_map.target_concept, 'OT'), bindparam("death_cause_type", "NI"), bindparam("death_cause_source", "L"), bindparam("death_cause_confidence", None), min(DeathPedsnet.site) ) \ .join(Demographic, Demographic.patid == cast(DeathPedsnet.person_id, String(256)), ) \ .join(VocabularyConcept, VocabularyConcept.concept_id == DeathPedsnet.cause_concept_id) \ .outerjoin(pedsnet_pcornet_valueset_map, and_(pedsnet_pcornet_valueset_map.source_concept_class == 'death cause code', cast(VocabularyConcept.vocabulary_id, String(200)) == pedsnet_pcornet_valueset_map.source_concept_id)) \ .filter(and_(DeathPedsnet.cause_source_value != None, DeathPedsnet.cause_source_concept_id != 44814650)) \ .group_by(DeathPedsnet.person_id, func.left(DeathPedsnet.cause_source_value, 8), coalesce(pedsnet_pcornet_valueset_map.target_concept, 'OT')) \ .all() # transform data to pcornet names and types # load to demographic table odo(death_cause, DeathCause.__table__, dshape='var * {patid: string, death_cause: string, death_cause_code: string,' 'death_cause_type: string, death_cause_source:string, ' 'death_cause_confidence: string, site: string}' ) # close session pedsnet_session.close() # output result self.stdout.ending = '' print('Death Cause ETL completed successfully', end='', file=self.stdout)
class TestTable(Base): __tablename__ = "test" id = Column(Integer, primary_key=True, autoincrement=True) _some_id = Column("some_id", String) some_primary_id = column_property( func.left(_some_id, 6).cast(Integer))
def find_matches(dataset, text, filter=None, exclude=None): entities = Entity.__table__ match_text = normalize(text, dataset)[:254] # select text column and apply necesary transformations text_field = entities.c.name if dataset.normalize_text: text_field = entities.c.normalized if dataset.ignore_case: text_field = func.lower(text_field) text_field = func.left(text_field, 254) # calculate the difference percentage l = func.greatest(1.0, func.least(len(match_text), func.length(text_field))) score = func.greatest(0.0, ((l - func.levenshtein(text_field, match_text)) / l) * 100.0) score = func.max(score).label('score') # coalesce the canonical identifier id_ = func.coalesce(entities.c.canonical_id, entities.c.id).label('id') # apply filters filters = [entities.c.dataset_id==dataset.id, entities.c.invalid==False] if not dataset.match_aliases: filters.append(entities.c.canonical_id==None) if exclude is not None: filters.append(entities.c.id!=exclude) if filter is not None: filters.append(text_field.ilike('%%%s%%' % filter)) q = select([id_, score], and_(*filters), [entities], group_by=[id_], order_by=[score.desc()]) return Matches(q)
def export_csv(self): request = self.request query = SipkdDBSession.query(Sp2dAdviceDet.advno, (func.right(Sp2d.sp2dno,5)+ func.coalesce(Sp2d.infix,Sp2d.infix,'')+'/'+ Sp2d.sp2dtype+'/'+ func.left(Sp2d.sp2dno,4) ).label('sp2dno'), Sp2d.sp2ddate, Sp2d.paymentfor, Sp2d.sp2damount, Sp2d.ppnamount, Sp2d.pphamount, (Sp2d.pot1num+Sp2d.pot2num+Sp2d.pot3num+Sp2d.pot4num+Sp2d.pot5num).label("potongan"), Sp2d.sp2dnetto, Sp2d.bknama, Sp2d.bankposnm, Sp2d.bankaccount, Sp2d.npwp).\ join(Sp2d).\ filter(Sp2d.sp2dno==Sp2dAdviceDet.sp2dno, Sp2d.sp2dno.in_(request.params['data'].split(','))) r = query.first() header = r.keys() query = query.all() rows = [] for item in query: rows.append(list(item)) # override attributes of response filename = 'sp2d%s.csv' % datetime.now().strftime('%Y%m%d%H%M%S') self.request.response.content_disposition = 'attachment;filename=' + filename return { 'header': header, 'rows': rows, }
def id_string(cls): """ IN SQL: SELECT CONCAT(CONCAT(CONCAT(LEFT((SELECT card_types.type_name FROM card_types WHERE card_types.id = cards.card_type_id),1),letter),RIGHT(CONCAT('000000',cards.id),6)),"C") as nid FROM cards; """ return func.concat(func.concat( func.concat(func.left( db.select([Card_Type.type_name]).where(Card_Type.id == cls.card_type_id).limit(1).as_scalar(), 1), cls.letter), func.right(func.concat('000000', cls.id), 6)), "C")
def used_letters(self): """ Returns a list of all the distinct first letters of the peoples last names. """ letter = func.left(ExtendedPerson.last_name, 1) letter = func.upper(func.unaccent(letter)) query = self.session.query(letter.distinct().label('letter')) query = query.order_by(letter) return [r.letter for r in query]
def build_query_to_populate(self, query, full_table, aggregate_table): insert_columns = [aggregate_table.c.join_key] fk = Column(self.key, Integer) geom = Column(self.geometry_column, Geometry()) bins_table = Table(self.table, full_table.metadata, fk, geom) if self.join_custom_data: extra_data = Table( "extra_data", full_table.metadata, Column("timestamp", DateTime), Column("verified", Boolean), Column("bigquery_key", String), Column("bigquery_test_id", String), Column("connection_type", String), Column("advertised_download", Integer), Column("actual_download", Float), Column("advertised_upload", Integer), Column("actual_upload", Float), Column("min_rtt", Integer), Column("location_type", String), Column("cost_of_service", Integer), Column("location", Geometry("Point", srid=4326)), keep_existing=True, ) joining = join( full_table, extra_data, and_( extra_data.c.bigquery_test_id == func.left(full_table.c.test_id, func.length(extra_data.c.bigquery_test_id)) ), isouter=True, ) query = query.select_from(joining) location = case( [(extra_data.c.verified, func.coalesce(extra_data.c.location, full_table.c.location))], else_=full_table.c.location, ) else: location = full_table.c.location select_query = query.select_from(bins_table).where(ST_Intersects(location, geom)).column(fk).group_by(fk) return insert_columns, select_query
def cip_crosswalk_join(tbl1, tbl2, col): if tbl1.get_schema_name().startswith('pums'): pums_table = tbl1 elif tbl2.get_schema_name().startswith('pums'): pums_table = tbl2 if tbl1.get_schema_name() == 'ipeds': deeper_table = tbl1 elif tbl2.get_schema_name() == 'ipeds': deeper_table = tbl2 if tbl1.get_schema_name() == 'onet': deeper_table = tbl1 elif tbl1.get_schema_name() == 'onet': deeper_table = tbl2 direct_join = getattr(pums_table, col) == func.left(getattr(deeper_table, col), 2) my_joins = [[[tbl2, direct_join], {"full": False, "isouter": False}]] return my_joins
def find_matches(dataset, text, filter=None, exclude=None): entities = Entity.__table__ match_text = (normalize(text) or '')[:254] # select text column and apply necesary transformations text_field = entities.c.name if dataset.normalize_text: text_field = entities.c.normalized if dataset.ignore_case: text_field = func.lower(text_field) text_field = func.left(text_field, 254) # calculate the difference percentage min_l = func.greatest(1.0, func.least(len(match_text), func.length(text_field))) score = func.greatest( 0.0, ((min_l - func.levenshtein(text_field, match_text)) / min_l) * 100.0) score = func.max(score).label('score') # coalesce the canonical identifier id_ = func.coalesce(entities.c.canonical_id, entities.c.id).label('id') # apply filters filters = [ entities.c.dataset_id == dataset.id, entities.c.invalid == False ] # noqa if not dataset.match_aliases: filters.append(entities.c.canonical_id == None) # noqa if exclude is not None: filters.append(entities.c.id != exclude) if filter is not None: filters.append(text_field.ilike('%%%s%%' % filter)) q = select([id_, score], and_(*filters), [entities], group_by=[id_], order_by=[score.desc()]) return Matches(q)
def find_matches(project, account, text, schemata=[], properties=[]): main = aliased(Property) ent = aliased(Entity) q = db.session.query(main.entity_id) q = q.filter(main.name == "name") q = q.filter(main.entity_id == ent.id) q = q.join(ent) q = q.filter(ent.project_id == project.id) for schema in schemata: obj = aliased(Schema) q = q.join(obj, ent.schema_id == obj.id) q = q.filter(obj.name == schema) for name, value in properties: p = aliased(Property) q = q.join(p, p.entity_id == ent.id) q = q.filter(p.active == True) # noqa q = q.filter(p.name == name) attr = project.get_attribute("entity", name) column = getattr(p, attr.value_column) q = q.filter(column == value) # prepare text fields (todo: further normalization!) text_field = func.left(func.lower(main.value_string), 254) match_text = text.lower().strip()[:254] match_text_db = cast(match_text, types.Unicode) # calculate the difference percentage l = func.greatest(1.0, func.least(len(match_text), func.length(text_field))) score = func.greatest(0.0, ((l - func.levenshtein(text_field, match_text_db)) / l) * 100.0) score = score.label("score") q = q.add_columns(score) q = q.order_by(score.desc()) q = q.filter(score > 50) return Matches(q, account)
def find_matches(project, account, text, schemata=[], properties=[]): main = aliased(Property) ent = aliased(Entity) q = db.session.query(main.entity_id) q = q.filter(main.name == 'name') q = q.filter(main.entity_id == ent.id) q = q.join(ent) q = q.filter(ent.project_id == project.id) if len(schemata): obj = aliased(Schema) q = q.join(obj, ent.schema_id == obj.id) q = q.filter(obj.name.in_(schemata)) for name, value in properties: p = aliased(Property) q = q.join(p, p.entity_id == ent.id) q = q.filter(p.active == True) # noqa q = q.filter(p.name == name) column = getattr(p, p.type_column(value)) q = q.filter(column == value) # prepare text fields (todo: further normalization!) text_field = func.left(func.lower(main.value_string), 254) match_text = text.lower().strip()[:254] match_text_db = cast(match_text, types.Unicode) # calculate the difference percentage l = func.greatest(1.0, func.least(len(match_text), func.length(text_field))) score = func.greatest(0.0, ((l - func.levenshtein(text_field, match_text_db)) / l) * 100.0) score = score.label('score') q = q.group_by(main.entity_id) q = q.add_columns(func.max(score)) q = q.order_by(func.max(score).desc()) q = q.filter(score > 50) return Matches(q, project, account)
def __init__(self, bra_id): Location.__init__(self, bra_id) self.attrs_query = db.session.query(func.count(Bra.id).label('count'), func.left(Bra.id, 3).label('state'))\ .filter(func.length(Bra.id) == 9).group_by(func.left(Bra.id, 3))
def ak_report_act(self): global mulai, selesai, tingkat req = self.request params = req.params url_dict = req.matchdict mulai = 'mulai' in params and params['mulai'] or 0 selesai = 'selesai' in params and params['selesai'] or 0 kel = 'kel' in params and params['kel'] or 0 rekid = 'rekid' in params and params['rekid'] or 0 sapid = 'sapid' in params and params['sapid'] or 0 if url_dict['act']=='bb' : if kel == '1' : query = DBSession.query(Jurnal.kode.label('jurnal_kd'), Jurnal.nama.label('jurnal_nm'), Jurnal.tanggal, Jurnal.tgl_transaksi, Jurnal.tahun_id, Unit.kode.label('unit_kd'), Unit.nama.label('unit_nm'), Jurnal.periode, Jurnal.jv_type, Jurnal.source, Jurnal.source_no, JurnalItem.amount, Sap.kode.label('rek_kd'), Sap.nama.label('rek_nm'), ).filter(Jurnal.id==JurnalItem.jurnal_id, Jurnal.unit_id==Unit.id, Jurnal.tahun_id==self.session['tahun'], Jurnal.tanggal.between(mulai,selesai), JurnalItem.sap_id==Sap.id ).order_by(Jurnal.tanggal).all() generator = r011Generator() pdf = generator.generate(query) response=req.response response.content_type="application/pdf" response.content_disposition='filename=output.pdf' response.write(pdf) return response elif kel == '2' : """subq = DBSession.query(JurnalItem.rekening_id).filter(JurnalItem.sap_id==sapid, Jurnal.id==JurnalItem.jurnal_id, Jurnal.unit_id==self.session['unit_id'], Jurnal.tahun_id==self.session['tahun'] ).group_by(JurnalItem.rekening_id).subquery() """ query = DBSession.query(Jurnal.kode.label('jurnal_kd'), Jurnal.nama.label('jurnal_nm'), Jurnal.tanggal, Jurnal.tgl_transaksi, Jurnal.tahun_id, Unit.kode.label('unit_kd'), Unit.nama.label('unit_nm'), Jurnal.periode, Jurnal.jv_type, Jurnal.source, Jurnal.source_no, JurnalItem.amount, Sap.kode.label('rek_kd'), Sap.nama.label('rek_nm'), ).filter(Jurnal.id==JurnalItem.jurnal_id, Jurnal.unit_id==Unit.id, Jurnal.unit_id==self.session['unit_id'], Jurnal.tahun_id==self.session['tahun'], Jurnal.tanggal.between(mulai,selesai), JurnalItem.sap_id==Sap.id ).order_by(Jurnal.tanggal).all() generator = r012Generator() pdf = generator.generate(query) response=req.response response.content_type="application/pdf" response.content_disposition='filename=output.pdf' response.write(pdf) return response elif url_dict['act']=='lrasap' : if kel=='1' : subq = DBSession.query(Sap.kode.label('subrek_kd'), Sap.nama.label('rek_nm'), JurnalItem.amount, Jurnal.tahun_id ).filter(JurnalItem.sap_id==Sap.id, JurnalItem.jurnal_id==Jurnal.id, JurnalItem.amount>0, Jurnal.tahun_id==self.session['tahun'] ).subquery() query = DBSession.query(Sap.kode, Sap.nama, subq.c.tahun_id.label('tahun_id'), func.sum(subq.c.amount).label('amount') ).filter(Sap.kode==func.left(subq.c.subrek_kd, func.length(Sap.kode)), func.substr(Sap.kode,1,1).in_(['4','5','6']) ).group_by(Sap.kode, Sap.nama, subq.c.tahun_id, ).order_by(Sap.kode) generator = r021Generator() pdf = generator.generate(query) response=req.response response.content_type="application/pdf" response.content_disposition='filename=output.pdf' response.write(pdf) return response elif kel=='2' : subq = DBSession.query(Sap.kode.label('subrek_kd'), Sap.nama.label('rek_nm'), Unit.kode.label('unit_kd'), Unit.nama.label('unit_nm'), JurnalItem.amount, Jurnal.tahun_id ).filter(JurnalItem.sap_id==Sap.id, JurnalItem.jurnal_id==Jurnal.id, Jurnal.unit_id==Unit.id, JurnalItem.amount>0, Jurnal.tahun_id==self.session['tahun'],Jurnal.unit_id==self.session['unit_id'] ).subquery() query = DBSession.query(Sap.kode, Sap.nama, subq.c.tahun_id.label('tahun_id'), subq.c.unit_kd.label('unit_kd'), subq.c.unit_nm.label('unit_nm'), func.sum(subq.c.amount).label('amount') ).filter(Sap.kode==func.left(subq.c.subrek_kd, func.length(Sap.kode)), func.substr(Sap.kode,1,1).in_(['4','5','6']) ).group_by(Sap.kode, Sap.nama, subq.c.tahun_id, subq.c.unit_kd, subq.c.unit_nm ).order_by(Sap.kode) generator = r022Generator() pdf = generator.generate(query) response=req.response response.content_type="application/pdf" response.content_disposition='filename=output.pdf' response.write(pdf) return response elif url_dict['act']=='lo' : if kel=='1' : subq = DBSession.query(Sap.kode.label('subrek_kd'), Sap.nama.label('rek_nm'), JurnalItem.amount, Jurnal.tahun_id ).filter(JurnalItem.sap_id==Sap.id, JurnalItem.jurnal_id==Jurnal.id, JurnalItem.amount>0, Jurnal.tahun_id==self.session['tahun'] ).subquery() query = DBSession.query(Sap.kode, Sap.nama, subq.c.tahun_id.label('tahun_id'), func.sum(subq.c.amount).label('amount') ).filter(Sap.kode==func.left(subq.c.subrek_kd, func.length(Sap.kode)), func.substr(Sap.kode,1,1).in_(['8','9']) ).group_by(Sap.kode, Sap.nama, subq.c.tahun_id, ).order_by(Sap.kode) generator = r051Generator() pdf = generator.generate(query) response=req.response response.content_type="application/pdf" response.content_disposition='filename=output.pdf' response.write(pdf) return response elif kel=='2' : subq = DBSession.query(Sap.kode.label('subrek_kd'), Sap.nama.label('rek_nm'), Unit.kode.label('unit_kd'), Unit.nama.label('unit_nm'), JurnalItem.amount, Jurnal.tahun_id ).filter(JurnalItem.sap_id==Sap.id, JurnalItem.jurnal_id==Jurnal.id, Jurnal.unit_id==Unit.id, JurnalItem.amount>0, Jurnal.tahun_id==self.session['tahun'],Jurnal.unit_id==self.session['unit_id'] ).subquery() query = DBSession.query(Sap.kode, Sap.nama, subq.c.tahun_id.label('tahun_id'), subq.c.unit_kd.label('unit_kd'), subq.c.unit_nm.label('unit_nm'), func.sum(subq.c.amount).label('amount') ).filter(Sap.kode==func.left(subq.c.subrek_kd, func.length(Sap.kode)), func.substr(Sap.kode,1,1).in_(['8','9']) ).group_by(Sap.kode, Sap.nama, subq.c.tahun_id, subq.c.unit_kd, subq.c.unit_nm ).order_by(Sap.kode) generator = r052Generator() pdf = generator.generate(query) response=req.response response.content_type="application/pdf" response.content_disposition='filename=output.pdf' response.write(pdf) return response
def query_sdss4_dr16_data_model_identifiers_from_database( self, mjd_start, mjd_end): """ Query the SDSS database for SDSS-IV (DR16) ApStar data model identifiers. :param context: The Airflow DAG execution context. """ release, filetype = ("DR16", "apStar") columns = ( func.left(catalogdb.SDSSDR16ApogeeStar.file, 2).label("prefix"), catalogdb.SDSSDR16ApogeeStar.field, catalogdb.SDSSDR16ApogeeStar.apstar_version.label("apstar"), catalogdb.SDSSDR16ApogeeStar.telescope, catalogdb.SDSSDR16ApogeeStar.apogee_id.label("obj"), func.right(func.left(catalogdb.SDSSDR16ApogeeStar.file, 10), 3).label("apred"), ) if not self._skip_sources_with_more_recent_observations: # The SDSSDR16ApogeeStar table does not have any MJD information. mjd = catalogdb.SDSSDR16ApogeeVisit.mjd q = session.query(*columns, mjd).distinct(*columns, mjd).join( catalogdb.SDSSDR16ApogeeVisit, catalogdb.SDSSDR16ApogeeVisit.apogee_id == catalogdb.SDSSDR16ApogeeStar.apogee_id) else: # Get the max MJD of any observations. sq = session.query( *columns, func.max( catalogdb.SDSSDR16ApogeeVisit.mjd).label('max_mjd')).join( catalogdb.SDSSDR16ApogeeVisit, catalogdb.SDSSDR16ApogeeVisit.apogee_id == catalogdb.SDSSDR16ApogeeStar.apogee_id).group_by( *columns).subquery() mjd = sq.c.max_mjd q = session.query(*columns, mjd).join( sq, catalogdb.SDSSDR16ApogeeStar.apogee_id == sq.c.obj) q = q.filter(mjd < mjd_end)\ .filter(mjd >= mjd_start) if self._query_filter_by_kwargs is not None: q = q.filter_by(**self._query_filter_by_kwargs) if self._limit is not None: q = q.limit(self._limit) log.debug( f"Found {q.count()} {release} {filetype} files between MJD {mjd_start} and {mjd_end}" ) common = dict(release=release, filetype=filetype) keys = [column.name for column in columns] # The MJD will not be included because len(keys) < len(values) and zip will only take the shorter of both. for values in q.yield_per(1): yield {**common, **dict(zip(keys, values))}
def _archive_access_log(engine, session, default_domain_name): try: Temp = declarative_base() class AccessLogToArchive(Temp): __tablename__ = 'accessLogToArchive' __table_args__ = {'prefixes': ['TEMPORARY']} id = Column(Integer, primary_key=True) time_since_epoch = Column(Numeric(15, 3)) ip_client = Column(CHAR(15)) http_status_code = Column(String(10)) http_reply_size = Column(Integer) http_url = Column(Text) http_username = Column(String(100)) userId = Column(Integer) archived = Column(Boolean) Temp.metadata.create_all(bind=engine) # Fill temporary table with unarchived chunk of data access_log_subquery = session.query( AccessLog.id, AccessLog.time_since_epoch, AccessLog.ip_client, AccessLog.http_status_code, AccessLog.http_reply_size, AccessLog.http_url, case([( sqlalchemy.or_( AccessLog.http_username.contains('@'), AccessLog.http_username.contains('\\'), AccessLog.http_username == '-'), AccessLog.http_username)], else_=AccessLog.http_username + '@' + default_domain_name).label('http_username'), AccessLog.archived).filter(is_(AccessLog.archived, None)).limit(1000000) # limit to prevent overload on a huge amount of data ins = insert(AccessLogToArchive).from_select([ 'id', 'time_since_epoch', 'ip_client', 'http_status_code', 'http_reply_size', 'http_url', 'http_username', 'archived'], access_log_subquery) session.execute(ins) query_result = session.query(AccessLogToArchive.http_username, User.cn).filter( and_(User.authMethod == 0, User.userPrincipalName == AccessLogToArchive.http_username)).all() # Set user ID field session.query(AccessLogToArchive).filter( or_( and_(User.authMethod == 0, User.userPrincipalName == AccessLogToArchive.http_username), and_(User.authMethod == 1, User.ip == AccessLogToArchive.ip_client))).update( {AccessLogToArchive.userId: User.id}, synchronize_session=False) session.query(AccessLog).filter(AccessLog.id == AccessLogToArchive.id).update( {AccessLog.userId: AccessLogToArchive.userId}, synchronize_session=False) # Get host from URL: strip protocol session.query(AccessLogToArchive).filter(func.locate('://', AccessLogToArchive.http_url) > 0).update( {AccessLogToArchive.http_url: func.substring(AccessLogToArchive.http_url, func.locate('://', AccessLogToArchive.http_url) + 3)}, synchronize_session=False) # Get host from URL: strip port and the rest session.query(AccessLogToArchive).filter(func.locate(':', AccessLogToArchive.http_url) > 0).update( {AccessLogToArchive.http_url: func.left(AccessLogToArchive.http_url, func.locate(':', AccessLogToArchive.http_url) - 1)}, synchronize_session=False) # Get host from URL: strip everything after the first slash session.query(AccessLogToArchive).filter(func.locate('/', AccessLogToArchive.http_url) > 0).update( {AccessLogToArchive.http_url: func.left(AccessLogToArchive.http_url, func.locate('/', AccessLogToArchive.http_url) - 1)}, synchronize_session=False) # Make summary traffic table subquery = session.query( func.date(func.from_unixtime(AccessLogToArchive.time_since_epoch)).label('date'), AccessLogToArchive.userId, AccessLogToArchive.http_url.label('host'), func.sum(AccessLogToArchive.http_reply_size).label('traffic')).\ filter(AccessLogToArchive.http_status_code.like('2%')).\ group_by( func.date(func.from_unixtime(AccessLogToArchive.time_since_epoch)), AccessLogToArchive.userId, AccessLogToArchive.http_url).\ having(func.sum(AccessLogToArchive.http_reply_size) > 0).subquery() # Update existing rows session.query(AccessLogArchive).filter( AccessLogArchive.date == subquery.c.date, AccessLogArchive.userId == subquery.c.userId, AccessLogArchive.host == subquery.c.host).\ update({AccessLogArchive.traffic: AccessLogArchive.traffic + subquery.c.traffic}, synchronize_session=False) # Insert new rows access_log_subquery = session.query(subquery).outerjoin( AccessLogArchive, and_( AccessLogArchive.date == subquery.c.date, AccessLogArchive.userId == subquery.c.userId, AccessLogArchive.host == subquery.c.host)).\ filter(AccessLogArchive.id.is_(None), subquery.c.userId.isnot(None)) ins = insert(AccessLogArchive).from_select(['date', 'userId', 'host', 'traffic'], access_log_subquery) session.execute(ins) # Mark access log chunk as archived session.query(AccessLog).filter( AccessLog.id == AccessLogToArchive.id).\ update({AccessLog.archived: 1}, synchronize_session=False) session.commit() ''' # Drop temp table AccessLogToArchive.__table__.drop(engine) ''' except: session.rollback() raise
def confirmation_number(cls): return func.left(cls.web_confirmation_number, func.charindex(".", cls.web_confirmation_number) - 1)
EngineLogging.configure_console_logging(logging.DEBUG) # replace with a credentials dictionary or provide a credentials file with open('/Users/ryan/watson-iot/functions/scripts/credentials_as_dev.json', encoding='utf-8') as F: credentials = json.loads(F.read()) db = Database(credentials=credentials) db_schema = None # set if you are not using the default table = db.get_table('MIKE_ROBOT_JUNE_25') dim = db.get_table('MIKE_ROBOT_JUNE_25_DIMENSION') group_by = { 'plant_abv': func.left(table.c['plant_code'], 3), 'manufacturer': dim.c['manufacturer'] } aggs = {'avg_speed': (table.c['speed'], func.avg)} def prepare_aggregate_query(group_by, aggs): # build a sub query. sargs = [] for alias, expression in list(group_by.items()): sargs.append(expression.label(alias)) for alias, (metric, agg) in list(aggs.items()): sargs.append(metric.label(alias)) db.start_session() query = db.session.query(*sargs)
def id_string(cls): return func.concat(func.concat( func.concat(func.left( db.select([Question_Type.type_name]).where(Question_Type.id == cls.question_type_id).limit(1).as_scalar(), 1), cls.letter), func.right(func.concat('000000', cls.id), 6)), "Q")