def list_expired_dids(worker_number=None, total_workers=None, limit=None, session=None): """ List expired data identifiers. :param limit: limit number. :param session: The database session in use. """ query = session.query(models.DataIdentifier.scope, models.DataIdentifier.name, models.DataIdentifier.did_type).\ filter(models.DataIdentifier.expired_at < datetime.utcnow()).\ order_by(models.DataIdentifier.expired_at).\ with_hint(models.DataIdentifier, "index(DIDS DIDS_EXPIRED_AT_IDX)", 'oracle') if worker_number and total_workers and total_workers - 1 > 0: if session.bind.dialect.name == 'oracle': bindparams = [bindparam('worker_number', worker_number-1), bindparam('total_workers', total_workers-1)] query = query.filter(text('ORA_HASH(name, :total_workers) = :worker_number', bindparams=bindparams)) elif session.bind.dialect.name == 'mysql': query = query.filter('mod(md5(name), %s) = %s' % (total_workers - 1, worker_number - 1)) elif session.bind.dialect.name == 'postgresql': query = query.filter('mod(abs((\'x\'||md5(name))::bit(32)::int), %s) = %s' % (total_workers-1, worker_number-1)) elif session.bind.dialect.name == 'sqlite': row_count = 0 dids = list() for scope, name, did_type in query.yield_per(10): if int(md5(name).hexdigest(), 16) % total_workers == worker_number-1: dids.append({'scope': scope, 'name': name, 'did_type': did_type}) row_count += 1 if limit and row_count >= limit: return dids return dids if limit: query = query.limit(limit) return [{'scope': scope, 'name': name, 'did_type': did_type} for scope, name, did_type in query]
def visit_idea(self, idea, level, prev_result): if idea.short_title: self.counter.add_text(self.cleantext(idea.short_title), 2) if idea.long_title: self.counter.add_text(self.cleantext(idea.long_title)) if idea.definition: self.counter.add_text(self.cleantext(idea.definition)) if self.count_posts and level == 0: from .generic import Content related = text( Idea._get_related_posts_statement(), bindparams=[bindparam('root_idea_id', idea.id), bindparam('discussion_id', idea.discussion_id)] ).columns(column('post_id')).alias('related') titles = set() # TODO maparent: Reoptimize for content in idea.db.query( Content).join( related, related.c.post_id == Content.id): body = content.body.first_original().value self.counter.add_text(self.cleantext(body), 0.5) title = content.subject.first_original().value title = self.cleantext(title) if title not in titles: self.counter.add_text(title) titles.add(title)
def add_father_ids(engine): ct = table_scheme.categories_t connection = engine.connect() #prepared statements selection = ct.select().where(ct.c.Topic==bindparam('f_topic')) fid_update = ct.update().where(ct.c.catid==bindparam('child_id')).values(fatherid=bindparam('fatherid_')) all_categories = connection.execute('SELECT * FROM categories') counter = 0 sys.stdout.write('\n') for row in all_categories: counter += 1 topic = row.Topic title = row.Title catid = row.catid if catid < 3: #ignore "" and "Top" continue index = len(topic)-(len(title)+1) father_topic = topic[:index] father_selection = connection.execute(selection, f_topic=father_topic) father = father_selection.first() if father == None: LOG.debug('Found no father for "{0}", searched for "{1}"'.format(topic, father_topic)) continue father_id = father[ct.c.catid] connection.execute(fid_update, child_id=catid, fatherid_=father_id) if counter % 10000 == 0: sys.stdout.write('.') if counter % 200000 == 0: sys.stdout.write(' - {0} ids generated\n'.format(counter)) sys.stdout.flush() print
def handle(self, *args, **options): # set up config = get_config() if config is None: raise CommandError('Unable to process configuration file p_to_p.yml') connection = get_connection(config) pedsnet_session = init_pedsnet(connection) init_pcornet(connection) observation_period = pedsnet_session.query(ObservationPeriod.person_id, ObservationPeriod.observation_period_start_date, ObservationPeriod.observation_period_end_date, ObservationPeriod.site, bindparam("chart", 'Y'), bindparam("enr_basis", 'E') ).filter( exists().where(ObservationPeriod.person_id == PersonVisit.person_id)).all() odo(observation_period, Enrollment.__table__, dshape='var * {patid: string, enr_start_date: date, enr_end_date: date, site: string, chart: String, ' 'enr_basis: String} ' ) # close session pedsnet_session.close() # ouutput result self.stdout.ending = '' print('Enrollment ETL completed successfully', end='', file=self.stdout)
def run(self): session = self.session() engine = session._WopMarsSession__session.bind conn = engine.connect() # snp2phenotype_path = self.input_file(InsertSNP2Phenotype.__input_file_snp2phenotype) snp_model = self.input_table(InsertSNP2Phenotype.__input_table_snp) phenotype_model = self.input_table(InsertSNP2Phenotype.__input_table_phenotype) snp2phenotype_model = self.output_table(InsertSNP2Phenotype.__output_table_snp2phenotype) snp2phenotype_df = pandas.read_table(snp2phenotype_path, header=None) # # read input file input_file_obj_list = [] for line in csv.reader(open(snp2phenotype_path, 'r', encoding='utf-8'), delimiter="\t"): snp_rsid = int(line[0]) phenotype_name = line[1] input_file_obj_list.append({'snp_rsid' : snp_rsid, 'phenotype_name' : phenotype_name}) # # create insert snp_select = select([snp_model.id]).where(snp_model.rsid==bindparam('snp_rsid')) phenotype_select = select([phenotype_model.id]).where(phenotype_model.name==bindparam('phenotype_name')) output_table_insert = insert(table=snp2phenotype_model.__table__, values={'snp_id': snp_select, 'phenotype_id': phenotype_select}) # if len(input_file_obj_list) > 0: if str(engine.__dict__['url']).split("://")[0]=='sqlite': engine.execute(output_table_insert.prefix_with("OR IGNORE", dialect='sqlite'), input_file_obj_list) elif str(engine.__dict__['url']).split("://")[0]=='mysql': from warnings import filterwarnings # three lines to suppress mysql warnings import MySQLdb as Database filterwarnings('ignore', category = Database.Warning) engine.execute(output_table_insert.prefix_with("IGNORE", dialect='mysql'), input_file_obj_list) elif str(engine.__dict__['url']).split("://")[0]=='postgresql': from sqlalchemy.dialects.postgresql import insert as pg_insert output_table_insert_pg = pg_insert(table=snp2phenotype_model.__table__, values={'snp_id': snp_select, 'phenotype_id': phenotype_select}).on_conflict_do_nothing(index_elements=['snp_id', 'phenotype_id']) engine.execute(output_table_insert_pg, input_file_obj_list)
def update_item_saved_info(item): engine = get_onitsuka_db_engine() item_owner_id = item['owner_id'] item_id = item['item_id'] user_following = Table('user_following', metaData, autoload=True, autoload_with = engine) s = select([user_following.c.user_id], (user_following.c.following_id==item_owner_id)) result = engine.execute(s) user_feed_update_list = list() for follower in result: item_owner_follower_id = follower['user_id'] print item_owner_follower_id user_feed_update_item = {} user_feed_update_item['user_id'] = item_owner_follower_id user_feed_update_item['owner_id'] = item_owner_id user_feed_update_item['item_id'] = item_id user_feed_update_list.append(user_feed_update_item) result.close() user_feed_table = Table('user_feed', metaData, autoload=True, autoload_with = engine) ins = user_feed_table.insert().values(user_id=bindparam('user_id'), owner_id=bindparam('owner_id'), item_id=bindparam('item_id')) engine.execute(ins, user_feed_update_list)
def demographic_etl(config): # set up connection = get_connection(config) pedsnet_session = init_pedsnet(connection) init_pcornet(connection) # multiple aliases for pedsnet_pcornet_valueset_map # to allow the three named joins gender_value_map = aliased(ValueSetMap) ethnicity_value_map = aliased(ValueSetMap) race_value_map = aliased(ValueSetMap) # extract the data from the person table person = pedsnet_session.query(Person.person_id, Person.birth_date, Person.birth_time, coalesce(gender_value_map.target_concept, 'OT'), coalesce(ethnicity_value_map.target_concept, 'OT'), coalesce(race_value_map.target_concept, 'OT'), bindparam("biobank_flag", "N"), Person.gender_source_value, Person.ethnicity_source_value, Person.race_source_value, Person.site, bindparam("gender_identity", None), bindparam("raw_gender_identity", None), bindparam("sexual_orientation", None), bindparam("raw_sexual_orientation", None) ). \ outerjoin(gender_value_map, and_(gender_value_map.source_concept_class == 'Gender', case([(and_(Person.gender_concept_id == None, gender_value_map.source_concept_id == None), True)], else_=cast(Person.gender_concept_id, String(200)) == gender_value_map.source_concept_id))). \ outerjoin(ethnicity_value_map, and_(ethnicity_value_map.source_concept_class == 'Hispanic', case([(and_(Person.ethnicity_concept_id == None, ethnicity_value_map.source_concept_id == None), True)], else_=cast(Person.ethnicity_concept_id, String(200)) == ethnicity_value_map.source_concept_id))). \ outerjoin(race_value_map, and_(race_value_map.source_concept_class == 'Race', case([(and_(Person.race_concept_id == None, race_value_map.source_concept_id == None), True)], else_=cast(Person.race_concept_id, String(200)) == race_value_map.source_concept_id))).all() # transform data to pcornet names and types # load to demographic table odo(person, Demographic.__table__, dshape='var * {patid: string, birth_date: date, birth_time: string, sex: string,' 'hispanic: string, race: string, biobank_flag: string, raw_sex: string,' 'raw_hispanic: string, raw_race:string, site: string, gender_identity: string,' 'raw_gender_identity: string, sexual_orientation: string, raw_sexual_orientation: string}' ) # close session pedsnet_session.close()
def insert_stock_data(country, market_gsi): market_id = [ct[0] for ct in markets if ct[1] == market_gsi][0] insert_stmt = tc_company_stock_prices.insert().values(for_date=bindparam('DailyDate'), market_id=bindparam('market_id'), company_id=bindparam('CompanyID'), open=bindparam('Open'), max=bindparam('Max'), min=bindparam('Min'), close=bindparam('Close'), volume=bindparam('Volume'), amount=bindparam('Amount')) print(insert_stmt) result = _read_data(country + '.csv') for item in result: item['DailyDate'] = datetime.strptime(item['DailyDate'], "%Y-%m-%d").date() item['CompanyID'] = int(item['CompanyID']) item['Open'] = float(item['Open']) item['Close'] = float(item['Close']) item['Min'] = float(item['Min']) item['Max'] = float(item['Max']) item['Volume'] = int(item['Volume']) item['Amount'] = int(item['Amount']) item['market_id'] = market_id # for i, row in enumerate(result): # print(row) # if i == 10: # break with engine.connect() as conn: conn.execute(insert_stmt, result)
def run_letter(letter, session, doctype='grant'): schema = RawLawyer if doctype == 'application': schema = App_RawLawyer letter = letter.upper() clause1 = schema.organization.startswith(bindparam('letter',letter)) clause2 = schema.name_first.startswith(bindparam('letter',letter)) clauses = or_(clause1, clause2) lawyers = (lawyer for lawyer in session.query(schema).filter(clauses)) block = clean_lawyers(lawyers) create_jw_blocks(block) create_lawyer_table(session)
def run_letter(letter, session, doctype='grant'): schema = RawAssignee if doctype == 'application': schema = App_RawAssignee letter = letter.upper() clause1 = schema.organization.startswith(bindparam('letter',letter)) clause2 = schema.name_first.startswith(bindparam('letter',letter)) clauses = or_(clause1, clause2) assignees = (assignee for assignee in session.query(schema).filter(clauses)) block = clean_assignees(assignees) create_jw_blocks(block) create_assignee_table(session)
def get_context_data(self, **kwargs): filter_form = ProgressFilterForm(request.args) conclusion_type = filter_form.conclusion.data dataset = filter_form.dataset status_level = self.model_eu_cls.conclusion_status_level2 label_type = self.TREND_LABEL species = [] if conclusion_type: if conclusion_type == 'bs': status_level = self.model_eu_cls.conclusion_status_level1 conclusion_value = self.model_eu_cls.conclusion_status_label label_type = self.STATUS_LABEL elif conclusion_type == 'stbp': conclusion_value = self.model_eu_cls.br_population_trend elif conclusion_type == 'ltbp': conclusion_value = self.model_eu_cls.br_population_trend_long elif conclusion_type == 'stwp': conclusion_value = self.model_eu_cls.wi_population_trend elif conclusion_type == 'ltwp': conclusion_value = self.model_eu_cls.wi_population_trend_long else: raise ValueError('Unknown conclusion type') eu_species = self.get_species_qs(dataset, conclusion_value, status_level) ignore_species = ( self.model_eu_cls.query .with_entities(self.model_eu_cls.speciescode) ) ms_species = ( LuDataBird.query .filter(~LuDataBird.speciescode.in_(ignore_species)) .filter_by(dataset=dataset) .with_entities(LuDataBird.speciescode.label('code'), LuDataBird.speciesname.label('name'), bindparam('conclution', ''), bindparam('status', ''), bindparam('additional_record', 0)) ) species = sorted(eu_species.union(ms_species), key=lambda x: x.name) return { 'filter_form': filter_form, 'species': species, 'current_selection': filter_form.get_selection(), 'dataset': dataset, 'label_type': label_type, }
def upgrade_severity_levels(session, severity_map): """ Updates the potentially changed severities at the reports. """ LOG.debug("Upgrading severity levels started...") # Create a sql query from the severity map. severity_map_q = union_all(*[ select([cast(bindparam('checker_id' + str(i), str(checker_id)) .label('checker_id'), sqlalchemy.String), cast(bindparam('severity' + str(i), Severity._NAMES_TO_VALUES[ severity_map[checker_id]]) .label('severity'), sqlalchemy.Integer)]) for i, checker_id in enumerate(severity_map)]) \ .alias('new_severities') checker_ids = severity_map.keys() # Get checkers which has been changed. changed_checker_q = select([Report.checker_id, Report.severity]) \ .group_by(Report.checker_id, Report.severity) \ .where(Report.checker_id.in_(checker_ids)) \ .except_(session.query(severity_map_q)).alias('changed_severites') changed_checkers = session.query(changed_checker_q.c.checker_id, changed_checker_q.c.severity) # Update severity levels of checkers. if changed_checkers: updated_checker_ids = set() for checker_id, severity_old in changed_checkers: severity_new = severity_map.get(checker_id, 'UNSPECIFIED') severity_id = Severity._NAMES_TO_VALUES[severity_new] LOG.info("Upgrading severity level of '%s' checker from %s to %s", checker_id, Severity._VALUES_TO_NAMES[severity_old], severity_new) if checker_id in updated_checker_ids: continue session.query(Report) \ .filter(Report.checker_id == checker_id) \ .update({Report.severity: severity_id}) updated_checker_ids.add(checker_id) session.commit() LOG.debug("Upgrading of severity levels finished...")
def db_operates(action, conn, tbl, rows, pk=['id']): if rows is None or len(rows)==0: return 0 cnt = 0 if action in ('del', 'mod'): # generate where clause u_where_params = [] for o in pk: if action=='mod': u_where_params.append(tbl.c[o]==bindparam('_'+o)) else: u_where_params.append(tbl.c[o]==bindparam(o)) u_where_clause = and_(*u_where_params) if action=='add': if len(rows)==-1: respxy = conn.execute(tbl.insert(), rows[0]) for idx in xrange(0, len(pk)): rows[0][pk[idx]]=respxy.inserted_primary_key[idx] else: respxy = conn.execute(tbl.insert(), rows) cnt = respxy.rowcount elif action=='mod': # generate values params u_value_keys = {} def prepare_columns(t_k, row_): for k in row_.keys(): if tbl.columns.has_key(k) and not k in pk: if u_value_keys.has_key(k): t_k[k] = u_value_keys[k] else: t_k[k] = u_value_keys[k] = bindparam(k) # preparation for key=id t_u_value_keys = {} for row in rows: prepare_columns(t_u_value_keys, row) for k in row.keys(): if k in pk: row['_'+k]=row[k] st = tbl.update().where(u_where_clause).values(**t_u_value_keys) respxy = conn.execute(st, [row]) cnt += respxy.rowcount t_u_value_keys.clear() del st # reset for key=id for row in rows: for k in row.keys(): if k in pk: del row['_'+k] elif action=='del': respxy = conn.execute(tbl.delete().where(u_where_clause), rows) cnt = respxy.rowcount return cnt
def get_user_id(email = None, session_id = None): """ Helper function that returns the user_id for a given email address """ if email is not None: result = db.session.execute( text("SELECT aaa.get_user_id_by_email(:email)", bindparams=[bindparam('email', email)])) return result.first()[0] if session_id is not None: result = db.session.execute( text("SELECT aaa.get_user_id_by_session_id(:session)", bindparams=[bindparam('session', session_id)])) return result.first()[0] return None
def _update(self, type, offset, values): """ :param type: The type prefix to use :param offset: The address offset to start at :param values: The values to set """ context = self._build_set(type, offset, values, prefix='x_') query = self._table.update().values(value='value') query = query.where(and_( self._table.c.type == bindparam('x_type'), self._table.c.index == bindparam('x_index'))) result = self._connection.execute(query, context) return result.rowcount == len(values)
def insert_splits_data(): stmt_insert_splits = splits_table.insert().values(split_id=bindparam('SplitID'), split_date=bindparam('SplitDate', type_=Date), company_id=bindparam('CompanyID'), ratio=bindparam('Ratio;')) print(stmt_insert_splits) result = _read_data('splits.csv') for item in result: item['SplitDate'] = datetime.strptime(item['SplitDate'], "%Y-%m-%d").date() item['Ratio;'] = float(item['Ratio;'][0:len(item['Ratio;']) - 1]) # for row in result: # print(row) with engine.connect() as conn: conn.execute(stmt_insert_splits, result)
def handle(self, *args, **options): # set up config = get_config() if config is None: raise CommandError('Unable to process configuration file p_to_p.yml') connection = get_connection(config) pedsnet_session = init_pedsnet(connection) init_pcornet(connection) init_vocab(connection) pedsnet_pcornet_valueset_map = aliased(ValueSetMap) # extract the data from the death table death_cause = pedsnet_session.query(DeathPedsnet.person_id, func.left(DeathPedsnet.cause_source_value, 8), coalesce(pedsnet_pcornet_valueset_map.target_concept, 'OT'), bindparam("death_cause_type", "NI"), bindparam("death_cause_source", "L"), bindparam("death_cause_confidence", None), min(DeathPedsnet.site) ) \ .join(Demographic, Demographic.patid == cast(DeathPedsnet.person_id, String(256)), ) \ .join(VocabularyConcept, VocabularyConcept.concept_id == DeathPedsnet.cause_concept_id) \ .outerjoin(pedsnet_pcornet_valueset_map, and_(pedsnet_pcornet_valueset_map.source_concept_class == 'death cause code', cast(VocabularyConcept.vocabulary_id, String(200)) == pedsnet_pcornet_valueset_map.source_concept_id)) \ .filter(and_(DeathPedsnet.cause_source_value != None, DeathPedsnet.cause_source_concept_id != 44814650)) \ .group_by(DeathPedsnet.person_id, func.left(DeathPedsnet.cause_source_value, 8), coalesce(pedsnet_pcornet_valueset_map.target_concept, 'OT')) \ .all() # transform data to pcornet names and types # load to demographic table odo(death_cause, DeathCause.__table__, dshape='var * {patid: string, death_cause: string, death_cause_code: string,' 'death_cause_type: string, death_cause_source:string, ' 'death_cause_confidence: string, site: string}' ) # close session pedsnet_session.close() # output result self.stdout.ending = '' print('Death Cause ETL completed successfully', end='', file=self.stdout)
def list_unlocked_replicas(rse, limit, bytes=None, rse_id=None, worker_number=None, total_workers=None, delay_seconds=0, session=None): """ List RSE File replicas with no locks. :param rse: the rse name. :param bytes: the amount of needed bytes. :param session: The database session in use. :returns: a list of dictionary replica. """ if not rse_id: rse_id = get_rse_id(rse=rse, session=session) # filter(models.RSEFileAssociation.state != ReplicaState.BEING_DELETED).\ none_value = None # Hack to get pep8 happy... query = session.query(models.RSEFileAssociation.scope, models.RSEFileAssociation.name, models.RSEFileAssociation.bytes, models.RSEFileAssociation.tombstone).\ filter(models.RSEFileAssociation.tombstone < datetime.utcnow()).\ filter(models.RSEFileAssociation.lock_cnt == 0).\ filter(case([(models.RSEFileAssociation.tombstone != none_value, models.RSEFileAssociation.rse_id), ]) == rse_id).\ filter(or_(models.RSEFileAssociation.state.in_((ReplicaState.AVAILABLE, ReplicaState.UNAVAILABLE)), and_(models.RSEFileAssociation.state == ReplicaState.BEING_DELETED, models.RSEFileAssociation.updated_at < datetime.utcnow() - timedelta(seconds=delay_seconds)))).\ order_by(models.RSEFileAssociation.tombstone).\ with_hint(models.RSEFileAssociation, "INDEX(replicas REPLICAS_TOMBSTONE_IDX)", 'oracle') if worker_number and total_workers and total_workers - 1 > 0: if session.bind.dialect.name == 'oracle': bindparams = [bindparam('worker_number', worker_number - 1), bindparam('total_workers', total_workers - 1)] query = query.filter(text('ORA_HASH(name, :total_workers) = :worker_number', bindparams=bindparams)) elif session.bind.dialect.name == 'mysql': query = query.filter('mod(md5(name), %s) = %s' % (total_workers - 1, worker_number - 1)) elif session.bind.dialect.name == 'postgresql': query = query.filter('mod(abs((\'x\'||md5(name))::bit(32)::int), %s) = %s' % (total_workers - 1, worker_number - 1)) query = query.limit(limit) rows = list() neededSpace = bytes totalbytes = 0 for (scope, name, bytes, tombstone) in query.yield_per(1000): if tombstone != OBSOLETE and neededSpace is not None and totalbytes >= neededSpace: break d = {'scope': scope, 'name': name, 'bytes': bytes} rows.append(d) if tombstone != OBSOLETE: totalbytes += bytes return rows
def _delete_multi_relation(self, cols, item_model, to_delete): to_delete = [dict(list(zip(cols, x))) for x in to_delete] session = self.dbsession d = delete(item_model).where(and_(*[getattr(item_model.c, x) == bindparam(x) for x in cols])) session.execute(d, to_delete)
def get_team_solved_subquery(team_id): """ Get a query that searches for a submission from a team for a given challenge. The challenge is supposed to come from an outer query. Example usage: .. code-block:: python team_solved_subquery = get_team_solved_subquery(team_id) challenge_query = (DBSession.query(Challenge, team_solved_subquery)) In this example we query for a list of all challenges and additionally fetch whether the currenttly logged in team has solved it. """ # This subquery basically searches for whether the current team has # solved the corresponding challenge. The correlate statement is # a SQLAlchemy statement that tells it to use the **outer** challenge # column. if team_id: team_solved_subquery = (DBSession.query(Submission). filter(Submission.team_id == team_id). filter(Challenge.id == Submission.challenge_id). correlate(Challenge). exists(). label("has_solved")) else: team_solved_subquery = bindparam("has_solved", 0) return team_solved_subquery
def logout(): # Is there a destination post-logout? dsturl = None if request.referrer and local_request(request.referrer): dsturl = request.referrer else: dsturl = None # End the session in the database already_logged_out = False if 'li' in session: ses = db.session result = ses.execute( text("SELECT ret, col, msg FROM aaa.logout(:sid) AS (ret BOOL, col TEXT, msg TEXT)", bindparams=[bindparam('sid', session['i'])])) ses.commit() # For now, don't test the result of the logout call. Regardless of # whether or not a user provides us with a valid session ID from the # wrong IP address, terminate the session. Shoot first, ask questions # later (i.e. why was a BadUser in posession of GoodUser's session # ID?!) else: already_logged_out = True # Nuke every key in the session for k in session.keys(): session.pop(k) # Set a flash message after we nuke the keys in session if already_logged_out: flash('Session cleared for logged out user') else: flash('You were logged out') return render_template('aaa/logout.html', dsturl=dsturl)
def prepare_columns(t_k, row_): for k in row_.keys(): if tbl.columns.has_key(k) and not k in pk: if u_value_keys.has_key(k): t_k[k] = u_value_keys[k] else: t_k[k] = u_value_keys[k] = bindparam(k)
def update_column_mappings(rule_map_list, ref_table_name): ''' loop through the column mapping rows in the database and populate the stored procedure column based on the transformation name @param rule_map_list: A list of tuples containing mapping info. Tuples should be: (rule_name, proc_name) @param engine: sqlalchemy engine object @param conn: sqlalchemy connection object @param ref_schema: the name of the reference schema @param ref_table_name: the name of the reference table containing the column mapping info ''' # check that list is not empty before preceding. if not rule_map_list: print('NO FUNCTIONS ADDED TO DATABASE') return with get_udl_connection() as conn: # get column_mapping table object col_map_table = conn.get_table(ref_table_name) # Generate sql to perform update update_stmt = col_map_table.update().where(col_map_table.c.transformation_rule == bindparam('rule_name')) update_stmt = update_stmt.values(stored_proc_name=bindparam('proc_name'), stored_proc_created_date=datetime.datetime.now()) # Create list of dicts that sqlalchemy will recognize # to update all rules with corresponding stored procedure. for pair in rule_map_list: conn.execute(update_stmt, rule_name=pair[0], proc_name=pair[1])
def generate_insert_items(self, header, row): row = [r if str(r) != '' else '0' for r in row] columns_values = dict(zip(header, row)) columns = [k for k, v in columns_values.items()] values = [":{k}".format(k=k) for k, v in columns_values.items()] params = [bindparam(k, v) for k, v in columns_values.items()] return (columns, values, params)
def upgrade(): # pylint: disable=too-many-locals """Upgrade events table to new bulk format. The data migration takes 3 steps, composing from two main parts - primary and secondary. Primary makes the "core" data migration (1. step), while the secondary fixes currently corrupted data (GET and BULK). 1. step: Migrate events.action = "IMPORT" to "BULK" 2. step: For a while we didn't have GET in events.action enum so the values that got inserted had action = "". We recognize these as action = "" and resource_type IS NOT NULL. 3. step: Due to missing data migration that migrated IMPORT to BULK without first changing values we convert event.action = "" AND event.resource_type IS NULL to "BULK" """ connection = op.get_bind() update_sql = events_table.update().where( events_table.c.id == bindparam("_id")) # 1st step import_sql = select([events_table]).where(events_table.c.action == "IMPORT") result_import_sql = connection.execute(import_sql).fetchall() import_ids = [{"_id": _id} for _id, _, _ in result_import_sql] update_import_sql = update_sql.values(action="BULK") if import_ids: connection.execute(update_import_sql, import_ids) # 2nd step missing_gets_sql = select([events_table]).where(and_( events_table.c.action == "", events_table.c.resource_type.isnot(None) )) result_missing_gets = connection.execute(missing_gets_sql).fetchall() gets_ids = [{"_id": _id} for _id, _, _ in result_missing_gets] update_missing_gets_sql = update_sql.values(action="GET") if gets_ids: connection.execute(update_missing_gets_sql, gets_ids) # 3rd step missing_bulks_sql = select([events_table]).where(and_( events_table.c.action == "", events_table.c.resource_type.is_(None) )) result_missing_bulks = connection.execute(missing_bulks_sql).fetchall() bulks_ids = [{"_id": _id} for _id, _, _ in result_missing_bulks] update_missing_bulks_sql = update_sql.values(action="BULK") if bulks_ids: connection.execute(update_missing_bulks_sql, bulks_ids) op.alter_column( "events", "action", type_=sa.Enum(u"POST", u"PUT", u"DELETE", u"BULK", u"GET"), existing_type=sa.Enum(u"POST", u"PUT", u"DELETE", u"IMPORT", u"BULK", u"GET"), nullable=False )
def _get_schema_check_query(schema_name): """ returns the sql query to look for schema presence """ query = select(['schema_name'], from_obj=['information_schema.schemata']).where('schema_name = :schema_name') params = [bindparam('schema_name', schema_name)] return text(str(query), bindparams=params)
def test_unicode(self, engine, connection): """Verify that unicode strings make it through SQLAlchemy and the backend""" unicode_str = "白人看不懂" one_row = Table('one_row', MetaData(bind=engine)) returned_str = sqlalchemy.select( [expression.bindparam("好", unicode_str)], from_obj=one_row, ).scalar() self.assertEqual(returned_str, unicode_str)
def find_prefix(cls, db, token): """Start the query for matching token. Returns an SQLAlchemy query already filtered by prefix-matches. """ prefix = token[:cls.prefix_length] # since we can't filter on hashed values, filter on prefix # so we aren't comparing with all tokens return db.query(cls).filter(bindparam('prefix', prefix).startswith(cls.prefix))
def column(self, column): try: table = version_table(column.table) except KeyError: return column if self.parent and table == self.parent.__table__: return bindparam(column.key, getattr(self.parent, column.key)) else: return table.c[column.name]
def test_unicode(self, engine, connection): """Verify that unicode strings make it through SQLAlchemy and the backend""" unicode_str = "中文" one_row = Table('one_row', MetaData(bind=engine)) returned_str = sqlalchemy.select( [expression.bindparam("好", unicode_str)], from_obj=one_row, ).scalar() self.assertEqual(to_str(returned_str), unicode_str)
def commit_updates(session, update_key, update_statements, table, commit_frequency=1000): """ Executes bulk updates for a given table. This is typically much faster than going through the SQLAlchemy ORM. In order to be flexible, the update statements must be set up in a specific way. You can only update one column at a time. The dictionaries in the list `update_statements` must have two keys: `pk`, which is the primary_key for the record to be updated, and `update` which is the new value for the column you want to change. The column you want to change is specified as a string by the argument `update_key`. This method will work regardless if you run it over MySQL or SQLite, but with MySQL, it is usually faster to use the bulk_commit_updates method (see lib/tasks.py), because it uses a table join to do the updates instead of executing individual statements. Args: session -- alchemy session object update_key -- the name of the column we want to update update_statements -- list of dictionaries of updates. See above description table -- SQLAlchemy table object. If you have a table reference, you can use TableName.__table commit_frequency -- tune this for speed. Runs "session.commit" every `commit_frequency` items """ primary_key = list(table.primary_key.columns.values())[0] update_key = table.columns[update_key] u = table.update().where(primary_key == bindparam('pk')).values( {update_key: bindparam('update')}) numgroups = len(update_statements) / commit_frequency for ng in range(numgroups): if numgroups == 0: break chunk = update_statements[ng * commit_frequency:(ng + 1) * commit_frequency] session.connection().execute(u, *chunk) print("committing chunk", ng + 1, "of", numgroups, "with length", len(chunk), "at", datetime.now()) session.commit() last_chunk = update_statements[numgroups * commit_frequency:] if last_chunk: print("committing last", len(last_chunk), "records at", datetime.now()) print(" If it sticks here, use the assignee_patch.py file to fix it!") session.connection().execute(u, *last_chunk) session.commit()
def select_host_uuids_lacking_sync_dataset_expr(cls): r""" Get the query that generates the UUIDs of the hosts that lack some dataset (provided that it is a sync dataset). The query accepts a single bound parameter C{ds_uuid}. >>> str(DatasetOnHost.select_host_uuids_lacking_sync_dataset_expr()) \ ... # doctest:+NORMALIZE_WHITESPACE 'SELECT DISTINCT other_inhabitant.uuid AS uuid \nFROM dataset JOIN membership AS other_membership ON dataset.group_id = other_membership.group_id JOIN user_at_node AS other_user_at_node ON other_user_at_node.id = other_membership.user_id JOIN host AS other_host ON other_host.user_id = other_user_at_node.id JOIN inhabitant AS other_inhabitant ON other_inhabitant.id = other_host.id LEFT OUTER JOIN dataset_on_host AS ds_on_other_host ON ds_on_other_host.dataset_id = dataset.id AND ds_on_other_host.host_id = other_host.id \nWHERE dataset.uuid = :ds_uuid AND dataset.sync AND ds_on_other_host.id IS NULL AND NOT other_user_at_node.suspended' @rtype: sqlalchemy.sql.expression.Select """ other_membership = memberships.alias('other_membership') other_user_at_node = users_at_node.alias('other_user_at_node') other_host = hosts.alias('other_host') other_inhabitant = inhabitants.alias('other_inhabitant') ds_on_other_host = datasets_on_host.alias('ds_on_other_host') return select( columns=[other_inhabitant.c.uuid.label('uuid')], from_obj=datasets.join( other_membership, datasets.c.group == other_membership.c.group).join( other_user_at_node, other_user_at_node.c.user_at_node_id == other_membership.c.user).join( other_host, other_host.c.user == other_user_at_node.c. user_at_node_id).join(other_inhabitant).join( ds_on_other_host, (ds_on_other_host.c.dataset == datasets.c.id) & (ds_on_other_host.c.host == other_host.c.host_id), isouter=True), whereclause=(datasets.c.uuid == bindparam('ds_uuid')) & datasets.c.sync & # Keep "== None" instead of "is None", # cause it is parsed by SQLAlchemy! (ds_on_other_host.c.id == None) & (~other_user_at_node.c.suspended), distinct=True)
def select_ds_uuids_groups_synced_to_host_expr(cls): r""" Get the query that provides the datasets (in fact, only their UUIDs and group UUIDs) that are present at some host. The query accepts a single bound parameter C{host_uuid}. >>> str(DatasetOnHost.select_ds_uuids_groups_synced_to_host_expr()) \ ... # doctest:+NORMALIZE_WHITESPACE 'SELECT DISTINCT dataset.uuid AS ds_uuid, "group".uuid AS ugroup_uuid \nFROM inhabitant AS my_inhabitant JOIN host AS my_host ON my_inhabitant.id = my_host.id JOIN membership AS i_belong_to ON my_host.user_id = i_belong_to.user_id JOIN "group" ON "group".id = i_belong_to.group_id JOIN dataset ON "group".id = dataset.group_id JOIN dataset_on_host ON dataset_on_host.host_id = my_host.id AND dataset_on_host.dataset_id = dataset.id \nWHERE my_inhabitant.uuid = :host_uuid AND dataset.sync AND dataset.time_completed IS NOT NULL' @rtype: sqlalchemy.sql.expression.Select """ my_host = hosts.alias('my_host') my_inhabitant = inhabitants.alias('my_inhabitant') i_belong_to = memberships.alias('i_belong_to') return select(columns=[datasets.c.uuid.label('ds_uuid'), user_groups.c.uuid.label('ugroup_uuid')], from_obj=my_inhabitant .join(my_host) .join(i_belong_to, my_host.c.user == i_belong_to.c.user) .join(user_groups) .join(datasets) .join(datasets_on_host, (datasets_on_host.c.host == my_host.c.host_id) & (datasets_on_host.c.dataset == datasets.c.id)), whereclause=(my_inhabitant.c.uuid == bindparam('host_uuid')) & datasets.c.sync & # Keep "!= None" instead of "is not None", # cause it is parsed by SQLAlchemy! (datasets.c.time_completed != None), distinct=True)
def insert_splits_data(): stmt_insert_splits = splits_table.insert().values( split_id=bindparam('SplitID'), split_date=bindparam('SplitDate', type_=Date), company_id=bindparam('CompanyID'), ratio=bindparam('Ratio;')) print(stmt_insert_splits) result = _read_data('splits.csv') for item in result: item['SplitDate'] = datetime.strptime(item['SplitDate'], "%Y-%m-%d").date() item['Ratio;'] = float(item['Ratio;'][0:len(item['Ratio;']) - 1]) # for row in result: # print(row) with engine.connect() as conn: conn.execute(stmt_insert_splits, result)
def generate_bind_ins(table, n): stmnt = table.insert() items = [] for i in range(n): item = [] for c in table.c: item.append(bindparam('_' + str(i + 1) + c.key)) items.append(item) stmnt = stmnt.values(items) return stmnt
def update_position_records(position_records): time_stamps = np.asarray([r.time_stamp.timestamp() for r in position_records]) time_deltas_minutes = np.diff(time_stamps) * SECONDS_TO_MINUTES lat_and_longs = np.asarray([[r.latitude, r.longitude] for r in position_records], dtype=np.float64) pos_change = great_circle_distance_np(lat_and_longs) calc_speed = pos_change / time_deltas_minutes * MINUTES_PER_HOUR stmt = PositionReading.update().where(and_(PositionReading.c.helicopter_id == bindparam('_helicopter_id'), PositionReading.c.time_stamp == bindparam('_time_stamp'))).values( minutes_since_last_reading=bindparam('time_elapsed'), knots_moved_since_last_reading=bindparam('pos_change'), calculated_speed=bindparam('calc_speed')) dal.engine.execute(stmt, [{'_helicopter_id': r.helicopter_id, '_time_stamp': r.time_stamp, 'time_elapsed': delta_t, 'pos_change': delta_x, 'calc_speed': speed, } for r, delta_t, delta_x, speed in zip(position_records[1:], time_deltas_minutes, pos_change, calc_speed)]) dal.session.commit()
def _update_failed_items(self, errors): """Update items in DB we couldn't sync to Issue Tracker""" if not errors: return issuetracker = all_models.IssuetrackerIssue.__table__ stmt = issuetracker.update().where( sa.and_( issuetracker.c.object_type == expr.bindparam("object_type_"), issuetracker.c.object_id == expr.bindparam("object_id_"), )).values(enabled=False) try: update_values = self._create_failed_items_list(errors) db.session.execute(stmt, update_values) db.session.commit() except sa.exc.OperationalError as error: logger.exception(error) raise exceptions.InternalServerError( "Failed to turn integration off for IssueTracker issues " "that weren't synced in database.")
def load_features(features_fns): """ Load object features from an HDF5 file. """ for features_fn in features_fns: print("Loading {}...".format(features_fn)) with h5py.File( features_fn, "r", libver="latest" ) as f_features: object_ids = f_features["object_id"].asstr()[:] vectors = f_features["features"][:] with database.engine.begin() as conn: stmt = ( models.objects.update() .where(models.objects.c.object_id == bindparam("_object_id")) .values({"vector": bindparam("vector")}) ) # TODO: Use UPDATE ... RETURNING to get the number of affected rows bar = ProgressBar(len(object_ids), max_width=40) obj_iter = iter(zip(object_ids, vectors)) while True: chunk = tuple(itertools.islice(obj_iter, 1000)) if not chunk: break conn.execute( stmt, [ {"_object_id": str(object_id), "vector": vector} for (object_id, vector) in chunk ], ) bar.numerator += len(chunk) print(bar, end="\r") print() # TODO: In the end, print a summary of how many objects have a feature vector now. print("Done.")
def filter_thread_work(session, query, total_threads, thread_id, hash_variable=None): """ Filters a query to partition thread workloads based on the thread id and total number of threads """ if thread_id is not None and total_threads is not None and (total_threads - 1) > 0: if session.bind.dialect.name == 'oracle': bindparams = [ bindparam('thread_id', thread_id), bindparam('total_threads', total_threads - 1) ] if not hash_variable: query = query.filter( text('ORA_HASH(id, :total_threads) = :thread_id', bindparams=bindparams)) else: query = query.filter( text('ORA_HASH(%s, :total_threads) = :thread_id' % (hash_variable), bindparams=bindparams)) elif session.bind.dialect.name == 'mysql': if not hash_variable: query = query.filter( text('mod(md5(id), %s) = %s' % (total_threads, thread_id))) else: query = query.filter( text('mod(md5(%s), %s) = %s' % (hash_variable, total_threads, thread_id))) elif session.bind.dialect.name == 'postgresql': if not hash_variable: query = query.filter( text( 'mod(abs((\'x\'||md5(id::text))::bit(32)::int), %s) = %s' % (total_threads, thread_id))) else: query = query.filter( text( 'mod(abs((\'x\'||md5(%s::text))::bit(32)::int), %s) = %s' % (hash_variable, total_threads, thread_id))) return query
def signups_requiring_notification(self, session, from_time, to_time, options=None): """ Returns a dict of AttractionSignups that require notification. The keys of the returned dict are the amount of advanced notice, given in seconds. A key of -1 indicates confirmation notices after a signup. The query generated by this method looks horrific, but is surprisingly efficient. """ advance_checkin = max(0, self.advance_checkin) subqueries = [] for advance_notice in sorted(set([-1] + self.advance_notices)): event_filters = [AttractionEvent.attraction_id == self.id] if advance_notice == -1: notice_ident = cast(AttractionSignup.attraction_event_id, UnicodeText) notice_param = bindparam('confirm_notice', advance_notice).label('advance_notice') else: advance_notice = max(0, advance_notice) + advance_checkin notice_delta = timedelta(seconds=advance_notice) event_filters += [ AttractionEvent.start_time >= from_time + notice_delta, AttractionEvent.start_time < to_time + notice_delta] notice_ident = func.concat(AttractionSignup.attraction_event_id, '_{}'.format(advance_notice)) notice_param = bindparam( 'advance_notice_{}'.format(advance_notice), advance_notice).label('advance_notice') subquery = session.query(AttractionSignup, notice_param).filter( AttractionSignup.is_unchecked_in, AttractionSignup.attraction_event_id.in_( session.query(AttractionEvent.id).filter(*event_filters)), not_(exists().where(and_( AttractionNotification.ident == notice_ident, AttractionNotification.attraction_event_id == AttractionSignup.attraction_event_id, AttractionNotification.attendee_id == AttractionSignup.attendee_id)))).with_labels() subqueries.append(subquery) query = subqueries[0].union(*subqueries[1:]) if options: query = query.options(*listify(options)) query.order_by(AttractionSignup.id) return groupify(query, lambda x: x[0], lambda x: x[1])
def sync_collection_new_registration(connection, transaction): where = and_( BIStatistic.__table__.c.day == bindparam('_on_day'), BIStatistic.__table__.c.platform == bindparam('_platform'), BIStatistic.__table__.c.game == 'All Game' ) values = { 'new_registration': bindparam('sum') } try: connection.execute(BIStatistic.__table__.update().where(where).values(values), rows) except: print('process_bi_statistic_for_lifetime new_registration transaction.rollback()') transaction.rollback() raise else: print('process_bi_statistic_for_lifetime new_registration transaction.commit()') transaction.commit() return
async def load_trunk(cls, dialed_number, db_connection): res = await db_connection.execute(cls.table.select().where( bindparam("dialed_number", dialed_number).startswith( cls.table.c.username)).where(cls.table.c.trunk == True)) if res.rowcount == 0: raise DoesNotExist('No trunk for "{}" found'.format(dialed_number)) elif res.rowcount > 1: raise DoesNotExist( "Trunk misconfiguration lead to multiple results for {}". format(dialed_number)) return cls(await res.first())
def update_many(self, rows, keys, chunk_size=1000, ensure=None, types=None): """Update many rows in the table at a time. This is significantly faster than updating them one by one. Per default the rows are processed in chunks of 1000 per commit, unless you specify a different ``chunk_size``. See :py:meth:`update() <dataset.Table.update>` for details on the other parameters. """ # Convert keys to a list if not a list or tuple. keys = keys if type(keys) in (list, tuple) else [keys] chunk = [] columns = [] for index, row in enumerate(rows): chunk.append(row) for col in row.keys(): if col not in columns: columns.append(col) # bindparam requires names to not conflict (cannot be "id" for id) for key in keys: row['_%s' % key] = row[key] # Update when chunk_size is fulfilled or this is the last row if len(chunk) == chunk_size or index == len(rows) - 1: cl = [self.table.c[k] == bindparam('_%s' % k) for k in keys] stmt = self.table.update(whereclause=and_(*cl), values={ col: bindparam(col, required=False) for col in columns }) self.db.executable.execute(stmt, chunk) chunk = []
def enrollment_etl(config): # set up connection = get_connection(config) pedsnet_session = init_pedsnet(connection) init_pcornet(connection) observation_period = pedsnet_session.query(ObservationPeriod.person_id, ObservationPeriod.observation_period_start_date, ObservationPeriod.observation_period_end_date, ObservationPeriod.site, bindparam("chart", 'Y'), bindparam("enr_basis", 'E') ).all() # endregion odo(observation_period, Enrollment.__table__, dshape='var * {patid: string, enr_start_date: date, enr_end_date: date, site: string, chart: String, ' 'enr_basis: String} ' ) # close session pedsnet_session.close()
def sync_collection(connection, transaction): where = and_( BIUserBill.__table__.c.user_id == bindparam('_user_id'), BIUserBill.__table__.c.platform == 'Undetected') values = {'platform': bindparam('platform')} try: connection.execute( BIUserBill.__table__.update().where(where).values(values), rows) except: print( 'process_user_mall_bill_fix_platform transaction.rollback()' ) transaction.rollback() raise else: print( 'process_user_mall_bill_fix_platform transaction.commit()') transaction.commit() return
def sync_collection(connection, transaction): where = and_( BIUserCurrency.__table__.c.og_account == bindparam( '_og_account'), BIUserCurrency.__table__.c.user_id == -1) values = {'user_id': bindparam('user_id')} try: connection.execute( BIUserCurrency.__table__.update().where(where).values( values), rows) except: print( 'process_user_currency_fill_user_id transaction.rollback()' ) transaction.rollback() raise else: print( 'process_user_currency_fill_user_id transaction.commit()') transaction.commit() return
def query_cube(cube_name, readable=True): cube = get_cube(cube_name) dimensions = get_dimensions(cube_name) fact_table = get_fact_table(cube_name).table.alias('fact') q = fact_table.select() selects, wheres, tables = [], [], [fact_table] if not readable: selects.append(fact_table.columns['fact_id'].label('REGENESIS_ID')) params = {} for dim in dimensions: name = dim.get('dim_name') field = name.upper() title = dim.get('dim_title_de') if readable: unit = dim.get('ref_unit_name') if unit is not None: field = '%s; %s' % (field, unit) field = '%s (%s)' % (title, field) type_ = dim.get('ref_type') if type_ == 'measure': selects.append(fact_table.columns[name].label(field)) if not readable: selects.append( fact_table.columns[name + "_quality"].label(field + '_QUALITY')) selects.append(fact_table.columns[name + "_error"].label(field + '_ERROR')) if type_ == 'time': selects.append(fact_table.columns[name].label(field)) if not readable: selects.append( fact_table.columns[name + '_from'].label(field + '_FROM')) selects.append(fact_table.columns[name + '_until'].label(field + '_UNTIL')) elif type_ == 'axis': vt = value_table.table.alias('value_%s' % name) id_col = field + ' - ID' if readable else field + '_CODE' selects.append(vt.c.name.label(id_col)) selects.append(vt.c.title_de.label(field)) tables.append(vt) params[name] = name wheres.append(vt.c.dimension_name == bindparam(name, value=name)) wheres.append(vt.c.value_id == fact_table.c[name]) q = select(selects, and_(*wheres), tables) return q, params
def select_ds_uuids_groups_synced_to_host_expr(cls): r""" Get the query that provides the datasets (in fact, only their UUIDs and group UUIDs) that are present at some host. The query accepts a single bound parameter C{host_uuid}. >>> str(DatasetOnHost.select_ds_uuids_groups_synced_to_host_expr()) \ ... # doctest:+NORMALIZE_WHITESPACE 'SELECT DISTINCT dataset.uuid AS ds_uuid, "group".uuid AS ugroup_uuid \nFROM inhabitant AS my_inhabitant JOIN host AS my_host ON my_inhabitant.id = my_host.id JOIN membership AS i_belong_to ON my_host.user_id = i_belong_to.user_id JOIN "group" ON "group".id = i_belong_to.group_id JOIN dataset ON "group".id = dataset.group_id JOIN dataset_on_host ON dataset_on_host.host_id = my_host.id AND dataset_on_host.dataset_id = dataset.id \nWHERE my_inhabitant.uuid = :host_uuid AND dataset.sync AND dataset.time_completed IS NOT NULL' @rtype: sqlalchemy.sql.expression.Select """ my_host = hosts.alias('my_host') my_inhabitant = inhabitants.alias('my_inhabitant') i_belong_to = memberships.alias('i_belong_to') return select( columns=[ datasets.c.uuid.label('ds_uuid'), user_groups.c.uuid.label('ugroup_uuid') ], from_obj=my_inhabitant.join(my_host).join( i_belong_to, my_host.c.user == i_belong_to.c.user).join(user_groups).join(datasets).join( datasets_on_host, (datasets_on_host.c.host == my_host.c.host_id) & (datasets_on_host.c.dataset == datasets.c.id)), whereclause=(my_inhabitant.c.uuid == bindparam('host_uuid')) & datasets.c.sync & # Keep "!= None" instead of "is not None", # cause it is parsed by SQLAlchemy! (datasets.c.time_completed != None), distinct=True)
def find(cls, db, token): """Find a token object by value. Returns None if not found. """ prefix = token[:cls.prefix_length] # since we can't filter on hashed values, filter on prefix # so we aren't comparing with all tokens prefix_match = db.query(cls).filter(bindparam('prefix', prefix).startswith(cls.prefix)) for orm_token in prefix_match: if orm_token.match(token): return orm_token
def handle_param(column, data): """ 处理where条件 """ opt = data.get('opt', '$te') if 'val' in data: value = data['val'] if opt == '$ne': # 不等于 return column != value if opt == '$te': # 等于 return column == value elif opt == '$lt': # 小于 return column < value elif opt == '$lte': # 小于等于 return column <= value elif opt == '$gt': # 大于 return column > value elif opt == '$gte': # 大于等于 return column >= value elif opt == '$like': # like return column.like(value) elif opt == '$in': return column.in_(value) elif opt == '$nin': return ~column.in_(value) elif opt == '$bind': # 占位符 if isinstance(value, str): return column == bindparam(value) else: opt = value["opt"] if opt == '$bind': return None if opt == "$in" or opt == "$nin": value["val"] = bindparam(value["val"], expanding=True) else: value["val"] = bindparam(value["val"]) return handle_param(column, value) elif opt == '$raw': return value
def neighboring_ids(): from sqlalchemy.sql.expression import bindparam batch = 1000 n_inds = session.query(Individual).count() print "Computing neighboring ids for {} individuals.".format(n_inds) stmt = Individual.__table__.update().\ where(Individual.id == bindparam('_id')).\ values({ 'neighboring_ids': bindparam('neighboring_ids'), }) updates = [] for batch_i, (lo, hi) in enumerate(yield_batch_limits(n_inds, batch)): print dt.datetime.now().isoformat()[:-7], "Batch from {} to {}".format(lo, hi-1) sys.stdout.flush() t0 = time.time() if batch < n_inds: ind_query = Individual.query.filter(and_(Individual.id >= lo, Individual.id < hi)) ind_query = ind_query.options(joinedload(Individual.sup_families)).\ options(joinedload(Individual.sub_families)) else: ind_query = Individual.query ind_query = ind_query.options(joinedload(Individual.sup_families)).\ options(joinedload(Individual.sub_families)) inds = ind_query.all() print " Querying {} individuals took {:.4f} seconds.".format(len(inds), time.time()-t0) sys.stdout.flush() t0 = time.time() for ii, ind in enumerate(inds): n_ids = [] for fam in ind.sub_families + ind.sup_families: for ind2 in fam.parents + fam.children: n_ids.append([fam.xref, ind2.xref]) updates.append({'neighboring_ids': u(json.dumps(n_ids)), '_id': ind.id}) print " Neighboring ids took {:.4f} seconds.".format(time.time()-t0) if len(updates) > 0: t0 = time.time() engine.execute(stmt, updates) updates = [] print " Executing took {:.4f} seconds.".format(time.time()-t0)
def dao_get_templates_for_cache(cache): if not cache or len(cache) == 0: return [] # First create a subquery that is a union select of the cache values # Then join templates to the subquery cache_queries = [ db.session.query(bindparam("template_id" + str(i), uuid.UUID(template_id.decode())).label('template_id'), bindparam("count" + str(i), int(count.decode())).label('count')) for i, (template_id, count) in enumerate(cache)] cache_subq = cache_queries[0].union(*cache_queries[1:]).subquery() query = db.session.query(Template.id.label('template_id'), Template.template_type, Template.name, Template.is_precompiled_letter, cache_subq.c.count.label('count') ).join(cache_subq, Template.id == cache_subq.c.template_id ).order_by(Template.name) return query.all()
def column(self, column): try: table = version_table(column.table) except KeyError: reflected_column = column else: reflected_column = table.c[column.name] if (column in self.relationship.local_columns and table == self.parent.__table__): reflected_column = bindparam(column.key, getattr(self.parent, column.key)) return reflected_column
def update_multiple(self, table_name, column, data_dict_list): """ 하나의 column에 대해서 조건을 줘서 DB에 기존에 있던 데이터에 대해서 한 번에 Update 한다. """ transaction = self.__connection.begin() # self.__connection.begin() if (data_dict_list): try: table = self.find_table(table_name) if (table.c.has_key(column)): c = table.c.get(column) upd = table.update(). \ where(c == bindparam('target_' + column)) added_data_dict_list = [ add_target_item(dic, column) for dic in data_dict_list ] # print("added_data_dict_list: ") # print(added_data_dict_list) result = self.__connection.execute(upd, added_data_dict_list) transaction.commit() return True else: print("{} table 에는 {} Column 이 없습니다".format( table_name, column)) print("Let's RollBack") transaction.rollback() return False except IntegrityError as error: error_code, error_msg = error.orig.args print("IntegrityError!!") print(">>>error_code: {}".format(error_code)) print(">>>error_msg: {}".format(error_msg)) print("Let's RollBack") transaction.rollback() return False else: print( "This is empty dict, we need list of dict for 'update_multiple' " ) transaction.rollback() print("Let's RollBack") return False
def list_expired_temporary_dids(rse, limit, worker_number=None, total_workers=None, session=None): """ List expired temporary DIDs. :param rse: the rse name. :param limit: The maximum number of replicas returned. :param worker_number: id of the executing worker. :param total_workers: Number of total workers. :param session: The database session in use. :returns: a list of dictionary replica. """ rse_id = get_rse_id(rse, session=session) is_none = None query = session.query(models.TemporaryDataIdentifier.scope, models.TemporaryDataIdentifier.name, models.TemporaryDataIdentifier.path, models.TemporaryDataIdentifier.bytes).\ with_hint(models.TemporaryDataIdentifier, "INDEX(tmp_dids TMP_DIDS_EXPIRED_AT_IDX)", 'oracle').\ filter(case([(models.TemporaryDataIdentifier.expired_at != is_none, models.TemporaryDataIdentifier.rse_id), ]) == rse_id) if worker_number and total_workers and total_workers - 1 > 0: if session.bind.dialect.name == 'oracle': bindparams = [bindparam('worker_number', worker_number - 1), bindparam('total_workers', total_workers - 1)] query = query.filter(text('ORA_HASH(name, :total_workers) = :worker_number', bindparams=bindparams)) elif session.bind.dialect.name == 'mysql': query = query.filter(text('mod(md5(name), %s) = %s' % (total_workers - 1, worker_number - 1))) elif session.bind.dialect.name == 'postgresql': query = query.filter(text('mod(abs((\'x\'||md5(path))::bit(32)::int), %s) = %s' % (total_workers - 1, worker_number - 1))) return [{'path': path, 'rse': rse, 'rse_id': rse_id, 'scope': scope, 'name': name, 'bytes': bytes} for scope, name, path, bytes in query.limit(limit)]
async def update_contentgroup_by_localgroupid( localgroupid: int, localcontentids: ContentGroupin, session: Session = Depends(get_session), current_user: User = Depends(get_current_user)): try: oldset = { 'username': current_user.username, 'localgroupid': localgroupid } old = ReportContentGroup.__table__.delete().where( and_( ReportContentGroup.__table__.c.username == bindparam( 'username'), ReportContentGroup.__table__.c.localgroupid == bindparam('localgroupid'))) session.execute(old, oldset) newset = [{ 'username': current_user.username, 'localgroupid': localgroupid, 'contentid': n, 'order': i } for i, n in enumerate(localcontentids.localcontentids)] session.execute(ReportContentGroup.__table__.insert(), newset) try: session.commit() return ContentGroupout( localgroupid=localgroupid, contents=session.query(ReportContentGroup).filter( ReportContentGroup.username == current_user.username, ReportContentGroup.localgroupid == localgroupid).order_by( ReportContentGroup.localgroupid, ReportContentGroup.order).all()) except: session.rollback() raise HTTPException(status_code=400, detail="not commited.") except Exception as e: print(e) raise HTTPException(status_code=400)
def update_df_to_postgres_using_metadata_and_id(self, df, table_name): meta = sa.MetaData(bind=self.engine) table_name_only = table_name.split(".")[1] schema_only = table_name.split(".")[0] docs = sa.Table(table_name_only, meta, autoload=True, schema=schema_only) sess = orm.sessionmaker(bind=self.engine)() conn = self.engine.connect() df_2 = df.copy() df_2['_id'] = df_2['id'] value_columns = filter(lambda c: c != 'id', df.columns.values) df_2 = df_2[['_id'] + value_columns] listToWrite = df_2.to_dict(orient='records') value_dict = {} for vc in value_columns: value_dict[vc] = bindparam(vc) conn.execute( docs.update().where( docs.c.id == bindparam('_id')).values(value_dict), listToWrite) sess.commit() sess.close()
def get_updated_account_counters(total_workers, worker_number, session=None): """ Get updated rse_counters. :param total_workers: Number of total workers. :param worker_number: id of the executing worker. :param session: Database session in use. :returns: List of rse_ids whose rse_counters need to be updated. """ query = session.query(models.UpdatedAccountCounter.account, models.UpdatedAccountCounter.rse_id).\ distinct(models.UpdatedAccountCounter.account, models.UpdatedAccountCounter.rse_id) if total_workers > 0: if session.bind.dialect.name == 'oracle': bindparams = [bindparam('worker_number', worker_number), bindparam('total_workers', total_workers)] query = query.filter(text('ORA_HASH(CONCAT(account, rse_id), :total_workers) = :worker_number', bindparams=bindparams)) elif session.bind.dialect.name == 'mysql': query = query.filter('mod(md5(concat(account, rse_id)), %s) = %s' % (total_workers + 1, worker_number)) elif session.bind.dialect.name == 'postgresql': query = query.filter('mod(abs((\'x\'||md5(concat(account, rse_id)))::bit(32)::int), %s) = %s' % (total_workers + 1, worker_number)) return query.all()
def visit_idea(self, idea, level, prev_result): if idea.short_title: self.counter.add_text(self.cleantext(idea.short_title), 2) if idea.long_title: self.counter.add_text(self.cleantext(idea.long_title)) if idea.definition: self.counter.add_text(self.cleantext(idea.definition)) if self.count_posts and level == 0: from .generic import Content related = text( Idea._get_related_posts_statement(), bindparams=[bindparam('root_idea_id', idea.id), bindparam('discussion_id', idea.discussion_id)] ).columns(column('post_id')).alias('related') titles = set() for title, body in idea.db.query( Content.subject, Content.body).join( related, related.c.post_id == Content.id): self.counter.add_text(self.cleantext(body), 0.5) title = self.cleantext(title) if title not in titles: self.counter.add_text(title) titles.add(title)