Esempio n. 1
0
def list_expired_dids(worker_number=None, total_workers=None, limit=None, session=None):
    """
    List expired data identifiers.

    :param limit: limit number.
    :param session: The database session in use.
    """
    query = session.query(models.DataIdentifier.scope, models.DataIdentifier.name, models.DataIdentifier.did_type).\
        filter(models.DataIdentifier.expired_at < datetime.utcnow()).\
        order_by(models.DataIdentifier.expired_at).\
        with_hint(models.DataIdentifier, "index(DIDS DIDS_EXPIRED_AT_IDX)", 'oracle')

    if worker_number and total_workers and total_workers - 1 > 0:
        if session.bind.dialect.name == 'oracle':
            bindparams = [bindparam('worker_number', worker_number-1), bindparam('total_workers', total_workers-1)]
            query = query.filter(text('ORA_HASH(name, :total_workers) = :worker_number', bindparams=bindparams))
        elif session.bind.dialect.name == 'mysql':
            query = query.filter('mod(md5(name), %s) = %s' % (total_workers - 1, worker_number - 1))
        elif session.bind.dialect.name == 'postgresql':
            query = query.filter('mod(abs((\'x\'||md5(name))::bit(32)::int), %s) = %s' % (total_workers-1, worker_number-1))
        elif session.bind.dialect.name == 'sqlite':
            row_count = 0
            dids = list()
            for scope, name, did_type in query.yield_per(10):
                if int(md5(name).hexdigest(), 16) % total_workers == worker_number-1:
                    dids.append({'scope': scope, 'name': name, 'did_type': did_type})
                    row_count += 1
                if limit and row_count >= limit:
                    return dids
            return dids

    if limit:
        query = query.limit(limit)

    return [{'scope': scope, 'name': name, 'did_type': did_type} for scope, name, did_type in query]
Esempio n. 2
0
 def visit_idea(self, idea, level, prev_result):
     if idea.short_title:
         self.counter.add_text(self.cleantext(idea.short_title), 2)
     if idea.long_title:
         self.counter.add_text(self.cleantext(idea.long_title))
     if idea.definition:
         self.counter.add_text(self.cleantext(idea.definition))
     if self.count_posts and level == 0:
         from .generic import Content
         related = text(
             Idea._get_related_posts_statement(),
             bindparams=[bindparam('root_idea_id', idea.id),
                         bindparam('discussion_id', idea.discussion_id)]
             ).columns(column('post_id')).alias('related')
         titles = set()
         # TODO maparent: Reoptimize
         for content in idea.db.query(
                 Content).join(
                 related, related.c.post_id == Content.id):
             body = content.body.first_original().value
             self.counter.add_text(self.cleantext(body), 0.5)
             title = content.subject.first_original().value
             title = self.cleantext(title)
             if title not in titles:
                 self.counter.add_text(title)
                 titles.add(title)
Esempio n. 3
0
def add_father_ids(engine):
    ct = table_scheme.categories_t
    connection = engine.connect()

    #prepared statements
    selection = ct.select().where(ct.c.Topic==bindparam('f_topic'))
    fid_update = ct.update().where(ct.c.catid==bindparam('child_id')).values(fatherid=bindparam('fatherid_'))
    all_categories = connection.execute('SELECT * FROM categories')

    counter = 0
    sys.stdout.write('\n')
    for row in all_categories:
        counter += 1
        topic = row.Topic
        title = row.Title
        catid = row.catid
        if catid < 3: #ignore "" and "Top"
            continue

        index = len(topic)-(len(title)+1)
        father_topic = topic[:index]

        father_selection = connection.execute(selection, f_topic=father_topic)
        father = father_selection.first()
        if father == None:
            LOG.debug('Found no father for "{0}", searched for "{1}"'.format(topic, father_topic))
            continue
        father_id = father[ct.c.catid]
        connection.execute(fid_update, child_id=catid, fatherid_=father_id)
        if counter % 10000 == 0:
            sys.stdout.write('.')
            if counter % 200000 == 0:
                sys.stdout.write(' - {0} ids generated\n'.format(counter))
            sys.stdout.flush()
    print
    def handle(self, *args, **options):
        # set up
        config = get_config()
        if config is None:
            raise CommandError('Unable to process configuration file p_to_p.yml')

        connection = get_connection(config)
        pedsnet_session = init_pedsnet(connection)
        init_pcornet(connection)

        observation_period = pedsnet_session.query(ObservationPeriod.person_id,
                                                   ObservationPeriod.observation_period_start_date,
                                                   ObservationPeriod.observation_period_end_date,
                                                   ObservationPeriod.site,
                                                   bindparam("chart", 'Y'),
                                                   bindparam("enr_basis", 'E')
                                                   ).filter(
            exists().where(ObservationPeriod.person_id == PersonVisit.person_id)).all()

        odo(observation_period, Enrollment.__table__,
            dshape='var * {patid: string, enr_start_date: date, enr_end_date: date, site: string, chart: String, '
                   'enr_basis: String} '
            )
        # close session
        pedsnet_session.close()

        # ouutput result
        self.stdout.ending = ''
        print('Enrollment ETL completed successfully', end='', file=self.stdout)
Esempio n. 5
0
 def run(self):
     session = self.session()
     engine = session._WopMarsSession__session.bind
     conn = engine.connect()
     #
     snp2phenotype_path = self.input_file(InsertSNP2Phenotype.__input_file_snp2phenotype)
     snp_model = self.input_table(InsertSNP2Phenotype.__input_table_snp)
     phenotype_model = self.input_table(InsertSNP2Phenotype.__input_table_phenotype)
     snp2phenotype_model = self.output_table(InsertSNP2Phenotype.__output_table_snp2phenotype)
     snp2phenotype_df = pandas.read_table(snp2phenotype_path, header=None)
     #
     # read input file
     input_file_obj_list = []
     for line in csv.reader(open(snp2phenotype_path, 'r', encoding='utf-8'), delimiter="\t"):
         snp_rsid = int(line[0])
         phenotype_name = line[1]
         input_file_obj_list.append({'snp_rsid' : snp_rsid, 'phenotype_name' : phenotype_name})
     #
     # create insert
     snp_select = select([snp_model.id]).where(snp_model.rsid==bindparam('snp_rsid'))
     phenotype_select = select([phenotype_model.id]).where(phenotype_model.name==bindparam('phenotype_name'))
     output_table_insert = insert(table=snp2phenotype_model.__table__, values={'snp_id': snp_select, 'phenotype_id': phenotype_select})
     #
     if len(input_file_obj_list) > 0:
         if str(engine.__dict__['url']).split("://")[0]=='sqlite':
             engine.execute(output_table_insert.prefix_with("OR IGNORE", dialect='sqlite'), input_file_obj_list)
         elif str(engine.__dict__['url']).split("://")[0]=='mysql':
                 from warnings import filterwarnings # three lines to suppress mysql warnings
                 import MySQLdb as Database
                 filterwarnings('ignore', category = Database.Warning)
                 engine.execute(output_table_insert.prefix_with("IGNORE", dialect='mysql'), input_file_obj_list)
         elif str(engine.__dict__['url']).split("://")[0]=='postgresql':
             from sqlalchemy.dialects.postgresql import insert as pg_insert
             output_table_insert_pg = pg_insert(table=snp2phenotype_model.__table__, values={'snp_id': snp_select, 'phenotype_id': phenotype_select}).on_conflict_do_nothing(index_elements=['snp_id', 'phenotype_id'])
             engine.execute(output_table_insert_pg, input_file_obj_list)
def update_item_saved_info(item):
    
        
    engine = get_onitsuka_db_engine()
    
    item_owner_id = item['owner_id']
    item_id = item['item_id']
    
    user_following = Table('user_following', metaData, autoload=True, autoload_with = engine)
    s = select([user_following.c.user_id], (user_following.c.following_id==item_owner_id))
    
    result = engine.execute(s)
    
    user_feed_update_list = list()
    for follower in result:
        
        item_owner_follower_id = follower['user_id']
        print item_owner_follower_id
        
        user_feed_update_item = {}
        user_feed_update_item['user_id']  = item_owner_follower_id
        user_feed_update_item['owner_id'] = item_owner_id
        user_feed_update_item['item_id'] = item_id
        user_feed_update_list.append(user_feed_update_item)

    result.close()

    user_feed_table = Table('user_feed', metaData, autoload=True, autoload_with = engine)
    ins = user_feed_table.insert().values(user_id=bindparam('user_id'), owner_id=bindparam('owner_id'), item_id=bindparam('item_id'))
    engine.execute(ins, user_feed_update_list)
def demographic_etl(config):
    # set up
    connection = get_connection(config)
    pedsnet_session = init_pedsnet(connection)
    init_pcornet(connection)

    # multiple aliases for pedsnet_pcornet_valueset_map
    # to allow the three named joins
    gender_value_map = aliased(ValueSetMap)
    ethnicity_value_map = aliased(ValueSetMap)
    race_value_map = aliased(ValueSetMap)

    # extract the data from the person table
    person = pedsnet_session.query(Person.person_id,
                                   Person.birth_date,
                                   Person.birth_time,
                                   coalesce(gender_value_map.target_concept, 'OT'),
                                   coalesce(ethnicity_value_map.target_concept, 'OT'),
                                   coalesce(race_value_map.target_concept, 'OT'),
                                   bindparam("biobank_flag", "N"),
                                   Person.gender_source_value,
                                   Person.ethnicity_source_value,
                                   Person.race_source_value,
                                   Person.site,
                                   bindparam("gender_identity", None),
                                   bindparam("raw_gender_identity", None),
                                   bindparam("sexual_orientation", None),
                                   bindparam("raw_sexual_orientation", None)
                                   ). \
        outerjoin(gender_value_map,
                  and_(gender_value_map.source_concept_class == 'Gender',
                       case([(and_(Person.gender_concept_id == None,
                                   gender_value_map.source_concept_id == None), True)],
                            else_=cast(Person.gender_concept_id, String(200)) ==
                                  gender_value_map.source_concept_id))). \
        outerjoin(ethnicity_value_map,
                  and_(ethnicity_value_map.source_concept_class == 'Hispanic',
                       case([(and_(Person.ethnicity_concept_id == None,
                                   ethnicity_value_map.source_concept_id == None), True)],
                            else_=cast(Person.ethnicity_concept_id, String(200)) ==
                                  ethnicity_value_map.source_concept_id))). \
        outerjoin(race_value_map,
                  and_(race_value_map.source_concept_class == 'Race',
                       case([(and_(Person.race_concept_id == None,
                                   race_value_map.source_concept_id == None), True)],
                            else_=cast(Person.race_concept_id, String(200)) ==
                                  race_value_map.source_concept_id))).all()

    # transform data to pcornet names and types
    # load to demographic table
    odo(person, Demographic.__table__,
        dshape='var * {patid: string, birth_date: date, birth_time: string, sex: string,'
               'hispanic: string, race: string, biobank_flag: string, raw_sex: string,'
               'raw_hispanic: string, raw_race:string, site: string, gender_identity: string,'
               'raw_gender_identity: string, sexual_orientation: string, raw_sexual_orientation: string}'
        )
    # close session

    pedsnet_session.close()
def insert_stock_data(country, market_gsi):
    market_id = [ct[0] for ct in markets if ct[1] == market_gsi][0]
    insert_stmt = tc_company_stock_prices.insert().values(for_date=bindparam('DailyDate'),
                                                          market_id=bindparam('market_id'),
                                                          company_id=bindparam('CompanyID'),
                                                          open=bindparam('Open'),
                                                          max=bindparam('Max'),
                                                          min=bindparam('Min'),
                                                          close=bindparam('Close'),
                                                          volume=bindparam('Volume'),
                                                          amount=bindparam('Amount'))
    print(insert_stmt)

    result = _read_data(country + '.csv')

    for item in result:
        item['DailyDate'] = datetime.strptime(item['DailyDate'], "%Y-%m-%d").date()
        item['CompanyID'] = int(item['CompanyID'])
        item['Open'] = float(item['Open'])
        item['Close'] = float(item['Close'])
        item['Min'] = float(item['Min'])
        item['Max'] = float(item['Max'])
        item['Volume'] = int(item['Volume'])
        item['Amount'] = int(item['Amount'])
        item['market_id'] = market_id

    # for i, row in enumerate(result):
    #     print(row)
    #     if i == 10:
    #         break

    with engine.connect() as conn:
        conn.execute(insert_stmt, result)
def run_letter(letter, session, doctype='grant'):
    schema = RawLawyer
    if doctype == 'application':
        schema = App_RawLawyer
    letter = letter.upper()
    clause1 = schema.organization.startswith(bindparam('letter',letter))
    clause2 = schema.name_first.startswith(bindparam('letter',letter))
    clauses = or_(clause1, clause2)
    lawyers = (lawyer for lawyer in session.query(schema).filter(clauses))
    block = clean_lawyers(lawyers)
    create_jw_blocks(block)
    create_lawyer_table(session)
def run_letter(letter, session, doctype='grant'):
    schema = RawAssignee
    if doctype == 'application':
        schema = App_RawAssignee
    letter = letter.upper()
    clause1 = schema.organization.startswith(bindparam('letter',letter))
    clause2 = schema.name_first.startswith(bindparam('letter',letter))
    clauses = or_(clause1, clause2)
    assignees = (assignee for assignee in session.query(schema).filter(clauses))
    block = clean_assignees(assignees)
    create_jw_blocks(block)
    create_assignee_table(session)
Esempio n. 11
0
    def get_context_data(self, **kwargs):
        filter_form = ProgressFilterForm(request.args)

        conclusion_type = filter_form.conclusion.data
        dataset = filter_form.dataset
        status_level = self.model_eu_cls.conclusion_status_level2
        label_type = self.TREND_LABEL
        species = []
        if conclusion_type:
            if conclusion_type == 'bs':
                status_level = self.model_eu_cls.conclusion_status_level1
                conclusion_value = self.model_eu_cls.conclusion_status_label
                label_type = self.STATUS_LABEL
            elif conclusion_type == 'stbp':
                conclusion_value = self.model_eu_cls.br_population_trend
            elif conclusion_type == 'ltbp':
                conclusion_value = self.model_eu_cls.br_population_trend_long
            elif conclusion_type == 'stwp':
                conclusion_value = self.model_eu_cls.wi_population_trend
            elif conclusion_type == 'ltwp':
                conclusion_value = self.model_eu_cls.wi_population_trend_long
            else:
                raise ValueError('Unknown conclusion type')
            eu_species = self.get_species_qs(dataset,
                                             conclusion_value,
                                             status_level)

            ignore_species = (
                self.model_eu_cls.query
                .with_entities(self.model_eu_cls.speciescode)
            )
            ms_species = (
                LuDataBird.query
                .filter(~LuDataBird.speciescode.in_(ignore_species))
                .filter_by(dataset=dataset)
                .with_entities(LuDataBird.speciescode.label('code'),
                               LuDataBird.speciesname.label('name'),
                               bindparam('conclution', ''),
                               bindparam('status', ''),
                               bindparam('additional_record', 0))
            )

            species = sorted(eu_species.union(ms_species),
                             key=lambda x: x.name)

        return {
            'filter_form': filter_form,
            'species': species,
            'current_selection': filter_form.get_selection(),
            'dataset': dataset,
            'label_type': label_type,
        }
Esempio n. 12
0
def upgrade_severity_levels(session, severity_map):
    """
    Updates the potentially changed severities at the reports.
    """
    LOG.debug("Upgrading severity levels started...")

    # Create a sql query from the severity map.
    severity_map_q = union_all(*[
        select([cast(bindparam('checker_id' + str(i), str(checker_id))
                .label('checker_id'), sqlalchemy.String),
                cast(bindparam('severity' + str(i), Severity._NAMES_TO_VALUES[
                    severity_map[checker_id]])
               .label('severity'), sqlalchemy.Integer)])
        for i, checker_id in enumerate(severity_map)]) \
        .alias('new_severities')

    checker_ids = severity_map.keys()

    # Get checkers which has been changed.
    changed_checker_q = select([Report.checker_id, Report.severity]) \
        .group_by(Report.checker_id, Report.severity) \
        .where(Report.checker_id.in_(checker_ids)) \
        .except_(session.query(severity_map_q)).alias('changed_severites')

    changed_checkers = session.query(changed_checker_q.c.checker_id,
                                     changed_checker_q.c.severity)

    # Update severity levels of checkers.
    if changed_checkers:
        updated_checker_ids = set()
        for checker_id, severity_old in changed_checkers:
            severity_new = severity_map.get(checker_id, 'UNSPECIFIED')
            severity_id = Severity._NAMES_TO_VALUES[severity_new]

            LOG.info("Upgrading severity level of '%s' checker from %s to %s",
                     checker_id,
                     Severity._VALUES_TO_NAMES[severity_old],
                     severity_new)

            if checker_id in updated_checker_ids:
                continue

            session.query(Report) \
                .filter(Report.checker_id == checker_id) \
                .update({Report.severity: severity_id})

            updated_checker_ids.add(checker_id)

        session.commit()

    LOG.debug("Upgrading of severity levels finished...")
Esempio n. 13
0
def db_operates(action, conn, tbl, rows, pk=['id']):
	if rows is None or len(rows)==0: return 0
	cnt = 0
	if action in ('del', 'mod'):
		# generate where clause
		u_where_params = []
		for o in pk: 
			if action=='mod': u_where_params.append(tbl.c[o]==bindparam('_'+o))
			else: u_where_params.append(tbl.c[o]==bindparam(o))
		u_where_clause = and_(*u_where_params)

	if action=='add':
		if len(rows)==-1:
			respxy = conn.execute(tbl.insert(), rows[0])
			for idx in xrange(0, len(pk)):
				rows[0][pk[idx]]=respxy.inserted_primary_key[idx]
		else:
			respxy = conn.execute(tbl.insert(), rows)

		cnt = respxy.rowcount
	elif action=='mod':
		# generate values params
		u_value_keys = {}
		def prepare_columns(t_k, row_):
			for k in row_.keys():
				if tbl.columns.has_key(k) and not k in pk: 
					if u_value_keys.has_key(k):
						t_k[k] = u_value_keys[k]
					else:
						t_k[k] = u_value_keys[k] = bindparam(k)

		# preparation for key=id
		t_u_value_keys = {}
		for row in rows:
			prepare_columns(t_u_value_keys, row)
			for k in row.keys(): 
				if k in pk: row['_'+k]=row[k]
			st = tbl.update().where(u_where_clause).values(**t_u_value_keys)
			respxy = conn.execute(st, [row])
			cnt += respxy.rowcount
			t_u_value_keys.clear()
			del st
		# reset for key=id
		for row in rows:
			for k in row.keys():
				if k in pk: del row['_'+k]
	elif action=='del':
		respxy = conn.execute(tbl.delete().where(u_where_clause), rows)
		cnt = respxy.rowcount

	return cnt
Esempio n. 14
0
def get_user_id(email = None, session_id = None):
    """ Helper function that returns the user_id for a given email address """
    if email is not None:
        result = db.session.execute(
            text("SELECT aaa.get_user_id_by_email(:email)",
                 bindparams=[bindparam('email', email)]))
        return result.first()[0]

    if session_id is not None:
        result = db.session.execute(
            text("SELECT aaa.get_user_id_by_session_id(:session)",
                 bindparams=[bindparam('session', session_id)]))
        return result.first()[0]
    return None
Esempio n. 15
0
    def _update(self, type, offset, values):
        """

        :param type: The type prefix to use
        :param offset: The address offset to start at
        :param values: The values to set
        """
        context = self._build_set(type, offset, values, prefix='x_')
        query = self._table.update().values(value='value')
        query = query.where(and_(
            self._table.c.type == bindparam('x_type'),
            self._table.c.index == bindparam('x_index')))
        result = self._connection.execute(query, context)
        return result.rowcount == len(values)
def insert_splits_data():
    stmt_insert_splits = splits_table.insert().values(split_id=bindparam('SplitID'), split_date=bindparam('SplitDate', type_=Date),
                                                      company_id=bindparam('CompanyID'), ratio=bindparam('Ratio;'))
    print(stmt_insert_splits)

    result = _read_data('splits.csv')
    for item in result:
        item['SplitDate'] = datetime.strptime(item['SplitDate'], "%Y-%m-%d").date()
        item['Ratio;'] = float(item['Ratio;'][0:len(item['Ratio;']) - 1])

    # for row in result:
    #     print(row)

    with engine.connect() as conn:
        conn.execute(stmt_insert_splits, result)
    def handle(self, *args, **options):
        # set up
        config = get_config()
        if config is None:
            raise CommandError('Unable to process configuration file p_to_p.yml')

        connection = get_connection(config)
        pedsnet_session = init_pedsnet(connection)
        init_pcornet(connection)
        init_vocab(connection)

        pedsnet_pcornet_valueset_map = aliased(ValueSetMap)

        # extract the data from the death table
        death_cause = pedsnet_session.query(DeathPedsnet.person_id,
                                            func.left(DeathPedsnet.cause_source_value, 8),
                                            coalesce(pedsnet_pcornet_valueset_map.target_concept, 'OT'),
                                            bindparam("death_cause_type", "NI"),
                                            bindparam("death_cause_source", "L"),
                                            bindparam("death_cause_confidence", None),
                                            min(DeathPedsnet.site)
                                            ) \
            .join(Demographic, Demographic.patid == cast(DeathPedsnet.person_id, String(256)), ) \
            .join(VocabularyConcept, VocabularyConcept.concept_id == DeathPedsnet.cause_concept_id) \
            .outerjoin(pedsnet_pcornet_valueset_map,
                       and_(pedsnet_pcornet_valueset_map.source_concept_class == 'death cause code',
                            cast(VocabularyConcept.vocabulary_id, String(200)) ==
                            pedsnet_pcornet_valueset_map.source_concept_id)) \
            .filter(and_(DeathPedsnet.cause_source_value != None,
                         DeathPedsnet.cause_source_concept_id != 44814650)) \
            .group_by(DeathPedsnet.person_id, func.left(DeathPedsnet.cause_source_value, 8),
                      coalesce(pedsnet_pcornet_valueset_map.target_concept, 'OT')) \
            .all()

        # transform data to pcornet names and types
        # load to demographic table
        odo(death_cause, DeathCause.__table__,
            dshape='var * {patid: string, death_cause: string, death_cause_code: string,'
                   'death_cause_type: string, death_cause_source:string, '
                   'death_cause_confidence: string, site: string}'
            )

        # close session
        pedsnet_session.close()

        # output result
        self.stdout.ending = ''
        print('Death Cause ETL completed successfully', end='', file=self.stdout)
Esempio n. 18
0
def list_unlocked_replicas(rse, limit, bytes=None, rse_id=None, worker_number=None, total_workers=None, delay_seconds=0, session=None):
    """
    List RSE File replicas with no locks.

    :param rse: the rse name.
    :param bytes: the amount of needed bytes.
    :param session: The database session in use.

    :returns: a list of dictionary replica.
    """
    if not rse_id:
        rse_id = get_rse_id(rse=rse, session=session)

    # filter(models.RSEFileAssociation.state != ReplicaState.BEING_DELETED).\
    none_value = None  # Hack to get pep8 happy...
    query = session.query(models.RSEFileAssociation.scope, models.RSEFileAssociation.name, models.RSEFileAssociation.bytes, models.RSEFileAssociation.tombstone).\
        filter(models.RSEFileAssociation.tombstone < datetime.utcnow()).\
        filter(models.RSEFileAssociation.lock_cnt == 0).\
        filter(case([(models.RSEFileAssociation.tombstone != none_value, models.RSEFileAssociation.rse_id), ]) == rse_id).\
        filter(or_(models.RSEFileAssociation.state.in_((ReplicaState.AVAILABLE, ReplicaState.UNAVAILABLE)),
                   and_(models.RSEFileAssociation.state == ReplicaState.BEING_DELETED, models.RSEFileAssociation.updated_at < datetime.utcnow() - timedelta(seconds=delay_seconds)))).\
        order_by(models.RSEFileAssociation.tombstone).\
        with_hint(models.RSEFileAssociation, "INDEX(replicas REPLICAS_TOMBSTONE_IDX)", 'oracle')

    if worker_number and total_workers and total_workers - 1 > 0:
        if session.bind.dialect.name == 'oracle':
            bindparams = [bindparam('worker_number', worker_number - 1), bindparam('total_workers', total_workers - 1)]
            query = query.filter(text('ORA_HASH(name, :total_workers) = :worker_number', bindparams=bindparams))
        elif session.bind.dialect.name == 'mysql':
            query = query.filter('mod(md5(name), %s) = %s' % (total_workers - 1, worker_number - 1))
        elif session.bind.dialect.name == 'postgresql':
            query = query.filter('mod(abs((\'x\'||md5(name))::bit(32)::int), %s) = %s' % (total_workers - 1, worker_number - 1))

    query = query.limit(limit)

    rows = list()
    neededSpace = bytes
    totalbytes = 0
    for (scope, name, bytes, tombstone) in query.yield_per(1000):

        if tombstone != OBSOLETE and neededSpace is not None and totalbytes >= neededSpace:
            break

        d = {'scope': scope, 'name': name, 'bytes': bytes}
        rows.append(d)
        if tombstone != OBSOLETE:
            totalbytes += bytes
    return rows
Esempio n. 19
0
    def _delete_multi_relation(self, cols, item_model, to_delete):
        to_delete = [dict(list(zip(cols, x))) for x in to_delete]

        session = self.dbsession
        d = delete(item_model).where(and_(*[getattr(item_model.c, x) == bindparam(x) for x in cols]))

        session.execute(d, to_delete)
Esempio n. 20
0
def get_team_solved_subquery(team_id):
    """
    Get a query that searches for a submission from a team for a given
    challenge. The challenge is supposed to come from an outer query.

    Example usage:
        .. code-block:: python

            team_solved_subquery = get_team_solved_subquery(team_id)
            challenge_query = (DBSession.query(Challenge,
                                               team_solved_subquery))

    In this example we query for a list of all challenges and additionally
    fetch whether the currenttly logged in team has solved it.
    """
    # This subquery basically searches for whether the current team has
    # solved the corresponding challenge. The correlate statement is
    # a SQLAlchemy statement that tells it to use the **outer** challenge
    # column.
    if team_id:
        team_solved_subquery = (DBSession.query(Submission).
                                filter(Submission.team_id == team_id).
                                filter(Challenge.id ==
                                       Submission.challenge_id).
                                correlate(Challenge).
                                exists().
                                label("has_solved"))
    else:
        team_solved_subquery = bindparam("has_solved", 0)
    return team_solved_subquery
Esempio n. 21
0
def logout():
    # Is there a destination post-logout?
    dsturl = None
    if request.referrer and local_request(request.referrer):
        dsturl = request.referrer
    else:
        dsturl = None

    # End the session in the database
    already_logged_out = False
    if 'li' in session:
        ses = db.session
        result = ses.execute(
            text("SELECT ret, col, msg FROM aaa.logout(:sid) AS (ret BOOL, col TEXT, msg TEXT)",
                 bindparams=[bindparam('sid', session['i'])]))
        ses.commit()
        # For now, don't test the result of the logout call. Regardless of
        # whether or not a user provides us with a valid session ID from the
        # wrong IP address, terminate the session. Shoot first, ask questions
        # later (i.e. why was a BadUser in posession of GoodUser's session
        # ID?!)
    else:
        already_logged_out = True

    # Nuke every key in the session
    for k in session.keys():
        session.pop(k)

    # Set a flash message after we nuke the keys in session
    if already_logged_out:
        flash('Session cleared for logged out user')
    else:
        flash('You were logged out')

    return render_template('aaa/logout.html', dsturl=dsturl)
Esempio n. 22
0
		def prepare_columns(t_k, row_):
			for k in row_.keys():
				if tbl.columns.has_key(k) and not k in pk: 
					if u_value_keys.has_key(k):
						t_k[k] = u_value_keys[k]
					else:
						t_k[k] = u_value_keys[k] = bindparam(k)
def update_column_mappings(rule_map_list, ref_table_name):
    '''
    loop through the column mapping rows in the database and populate the
    stored procedure column based on the transformation name
    @param rule_map_list: A list of tuples containing mapping info. Tuples should be: (rule_name, proc_name)
    @param engine: sqlalchemy engine object
    @param conn: sqlalchemy connection object
    @param ref_schema: the name of the reference schema
    @param ref_table_name: the name of the reference table containing the column mapping info
    '''

    # check that list is not empty before preceding.
    if not rule_map_list:
        print('NO FUNCTIONS ADDED TO DATABASE')
        return
    with get_udl_connection() as conn:
        # get column_mapping table object
        col_map_table = conn.get_table(ref_table_name)

        # Generate sql to perform update
        update_stmt = col_map_table.update().where(col_map_table.c.transformation_rule == bindparam('rule_name'))
        update_stmt = update_stmt.values(stored_proc_name=bindparam('proc_name'), stored_proc_created_date=datetime.datetime.now())

        # Create list of dicts that sqlalchemy will recognize
        # to update all rules with corresponding stored procedure.
        for pair in rule_map_list:
            conn.execute(update_stmt, rule_name=pair[0], proc_name=pair[1])
 def generate_insert_items(self, header, row):
     row = [r if str(r) != '' else '0' for r in row]
     columns_values = dict(zip(header, row))
     columns = [k for k, v in columns_values.items()]
     values = [":{k}".format(k=k) for k, v in columns_values.items()]
     params = [bindparam(k, v) for k, v in columns_values.items()]
     return (columns, values, params)
def upgrade():  # pylint: disable=too-many-locals
  """Upgrade events table to new bulk format.

  The data migration takes 3 steps, composing from two main parts - primary and
  secondary. Primary makes the "core" data migration (1. step), while the
  secondary fixes currently corrupted data (GET and BULK).

  1. step: Migrate events.action = "IMPORT" to "BULK"

  2. step: For a while we didn't have GET in events.action enum so the values
  that got inserted had action = "". We recognize these as action = "" and
  resource_type IS NOT NULL.

  3. step: Due to missing data migration that migrated IMPORT to BULK without
  first changing values we convert event.action = "" AND
  event.resource_type IS NULL to "BULK"
  """
  connection = op.get_bind()
  update_sql = events_table.update().where(
      events_table.c.id == bindparam("_id"))

  # 1st step
  import_sql = select([events_table]).where(events_table.c.action == "IMPORT")
  result_import_sql = connection.execute(import_sql).fetchall()

  import_ids = [{"_id": _id} for _id, _, _ in result_import_sql]
  update_import_sql = update_sql.values(action="BULK")

  if import_ids:
    connection.execute(update_import_sql, import_ids)

  # 2nd step
  missing_gets_sql = select([events_table]).where(and_(
      events_table.c.action == "",
      events_table.c.resource_type.isnot(None)
  ))
  result_missing_gets = connection.execute(missing_gets_sql).fetchall()
  gets_ids = [{"_id": _id} for _id, _, _ in result_missing_gets]
  update_missing_gets_sql = update_sql.values(action="GET")
  if gets_ids:
    connection.execute(update_missing_gets_sql, gets_ids)

  # 3rd step
  missing_bulks_sql = select([events_table]).where(and_(
      events_table.c.action == "",
      events_table.c.resource_type.is_(None)
  ))
  result_missing_bulks = connection.execute(missing_bulks_sql).fetchall()
  bulks_ids = [{"_id": _id} for _id, _, _ in result_missing_bulks]
  update_missing_bulks_sql = update_sql.values(action="BULK")
  if bulks_ids:
    connection.execute(update_missing_bulks_sql, bulks_ids)

  op.alter_column(
      "events", "action",
      type_=sa.Enum(u"POST", u"PUT", u"DELETE", u"BULK", u"GET"),
      existing_type=sa.Enum(u"POST", u"PUT", u"DELETE", u"IMPORT", u"BULK",
                            u"GET"),
      nullable=False
  )
Esempio n. 26
0
def _get_schema_check_query(schema_name):
    """
    returns the sql query to look for schema presence
    """
    query = select(['schema_name'], from_obj=['information_schema.schemata']).where('schema_name = :schema_name')
    params = [bindparam('schema_name', schema_name)]
    return text(str(query), bindparams=params)
Esempio n. 27
0
 def test_unicode(self, engine, connection):
     """Verify that unicode strings make it through SQLAlchemy and the backend"""
     unicode_str = "白人看不懂"
     one_row = Table('one_row', MetaData(bind=engine))
     returned_str = sqlalchemy.select(
         [expression.bindparam("好", unicode_str)],
         from_obj=one_row,
     ).scalar()
     self.assertEqual(returned_str, unicode_str)
Esempio n. 28
0
 def find_prefix(cls, db, token):
     """Start the query for matching token.
     
     Returns an SQLAlchemy query already filtered by prefix-matches.
     """
     prefix = token[:cls.prefix_length]
     # since we can't filter on hashed values, filter on prefix
     # so we aren't comparing with all tokens
     return db.query(cls).filter(bindparam('prefix', prefix).startswith(cls.prefix))
 def column(self, column):
     try:
         table = version_table(column.table)
     except KeyError:
         return column
     if self.parent and table == self.parent.__table__:
         return bindparam(column.key, getattr(self.parent, column.key))
     else:
         return table.c[column.name]
Esempio n. 30
0
    def find_prefix(cls, db, token):
        """Start the query for matching token.

        Returns an SQLAlchemy query already filtered by prefix-matches.
        """
        prefix = token[:cls.prefix_length]
        # since we can't filter on hashed values, filter on prefix
        # so we aren't comparing with all tokens
        return db.query(cls).filter(bindparam('prefix', prefix).startswith(cls.prefix))
 def test_unicode(self, engine, connection):
     """Verify that unicode strings make it through SQLAlchemy and the backend"""
     unicode_str = "中文"
     one_row = Table('one_row', MetaData(bind=engine))
     returned_str = sqlalchemy.select(
         [expression.bindparam("好", unicode_str)],
         from_obj=one_row,
     ).scalar()
     self.assertEqual(to_str(returned_str), unicode_str)
Esempio n. 32
0
def commit_updates(session,
                   update_key,
                   update_statements,
                   table,
                   commit_frequency=1000):
    """
    Executes bulk updates for a given table. This is typically much faster than going through
    the SQLAlchemy ORM. In order to be flexible, the update statements must be set up in a specific
    way. You can only update one column at a time. The dictionaries in the list `update_statements`
    must have two keys: `pk`, which is the primary_key for the record to be updated, and `update`
    which is the new value for the column you want to change. The column you want to change
    is specified as a string by the argument `update_key`.

    This method will work regardless if you run it over MySQL or SQLite, but with MySQL, it is
    usually faster to use the bulk_commit_updates method (see lib/tasks.py), because it uses
    a table join to do the updates instead of executing individual statements.

    Args:
    session -- alchemy session object
    update_key -- the name of the column we want to update
    update_statements -- list of dictionaries of updates. See above description
    table -- SQLAlchemy table object. If you have a table reference, you can use TableName.__table
    commit_frequency -- tune this for speed. Runs "session.commit" every `commit_frequency` items
    """
    primary_key = list(table.primary_key.columns.values())[0]
    update_key = table.columns[update_key]
    u = table.update().where(primary_key == bindparam('pk')).values(
        {update_key: bindparam('update')})
    numgroups = len(update_statements) / commit_frequency
    for ng in range(numgroups):
        if numgroups == 0:
            break
        chunk = update_statements[ng * commit_frequency:(ng + 1) *
                                  commit_frequency]
        session.connection().execute(u, *chunk)
        print("committing chunk", ng + 1, "of", numgroups, "with length",
              len(chunk), "at", datetime.now())
        session.commit()
    last_chunk = update_statements[numgroups * commit_frequency:]
    if last_chunk:
        print("committing last", len(last_chunk), "records at", datetime.now())
        print(" If it sticks here, use the assignee_patch.py file to fix it!")
        session.connection().execute(u, *last_chunk)
        session.commit()
Esempio n. 33
0
    def select_host_uuids_lacking_sync_dataset_expr(cls):
        r"""
        Get the query that generates the UUIDs of the hosts that lack
        some dataset (provided that it is a sync dataset).
        The query accepts a single bound parameter C{ds_uuid}.

        >>> str(DatasetOnHost.select_host_uuids_lacking_sync_dataset_expr()) \
        ...     # doctest:+NORMALIZE_WHITESPACE
        'SELECT DISTINCT other_inhabitant.uuid AS uuid
        \nFROM
            dataset
            JOIN membership AS other_membership
                ON dataset.group_id = other_membership.group_id
            JOIN user_at_node AS other_user_at_node
                ON other_user_at_node.id = other_membership.user_id
            JOIN host AS other_host
                ON other_host.user_id = other_user_at_node.id
            JOIN inhabitant AS other_inhabitant
                ON other_inhabitant.id = other_host.id
            LEFT OUTER JOIN dataset_on_host AS ds_on_other_host
                ON ds_on_other_host.dataset_id = dataset.id AND
                   ds_on_other_host.host_id = other_host.id
        \nWHERE
            dataset.uuid = :ds_uuid AND
            dataset.sync AND
            ds_on_other_host.id IS NULL AND
            NOT other_user_at_node.suspended'

        @rtype: sqlalchemy.sql.expression.Select
        """
        other_membership = memberships.alias('other_membership')
        other_user_at_node = users_at_node.alias('other_user_at_node')
        other_host = hosts.alias('other_host')
        other_inhabitant = inhabitants.alias('other_inhabitant')
        ds_on_other_host = datasets_on_host.alias('ds_on_other_host')

        return select(
            columns=[other_inhabitant.c.uuid.label('uuid')],
            from_obj=datasets.join(
                other_membership,
                datasets.c.group == other_membership.c.group).join(
                    other_user_at_node, other_user_at_node.c.user_at_node_id ==
                    other_membership.c.user).join(
                        other_host, other_host.c.user == other_user_at_node.c.
                        user_at_node_id).join(other_inhabitant).join(
                            ds_on_other_host,
                            (ds_on_other_host.c.dataset == datasets.c.id) &
                            (ds_on_other_host.c.host == other_host.c.host_id),
                            isouter=True),
            whereclause=(datasets.c.uuid == bindparam('ds_uuid'))
            & datasets.c.sync &
            # Keep "== None" instead of "is None",
            # cause it is parsed by SQLAlchemy!
            (ds_on_other_host.c.id == None) &
            (~other_user_at_node.c.suspended),
            distinct=True)
Esempio n. 34
0
    def select_ds_uuids_groups_synced_to_host_expr(cls):
        r"""
        Get the query that provides the datasets (in fact, only their UUIDs
        and group UUIDs) that are present at some host.

        The query accepts a single bound parameter C{host_uuid}.

        >>> str(DatasetOnHost.select_ds_uuids_groups_synced_to_host_expr()) \
        ...     # doctest:+NORMALIZE_WHITESPACE
        'SELECT DISTINCT
            dataset.uuid AS ds_uuid,
            "group".uuid AS ugroup_uuid
        \nFROM
            inhabitant AS my_inhabitant
            JOIN host AS my_host
                ON my_inhabitant.id = my_host.id
            JOIN membership AS i_belong_to
                ON my_host.user_id = i_belong_to.user_id
            JOIN "group"
                ON "group".id = i_belong_to.group_id
            JOIN dataset
                ON "group".id = dataset.group_id
            JOIN dataset_on_host
                ON dataset_on_host.host_id = my_host.id AND
                   dataset_on_host.dataset_id = dataset.id
        \nWHERE
            my_inhabitant.uuid = :host_uuid AND
            dataset.sync AND
            dataset.time_completed IS NOT NULL'

        @rtype: sqlalchemy.sql.expression.Select
        """
        my_host = hosts.alias('my_host')
        my_inhabitant = inhabitants.alias('my_inhabitant')
        i_belong_to = memberships.alias('i_belong_to')

        return select(columns=[datasets.c.uuid.label('ds_uuid'),
                               user_groups.c.uuid.label('ugroup_uuid')],
                      from_obj=my_inhabitant
                                   .join(my_host)
                                   .join(i_belong_to,
                                         my_host.c.user == i_belong_to.c.user)
                                   .join(user_groups)
                                   .join(datasets)
                                   .join(datasets_on_host,
                                         (datasets_on_host.c.host ==
                                              my_host.c.host_id) &
                                         (datasets_on_host.c.dataset ==
                                              datasets.c.id)),
                      whereclause=(my_inhabitant.c.uuid ==
                                       bindparam('host_uuid')) &
                                  datasets.c.sync &
                                  # Keep "!= None" instead of "is not None",
                                  # cause it is parsed by SQLAlchemy!
                                  (datasets.c.time_completed != None),
                      distinct=True)
def insert_splits_data():
    stmt_insert_splits = splits_table.insert().values(
        split_id=bindparam('SplitID'),
        split_date=bindparam('SplitDate', type_=Date),
        company_id=bindparam('CompanyID'),
        ratio=bindparam('Ratio;'))
    print(stmt_insert_splits)

    result = _read_data('splits.csv')
    for item in result:
        item['SplitDate'] = datetime.strptime(item['SplitDate'],
                                              "%Y-%m-%d").date()
        item['Ratio;'] = float(item['Ratio;'][0:len(item['Ratio;']) - 1])

    # for row in result:
    #     print(row)

    with engine.connect() as conn:
        conn.execute(stmt_insert_splits, result)
Esempio n. 36
0
def generate_bind_ins(table, n):
    stmnt = table.insert()
    items = []
    for i in range(n):
        item = []
        for c in table.c:
            item.append(bindparam('_' + str(i + 1) + c.key))
        items.append(item)
    stmnt = stmnt.values(items)
    return stmnt
Esempio n. 37
0
def update_position_records(position_records):
    time_stamps = np.asarray([r.time_stamp.timestamp() for r in position_records])
    time_deltas_minutes = np.diff(time_stamps) * SECONDS_TO_MINUTES
    lat_and_longs = np.asarray([[r.latitude, r.longitude] for r in position_records], dtype=np.float64)
    pos_change = great_circle_distance_np(lat_and_longs)
    calc_speed = pos_change / time_deltas_minutes * MINUTES_PER_HOUR
    stmt = PositionReading.update().where(and_(PositionReading.c.helicopter_id == bindparam('_helicopter_id'),
                                               PositionReading.c.time_stamp == bindparam('_time_stamp'))).values(
        minutes_since_last_reading=bindparam('time_elapsed'),
        knots_moved_since_last_reading=bindparam('pos_change'),
        calculated_speed=bindparam('calc_speed'))
    dal.engine.execute(stmt, [{'_helicopter_id': r.helicopter_id,
                               '_time_stamp': r.time_stamp,
                               'time_elapsed': delta_t,
                               'pos_change': delta_x,
                               'calc_speed': speed,
                               } for r, delta_t, delta_x, speed in
                              zip(position_records[1:], time_deltas_minutes, pos_change, calc_speed)])
    dal.session.commit()
 def _update_failed_items(self, errors):
     """Update items in DB we couldn't sync to Issue Tracker"""
     if not errors:
         return
     issuetracker = all_models.IssuetrackerIssue.__table__
     stmt = issuetracker.update().where(
         sa.and_(
             issuetracker.c.object_type == expr.bindparam("object_type_"),
             issuetracker.c.object_id == expr.bindparam("object_id_"),
         )).values(enabled=False)
     try:
         update_values = self._create_failed_items_list(errors)
         db.session.execute(stmt, update_values)
         db.session.commit()
     except sa.exc.OperationalError as error:
         logger.exception(error)
         raise exceptions.InternalServerError(
             "Failed to turn integration off for IssueTracker issues "
             "that weren't synced in database.")
Esempio n. 39
0
    def load_features(features_fns):
        """
        Load object features from an HDF5 file.
        """
        for features_fn in features_fns:
            print("Loading {}...".format(features_fn))
            with h5py.File(
                features_fn, "r", libver="latest"
            ) as f_features:
                object_ids = f_features["object_id"].asstr()[:]
                vectors = f_features["features"][:]
            
            with database.engine.begin() as conn:
                stmt = (
                    models.objects.update()
                    .where(models.objects.c.object_id == bindparam("_object_id"))
                    .values({"vector": bindparam("vector")})
                )

                # TODO: Use UPDATE ... RETURNING to get the number of affected rows

                bar = ProgressBar(len(object_ids), max_width=40)
                obj_iter = iter(zip(object_ids, vectors))
                while True:
                    chunk = tuple(itertools.islice(obj_iter, 1000))
                    if not chunk:
                        break
                    conn.execute(
                        stmt,
                        [
                            {"_object_id": str(object_id), "vector": vector}
                            for (object_id, vector) in chunk
                        ],
                    )

                    bar.numerator += len(chunk)
                    print(bar, end="\r")
                print()

                # TODO: In the end, print a summary of how many objects have a feature vector now.

                print("Done.")
Esempio n. 40
0
def filter_thread_work(session,
                       query,
                       total_threads,
                       thread_id,
                       hash_variable=None):
    """ Filters a query to partition thread workloads based on the thread id and total number of threads """
    if thread_id is not None and total_threads is not None and (total_threads -
                                                                1) > 0:
        if session.bind.dialect.name == 'oracle':
            bindparams = [
                bindparam('thread_id', thread_id),
                bindparam('total_threads', total_threads - 1)
            ]
            if not hash_variable:
                query = query.filter(
                    text('ORA_HASH(id, :total_threads) = :thread_id',
                         bindparams=bindparams))
            else:
                query = query.filter(
                    text('ORA_HASH(%s, :total_threads) = :thread_id' %
                         (hash_variable),
                         bindparams=bindparams))
        elif session.bind.dialect.name == 'mysql':
            if not hash_variable:
                query = query.filter(
                    text('mod(md5(id), %s) = %s' % (total_threads, thread_id)))
            else:
                query = query.filter(
                    text('mod(md5(%s), %s) = %s' %
                         (hash_variable, total_threads, thread_id)))
        elif session.bind.dialect.name == 'postgresql':
            if not hash_variable:
                query = query.filter(
                    text(
                        'mod(abs((\'x\'||md5(id::text))::bit(32)::int), %s) = %s'
                        % (total_threads, thread_id)))
            else:
                query = query.filter(
                    text(
                        'mod(abs((\'x\'||md5(%s::text))::bit(32)::int), %s) = %s'
                        % (hash_variable, total_threads, thread_id)))
    return query
Esempio n. 41
0
    def signups_requiring_notification(self, session, from_time, to_time, options=None):
        """
        Returns a dict of AttractionSignups that require notification.

        The keys of the returned dict are the amount of advanced notice, given
        in seconds. A key of -1 indicates confirmation notices after a signup.

        The query generated by this method looks horrific, but is surprisingly
        efficient.
        """
        advance_checkin = max(0, self.advance_checkin)
        subqueries = []
        for advance_notice in sorted(set([-1] + self.advance_notices)):
            event_filters = [AttractionEvent.attraction_id == self.id]
            if advance_notice == -1:
                notice_ident = cast(AttractionSignup.attraction_event_id, UnicodeText)
                notice_param = bindparam('confirm_notice', advance_notice).label('advance_notice')
            else:
                advance_notice = max(0, advance_notice) + advance_checkin
                notice_delta = timedelta(seconds=advance_notice)
                event_filters += [
                    AttractionEvent.start_time >= from_time + notice_delta,
                    AttractionEvent.start_time < to_time + notice_delta]
                notice_ident = func.concat(AttractionSignup.attraction_event_id, '_{}'.format(advance_notice))
                notice_param = bindparam(
                    'advance_notice_{}'.format(advance_notice), advance_notice).label('advance_notice')

            subquery = session.query(AttractionSignup, notice_param).filter(
                AttractionSignup.is_unchecked_in,
                AttractionSignup.attraction_event_id.in_(
                    session.query(AttractionEvent.id).filter(*event_filters)),
                not_(exists().where(and_(
                    AttractionNotification.ident == notice_ident,
                    AttractionNotification.attraction_event_id == AttractionSignup.attraction_event_id,
                    AttractionNotification.attendee_id == AttractionSignup.attendee_id)))).with_labels()
            subqueries.append(subquery)

        query = subqueries[0].union(*subqueries[1:])
        if options:
            query = query.options(*listify(options))
        query.order_by(AttractionSignup.id)
        return groupify(query, lambda x: x[0], lambda x: x[1])
Esempio n. 42
0
        def sync_collection_new_registration(connection, transaction):
            where = and_(
                BIStatistic.__table__.c.day == bindparam('_on_day'),
                BIStatistic.__table__.c.platform == bindparam('_platform'),
                BIStatistic.__table__.c.game == 'All Game'
                )
            values = {
                'new_registration': bindparam('sum')
                }

            try:
                connection.execute(BIStatistic.__table__.update().where(where).values(values), rows)
            except:
                print('process_bi_statistic_for_lifetime new_registration transaction.rollback()')
                transaction.rollback()
                raise
            else:
                print('process_bi_statistic_for_lifetime new_registration transaction.commit()')
                transaction.commit()
            return
Esempio n. 43
0
 async def load_trunk(cls, dialed_number, db_connection):
     res = await db_connection.execute(cls.table.select().where(
         bindparam("dialed_number", dialed_number).startswith(
             cls.table.c.username)).where(cls.table.c.trunk == True))
     if res.rowcount == 0:
         raise DoesNotExist('No trunk for "{}" found'.format(dialed_number))
     elif res.rowcount > 1:
         raise DoesNotExist(
             "Trunk misconfiguration lead to multiple results for {}".
             format(dialed_number))
     return cls(await res.first())
Esempio n. 44
0
    def update_many(self,
                    rows,
                    keys,
                    chunk_size=1000,
                    ensure=None,
                    types=None):
        """Update many rows in the table at a time.

        This is significantly faster than updating them one by one. Per default
        the rows are processed in chunks of 1000 per commit, unless you specify
        a different ``chunk_size``.

        See :py:meth:`update() <dataset.Table.update>` for details on
        the other parameters.
        """
        # Convert keys to a list if not a list or tuple.
        keys = keys if type(keys) in (list, tuple) else [keys]

        chunk = []
        columns = []
        for index, row in enumerate(rows):
            chunk.append(row)
            for col in row.keys():
                if col not in columns:
                    columns.append(col)

            # bindparam requires names to not conflict (cannot be "id" for id)
            for key in keys:
                row['_%s' % key] = row[key]

            # Update when chunk_size is fulfilled or this is the last row
            if len(chunk) == chunk_size or index == len(rows) - 1:
                cl = [self.table.c[k] == bindparam('_%s' % k) for k in keys]
                stmt = self.table.update(whereclause=and_(*cl),
                                         values={
                                             col: bindparam(col,
                                                            required=False)
                                             for col in columns
                                         })
                self.db.executable.execute(stmt, chunk)
                chunk = []
Esempio n. 45
0
def enrollment_etl(config):
    # set up
    connection = get_connection(config)
    pedsnet_session = init_pedsnet(connection)
    init_pcornet(connection)

    observation_period = pedsnet_session.query(ObservationPeriod.person_id,
                                               ObservationPeriod.observation_period_start_date,
                                               ObservationPeriod.observation_period_end_date,
                                               ObservationPeriod.site,
                                               bindparam("chart", 'Y'),
                                               bindparam("enr_basis", 'E')
                                               ).all()
    # endregion

    odo(observation_period, Enrollment.__table__,
        dshape='var * {patid: string, enr_start_date: date, enr_end_date: date, site: string, chart: String, '
               'enr_basis: String} '
        )
    # close session
    pedsnet_session.close()
Esempio n. 46
0
        def sync_collection(connection, transaction):
            where = and_(
                BIUserBill.__table__.c.user_id == bindparam('_user_id'),
                BIUserBill.__table__.c.platform == 'Undetected')
            values = {'platform': bindparam('platform')}

            try:
                connection.execute(
                    BIUserBill.__table__.update().where(where).values(values),
                    rows)
            except:
                print(
                    'process_user_mall_bill_fix_platform transaction.rollback()'
                )
                transaction.rollback()
                raise
            else:
                print(
                    'process_user_mall_bill_fix_platform transaction.commit()')
                transaction.commit()
            return
Esempio n. 47
0
        def sync_collection(connection, transaction):
            where = and_(
                BIUserCurrency.__table__.c.og_account == bindparam(
                    '_og_account'), BIUserCurrency.__table__.c.user_id == -1)
            values = {'user_id': bindparam('user_id')}

            try:
                connection.execute(
                    BIUserCurrency.__table__.update().where(where).values(
                        values), rows)
            except:
                print(
                    'process_user_currency_fill_user_id transaction.rollback()'
                )
                transaction.rollback()
                raise
            else:
                print(
                    'process_user_currency_fill_user_id transaction.commit()')
                transaction.commit()
            return
Esempio n. 48
0
def query_cube(cube_name, readable=True):
    cube = get_cube(cube_name)
    dimensions = get_dimensions(cube_name)

    fact_table = get_fact_table(cube_name).table.alias('fact')
    q = fact_table.select()
    selects, wheres, tables = [], [], [fact_table]

    if not readable:
        selects.append(fact_table.columns['fact_id'].label('REGENESIS_ID'))

    params = {}
    for dim in dimensions:
        name = dim.get('dim_name')
        field = name.upper()
        title = dim.get('dim_title_de')

        if readable:
            unit = dim.get('ref_unit_name')
            if unit is not None:
                field = '%s; %s' % (field, unit)
            field = '%s (%s)' % (title, field)

        type_ = dim.get('ref_type')
        if type_ == 'measure':
            selects.append(fact_table.columns[name].label(field))
            if not readable:
                selects.append(
                    fact_table.columns[name + "_quality"].label(field +
                                                                '_QUALITY'))
                selects.append(fact_table.columns[name +
                                                  "_error"].label(field +
                                                                  '_ERROR'))
        if type_ == 'time':
            selects.append(fact_table.columns[name].label(field))
            if not readable:
                selects.append(
                    fact_table.columns[name + '_from'].label(field + '_FROM'))
                selects.append(fact_table.columns[name +
                                                  '_until'].label(field +
                                                                  '_UNTIL'))
        elif type_ == 'axis':
            vt = value_table.table.alias('value_%s' % name)
            id_col = field + ' - ID' if readable else field + '_CODE'
            selects.append(vt.c.name.label(id_col))
            selects.append(vt.c.title_de.label(field))
            tables.append(vt)
            params[name] = name
            wheres.append(vt.c.dimension_name == bindparam(name, value=name))
            wheres.append(vt.c.value_id == fact_table.c[name])

    q = select(selects, and_(*wheres), tables)
    return q, params
Esempio n. 49
0
    def select_ds_uuids_groups_synced_to_host_expr(cls):
        r"""
        Get the query that provides the datasets (in fact, only their UUIDs
        and group UUIDs) that are present at some host.

        The query accepts a single bound parameter C{host_uuid}.

        >>> str(DatasetOnHost.select_ds_uuids_groups_synced_to_host_expr()) \
        ...     # doctest:+NORMALIZE_WHITESPACE
        'SELECT DISTINCT
            dataset.uuid AS ds_uuid,
            "group".uuid AS ugroup_uuid
        \nFROM
            inhabitant AS my_inhabitant
            JOIN host AS my_host
                ON my_inhabitant.id = my_host.id
            JOIN membership AS i_belong_to
                ON my_host.user_id = i_belong_to.user_id
            JOIN "group"
                ON "group".id = i_belong_to.group_id
            JOIN dataset
                ON "group".id = dataset.group_id
            JOIN dataset_on_host
                ON dataset_on_host.host_id = my_host.id AND
                   dataset_on_host.dataset_id = dataset.id
        \nWHERE
            my_inhabitant.uuid = :host_uuid AND
            dataset.sync AND
            dataset.time_completed IS NOT NULL'

        @rtype: sqlalchemy.sql.expression.Select
        """
        my_host = hosts.alias('my_host')
        my_inhabitant = inhabitants.alias('my_inhabitant')
        i_belong_to = memberships.alias('i_belong_to')

        return select(
            columns=[
                datasets.c.uuid.label('ds_uuid'),
                user_groups.c.uuid.label('ugroup_uuid')
            ],
            from_obj=my_inhabitant.join(my_host).join(
                i_belong_to, my_host.c.user ==
                i_belong_to.c.user).join(user_groups).join(datasets).join(
                    datasets_on_host,
                    (datasets_on_host.c.host == my_host.c.host_id) &
                    (datasets_on_host.c.dataset == datasets.c.id)),
            whereclause=(my_inhabitant.c.uuid == bindparam('host_uuid'))
            & datasets.c.sync &
            # Keep "!= None" instead of "is not None",
            # cause it is parsed by SQLAlchemy!
            (datasets.c.time_completed != None),
            distinct=True)
Esempio n. 50
0
    def find(cls, db, token):
        """Find a token object by value.

        Returns None if not found.
        """
        prefix = token[:cls.prefix_length]
        # since we can't filter on hashed values, filter on prefix
        # so we aren't comparing with all tokens
        prefix_match = db.query(cls).filter(bindparam('prefix', prefix).startswith(cls.prefix))
        for orm_token in prefix_match:
            if orm_token.match(token):
                return orm_token
Esempio n. 51
0
def handle_param(column, data):
    """
    处理where条件
    """
    opt = data.get('opt', '$te')
    if 'val' in data:
        value = data['val']
        if opt == '$ne':  # 不等于
            return column != value
        if opt == '$te':  # 等于
            return column == value
        elif opt == '$lt':  # 小于
            return column < value
        elif opt == '$lte':  # 小于等于
            return column <= value
        elif opt == '$gt':  # 大于
            return column > value
        elif opt == '$gte':  # 大于等于
            return column >= value
        elif opt == '$like':  # like
            return column.like(value)
        elif opt == '$in':
            return column.in_(value)
        elif opt == '$nin':
            return ~column.in_(value)
        elif opt == '$bind':
            # 占位符
            if isinstance(value, str):
                return column == bindparam(value)
            else:
                opt = value["opt"]
                if opt == '$bind':
                    return None
                if opt == "$in" or opt == "$nin":
                    value["val"] = bindparam(value["val"], expanding=True)
                else:
                    value["val"] = bindparam(value["val"])
                return handle_param(column, value)
        elif opt == '$raw':
            return value
Esempio n. 52
0
def neighboring_ids():
    from sqlalchemy.sql.expression import bindparam

    batch = 1000
    n_inds = session.query(Individual).count()
    print "Computing neighboring ids for {} individuals.".format(n_inds)
    stmt = Individual.__table__.update().\
        where(Individual.id == bindparam('_id')).\
        values({
            'neighboring_ids': bindparam('neighboring_ids'),
        })
    updates = []
    for batch_i, (lo, hi) in enumerate(yield_batch_limits(n_inds, batch)):
        print dt.datetime.now().isoformat()[:-7], "Batch from {} to {}".format(lo, hi-1)
        sys.stdout.flush()
        t0 = time.time()
        if batch < n_inds:
            ind_query = Individual.query.filter(and_(Individual.id >= lo, Individual.id < hi))
            ind_query = ind_query.options(joinedload(Individual.sup_families)).\
                                  options(joinedload(Individual.sub_families))
        else:
            ind_query = Individual.query
            ind_query = ind_query.options(joinedload(Individual.sup_families)).\
                                  options(joinedload(Individual.sub_families))
        inds = ind_query.all()
        print "  Querying {} individuals took {:.4f} seconds.".format(len(inds), time.time()-t0)
        sys.stdout.flush()
        t0 = time.time()
        for ii, ind in enumerate(inds):
            n_ids = []
            for fam in ind.sub_families + ind.sup_families:
                for ind2 in fam.parents + fam.children:
                    n_ids.append([fam.xref, ind2.xref])
            updates.append({'neighboring_ids': u(json.dumps(n_ids)), '_id': ind.id})
        print "  Neighboring ids took {:.4f} seconds.".format(time.time()-t0)
        if len(updates) > 0:
            t0 = time.time()
            engine.execute(stmt, updates)
            updates = []
            print "  Executing took {:.4f} seconds.".format(time.time()-t0)
Esempio n. 53
0
def dao_get_templates_for_cache(cache):
    if not cache or len(cache) == 0:
        return []

    # First create a subquery that is a union select of the cache values
    # Then join templates to the subquery
    cache_queries = [
        db.session.query(bindparam("template_id" + str(i),
                                   uuid.UUID(template_id.decode())).label('template_id'),
                         bindparam("count" + str(i), int(count.decode())).label('count'))
        for i, (template_id, count) in enumerate(cache)]
    cache_subq = cache_queries[0].union(*cache_queries[1:]).subquery()
    query = db.session.query(Template.id.label('template_id'),
                             Template.template_type,
                             Template.name,
                             Template.is_precompiled_letter,
                             cache_subq.c.count.label('count')
                             ).join(cache_subq,
                                    Template.id == cache_subq.c.template_id
                                    ).order_by(Template.name)

    return query.all()
Esempio n. 54
0
    def column(self, column):
        try:
            table = version_table(column.table)
        except KeyError:
            reflected_column = column
        else:
            reflected_column = table.c[column.name]
            if (column in self.relationship.local_columns
                    and table == self.parent.__table__):
                reflected_column = bindparam(column.key,
                                             getattr(self.parent, column.key))

        return reflected_column
Esempio n. 55
0
    def update_multiple(self, table_name, column, data_dict_list):
        """
        하나의 column에 대해서 조건을 줘서
        DB에 기존에 있던 데이터에 대해서 한 번에 Update 한다.
        """
        transaction = self.__connection.begin()  # self.__connection.begin()

        if (data_dict_list):
            try:
                table = self.find_table(table_name)
                if (table.c.has_key(column)):
                    c = table.c.get(column)

                    upd = table.update(). \
                        where(c == bindparam('target_' + column))

                    added_data_dict_list = [
                        add_target_item(dic, column) for dic in data_dict_list
                    ]
                    # print("added_data_dict_list: ")
                    # print(added_data_dict_list)
                    result = self.__connection.execute(upd,
                                                       added_data_dict_list)
                    transaction.commit()
                    return True

                else:
                    print("{} table 에는 {} Column 이 없습니다".format(
                        table_name, column))
                    print("Let's RollBack")
                    transaction.rollback()
                    return False

            except IntegrityError as error:
                error_code, error_msg = error.orig.args

                print("IntegrityError!!")
                print(">>>error_code: {}".format(error_code))
                print(">>>error_msg: {}".format(error_msg))
                print("Let's RollBack")
                transaction.rollback()

                return False

        else:
            print(
                "This is empty dict, we need list of dict for 'update_multiple' "
            )
            transaction.rollback()
            print("Let's RollBack")
            return False
Esempio n. 56
0
def list_expired_temporary_dids(rse, limit, worker_number=None, total_workers=None,
                                session=None):
    """
    List expired temporary DIDs.

    :param rse: the rse name.
    :param limit: The maximum number of replicas returned.
    :param worker_number:      id of the executing worker.
    :param total_workers:      Number of total workers.
    :param session: The database session in use.

    :returns: a list of dictionary replica.
    """
    rse_id = get_rse_id(rse, session=session)
    is_none = None
    query = session.query(models.TemporaryDataIdentifier.scope,
                          models.TemporaryDataIdentifier.name,
                          models.TemporaryDataIdentifier.path,
                          models.TemporaryDataIdentifier.bytes).\
        with_hint(models.TemporaryDataIdentifier, "INDEX(tmp_dids TMP_DIDS_EXPIRED_AT_IDX)", 'oracle').\
        filter(case([(models.TemporaryDataIdentifier.expired_at != is_none, models.TemporaryDataIdentifier.rse_id), ]) == rse_id)

    if worker_number and total_workers and total_workers - 1 > 0:
        if session.bind.dialect.name == 'oracle':
            bindparams = [bindparam('worker_number', worker_number - 1), bindparam('total_workers', total_workers - 1)]
            query = query.filter(text('ORA_HASH(name, :total_workers) = :worker_number', bindparams=bindparams))
        elif session.bind.dialect.name == 'mysql':
            query = query.filter(text('mod(md5(name), %s) = %s' % (total_workers - 1, worker_number - 1)))
        elif session.bind.dialect.name == 'postgresql':
            query = query.filter(text('mod(abs((\'x\'||md5(path))::bit(32)::int), %s) = %s' % (total_workers - 1, worker_number - 1)))

    return [{'path': path,
             'rse': rse,
             'rse_id': rse_id,
             'scope': scope,
             'name': name,
             'bytes': bytes}
            for scope, name, path, bytes in query.limit(limit)]
Esempio n. 57
0
async def update_contentgroup_by_localgroupid(
    localgroupid: int,
    localcontentids: ContentGroupin,
    session: Session = Depends(get_session),
    current_user: User = Depends(get_current_user)):
    try:
        oldset = {
            'username': current_user.username,
            'localgroupid': localgroupid
        }
        old = ReportContentGroup.__table__.delete().where(
            and_(
                ReportContentGroup.__table__.c.username == bindparam(
                    'username'), ReportContentGroup.__table__.c.localgroupid ==
                bindparam('localgroupid')))
        session.execute(old, oldset)
        newset = [{
            'username': current_user.username,
            'localgroupid': localgroupid,
            'contentid': n,
            'order': i
        } for i, n in enumerate(localcontentids.localcontentids)]
        session.execute(ReportContentGroup.__table__.insert(), newset)
        try:
            session.commit()
            return ContentGroupout(
                localgroupid=localgroupid,
                contents=session.query(ReportContentGroup).filter(
                    ReportContentGroup.username == current_user.username,
                    ReportContentGroup.localgroupid == localgroupid).order_by(
                        ReportContentGroup.localgroupid,
                        ReportContentGroup.order).all())
        except:
            session.rollback()
            raise HTTPException(status_code=400, detail="not commited.")
    except Exception as e:
        print(e)
        raise HTTPException(status_code=400)
Esempio n. 58
0
 def update_df_to_postgres_using_metadata_and_id(self, df, table_name):
     meta = sa.MetaData(bind=self.engine)
     table_name_only = table_name.split(".")[1]
     schema_only = table_name.split(".")[0]
     docs = sa.Table(table_name_only,
                     meta,
                     autoload=True,
                     schema=schema_only)
     sess = orm.sessionmaker(bind=self.engine)()
     conn = self.engine.connect()
     df_2 = df.copy()
     df_2['_id'] = df_2['id']
     value_columns = filter(lambda c: c != 'id', df.columns.values)
     df_2 = df_2[['_id'] + value_columns]
     listToWrite = df_2.to_dict(orient='records')
     value_dict = {}
     for vc in value_columns:
         value_dict[vc] = bindparam(vc)
     conn.execute(
         docs.update().where(
             docs.c.id == bindparam('_id')).values(value_dict), listToWrite)
     sess.commit()
     sess.close()
Esempio n. 59
0
def get_updated_account_counters(total_workers, worker_number, session=None):
    """
    Get updated rse_counters.

    :param total_workers:      Number of total workers.
    :param worker_number:      id of the executing worker.
    :param session:            Database session in use.
    :returns:                  List of rse_ids whose rse_counters need to be updated.
    """
    query = session.query(models.UpdatedAccountCounter.account, models.UpdatedAccountCounter.rse_id).\
        distinct(models.UpdatedAccountCounter.account, models.UpdatedAccountCounter.rse_id)

    if total_workers > 0:
        if session.bind.dialect.name == 'oracle':
            bindparams = [bindparam('worker_number', worker_number),
                          bindparam('total_workers', total_workers)]
            query = query.filter(text('ORA_HASH(CONCAT(account, rse_id), :total_workers) = :worker_number', bindparams=bindparams))
        elif session.bind.dialect.name == 'mysql':
            query = query.filter('mod(md5(concat(account, rse_id)), %s) = %s' % (total_workers + 1, worker_number))
        elif session.bind.dialect.name == 'postgresql':
            query = query.filter('mod(abs((\'x\'||md5(concat(account, rse_id)))::bit(32)::int), %s) = %s' % (total_workers + 1, worker_number))

    return query.all()
Esempio n. 60
0
 def visit_idea(self, idea, level, prev_result):
     if idea.short_title:
         self.counter.add_text(self.cleantext(idea.short_title), 2)
     if idea.long_title:
         self.counter.add_text(self.cleantext(idea.long_title))
     if idea.definition:
         self.counter.add_text(self.cleantext(idea.definition))
     if self.count_posts and level == 0:
         from .generic import Content
         related = text(
             Idea._get_related_posts_statement(),
             bindparams=[bindparam('root_idea_id', idea.id),
                         bindparam('discussion_id', idea.discussion_id)]
             ).columns(column('post_id')).alias('related')
         titles = set()
         for title, body in idea.db.query(
                 Content.subject, Content.body).join(
                 related, related.c.post_id == Content.id):
             self.counter.add_text(self.cleantext(body), 0.5)
             title = self.cleantext(title)
             if title not in titles:
                 self.counter.add_text(title)
                 titles.add(title)