Example #1
0
    def eval(self, builder):
        """ return a query object
        """

        dbh = builder._get_dbh()

        tokens = self.value[1:]
        expr_1 = self.value[0].eval( builder )

        while tokens:
            op = tokens[0]
            eval_2 = tokens[1].eval( builder )
            tokens = tokens[2:]

            if op == '|':
                eval_1 = sqla.union(eval_1, eval_2)
            elif op == '&':
                eval_1 = sqla.intersect(eval_1, eval_2)
            elif op == ':':
                eval_1 = sqla.except_(
                    dbh.session().query(dbh.Sample.id).filter(
                        dbh.Sample.id.in_( sqla.union(eval_1, eval_2)) ),
                    dbh.session().query(dbh.Sample.id).filter(
                        dbh.Sample.iid.n_( sqla.intersect(eval_1, eval_2)) )
                    )

        q = dbh.session().query(dbh.Sample.id).filter( dbh.Sample.id.in_( eval_1 ) )
        return q
Example #2
0
    def test_intersect_unions_2(self):
        u = intersect(
            union(select([t1.c.col3, t1.c.col4]), select([t3.c.col3, t3.c.col4])).alias().select(),
            union(select([t2.c.col3, t2.c.col4]), select([t3.c.col3, t3.c.col4])).alias().select(),
        )
        wanted = [("aaa", "ccc"), ("bbb", "aaa"), ("ccc", "bbb")]
        found = self._fetchall_sorted(u.execute())

        eq_(found, wanted)
Example #3
0
    def test_intersect(self):
        i = intersect(select([t2.c.col3, t2.c.col4]), select([t2.c.col3, t2.c.col4], t2.c.col4 == t3.c.col3))

        wanted = [("aaa", "bbb"), ("bbb", "ccc"), ("ccc", "aaa")]

        found1 = self._fetchall_sorted(i.execute())
        eq_(found1, wanted)

        found2 = self._fetchall_sorted(i.alias("bar").select().execute())
        eq_(found2, wanted)
Example #4
0
    def test_composite_alias(self):
        ua = intersect(
            select([t2.c.col3, t2.c.col4]),
            union(select([t1.c.col3, t1.c.col4]), select([t2.c.col3, t2.c.col4]), select([t3.c.col3, t3.c.col4]))
            .alias()
            .select(),
        ).alias()

        wanted = [("aaa", "bbb"), ("bbb", "ccc"), ("ccc", "aaa")]
        found = self._fetchall_sorted(ua.select().execute())
        eq_(found, wanted)
Example #5
0
    def test_intersect(self):
        i = intersect(select([t2.c.col3, t2.c.col4]),
                      select([t2.c.col3, t2.c.col4], t2.c.col4 == t3.c.col3))

        wanted = [('aaa', 'bbb'), ('bbb', 'ccc'), ('ccc', 'aaa')]

        found1 = self._fetchall_sorted(i.execute())
        eq_(found1, wanted)

        found2 = self._fetchall_sorted(i.alias('bar').select().execute())
        eq_(found2, wanted)
Example #6
0
    def test_composite_alias(self):
        ua = intersect(
            select([t2.c.col3, t2.c.col4]),
            union(
                select([t1.c.col3, t1.c.col4]),
                select([t2.c.col3, t2.c.col4]),
                select([t3.c.col3, t3.c.col4]),
            ).alias().select()).alias()

        wanted = [('aaa', 'bbb'), ('bbb', 'ccc'), ('ccc', 'aaa')]
        found = self._fetchall_sorted(ua.select().execute())
        eq_(found, wanted)
Example #7
0
    def test_composite_alias(self):
        ua = intersect(
            select([t2.c.col3, t2.c.col4]),
            union(
                select([t1.c.col3, t1.c.col4]),
                select([t2.c.col3, t2.c.col4]),
                select([t3.c.col3, t3.c.col4]),
            ).alias().select()
        ).alias()

        wanted = [('aaa', 'bbb'), ('bbb', 'ccc'), ('ccc', 'aaa')]
        found = self._fetchall_sorted(ua.select().execute())
        eq_(found, wanted)
Example #8
0
    def test_intersect(self):
        i = intersect(
            select([t2.c.col3, t2.c.col4]),
            select([t2.c.col3, t2.c.col4], t2.c.col4 == t3.c.col3)
        )

        wanted = [('aaa', 'bbb'), ('bbb', 'ccc'), ('ccc', 'aaa')]

        found1 = self._fetchall_sorted(i.execute())
        eq_(found1, wanted)

        found2 = self._fetchall_sorted(i.alias('bar').select().execute())
        eq_(found2, wanted)
Example #9
0
    def test_intersect_unions_2(self):
        u = intersect(
            union(
                select([t1.c.col3, t1.c.col4]),
                select([t3.c.col3, t3.c.col4]),
            ).alias().select(),
            union(
                select([t2.c.col3, t2.c.col4]),
                select([t3.c.col3, t3.c.col4]),
            ).alias().select())
        wanted = [('aaa', 'ccc'), ('bbb', 'aaa'), ('ccc', 'bbb')]
        found = self._fetchall_sorted(u.execute())

        eq_(found, wanted)
Example #10
0
    def test_intersect_unions_2(self):
        u = intersect(
            union(
                select([t1.c.col3, t1.c.col4]),
                select([t3.c.col3, t3.c.col4]),
            ).alias().select(),
            union(
                select([t2.c.col3, t2.c.col4]),
                select([t3.c.col3, t3.c.col4]),
            ).alias().select()
        )
        wanted = [('aaa', 'ccc'), ('bbb', 'aaa'), ('ccc', 'bbb')]
        found = self._fetchall_sorted(u.execute())

        eq_(found, wanted)
Example #11
0
    def variants_in_region(self, connection: Connection,
                           genomic_interval: GenomicInterval,
                           output_region_attrs: List[Vocabulary],
                           meta_attrs: MetadataAttrs,
                           region_attrs: Optional[RegionAttrs]) -> Selectable:
        # init state
        self.connection = connection
        self._set_meta_attributes(meta_attrs)
        self.create_table_of_meta(['item_id'])
        self._set_region_attributes(region_attrs)
        self.create_table_of_regions(['item_id'])

        if self.my_region_t is not None:
            only_from_samples = intersect(select([self.my_meta_t.c.item_id]),
                                          select([self.my_region_t.c.item_id]))
        else:
            only_from_samples = select([self.my_meta_t.c.item_id])
        only_from_samples = only_from_samples.alias('samples')

        select_columns = [
            genomes.c[self.region_col_map[att]].label(att.name)
            for att in output_region_attrs
        ]
        stmt =\
            select(select_columns).distinct() \
            .select_from(genomes.join(only_from_samples, only_from_samples.c.item_id == genomes.c.item_id)) \
            .where((genomes.c.start >= genomic_interval.start) &
                   (genomes.c.start <= genomic_interval.stop) &
                   (genomes.c.chrom == genomic_interval.chrom))

        if self.log_sql_commands:
            utils.show_stmt(
                connection, stmt, self.logger.debug,
                f'KGenomes: VARIANTS IN REGION '
                f'{genomic_interval.chrom}'
                f'-{genomic_interval.start}-{genomic_interval.stop}')
        return stmt
Example #12
0
def build_join(op, lcte, rcte):
    if op == OR:
        return aliased(union(lcte, rcte)).select()
    else:
        return aliased(intersect(lcte, rcte)).select()
Example #13
0
from sqlalchemy import create_engine, MetaData, Table, Column, Integer, String, union, union_all, except_, intersect
engine = create_engine('sqlite:///college.db', echo=True)

meta = MetaData()
conn = engine.connect()
addresses = Table('addresses', meta, Column('id', Integer, primary_key=True),
                  Column('st_id', Integer), Column('postal_add', String),
                  Column('email_add', String))

u = union(addresses.select().where(addresses.c.email_add.like('*****@*****.**')),
          addresses.select().where(addresses.c.email_add.like('*****@*****.**')))
result = conn.execute(u)
result.fetchall()
u = union_all(
    addresses.select().where(addresses.c.email_add.like('*****@*****.**')),
    addresses.select().where(addresses.c.email_add.like('*****@*****.**')))
result = conn.execute(u)
result.fetchall()
u = except_(
    addresses.select().where(addresses.c.email_add.like('*****@*****.**')),
    addresses.select().where(addresses.c.postal_add.like('%Pune')))
result = conn.execute(u)
result.fetchall()
u = intersect(
    addresses.select().where(addresses.c.email_add.like('*****@*****.**')),
    addresses.select().where(addresses.c.postal_add.like('%Delhi')))
result = conn.execute(u)
result.fetchall()
Example #14
0
#stmt = select([students]).where(or_(students.c.name == 'Ravi', students.c.id == 1))
rows = con.execute(stmt)
print(rows.fetchall())

stmt = select([students]).order_by(asc(students.c.id))
stmt = select([students]).order_by(desc(students.c.id))
stmt = select([students]).where(between(students.c.id, 2, 3))
prints(con.execute(stmt))

result = con.execute(select([func.now()]))
print(result.fetchone())

result = con.execute(select([func.count(students.c.id)]))
result = con.execute(select([func.max(students.c.id)]))
result = con.execute(select([func.min(students.c.id)]))
result = con.execute(select([func.avg(students.c.id).label('avg')]))
print(result.fetchone())

from sqlalchemy import union, union_all, except_, intersect
# u = union(addresses.select().where(addresses.c.email_add.like('%@gmail.com addresses.select().where(addresses.c.email_add.like('*****@*****.**'))))
u = union_all(
    addresses.select().where(addresses.c.email_add.like('*****@*****.**')),
    addresses.select().where(addresses.c.email_add.like('*****@*****.**')))
u = except_(
    addresses.select().where(addresses.c.email_add.like('*****@*****.**')),
    addresses.select().where(addresses.c.postal_add.like('%Pune')))
u = intersect(
    addresses.select().where(addresses.c.email_add.like('*****@*****.**')),
    addresses.select().where(addresses.c.postal_add.like('%Pune')))
prints(con.execute(u))
Example #15
0
    print (res)

# SELECT "ADDRESSES".id, "ADDRESSES".st_id, "ADDRESSES".postal_add, "ADDRESSES".email_add FROM "ADDRESSES"
# WHERE "ADDRESSES".email_add LIKE ? UNION ALL SELECT "ADDRESSES".id, "ADDRESSES".st_id, "ADDRESSES".postal_add, "ADDRESSES".email_add
# FROM "ADDRESSES" WHERE "ADDRESSES".email_add LIKE ?
u = union_all(addresses.select().where(addresses.c.email_add.like('*****@*****.**')), addresses.select().where(addresses.c.email_add.like('*****@*****.**')))
result = conn.execute(u)
for res in result:
    print (res)
    
# EXCEPT
# SELECT "ADDRESSES".id, "ADDRESSES".st_id, "ADDRESSES".postal_add, "ADDRESSES".email_add FROM "ADDRESSES"
# WHERE "ADDRESSES".email_add LIKE ? EXCEPT SELECT "ADDRESSES".id, "ADDRESSES".st_id, "ADDRESSES".postal_add, "ADDRESSES".email_add
# FROM "ADDRESSES" WHERE "ADDRESSES".email_add LIKE ?

u = except_(addresses.select().where(addresses.c.email_add.like('*****@*****.**')), addresses.select().where(addresses.c.email_add.like('*****@*****.**')))
result = conn.execute(u)
for res in result:
    print (res)
    
# INTERSECT
# SELECT "ADDRESSES".id, "ADDRESSES".st_id, "ADDRESSES".postal_add, "ADDRESSES".email_add FROM "ADDRESSES"
# WHERE "ADDRESSES".email_add LIKE ? INTERSECT SELECT "ADDRESSES".id, "ADDRESSES".st_id, "ADDRESSES".postal_add, "ADDRESSES".email_add
# FROM "ADDRESSES" WHERE "ADDRESSES".email_add LIKE ?

u = intersect(addresses.select().where(addresses.c.email_add.like('*****@*****.**')), addresses.select().where(addresses.c.email_add.like('*****@*****.**')))
result = conn.execute(u)
for res in result:
    print (res)

meta.drop_all(engine)
Example #16
0
    def rank_variants_by_frequency(self, connection, meta_attrs: MetadataAttrs,
                                   region_attrs: RegionAttrs, ascending: bool,
                                   freq_threshold: float, limit_result: int,
                                   time_estimate_only: bool) -> FromClause:
        # init state
        self.connection = connection
        self._set_meta_attributes(meta_attrs)
        self.create_table_of_meta(['item_id', 'gender'])
        self._set_region_attributes(region_attrs)
        self.create_table_of_regions(['item_id'])

        # if self.my_region_t is None:
        #     raise Notice("1000Genomes data set is too broad. Please restrict the population size by setting at least one "
        #                  "region constraint imposing the presence of one or more variants in a precise locus or genomic "
        #                  "area.")

        females_and_males_stmt = \
            select([self.my_meta_t.c.gender, func.count(self.my_meta_t.c.item_id)])
        if self.my_region_t is not None:
            females_and_males_stmt = females_and_males_stmt\
                .where(self.my_meta_t.c.item_id.in_(select([self.my_region_t.c.item_id])))
        females_and_males_stmt = females_and_males_stmt.group_by(
            self.my_meta_t.c.gender)

        gender_of_individuals = [
            row.values()
            for row in connection.execute(females_and_males_stmt).fetchall()
        ]
        if len(gender_of_individuals) == 0:
            raise EmptyResult('1000Genomes')
        females = next(
            (el[1] for el in gender_of_individuals if el[0] == 'female'), 0)
        males = next(
            (el[1] for el in gender_of_individuals if el[0] == 'male'), 0)
        population_size = males + females

        if population_size > 150:
            raise Notice(
                f"1000Genomes data set is too broad. Please restrict the population size by setting more constraints. "
                f"This query is allowed for populations smaller than 150. Currently selected is {population_size}"
            )

        if time_estimate_only:
            estimated_time = str(
                9 * population_size
            ) if population_size <= 149 else "2700"  # ~45 min if pop > 149
            self.notify_message(SourceMessage.Type.TIME_TO_FINISH,
                                estimated_time)
            self.notify_message(
                SourceMessage.Type.GENERAL_WARNING,
                f'Genomes to analyze in 1000Genomes: {population_size}')
            locale.setlocale(locale.LC_ALL, '')
            estimated_n_variants = 4144924 * population_size
            self.notify_message(
                SourceMessage.Type.GENERAL_WARNING,
                f'Estimated number variants to rank in 1000Genomes: ~{estimated_n_variants:n}'
            )
            raise EmptyResult('1000Genomes')

        # reduce size of the join with genomes table
        genomes_red = select(
            [genomes.c.item_id, genomes.c.chrom, genomes.c.start, genomes.c.ref, genomes.c.alt, genomes.c.al1,
             genomes.c.al2])\
            .alias('variants_few_columns')

        # custom functions
        func_occurrence = (
            func.sum(genomes_red.c.al1) +
            func.sum(func.coalesce(genomes_red.c.al2, 0))).label(
                Vocabulary.OCCURRENCE.name)
        func_positive_donors = func.count(genomes_red.c.item_id).label(
            Vocabulary.POSITIVE_DONORS.name)
        if meta_attrs.assembly == 'hg19':
            func_frequency_new = func.rr.mut_frequency_new_hg19(
                func_occurrence, males, females, genomes_red.c.chrom,
                genomes_red.c.start)
        else:
            func_frequency_new = func.rr.mut_frequency_new_grch38(
                func_occurrence, males, females, genomes_red.c.chrom,
                genomes_red.c.start)
        func_frequency_new = func_frequency_new.label(
            Vocabulary.FREQUENCY.name)

        # Actually, self.my_region_t already contains only the individuals compatible with meta_attrs, but it can contain
        # duplicated item_id. Since we want to join, it's better to remove them.
        # LIMIT is part of a trick used to speed up the job. See later
        if self.my_region_t is not None:
            sample_set_with_limit = intersect(select([self.my_meta_t.c.item_id]), select([self.my_region_t.c.item_id])) \
                .limit(population_size) \
                .alias('sample_set')
        else:
            sample_set_with_limit = select([self.my_meta_t.c.item_id])\
                .limit(population_size) \
                .alias('sample_set')

        stmt = select([genomes_red.c.chrom.label(Vocabulary.CHROM.name),
                       genomes_red.c.start.label(Vocabulary.START.name),
                       genomes_red.c.ref.label(Vocabulary.REF.name),
                       genomes_red.c.alt.label(Vocabulary.ALT.name),
                       cast(literal(population_size), types.Integer).label(Vocabulary.POPULATION_SIZE.name),
                       func_occurrence,
                       func_positive_donors,
                       func_frequency_new]) \
            .select_from(genomes_red.join(
                sample_set_with_limit,
                genomes_red.c.item_id == sample_set_with_limit.c.item_id)) \
            .group_by(genomes_red.c.chrom, genomes_red.c.start, genomes_red.c.ref, genomes_red.c.alt)
        if ascending:
            if freq_threshold:
                stmt = stmt.having(func_frequency_new >= freq_threshold)
            stmt = stmt.order_by(asc(func_frequency_new), asc(func_occurrence))
        else:
            if freq_threshold:
                stmt = stmt.having(func_frequency_new <= freq_threshold)
            stmt = stmt.order_by(desc(func_frequency_new),
                                 desc(func_occurrence))
        stmt = stmt.limit(limit_result)
        self.logger.debug(
            f'KGenomes: request /rank_variants_by_frequency for a population of {population_size} individuals'
        )

        # this + LIMIT in sample_set_with_limit make the trick to force using the index, but only up to 149 individuals
        if population_size <= 149:  # 333 is the population size at which the execution time w index matches that w/o index
            connection.execute('SET SESSION enable_seqscan=false')

        # create result table
        if self.log_sql_commands:
            self.logger.debug('KGenomes: RANKING VARIANTS IN SAMPLE SET')
        t_name = utils.random_t_name_w_prefix('ranked_variants')
        utils.create_table_as(t_name, stmt, default_schema_to_use_name,
                              connection, self.log_sql_commands,
                              self.logger.debug)
        result = Table(t_name,
                       db_meta,
                       autoload=True,
                       autoload_with=connection,
                       schema=default_schema_to_use_name)
        connection.invalidate(
        )  # instead of setting seqscan=true discard this connection
        return result
Example #17
0
    stmt = simple_table.select(simple_table.c.id=='7')
    print(stmt)
    print(stmt.execute().fetchone())
    stmt = simple_table.select(simple_table.c.col1!='update data')
    print(stmt.execute().fetchall())
    print(stmt.execute().rowcount)

    # bindparam
    print('============================')
    stmt = simple_table.select(whereclause=simple_table.c.id==bindparam('id'))
    print(stmt.execute(id=6).fetchone())
    stmt = simple_table.select(group_by=[simple_table.c.col1])
    print(stmt.execute().fetchall())

    print('============================')
    # join
    from_obj = simple_table.join(second_table)
    #from_obj = simple_table.outerjoin(second_table)
    q = simple_table.select().select_from(from_obj).where(simple_table.c.id == second_table.c.simple_id)
    print(q.column('second.simple_id'))
    print(q.execute().fetchall())

    # set operation
    print('============================')
    q1 = simple_table.select(simple_table.c.id > 1)
    q2 = simple_table.select(simple_table.c.id < 7)
    print(q1.execute().fetchall())
    print(q2.execute().fetchall())
    q = intersect(q1, q2)
    print(q)
Example #18
0
def build_join(op, lcte, rcte):
    if op == OR:
        return union(lcte, rcte).subquery().select()
    else:
        return intersect(lcte, rcte).subquery().select()