コード例 #1
0
 def _table_without_any_of_mutations(self):
     """
     Returns a Table containing the item_id from the table genomes that do not match the given mutations.
     :param select_columns selects only the column names in this collection. If None, selects all the columns from genomes.
     """
     mutations = self.region_attrs.without_variants
     if len(mutations) == 0:
         raise ValueError('function argument *mutations cannot be empty')
     else:
         # create table for the result
         t_name = utils.random_t_name_w_prefix('without_any_of_mut')
         query_mutations = self._stmt_where_region_is_any_of_mutations(
             *mutations,
             from_table=regions,
             select_expression=select([regions.c.item_id]),
             only_item_id_in_table=self.my_meta_t)
         stmt_as = except_(select([self.my_meta_t.c.item_id]),
                           query_mutations)
         stmt_create_table = utils.stmt_create_table_as(
             t_name, stmt_as, default_schema_to_use_name)
         if self.log_sql_commands:
             utils.show_stmt(
                 self.connection, stmt_create_table, self.logger.debug,
                 'CREATE TABLE WITHOUT ANY OF THE {} MUTATIONS'.format(
                     len(mutations)))
         self.connection.execute(stmt_create_table)
         return Table(t_name,
                      db_meta,
                      autoload=True,
                      autoload_with=self.connection,
                      schema=default_schema_to_use_name)
コード例 #2
0
 def _table_with_any_of_mutations(self, select_columns,
                                  only_item_id_in_table: Optional[Table],
                                  *mutations: Mutation):
     """Returns a Table containing all the rows from the table regions containing one of the variants in
     the argument mutations.
     :param select_columns selects only the column names in this collection. If None, selects all the columns from regions.
     :param only_item_id_in_table If None, the variants that are not owned by any of the individuals in this table
     are discarded from the result.
     """
     if len(mutations) == 0:
         raise ValueError('function argument *mutations cannot be empty')
     else:
         # create table for the result
         t_name = utils.random_t_name_w_prefix('with_any_of_mut')
         columns = [regions.c[c_name] for c_name in select_columns
                    ] if select_columns is not None else [regions]
         stmt_as = self._stmt_where_region_is_any_of_mutations(
             *mutations,
             from_table=regions,
             select_expression=select(columns),
             only_item_id_in_table=only_item_id_in_table)
     stmt_create_table = utils.stmt_create_table_as(
         t_name, stmt_as, default_schema_to_use_name)
     if self.log_sql_commands:
         utils.show_stmt(
             self.connection, stmt_create_table, self.logger.debug,
             'CREATE TABLE HAVING ANY OF THE {} MUTATIONS'.format(
                 len(mutations)))
     self.connection.execute(stmt_create_table)
     return Table(t_name,
                  db_meta,
                  autoload=True,
                  autoload_with=self.connection,
                  schema=default_schema_to_use_name)
コード例 #3
0
 def table_with_variants_same_c_copy(self, select_columns: Optional[list]):
     """
      Returns a table of variants of the same type of the ones contained in RegionAttrs.with_variants_same_c_copy and only
      form the individuals that own all of them on the same chromosome copy.
      :param select_columns: the list of column names to select from the result. If None, all the columns are taken.
     """
     if len(self.region_attrs.with_variants_same_c_copy) < 2:
         raise ValueError(
             'You must provide at least two Mutation instances in order to use this method.'
         )
     # selects only the mutations to be on the same chromosome copies (this set will be used two times) from all individuals
     # we will enforce the presence of all the given mutations in all the individuals later...
     interm_select_column_names = None  # means all columns
     if select_columns is not None:  # otherwise pick select_columns + minimum required
         interm_select_column_names = set(select_columns)
         interm_select_column_names.update(['item_id', 'al1', 'al2'])
     intermediate_table = self._table_with_any_of_mutations(
         interm_select_column_names, self.my_meta_t,
         *self.region_attrs.with_variants_same_c_copy)
     # groups mutations by owner in the intermediate table, and take only the owners for which sum(al1) or sum(al2)
     # equals to the number of the given mutations. That condition automatically implies the presence of all the
     # given mutations in the same individual.
     # for those owner, take all the given mutations
     result_columns = [intermediate_table]  # means all columns
     if select_columns is not None:  # otherwise pick the columns from select_columns
         result_columns = [
             intermediate_table.c[col_name] for col_name in select_columns
         ]
     stmt_as = \
         select(result_columns) \
         .where(intermediate_table.c.item_id.in_(
             select([intermediate_table.c.item_id])
             .group_by(intermediate_table.c.item_id)
             .having(
                 (func.sum(intermediate_table.c.al1) == len(
                     self.region_attrs.with_variants_same_c_copy)) |  # the ( ) around each condition are mandatory
                 (func.sum(func.coalesce(intermediate_table.c.al2, 0)) == len(
                     self.region_attrs.with_variants_same_c_copy)))
         ))
     target_t_name = utils.random_t_name_w_prefix('with_var_same_c_copy')
     stmt = utils.stmt_create_table_as(target_t_name, stmt_as,
                                       default_schema_to_use_name)
     if self.log_sql_commands:
         utils.show_stmt(
             self.connection, stmt, self.logger.debug,
             'INDIVIDUALS (+ THE GIVEN MUTATIONS) HAVING ALL THE SPECIFIED MUTATIONS ON THE SAME CHROMOSOME COPY'
         )
     self.connection.execute(stmt)
     if self.log_sql_commands:
         self.logger.debug('DROP TABLE ' + intermediate_table.name)
     intermediate_table.drop(self.connection)
     return Table(target_t_name,
                  db_meta,
                  autoload=True,
                  autoload_with=self.connection,
                  schema=default_schema_to_use_name)
コード例 #4
0
 def table_with_all_of_mutations(self, select_columns: Optional[list]):
     """
      Returns a table of variants of the same type of the ones contained in RegionAttrs.with_variants and only form the
      individuals that own all of them.
      :param select_columns: the list of column names to select from the result. If None, all the columns are taken.
     """
     if not self.region_attrs.with_variants:
         raise ValueError(
             'instance parameter self.with_variants not initialized')
     elif len(self.region_attrs.with_variants) == 1:
         return self._table_with_any_of_mutations(
             select_columns, self.my_meta_t,
             *self.region_attrs.with_variants)
     else:
         union_select_column_names = None  # means all columns
         if select_columns is not None:  # otherwise use select_columns + minimum necessary
             union_select_column_names = set(select_columns)
             union_select_column_names.add('item_id')
         union_table = self._table_with_any_of_mutations(
             union_select_column_names, self.my_meta_t,
             *self.region_attrs.with_variants)
         # extracts only the samples having all the mutations
         result_select_columns = [union_table]  # means all columns
         if select_columns is not None:  # otherwise use selected_columns
             result_select_columns = [
                 union_table.c[col_name] for col_name in select_columns
             ]
         stmt_as = \
             select(result_select_columns) \
             .where(union_table.c.item_id.in_(
                 select([union_table.c.item_id])
                 .group_by(union_table.c.item_id)
                 .having(func.count(union_table.c.item_id) == len(self.region_attrs.with_variants))
             ))
         target_t_name = utils.random_t_name_w_prefix('with')
         stmt_create_table = utils.stmt_create_table_as(
             target_t_name, stmt_as, default_schema_to_use_name)
         if self.log_sql_commands:
             utils.show_stmt(
                 self.connection, stmt_create_table, self.logger.debug,
                 'INDIVIDUALS HAVING "ALL" THE {} MUTATIONS (WITH DUPLICATE ITEM_ID)'
                 .format(len(self.region_attrs.with_variants)))
         self.connection.execute(stmt_create_table)
         if self.log_sql_commands:
             self.logger.debug('DROP TABLE ' + union_table.name)
         union_table.drop(self.connection)
         return Table(target_t_name,
                      db_meta,
                      autoload=True,
                      autoload_with=self.connection,
                      schema=default_schema_to_use_name)
コード例 #5
0
 def take_regions_of_common_individuals(self, tables: list):
     """
     Generates a table containing all the mutations from all the origin tables but only for those individuals that
     appear in all the origin tables.
     Supposing that each origin table reflects a characteristic that the final sample set must have, this method
     basically puts those characteristics in AND relationship by taking only the regions from the individuals that
     have all the characteristics.
     :param tables: The source tables which must have the same columns in the same order.
     """
     if len(tables) == 1:
         return tables[0]
     else:
         # join 1st with 2nd with 3rd ... with nth on item_id
         # TODO consider creating temporary tables selecting only the item_id before joining
         stmt_join = reduce(
             lambda table_1, table_2: table_1.join(
                 table_2, tables[0].c.item_id == table_2.c.item_id), tables)
         # union of tables
         select_all_from_each_table = map(lambda table_: select([table_]),
                                          tables)
         # TODO consider selecting from union table only what is needed by the users of this method (parametric choice)
         stmt_union = union(*select_all_from_each_table).alias()
         # select from the union table only the item_id that exists in the join
         stmt_as = \
             select([stmt_union]) \
             .where(exists(select()
                           .select_from(stmt_join)
                           .where(stmt_union.c.item_id == tables[0].c.item_id)
                           ))
         target_t_name = utils.random_t_name_w_prefix('intersect')
         stmt_create_table = utils.stmt_create_table_as(
             target_t_name, stmt_as, default_schema_to_use_name)
         if self.log_sql_commands:
             utils.show_stmt(
                 self.connection, stmt_create_table, self.logger.debug,
                 'SELECT ALL FROM SOURCE TABLES WHERE item_id IS IN ALL SOURCE TABLES'
             )
         self.connection.execute(stmt_create_table)
         # TODO drop partial tables ?
         return Table(target_t_name,
                      db_meta,
                      autoload=True,
                      autoload_with=self.connection,
                      schema=default_schema_to_use_name)