def _table_without_any_of_mutations(self): """ Returns a Table containing the item_id from the table genomes that do not match the given mutations. :param select_columns selects only the column names in this collection. If None, selects all the columns from genomes. """ mutations = self.region_attrs.without_variants if len(mutations) == 0: raise ValueError('function argument *mutations cannot be empty') else: # create table for the result t_name = utils.random_t_name_w_prefix('without_any_of_mut') query_mutations = self._stmt_where_region_is_any_of_mutations( *mutations, from_table=regions, select_expression=select([regions.c.item_id]), only_item_id_in_table=self.my_meta_t) stmt_as = except_(select([self.my_meta_t.c.item_id]), query_mutations) stmt_create_table = utils.stmt_create_table_as( t_name, stmt_as, default_schema_to_use_name) if self.log_sql_commands: utils.show_stmt( self.connection, stmt_create_table, self.logger.debug, 'CREATE TABLE WITHOUT ANY OF THE {} MUTATIONS'.format( len(mutations))) self.connection.execute(stmt_create_table) return Table(t_name, db_meta, autoload=True, autoload_with=self.connection, schema=default_schema_to_use_name)
def _table_with_any_of_mutations(self, select_columns, only_item_id_in_table: Optional[Table], *mutations: Mutation): """Returns a Table containing all the rows from the table regions containing one of the variants in the argument mutations. :param select_columns selects only the column names in this collection. If None, selects all the columns from regions. :param only_item_id_in_table If None, the variants that are not owned by any of the individuals in this table are discarded from the result. """ if len(mutations) == 0: raise ValueError('function argument *mutations cannot be empty') else: # create table for the result t_name = utils.random_t_name_w_prefix('with_any_of_mut') columns = [regions.c[c_name] for c_name in select_columns ] if select_columns is not None else [regions] stmt_as = self._stmt_where_region_is_any_of_mutations( *mutations, from_table=regions, select_expression=select(columns), only_item_id_in_table=only_item_id_in_table) stmt_create_table = utils.stmt_create_table_as( t_name, stmt_as, default_schema_to_use_name) if self.log_sql_commands: utils.show_stmt( self.connection, stmt_create_table, self.logger.debug, 'CREATE TABLE HAVING ANY OF THE {} MUTATIONS'.format( len(mutations))) self.connection.execute(stmt_create_table) return Table(t_name, db_meta, autoload=True, autoload_with=self.connection, schema=default_schema_to_use_name)
def table_with_variants_same_c_copy(self, select_columns: Optional[list]): """ Returns a table of variants of the same type of the ones contained in RegionAttrs.with_variants_same_c_copy and only form the individuals that own all of them on the same chromosome copy. :param select_columns: the list of column names to select from the result. If None, all the columns are taken. """ if len(self.region_attrs.with_variants_same_c_copy) < 2: raise ValueError( 'You must provide at least two Mutation instances in order to use this method.' ) # selects only the mutations to be on the same chromosome copies (this set will be used two times) from all individuals # we will enforce the presence of all the given mutations in all the individuals later... interm_select_column_names = None # means all columns if select_columns is not None: # otherwise pick select_columns + minimum required interm_select_column_names = set(select_columns) interm_select_column_names.update(['item_id', 'al1', 'al2']) intermediate_table = self._table_with_any_of_mutations( interm_select_column_names, self.my_meta_t, *self.region_attrs.with_variants_same_c_copy) # groups mutations by owner in the intermediate table, and take only the owners for which sum(al1) or sum(al2) # equals to the number of the given mutations. That condition automatically implies the presence of all the # given mutations in the same individual. # for those owner, take all the given mutations result_columns = [intermediate_table] # means all columns if select_columns is not None: # otherwise pick the columns from select_columns result_columns = [ intermediate_table.c[col_name] for col_name in select_columns ] stmt_as = \ select(result_columns) \ .where(intermediate_table.c.item_id.in_( select([intermediate_table.c.item_id]) .group_by(intermediate_table.c.item_id) .having( (func.sum(intermediate_table.c.al1) == len( self.region_attrs.with_variants_same_c_copy)) | # the ( ) around each condition are mandatory (func.sum(func.coalesce(intermediate_table.c.al2, 0)) == len( self.region_attrs.with_variants_same_c_copy))) )) target_t_name = utils.random_t_name_w_prefix('with_var_same_c_copy') stmt = utils.stmt_create_table_as(target_t_name, stmt_as, default_schema_to_use_name) if self.log_sql_commands: utils.show_stmt( self.connection, stmt, self.logger.debug, 'INDIVIDUALS (+ THE GIVEN MUTATIONS) HAVING ALL THE SPECIFIED MUTATIONS ON THE SAME CHROMOSOME COPY' ) self.connection.execute(stmt) if self.log_sql_commands: self.logger.debug('DROP TABLE ' + intermediate_table.name) intermediate_table.drop(self.connection) return Table(target_t_name, db_meta, autoload=True, autoload_with=self.connection, schema=default_schema_to_use_name)
def table_with_all_of_mutations(self, select_columns: Optional[list]): """ Returns a table of variants of the same type of the ones contained in RegionAttrs.with_variants and only form the individuals that own all of them. :param select_columns: the list of column names to select from the result. If None, all the columns are taken. """ if not self.region_attrs.with_variants: raise ValueError( 'instance parameter self.with_variants not initialized') elif len(self.region_attrs.with_variants) == 1: return self._table_with_any_of_mutations( select_columns, self.my_meta_t, *self.region_attrs.with_variants) else: union_select_column_names = None # means all columns if select_columns is not None: # otherwise use select_columns + minimum necessary union_select_column_names = set(select_columns) union_select_column_names.add('item_id') union_table = self._table_with_any_of_mutations( union_select_column_names, self.my_meta_t, *self.region_attrs.with_variants) # extracts only the samples having all the mutations result_select_columns = [union_table] # means all columns if select_columns is not None: # otherwise use selected_columns result_select_columns = [ union_table.c[col_name] for col_name in select_columns ] stmt_as = \ select(result_select_columns) \ .where(union_table.c.item_id.in_( select([union_table.c.item_id]) .group_by(union_table.c.item_id) .having(func.count(union_table.c.item_id) == len(self.region_attrs.with_variants)) )) target_t_name = utils.random_t_name_w_prefix('with') stmt_create_table = utils.stmt_create_table_as( target_t_name, stmt_as, default_schema_to_use_name) if self.log_sql_commands: utils.show_stmt( self.connection, stmt_create_table, self.logger.debug, 'INDIVIDUALS HAVING "ALL" THE {} MUTATIONS (WITH DUPLICATE ITEM_ID)' .format(len(self.region_attrs.with_variants))) self.connection.execute(stmt_create_table) if self.log_sql_commands: self.logger.debug('DROP TABLE ' + union_table.name) union_table.drop(self.connection) return Table(target_t_name, db_meta, autoload=True, autoload_with=self.connection, schema=default_schema_to_use_name)
def take_regions_of_common_individuals(self, tables: list): """ Generates a table containing all the mutations from all the origin tables but only for those individuals that appear in all the origin tables. Supposing that each origin table reflects a characteristic that the final sample set must have, this method basically puts those characteristics in AND relationship by taking only the regions from the individuals that have all the characteristics. :param tables: The source tables which must have the same columns in the same order. """ if len(tables) == 1: return tables[0] else: # join 1st with 2nd with 3rd ... with nth on item_id # TODO consider creating temporary tables selecting only the item_id before joining stmt_join = reduce( lambda table_1, table_2: table_1.join( table_2, tables[0].c.item_id == table_2.c.item_id), tables) # union of tables select_all_from_each_table = map(lambda table_: select([table_]), tables) # TODO consider selecting from union table only what is needed by the users of this method (parametric choice) stmt_union = union(*select_all_from_each_table).alias() # select from the union table only the item_id that exists in the join stmt_as = \ select([stmt_union]) \ .where(exists(select() .select_from(stmt_join) .where(stmt_union.c.item_id == tables[0].c.item_id) )) target_t_name = utils.random_t_name_w_prefix('intersect') stmt_create_table = utils.stmt_create_table_as( target_t_name, stmt_as, default_schema_to_use_name) if self.log_sql_commands: utils.show_stmt( self.connection, stmt_create_table, self.logger.debug, 'SELECT ALL FROM SOURCE TABLES WHERE item_id IS IN ALL SOURCE TABLES' ) self.connection.execute(stmt_create_table) # TODO drop partial tables ? return Table(target_t_name, db_meta, autoload=True, autoload_with=self.connection, schema=default_schema_to_use_name)