def generate_variabile_positions_table(self): if self.skip_SNV_profiling: # there is nothing to generate really.. self.run.info('variable_nts_table', False, quiet=True) return variable_nts_table = dbops.TableForVariability( self.profile_db_path, anvio.__profile__version__, progress=self.progress) self.progress.new('Storing variability information') for contig in self.contigs.values(): for split in contig.splits: for column_profile in split.column_profiles.values(): # let's figure out more about this particular variable position pos_in_contig = column_profile['pos_in_contig'] column_profile['in_partial_gene_call'], \ column_profile['in_complete_gene_call'],\ column_profile['pos_in_codon'] = self.get_nt_position_info(contig.name, pos_in_contig) column_profile['sample_id'] = self.sample_id variable_nts_table.append(column_profile) variable_nts_table.store() self.progress.end() self.run.info('variable_nts_table', True, quiet=True)
def generate_variabile_nts_table(self): if self.skip_SNV_profiling: # there is nothing to generate really.. self.run.info('variable_nts_table', False, quiet=True) return self.progress.new('NT Variability') variable_nts_table = dbops.TableForVariability( self.profile_db_path, anvio.__profile__version__, progress=self.progress) for contig in self.contigs.values(): for split in contig.splits: for column_profile in split.column_profiles.values(): # let's figure out more about this particular variable position pos_in_contig = column_profile['pos_in_contig'] column_profile['in_partial_gene_call'], \ column_profile['in_complete_gene_call'],\ column_profile['base_pos_in_codon'] = self.get_nt_position_info(contig.name, pos_in_contig) column_profile['sample_id'] = self.sample_id column_profile[ 'corresponding_gene_call'] = -1 # this means there is no gene call that corresponds to this # nt position, which will be updated in the following lines. # yeah, we use '-1', because genecaller ids start from 0 :/ column_profile['codon_order_in_gene'] = -1 # if this particular position (`pos_in_contig`) falls within a COMPLETE gene call, # we would like to find out which unique gene caller id(s) match to this position. if column_profile['in_complete_gene_call']: corresponding_gene_caller_ids = self.get_corresponding_gene_caller_ids_for_base_position( contig.name, pos_in_contig) # if there are more than one corresponding gene call, this usually indicates an assembly error # just to be on the safe side, we will not report a corresopnding unique gene callers id for this # position if len(corresponding_gene_caller_ids) == 1: # if we are here, it means this nucleotide position is in a complete gene call. we will do two things here. # first, we will store the gene_caller_id that corresponds to this nt position, and then we will store the # order of the corresponding codon in the gene for this nt position. gene_caller_id = corresponding_gene_caller_ids[0] column_profile[ 'corresponding_gene_call'] = gene_caller_id column_profile[ 'codon_order_in_gene'] = self.get_corresponding_codon_order_in_gene( gene_caller_id, contig.name, pos_in_contig) # save this information for later use self.codons_in_genes_to_profile_AA_frequencies.add( (gene_caller_id, column_profile['codon_order_in_gene']), ) variable_nts_table.append(column_profile) variable_nts_table.store() self.progress.end() self.run.info('variable_nts_table', True, quiet=True)
def merge_variable_nts_tables(self): variable_nts_table = dbops.TableForVariability( self.merged_profile_db_path, progress=self.progress) for input_profile_db_path in self.profile_dbs_info_dict: sample_profile_db = dbops.ProfileDatabase(input_profile_db_path, quiet=True) sample_variable_nts_table = sample_profile_db.db.get_table_as_list_of_tuples( tables.variable_nts_table_name, tables.variable_nts_table_structure) sample_profile_db.disconnect() for tpl in sample_variable_nts_table: entry = tuple([ variable_nts_table.next_id(tables.variable_nts_table_name) ] + list(tpl[1:])) variable_nts_table.db_entries.append(entry) variable_nts_table.store()
def merge_variable_nts_tables(self): self.is_all_samples_have_it('variable_nts_table') variable_nts_table = dbops.TableForVariability(self.profile_db_path, progress=self.progress) for runinfo in self.input_runinfo_dicts.values(): sample_profile_db = dbops.ProfileDatabase(runinfo['profile_db'], quiet=True) sample_variable_nts_table = sample_profile_db.db.get_table_as_list_of_tuples( tables.variable_nts_table_name, tables.variable_nts_table_structure) sample_profile_db.disconnect() for tpl in sample_variable_nts_table: entry = tuple([ variable_nts_table.next_id(tables.variable_nts_table_name) ] + list(tpl[1:])) variable_nts_table.db_entries.append(entry) variable_nts_table.store()