def generate_variabile_aas_table(self): if self.skip_SNV_profiling or not self.profile_AA_frequencies: # there is nothing to generate really.. self.run.info('AA_frequencies_table', False, quiet=True) return self.progress.new('Computing AA frequencies at variable positions') variable_aas_table = dbops.TableForAAFrequencies( self.profile_db_path, progress=self.progress) aa_frequencies = bamops.AAFrequencies() codons_in_genes_to_profile_AA_frequencies_dict = {} for gene_call_id, codon_order in self.codons_in_genes_to_profile_AA_frequencies: if gene_call_id not in codons_in_genes_to_profile_AA_frequencies_dict: codons_in_genes_to_profile_AA_frequencies_dict[ gene_call_id] = set([]) codons_in_genes_to_profile_AA_frequencies_dict[gene_call_id].add( codon_order) gene_caller_ids_to_profile = codons_in_genes_to_profile_AA_frequencies_dict.keys( ) num_gene_caller_ids_to_profile = len(gene_caller_ids_to_profile) for i in range(0, len(gene_caller_ids_to_profile)): gene_caller_id = gene_caller_ids_to_profile[i] codons_to_profile = codons_in_genes_to_profile_AA_frequencies_dict[ gene_caller_id] self.progress.update("Working on gene caller id '%d' (%d of %d) w/ %d codons of interest" \ % (gene_caller_id, i + 1, num_gene_caller_ids_to_profile, len(codons_to_profile))) gene_call = self.genes_in_contigs_dict[gene_caller_id] contig_name = gene_call['contig'] aa_frequencies_dict = aa_frequencies.process_gene_call( self.bam, gene_call, self.contig_sequences[contig_name]['sequence'], codons_to_profile) for codon_order in aa_frequencies_dict: e = aa_frequencies_dict[codon_order] db_entry = { 'sample_id': self.sample_id, 'corresponding_gene_call': gene_caller_id } db_entry['reference'] = e['reference'] db_entry['coverage'] = e['coverage'] db_entry['departure_from_reference'] = e[ 'departure_from_reference'] db_entry['codon_order_in_gene'] = codon_order for aa in constants.codon_to_AA.values(): db_entry[aa] = e['frequencies'][aa] variable_aas_table.append(db_entry) variable_aas_table.store() self.progress.end() self.run.info('AA_frequencies_table', True, quiet=True)
def generate_variabile_aas_table(self): if self.skip_SNV_profiling or not self.profile_AA_frequencies: return variable_aas_table = TableForAAFrequencies(self.profile_db_path, progress=self.progress) aa_frequencies = bamops.AAFrequencies() codons_in_genes_to_profile_AA_frequencies_dict = {} for gene_call_id, codon_order in self.codons_in_genes_to_profile_AA_frequencies: if gene_call_id not in codons_in_genes_to_profile_AA_frequencies_dict: codons_in_genes_to_profile_AA_frequencies_dict[ gene_call_id] = set([]) codons_in_genes_to_profile_AA_frequencies_dict[gene_call_id].add( codon_order) gene_caller_ids_to_profile = list( codons_in_genes_to_profile_AA_frequencies_dict.keys()) for i in range(0, len(gene_caller_ids_to_profile)): gene_caller_id = gene_caller_ids_to_profile[i] codons_to_profile = codons_in_genes_to_profile_AA_frequencies_dict[ gene_caller_id] gene_call = self.genes_in_contigs_dict[gene_caller_id] contig_name = gene_call['contig'] aa_frequencies_dict = aa_frequencies.process_gene_call( self.bam, gene_call, self.contig_sequences[contig_name]['sequence'], codons_to_profile) for codon_order in aa_frequencies_dict: e = aa_frequencies_dict[codon_order] db_entry = { 'sample_id': self.sample_id, 'corresponding_gene_call': gene_caller_id } db_entry['reference'] = e['reference'] db_entry['coverage'] = e['coverage'] db_entry['departure_from_reference'] = e[ 'departure_from_reference'] db_entry['codon_order_in_gene'] = codon_order for aa in list(constants.codon_to_AA.values()): db_entry[aa] = e['frequencies'][aa] variable_aas_table.append(db_entry) variable_aas_table.store() # clear contents of set self.codons_in_genes_to_profile_AA_frequencies.clear()