Ejemplo n.º 1
0
    def generate_variabile_aas_table(self):
        if self.skip_SNV_profiling or not self.profile_AA_frequencies:
            # there is nothing to generate really..
            self.run.info('AA_frequencies_table', False, quiet=True)
            return

        self.progress.new('Computing AA frequencies at variable positions')

        variable_aas_table = dbops.TableForAAFrequencies(
            self.profile_db_path, progress=self.progress)

        aa_frequencies = bamops.AAFrequencies()

        codons_in_genes_to_profile_AA_frequencies_dict = {}
        for gene_call_id, codon_order in self.codons_in_genes_to_profile_AA_frequencies:
            if gene_call_id not in codons_in_genes_to_profile_AA_frequencies_dict:
                codons_in_genes_to_profile_AA_frequencies_dict[
                    gene_call_id] = set([])
            codons_in_genes_to_profile_AA_frequencies_dict[gene_call_id].add(
                codon_order)

        gene_caller_ids_to_profile = codons_in_genes_to_profile_AA_frequencies_dict.keys(
        )
        num_gene_caller_ids_to_profile = len(gene_caller_ids_to_profile)

        for i in range(0, len(gene_caller_ids_to_profile)):
            gene_caller_id = gene_caller_ids_to_profile[i]
            codons_to_profile = codons_in_genes_to_profile_AA_frequencies_dict[
                gene_caller_id]

            self.progress.update("Working on gene caller id '%d' (%d of %d) w/ %d codons of interest" \
                                % (gene_caller_id, i + 1, num_gene_caller_ids_to_profile, len(codons_to_profile)))

            gene_call = self.genes_in_contigs_dict[gene_caller_id]
            contig_name = gene_call['contig']
            aa_frequencies_dict = aa_frequencies.process_gene_call(
                self.bam, gene_call,
                self.contig_sequences[contig_name]['sequence'],
                codons_to_profile)

            for codon_order in aa_frequencies_dict:
                e = aa_frequencies_dict[codon_order]

                db_entry = {
                    'sample_id': self.sample_id,
                    'corresponding_gene_call': gene_caller_id
                }
                db_entry['reference'] = e['reference']
                db_entry['coverage'] = e['coverage']
                db_entry['departure_from_reference'] = e[
                    'departure_from_reference']
                db_entry['codon_order_in_gene'] = codon_order
                for aa in constants.codon_to_AA.values():
                    db_entry[aa] = e['frequencies'][aa]

                variable_aas_table.append(db_entry)

        variable_aas_table.store()
        self.progress.end()
        self.run.info('AA_frequencies_table', True, quiet=True)
Ejemplo n.º 2
0
    def generate_variabile_aas_table(self):
        if self.skip_SNV_profiling or not self.profile_AA_frequencies:
            return

        variable_aas_table = TableForAAFrequencies(self.profile_db_path,
                                                   progress=self.progress)

        aa_frequencies = bamops.AAFrequencies()

        codons_in_genes_to_profile_AA_frequencies_dict = {}
        for gene_call_id, codon_order in self.codons_in_genes_to_profile_AA_frequencies:
            if gene_call_id not in codons_in_genes_to_profile_AA_frequencies_dict:
                codons_in_genes_to_profile_AA_frequencies_dict[
                    gene_call_id] = set([])
            codons_in_genes_to_profile_AA_frequencies_dict[gene_call_id].add(
                codon_order)

        gene_caller_ids_to_profile = list(
            codons_in_genes_to_profile_AA_frequencies_dict.keys())

        for i in range(0, len(gene_caller_ids_to_profile)):
            gene_caller_id = gene_caller_ids_to_profile[i]
            codons_to_profile = codons_in_genes_to_profile_AA_frequencies_dict[
                gene_caller_id]

            gene_call = self.genes_in_contigs_dict[gene_caller_id]
            contig_name = gene_call['contig']
            aa_frequencies_dict = aa_frequencies.process_gene_call(
                self.bam, gene_call,
                self.contig_sequences[contig_name]['sequence'],
                codons_to_profile)

            for codon_order in aa_frequencies_dict:
                e = aa_frequencies_dict[codon_order]

                db_entry = {
                    'sample_id': self.sample_id,
                    'corresponding_gene_call': gene_caller_id
                }
                db_entry['reference'] = e['reference']
                db_entry['coverage'] = e['coverage']
                db_entry['departure_from_reference'] = e[
                    'departure_from_reference']
                db_entry['codon_order_in_gene'] = codon_order
                for aa in list(constants.codon_to_AA.values()):
                    db_entry[aa] = e['frequencies'][aa]

                variable_aas_table.append(db_entry)

        variable_aas_table.store()

        # clear contents of set
        self.codons_in_genes_to_profile_AA_frequencies.clear()