예제 #1
0
    def generate_variabile_positions_table(self):
        if self.skip_SNV_profiling:
            # there is nothing to generate really..
            self.run.info('variable_nts_table', False, quiet=True)
            return

        variable_nts_table = dbops.TableForVariability(
            self.profile_db_path,
            anvio.__profile__version__,
            progress=self.progress)

        self.progress.new('Storing variability information')
        for contig in self.contigs.values():
            for split in contig.splits:
                for column_profile in split.column_profiles.values():
                    # let's figure out more about this particular variable position
                    pos_in_contig = column_profile['pos_in_contig']

                    column_profile['in_partial_gene_call'], \
                    column_profile['in_complete_gene_call'],\
                    column_profile['pos_in_codon'] = self.get_nt_position_info(contig.name, pos_in_contig)

                    column_profile['sample_id'] = self.sample_id

                    variable_nts_table.append(column_profile)

        variable_nts_table.store()
        self.progress.end()
        self.run.info('variable_nts_table', True, quiet=True)
예제 #2
0
    def generate_variabile_nts_table(self):
        if self.skip_SNV_profiling:
            # there is nothing to generate really..
            self.run.info('variable_nts_table', False, quiet=True)
            return

        self.progress.new('NT Variability')
        variable_nts_table = dbops.TableForVariability(
            self.profile_db_path,
            anvio.__profile__version__,
            progress=self.progress)

        for contig in self.contigs.values():
            for split in contig.splits:
                for column_profile in split.column_profiles.values():
                    # let's figure out more about this particular variable position
                    pos_in_contig = column_profile['pos_in_contig']

                    column_profile['in_partial_gene_call'], \
                    column_profile['in_complete_gene_call'],\
                    column_profile['base_pos_in_codon'] = self.get_nt_position_info(contig.name, pos_in_contig)

                    column_profile['sample_id'] = self.sample_id
                    column_profile[
                        'corresponding_gene_call'] = -1  # this means there is no gene call that corresponds to this
                    # nt position, which will be updated in the following lines.
                    # yeah, we use '-1', because genecaller ids start from 0 :/
                    column_profile['codon_order_in_gene'] = -1

                    # if this particular position (`pos_in_contig`) falls within a COMPLETE gene call,
                    # we would like to find out which unique gene caller id(s) match to this position.
                    if column_profile['in_complete_gene_call']:
                        corresponding_gene_caller_ids = self.get_corresponding_gene_caller_ids_for_base_position(
                            contig.name, pos_in_contig)

                        # if there are more than one corresponding gene call, this usually indicates an assembly error
                        # just to be on the safe side, we will not report a corresopnding unique gene callers id for this
                        # position
                        if len(corresponding_gene_caller_ids) == 1:
                            # if we are here, it means this nucleotide position is in a complete gene call. we will do two things here.
                            # first, we will store the gene_caller_id that corresponds to this nt position, and then we will store the
                            # order of the corresponding codon in the gene for this nt position.
                            gene_caller_id = corresponding_gene_caller_ids[0]
                            column_profile[
                                'corresponding_gene_call'] = gene_caller_id
                            column_profile[
                                'codon_order_in_gene'] = self.get_corresponding_codon_order_in_gene(
                                    gene_caller_id, contig.name, pos_in_contig)

                            # save this information for later use
                            self.codons_in_genes_to_profile_AA_frequencies.add(
                                (gene_caller_id,
                                 column_profile['codon_order_in_gene']), )

                    variable_nts_table.append(column_profile)

        variable_nts_table.store()
        self.progress.end()
        self.run.info('variable_nts_table', True, quiet=True)
예제 #3
0
파일: merger.py 프로젝트: mruehlemann/anvio
    def merge_variable_nts_tables(self):
        variable_nts_table = dbops.TableForVariability(
            self.merged_profile_db_path, progress=self.progress)

        for input_profile_db_path in self.profile_dbs_info_dict:
            sample_profile_db = dbops.ProfileDatabase(input_profile_db_path,
                                                      quiet=True)
            sample_variable_nts_table = sample_profile_db.db.get_table_as_list_of_tuples(
                tables.variable_nts_table_name,
                tables.variable_nts_table_structure)
            sample_profile_db.disconnect()

            for tpl in sample_variable_nts_table:
                entry = tuple([
                    variable_nts_table.next_id(tables.variable_nts_table_name)
                ] + list(tpl[1:]))
                variable_nts_table.db_entries.append(entry)

        variable_nts_table.store()
예제 #4
0
    def merge_variable_nts_tables(self):
        self.is_all_samples_have_it('variable_nts_table')

        variable_nts_table = dbops.TableForVariability(self.profile_db_path,
                                                       progress=self.progress)

        for runinfo in self.input_runinfo_dicts.values():
            sample_profile_db = dbops.ProfileDatabase(runinfo['profile_db'],
                                                      quiet=True)
            sample_variable_nts_table = sample_profile_db.db.get_table_as_list_of_tuples(
                tables.variable_nts_table_name,
                tables.variable_nts_table_structure)
            sample_profile_db.disconnect()

            for tpl in sample_variable_nts_table:
                entry = tuple([
                    variable_nts_table.next_id(tables.variable_nts_table_name)
                ] + list(tpl[1:]))
                variable_nts_table.db_entries.append(entry)

        variable_nts_table.store()