def perform_alignment_based_estimation(self):
        print('\nmultiple identified strains in group')
        print('performing composition estimation')
        self.perform_setup()
        self.create_database()
        self.load_genomes()
        self.create_fragments()

        #print('estimating transition probabilities')
        self.align_sample_reads()
        self.align_simulated_reads()

        self.create_symbols()
        self.create_accessions_filenames_mapping()
        empirical_pooled_summary, empirical_organism_summary = self.get_read_classifications(
            self.fragment_paf_file, empirical=True)
        observed_counts = self.get_read_classifications(self.sample_paf_file)

        path = self.context.project_path + '/runtimefiles/abundance_estimation/'
        write_json(empirical_pooled_summary,
                   path + 'empirical_pooled_summary.json')
        write_json(empirical_organism_summary,
                   path + 'empirical_organism_summary.json')
        write_json(observed_counts, path + 'observed_counts.json')

        empirical_organism_summary = load_json(
            path + 'empirical_organism_summary.json')
        observed_counts = load_json(path + 'observed_counts.json')

        print('EM rounds start')

        self.estimate_proportions(empirical_organism_summary, observed_counts)
        self.update_sample_proportions()
Beispiel #2
0
 def __init__(self, paf_file, database_path, include_plasmids_mitochondria):
     self.paf_file = paf_file
     self.include_plasmids_mitochondria = include_plasmids_mitochondria
     self.ag_dict = load_json(database_path +
                              'taxonomy/accessions_genomes.json')
     self.af_dict = load_json(database_path +
                              'taxonomy/accessions_filenames.json')
    def __init__(self, reads_alignments, context):
        self.reads_alignments = reads_alignments
        self.context = context

        self.af_dict = load_json(self.context.database_path +
                                 'taxonomy/accessions_filenames.json')
        self.an_dict = load_json(self.context.database_path +
                                 'taxonomy/accessions_genomes.json')
Beispiel #4
0
 def __init__(self, paf_file, database_path):
     self.paf_file = paf_file
     self.min_pid = 0.1
     self.min_collinearity = 0.8
     self.min_read_length = 700
     self.ag_dict = load_json(database_path +
                              'taxonomy/accessions_genomes.json')
     self.af_dict = load_json(database_path +
                              'taxonomy/accessions_filenames.json')
Beispiel #5
0
    def __init__(self, classifications, collinearities, group_id, group_abundance, context):
        self.group_id = group_id
        self.group_abundance = group_abundance
        self.classifications = classifications
        self.collinearities = collinearities
        self.context = context
        self.min_cec_ratio = 2

        self.af_dict = load_json(context.database_path + 'taxonomy/accessions_filenames.json')
        self.ag_dict = load_json(context.database_path + 'taxonomy/accessions_genomes.json')
Beispiel #6
0
    def remove_plasmids_mitochondria(self):
        ct = self.context
        taxonomy_file = ct.database_path + 'taxonomy/accessions_genomes.json'
        taxonomy = load_json(taxonomy_file)

        clean_classifications = []
        while len(self.classifications) > 0:
            chromosome_accessions = []
            read_id, accessions = self.classifications.pop()

            for accession in accessions:
                sequence_name = taxonomy[accession.upper()]
                if 'plasmid' not in sequence_name.lower(
                ) and 'mitochond' not in sequence_name.lower():
                    chromosome_accessions.append(accession)

            if len(chromosome_accessions) > 0:
                clean_classifications.append([read_id, chromosome_accessions])
        self.classifications = clean_classifications
Beispiel #7
0
 def __init__(self, paf_file, database_path):
     self.paf_file = paf_file
     self.ag_dict = load_json(database_path +
                              'taxonomy/accessions_genomes.json')
     self.af_dict = load_json(database_path +
                              'taxonomy/accessions_filenames.json')