def perform_alignment_based_estimation(self): print('\nmultiple identified strains in group') print('performing composition estimation') self.perform_setup() self.create_database() self.load_genomes() self.create_fragments() #print('estimating transition probabilities') self.align_sample_reads() self.align_simulated_reads() self.create_symbols() self.create_accessions_filenames_mapping() empirical_pooled_summary, empirical_organism_summary = self.get_read_classifications( self.fragment_paf_file, empirical=True) observed_counts = self.get_read_classifications(self.sample_paf_file) path = self.context.project_path + '/runtimefiles/abundance_estimation/' write_json(empirical_pooled_summary, path + 'empirical_pooled_summary.json') write_json(empirical_organism_summary, path + 'empirical_organism_summary.json') write_json(observed_counts, path + 'observed_counts.json') empirical_organism_summary = load_json( path + 'empirical_organism_summary.json') observed_counts = load_json(path + 'observed_counts.json') print('EM rounds start') self.estimate_proportions(empirical_organism_summary, observed_counts) self.update_sample_proportions()
def __init__(self, paf_file, database_path, include_plasmids_mitochondria): self.paf_file = paf_file self.include_plasmids_mitochondria = include_plasmids_mitochondria self.ag_dict = load_json(database_path + 'taxonomy/accessions_genomes.json') self.af_dict = load_json(database_path + 'taxonomy/accessions_filenames.json')
def __init__(self, reads_alignments, context): self.reads_alignments = reads_alignments self.context = context self.af_dict = load_json(self.context.database_path + 'taxonomy/accessions_filenames.json') self.an_dict = load_json(self.context.database_path + 'taxonomy/accessions_genomes.json')
def __init__(self, paf_file, database_path): self.paf_file = paf_file self.min_pid = 0.1 self.min_collinearity = 0.8 self.min_read_length = 700 self.ag_dict = load_json(database_path + 'taxonomy/accessions_genomes.json') self.af_dict = load_json(database_path + 'taxonomy/accessions_filenames.json')
def __init__(self, classifications, collinearities, group_id, group_abundance, context): self.group_id = group_id self.group_abundance = group_abundance self.classifications = classifications self.collinearities = collinearities self.context = context self.min_cec_ratio = 2 self.af_dict = load_json(context.database_path + 'taxonomy/accessions_filenames.json') self.ag_dict = load_json(context.database_path + 'taxonomy/accessions_genomes.json')
def remove_plasmids_mitochondria(self): ct = self.context taxonomy_file = ct.database_path + 'taxonomy/accessions_genomes.json' taxonomy = load_json(taxonomy_file) clean_classifications = [] while len(self.classifications) > 0: chromosome_accessions = [] read_id, accessions = self.classifications.pop() for accession in accessions: sequence_name = taxonomy[accession.upper()] if 'plasmid' not in sequence_name.lower( ) and 'mitochond' not in sequence_name.lower(): chromosome_accessions.append(accession) if len(chromosome_accessions) > 0: clean_classifications.append([read_id, chromosome_accessions]) self.classifications = clean_classifications
def __init__(self, paf_file, database_path): self.paf_file = paf_file self.ag_dict = load_json(database_path + 'taxonomy/accessions_genomes.json') self.af_dict = load_json(database_path + 'taxonomy/accessions_filenames.json')