def create_matched_genomes(): """Create list of GenomePair objects.""" gnm1 = genome.Genome() gnm1.id = "Trixie" gnm1.annotation_status = "draft" gnm2 = genome.Genome() gnm2.id = "Trixie" gnm_pair1 = genomepair.GenomePair() gnm_pair1.genome1 = gnm1 gnm_pair1.genome2 = gnm2 gnm3 = genome.Genome() gnm3.id = "Alice" gnm3.annotation_status = "final" gnm4 = genome.Genome() gnm4.id = "Alice" gnm_pair2 = genomepair.GenomePair() gnm_pair2.genome1 = gnm3 gnm_pair2.genome2 = gnm4 matched_genomes = [gnm_pair1, gnm_pair2] return matched_genomes
def setUp(self): self.ticket1 = ticket.ImportTicket() self.src1 = source.Source() self.src1.id = "L5_SRC_1" self.src2 = source.Source() self.src2.id = "L5_SRC_2" self.src3 = source.Source() self.src3.id = "L5_SRC_3" self.cds1 = cds.Cds() self.cds1.id = "L5_CDS_1" self.cds2 = cds.Cds() self.cds2.id = "L5_CDS_2" self.cds3 = cds.Cds() self.cds3.id = "L5_CDS_3" self.trna1 = trna.Trna() self.trna1.id = "L5_TRNA_1" self.trna2 = trna.Trna() self.trna2.id = "L5_TRNA_2" self.trna3 = trna.Trna() self.trna3.id = "L5_TRNA_3" self.tmrna1 = tmrna.Tmrna() self.tmrna1.id = "L5_TMRNA_1" self.tmrna2 = tmrna.Tmrna() self.tmrna2.id = "L5_TMRNA_2" self.tmrna3 = tmrna.Tmrna() self.tmrna3.id = "L5_TMRNA_3" self.genome1 = genome.Genome() self.genome1.type = "flat_file" self.genome1.cds_features = [self.cds1, self.cds2] self.genome1.source_features = [self.src1, self.src2] self.genome1.trna_features = [self.trna1, self.trna2] self.genome1.tmrna_features = [self.tmrna1, self.tmrna2] self.genome2 = genome.Genome() self.genome2.type = "mysql" self.genome_pair1 = genomepair.GenomePair() self.genome_pair2 = genomepair.GenomePair() self.bndl = bundle.Bundle() self.bndl.ticket = self.ticket1 self.bndl.genome_dict[self.genome1.type] = self.genome1 self.bndl.genome_dict[self.genome2.type] = self.genome2 self.bndl.genome_pair_dict["genome_pair1"] = self.genome_pair1 self.bndl.genome_pair_dict["genome_pair2"] = self.genome_pair2 self.eval_correct1 = evaluation.Evaluation(status="correct") self.eval_correct2 = evaluation.Evaluation(status="correct") self.eval_error1 = evaluation.Evaluation(status="error") self.eval_error2 = evaluation.Evaluation(status="error")
def test_set_genome_pair_2(self): """Check that a genome pair is not set if one key is not present.""" self.bndl.ticket = self.tkt self.bndl.genome_dict[self.genome1.type] = self.genome1 self.bndl.genome_dict[self.genome2.type] = self.genome2 genome_pair = genomepair.GenomePair() self.bndl.set_genome_pair(genome_pair, "invalid", "flat_file") self.assertEqual(len(self.bndl.genome_pair_dict.keys()), 0)
def test_set_genome_pair_1(self): """Check that a genome pair is set if both keys are present.""" self.bndl.ticket = self.tkt self.bndl.genome_dict[self.genome1.type] = self.genome1 self.bndl.genome_dict[self.genome2.type] = self.genome2 genome_pair = genomepair.GenomePair() self.bndl.set_genome_pair(genome_pair, "mysql", "flat_file") self.assertEqual( list(self.bndl.genome_pair_dict.keys())[0], "mysql_flat_file")
def setUp(self): self.genome1 = genome.Genome() self.genome2 = genome.Genome() self.tkt = ticket.ImportTicket() self.genome_pair = genomepair.GenomePair() self.genome_pair.genome1 = self.genome1 self.genome_pair.genome2 = self.genome2 self.date_jan1 = datetime.strptime('1/1/2000', '%m/%d/%Y') self.date_feb1 = datetime.strptime('2/1/2000', '%m/%d/%Y') self.date_feb1_b = datetime.strptime('2/1/2000', '%m/%d/%Y')
def setUp(self): self.ticket1 = ticket.ImportTicket() self.src1 = source.Source() self.src1.id = "L5_SRC_1" self.src2 = source.Source() self.src2.id = "L5_SRC_2" self.src3 = source.Source() self.src3.id = "L5_SRC_3" self.cds1 = cds.Cds() self.cds1.id = "L5_CDS_1" self.cds2 = cds.Cds() self.cds2.id = "L5_CDS_2" self.cds3 = cds.Cds() self.cds3.id = "L5_CDS_3" self.genome1 = genome.Genome() self.genome1.type = "flat_file" self.genome1.cds_features.append(self.cds1) self.genome1.cds_features.append(self.cds2) self.genome1.source_features.append(self.src1) self.genome1.source_features.append(self.src2) self.genome2 = genome.Genome() self.genome2.type = "mysql" self.genome_pair1 = genomepair.GenomePair() self.genome_pair2 = genomepair.GenomePair() self.bndl = bundle.Bundle() self.bndl.ticket = self.ticket1 self.bndl.genome_dict[self.genome1.type] = self.genome1 self.bndl.genome_dict[self.genome2.type] = self.genome2 self.bndl.genome_pair_dict["genome_pair1"] = self.genome_pair1 self.bndl.genome_pair_dict["genome_pair2"] = self.genome_pair2 self.eval_correct1 = eval.Eval(status="correct") self.eval_correct2 = eval.Eval(status="correct") self.eval_error1 = eval.Eval(status="error") self.eval_error2 = eval.Eval(status="error")
def match_genomes(mysqldb_dict, phagesdb_dict): """Match MySQL database genome data to PhagesDB genome data. Both dictionaries: Key = PhageID Value = pdm_utils genome object""" # Generate phage_id sets and match sets. phagesdb_ids = phagesdb_dict.keys() mysqldb_ids = mysqldb_dict.keys() matched_ids = mysqldb_ids & phagesdb_ids unmatched_mysqldb_ids = mysqldb_ids - phagesdb_ids unmatched_phagesdb_ids = phagesdb_ids - mysqldb_ids matched_genomes = [] for id in matched_ids: gnm_pair = genomepair.GenomePair() gnm_pair.genome1 = mysqldb_dict[id] gnm_pair.genome2 = phagesdb_dict[id] matched_genomes.append(gnm_pair) unmatched_mysqldb_authored_genomes = {} for id in unmatched_mysqldb_ids: gnm = mysqldb_dict[id] if gnm.annotation_author == 1: unmatched_mysqldb_authored_genomes[id] = gnm print("\nSummary of genome matching:") print(f"{len(matched_ids):>6}: genome(s) matched.") print(f"{len(unmatched_mysqldb_ids):>6}: MySQL genome(s) not matched.") print(f"{len(unmatched_phagesdb_ids):>6}: PhagesDB genome(s) not matched.") count = len(unmatched_mysqldb_authored_genomes.keys()) if count > 0: print(f"{count} Hatfull-authored unmatched MySQL genome(s):") for key in unmatched_mysqldb_authored_genomes.keys(): print(key) return (matched_genomes, unmatched_phagesdb_ids)
def match_genomes(dict1, dict2): """Match MySQL database genome data to PhagesDB genome data. Both dictionaries: Key = PhageID Value = pdm_utils genome object""" # Generate phage_id sets and match sets. d2_keys = dict2.keys() d1_keys = dict1.keys() matched_keys = d1_keys & d2_keys d1_unmatched_keys = d1_keys - d2_keys d2_unmatched_keys = d2_keys - d1_keys matched_genomes = [] for key in matched_keys: gnm_pair = genomepair.GenomePair() gnm_pair.genome1 = dict1[key] gnm_pair.genome2 = dict2[key] matched_genomes.append(gnm_pair) # Only unmatched with AnnotationAuthor = 1 unmatched_d1_genomes = {} for key in d1_unmatched_keys: gnm = dict1[key] if gnm.annotation_author == 1: unmatched_d1_genomes[key] = gnm results = { "match_count": len(matched_keys), "d1_unmatch_count": len(d1_unmatched_keys), "d2_unmatch_count": len(d2_unmatched_keys), "d1_unmatch_aa1": unmatched_d1_genomes.keys() } print_match_results(results) return (matched_genomes, d2_unmatched_keys)