Example #1
0
def create_matched_genomes():
    """Create list of GenomePair objects."""

    gnm1 = genome.Genome()
    gnm1.id = "Trixie"
    gnm1.annotation_status = "draft"

    gnm2 = genome.Genome()
    gnm2.id = "Trixie"

    gnm_pair1 = genomepair.GenomePair()
    gnm_pair1.genome1 = gnm1
    gnm_pair1.genome2 = gnm2

    gnm3 = genome.Genome()
    gnm3.id = "Alice"
    gnm3.annotation_status = "final"

    gnm4 = genome.Genome()
    gnm4.id = "Alice"

    gnm_pair2 = genomepair.GenomePair()
    gnm_pair2.genome1 = gnm3
    gnm_pair2.genome2 = gnm4

    matched_genomes = [gnm_pair1, gnm_pair2]
    return matched_genomes
Example #2
0
    def setUp(self):

        self.ticket1 = ticket.ImportTicket()

        self.src1 = source.Source()
        self.src1.id = "L5_SRC_1"
        self.src2 = source.Source()
        self.src2.id = "L5_SRC_2"
        self.src3 = source.Source()
        self.src3.id = "L5_SRC_3"

        self.cds1 = cds.Cds()
        self.cds1.id = "L5_CDS_1"
        self.cds2 = cds.Cds()
        self.cds2.id = "L5_CDS_2"
        self.cds3 = cds.Cds()
        self.cds3.id = "L5_CDS_3"

        self.trna1 = trna.Trna()
        self.trna1.id = "L5_TRNA_1"
        self.trna2 = trna.Trna()
        self.trna2.id = "L5_TRNA_2"
        self.trna3 = trna.Trna()
        self.trna3.id = "L5_TRNA_3"

        self.tmrna1 = tmrna.Tmrna()
        self.tmrna1.id = "L5_TMRNA_1"
        self.tmrna2 = tmrna.Tmrna()
        self.tmrna2.id = "L5_TMRNA_2"
        self.tmrna3 = tmrna.Tmrna()
        self.tmrna3.id = "L5_TMRNA_3"

        self.genome1 = genome.Genome()
        self.genome1.type = "flat_file"
        self.genome1.cds_features = [self.cds1, self.cds2]
        self.genome1.source_features = [self.src1, self.src2]
        self.genome1.trna_features = [self.trna1, self.trna2]
        self.genome1.tmrna_features = [self.tmrna1, self.tmrna2]

        self.genome2 = genome.Genome()
        self.genome2.type = "mysql"
        self.genome_pair1 = genomepair.GenomePair()
        self.genome_pair2 = genomepair.GenomePair()
        self.bndl = bundle.Bundle()
        self.bndl.ticket = self.ticket1
        self.bndl.genome_dict[self.genome1.type] = self.genome1
        self.bndl.genome_dict[self.genome2.type] = self.genome2
        self.bndl.genome_pair_dict["genome_pair1"] = self.genome_pair1
        self.bndl.genome_pair_dict["genome_pair2"] = self.genome_pair2

        self.eval_correct1 = evaluation.Evaluation(status="correct")
        self.eval_correct2 = evaluation.Evaluation(status="correct")
        self.eval_error1 = evaluation.Evaluation(status="error")
        self.eval_error2 = evaluation.Evaluation(status="error")
Example #3
0
    def test_set_genome_pair_2(self):
        """Check that a genome pair is not set if one key is not present."""

        self.bndl.ticket = self.tkt
        self.bndl.genome_dict[self.genome1.type] = self.genome1
        self.bndl.genome_dict[self.genome2.type] = self.genome2
        genome_pair = genomepair.GenomePair()
        self.bndl.set_genome_pair(genome_pair, "invalid", "flat_file")
        self.assertEqual(len(self.bndl.genome_pair_dict.keys()), 0)
Example #4
0
    def test_set_genome_pair_1(self):
        """Check that a genome pair is set if both keys are present."""

        self.bndl.ticket = self.tkt
        self.bndl.genome_dict[self.genome1.type] = self.genome1
        self.bndl.genome_dict[self.genome2.type] = self.genome2
        genome_pair = genomepair.GenomePair()
        self.bndl.set_genome_pair(genome_pair, "mysql", "flat_file")
        self.assertEqual(
            list(self.bndl.genome_pair_dict.keys())[0], "mysql_flat_file")
Example #5
0
    def setUp(self):
        self.genome1 = genome.Genome()
        self.genome2 = genome.Genome()
        self.tkt = ticket.ImportTicket()
        self.genome_pair = genomepair.GenomePair()
        self.genome_pair.genome1 = self.genome1
        self.genome_pair.genome2 = self.genome2

        self.date_jan1 = datetime.strptime('1/1/2000', '%m/%d/%Y')
        self.date_feb1 = datetime.strptime('2/1/2000', '%m/%d/%Y')
        self.date_feb1_b = datetime.strptime('2/1/2000', '%m/%d/%Y')
Example #6
0
    def setUp(self):

        self.ticket1 = ticket.ImportTicket()
        self.src1 = source.Source()
        self.src1.id = "L5_SRC_1"
        self.src2 = source.Source()
        self.src2.id = "L5_SRC_2"
        self.src3 = source.Source()
        self.src3.id = "L5_SRC_3"
        self.cds1 = cds.Cds()
        self.cds1.id = "L5_CDS_1"
        self.cds2 = cds.Cds()
        self.cds2.id = "L5_CDS_2"
        self.cds3 = cds.Cds()
        self.cds3.id = "L5_CDS_3"
        self.genome1 = genome.Genome()
        self.genome1.type = "flat_file"
        self.genome1.cds_features.append(self.cds1)
        self.genome1.cds_features.append(self.cds2)
        self.genome1.source_features.append(self.src1)
        self.genome1.source_features.append(self.src2)
        self.genome2 = genome.Genome()
        self.genome2.type = "mysql"
        self.genome_pair1 = genomepair.GenomePair()
        self.genome_pair2 = genomepair.GenomePair()
        self.bndl = bundle.Bundle()
        self.bndl.ticket = self.ticket1
        self.bndl.genome_dict[self.genome1.type] = self.genome1
        self.bndl.genome_dict[self.genome2.type] = self.genome2
        self.bndl.genome_pair_dict["genome_pair1"] = self.genome_pair1
        self.bndl.genome_pair_dict["genome_pair2"] = self.genome_pair2

        self.eval_correct1 = eval.Eval(status="correct")
        self.eval_correct2 = eval.Eval(status="correct")
        self.eval_error1 = eval.Eval(status="error")
        self.eval_error2 = eval.Eval(status="error")
Example #7
0
def match_genomes(mysqldb_dict, phagesdb_dict):
    """Match MySQL database genome data to PhagesDB genome data.

    Both dictionaries:
    Key = PhageID
    Value = pdm_utils genome object"""

    # Generate phage_id sets and match sets.
    phagesdb_ids = phagesdb_dict.keys()
    mysqldb_ids = mysqldb_dict.keys()
    matched_ids = mysqldb_ids & phagesdb_ids
    unmatched_mysqldb_ids = mysqldb_ids - phagesdb_ids
    unmatched_phagesdb_ids = phagesdb_ids - mysqldb_ids

    matched_genomes = []
    for id in matched_ids:
        gnm_pair = genomepair.GenomePair()
        gnm_pair.genome1 = mysqldb_dict[id]
        gnm_pair.genome2 = phagesdb_dict[id]
        matched_genomes.append(gnm_pair)

    unmatched_mysqldb_authored_genomes = {}
    for id in unmatched_mysqldb_ids:
        gnm = mysqldb_dict[id]
        if gnm.annotation_author == 1:
            unmatched_mysqldb_authored_genomes[id] = gnm

    print("\nSummary of genome matching:")
    print(f"{len(matched_ids):>6}: genome(s) matched.")
    print(f"{len(unmatched_mysqldb_ids):>6}: MySQL genome(s) not matched.")
    print(f"{len(unmatched_phagesdb_ids):>6}: PhagesDB genome(s) not matched.")

    count = len(unmatched_mysqldb_authored_genomes.keys())
    if count > 0:
        print(f"{count} Hatfull-authored unmatched MySQL genome(s):")
        for key in unmatched_mysqldb_authored_genomes.keys():
            print(key)

    return (matched_genomes, unmatched_phagesdb_ids)
Example #8
0
def match_genomes(dict1, dict2):
    """Match MySQL database genome data to PhagesDB genome data.

    Both dictionaries:
    Key = PhageID
    Value = pdm_utils genome object"""

    # Generate phage_id sets and match sets.
    d2_keys = dict2.keys()
    d1_keys = dict1.keys()
    matched_keys = d1_keys & d2_keys
    d1_unmatched_keys = d1_keys - d2_keys
    d2_unmatched_keys = d2_keys - d1_keys

    matched_genomes = []
    for key in matched_keys:
        gnm_pair = genomepair.GenomePair()
        gnm_pair.genome1 = dict1[key]
        gnm_pair.genome2 = dict2[key]
        matched_genomes.append(gnm_pair)

    # Only unmatched with AnnotationAuthor = 1
    unmatched_d1_genomes = {}
    for key in d1_unmatched_keys:
        gnm = dict1[key]
        if gnm.annotation_author == 1:
            unmatched_d1_genomes[key] = gnm

    results = {
        "match_count": len(matched_keys),
        "d1_unmatch_count": len(d1_unmatched_keys),
        "d2_unmatch_count": len(d2_unmatched_keys),
        "d1_unmatch_aa1": unmatched_d1_genomes.keys()
    }
    print_match_results(results)

    return (matched_genomes, d2_unmatched_keys)