def get_cds_seqrecords(alchemist, values=[], nucleotide=False, verbose=False): cds_list = parse_feature_data(alchemist, values=values) seqrecords = [] genomes_dict = {} for cds in cds_list: if not cds.genome_id in genomes_dict.keys(): if verbose: print(f"...Retrieving parent genome for {cds.id}...") phage_id_obj = querying.get_column(alchemist.metadata, "phage.PhageID") phage_obj = phage_id_obj.table parent_genome_query = querying.build_select( alchemist.graph, phage_obj, where=\ phage_id_obj==cds.genome_id) parent_genome_data = mysqldb_basic.first(alchemist.engine, parent_genome_query) parent_genome = mysqldb.parse_phage_table_data(parent_genome_data) genomes_dict.update({cds.genome_id: parent_genome}) if verbose: print(f"Converting {cds.id}...") cds.genome_length = genomes_dict[cds.genome_id].length cds.set_seqfeature() record = cds_to_seqrecord(cds, genomes_dict[cds.genome_id]) seqrecords.append(record) return seqrecords
def test_parse_phage_table_data_1(self): """Verify standard MySQL genome data is parsed correctly from a data dictionary returned from a SQL query.""" data_dict = { "PhageID": "L5", "Accession": "ABC123", "Name": "L5_Draft", "HostGenus": "Mycobacterium", "Sequence": "ATCG".encode("utf-8"), "Length": 10, "DateLastModified": constants.EMPTY_DATE, "Notes": "abc".encode("utf-8"), "GC": 12.12, "Cluster": "A", "Subcluster": "A2", "Status": "final", "RetrieveRecord": 1, "AnnotationAuthor": 1 } self.genome1 = \ mysqldb.parse_phage_table_data(data_dict, gnm_type="mysql") with self.subTest(): self.assertEqual(self.genome1.id, "L5") with self.subTest(): self.assertEqual(self.genome1.accession, "ABC123") with self.subTest(): self.assertEqual(self.genome1.name, "L5_Draft") with self.subTest(): self.assertEqual(self.genome1.host_genus, "Mycobacterium") with self.subTest(): self.assertEqual(self.genome1.seq, "ATCG") with self.subTest(): self.assertIsInstance(self.genome1.seq, Seq) with self.subTest(): self.assertEqual(self.genome1.length, 10) with self.subTest(): self.assertEqual(self.genome1.date, constants.EMPTY_DATE) with self.subTest(): self.assertEqual(self.genome1.description, "abc") with self.subTest(): self.assertEqual(self.genome1.gc, 12.12) with self.subTest(): self.assertEqual(self.genome1.cluster, "A") with self.subTest(): self.assertEqual(self.genome1.subcluster, "A2") with self.subTest(): self.assertEqual(self.genome1.annotation_status, "final") with self.subTest(): self.assertEqual(self.genome1.retrieve_record, 1) with self.subTest(): self.assertEqual(self.genome1.annotation_author, 1) with self.subTest(): self.assertEqual(self.genome1.translation_table, 11) with self.subTest(): self.assertEqual(self.genome1.type, "mysql")
def test_parse_phage_table_data_2(self): """Verify truncated MySQL genome data is parsed correctly from a data dictionary returned from a SQL query.""" data_dict = {"PhageID": "L5"} self.genome1 = mysqldb.parse_phage_table_data(data_dict) with self.subTest(): self.assertEqual(self.genome1.id, "L5") with self.subTest(): self.assertEqual(self.genome1.name, "") with self.subTest(): self.assertEqual(self.genome1.translation_table, 11) with self.subTest(): self.assertEqual(self.genome1.type, "")