def test_bridge_in_linear_record(self): self.seqrec.annotations["topology"] = "linear" self.seqrec.features.append(self.seqcds) with self.assertRaisesRegex(ValueError, "Features that bridge"): Record.from_biopython(self.seqrec, taxon='bacteria') self.seqrec.features[0] = self.seqgene with self.assertRaisesRegex(ValueError, "Features that bridge"): Record.from_biopython(self.seqrec, taxon='bacteria')
def test_bridge_in_linear_record(self): self.seqrec.annotations["topology"] = "linear" self.seqrec.features.append(self.seqcds) with self.assertRaisesRegex(SecmetInvalidInputError, "cannot determine correct exon ordering"): Record.from_biopython(self.seqrec, taxon='fungi') self.seqrec.features[0] = self.seqgene with self.assertRaisesRegex(SecmetInvalidInputError, "cannot determine correct exon ordering"): Record.from_biopython(self.seqrec, taxon='fungi')
def test_record_conversion_from_biopython(self): before = list(Bio.SeqIO.parse(helpers.get_path_to_nisin_genbank(), "genbank"))[0] # sort notes, because direct comparisons otherwise are awful for feature in before.features: if "note" in feature.qualifiers: feature.qualifiers["note"] = sorted(feature.qualifiers["note"]) before_features = sorted(map(str, before.features)) type_counts = defaultdict(lambda: 0) for feature in before.features: type_counts[feature.type] += 1 record = Record.from_biopython(before, taxon="bacteria") after = record.to_biopython() # ensure new features are correct assert len(before_features) == len(after.features) for bef, aft in zip(before_features, sorted(map(str, after.features))): assert bef == aft # ensure we haven't changed the original record or feature list assert id(before) != id(after) assert id(before.features) != id(after.features) for i in range(len(before.features)): assert id(before.features[i]) != id(after.features[i]) for bef, aft in zip(before_features, sorted(map(str, before.features))): assert bef == aft # ensure that the counts of each match assert type_counts["CDS"] == len(record.get_cds_features()) assert type_counts["PFAM_domain"] == len(record.get_pfam_domains()) assert type_counts["cluster"] == len(record.get_clusters()) assert type_counts["aSDomain"] == len(record.get_antismash_domains())
def test_cds_with_no_id(self): self.seqrec.features.append(self.seqcds) rec = Record.from_biopython(self.seqrec, taxon="bacteria") cdses = rec.get_cds_features() assert len(cdses) == 2 assert cdses[0].location.start == 0 assert cdses[0].location.end == 9 assert cdses[0].get_name() == "bridge_LOWER" assert cdses[1].location.start == 12 assert cdses[1].location.end == 21 assert cdses[1].get_name() == "bridge_UPPER"
def test_cds_split(self): self.seqrec.features.append(self.seqcds) print(self.seqcds) for id_name in ["locus_tag", "gene"]: self.seqcds.qualifiers[id_name] = ["test"] rec = Record.from_biopython(self.seqrec, taxon="bacteria") cdses = rec.get_cds_features() assert len(cdses) == 2 assert cdses[0].location.start == 0 assert cdses[0].location.end == 9 assert getattr(cdses[0], id_name) == "test_LOWER" assert cdses[0].get_name() == "test_LOWER" assert cdses[1].location.start == 12 assert cdses[1].location.end == 21 assert getattr(cdses[1], id_name) == "test_UPPER" assert cdses[1].get_name() == "test_UPPER" self.seqcds.qualifiers.pop(id_name)
def test_gene_split(self): self.seqrec.features.append(self.seqgene) for id_name in ["locus_tag", "gene"]: self.seqgene.qualifiers[id_name] = [id_name + "_test"] expected = id_name + "_test" rec = Record.from_biopython(self.seqrec, taxon="bacteria") self.seqgene.qualifiers.pop(id_name) genes = rec.get_genes() assert len(genes) == 2 if id_name == "gene": id_name = "gene_name" # since a Gene doesn't have a gene member assert genes[0].location.start == 12 assert genes[0].location.end == 21 assert getattr(genes[0], id_name) == expected + "_UPPER" assert genes[0].get_name() == expected + "_UPPER" assert genes[1].location.start == 0 assert genes[1].location.end == 9 assert getattr(genes[1], id_name) == expected + "_LOWER" assert genes[1].get_name() == expected + "_LOWER"