def test_annotation_creation(): feature1 = Feature("CDS", [seq.Location(1, 2)], qual={"gene": "test1"}) feature2 = Feature("CDS", [seq.Location(3, 4)], qual={"gene": "test2"}) feature_list = [feature1, feature2] annotation = Annotation(feature_list) for feature in annotation: assert feature.key in [f.key for f in feature_list] assert feature.qual["gene"] in [f.qual["gene"] for f in feature_list]
def test_annotation_concatenation(): feature1 = Feature("CDS", [seq.Location(1, 1)], qual={"gene": "test1"}) feature2 = Feature("CDS", [seq.Location(2, 2)], qual={"gene": "test2"}) annot1 = Annotation([feature1, feature2]) feature3 = Feature("CDS", [seq.Location(3, 3)], qual={"gene": "test3"}) feature4 = Feature("CDS", [seq.Location(4, 4)], qual={"gene": "test4"}) annot2 = Annotation([feature3, feature4]) feature5 = Feature("CDS", [seq.Location(5, 5)], qual={"gene": "test5"}) concat = annot1 + annot2 + feature5 assert set([f.qual["gene"] for f in concat]) \ == set(["test1", "test2", "test3", "test4", "test5"])
def test_feature_without_id(): """ A feature without 'ID' should raise an error if it has multiple locations and consequently multiple entries in the GFF3 file. """ annot = seq.Annotation( [seq.Feature( key = "CDS", locs = [seq.Location(1,2), seq.Location(4,5)], qual = {"some" : "qualifiers"} )] ) file = gff.GFFFile() with pytest.raises(ValueError): gff.set_annotation(file, annot)
def test_genbank_utility_gp(): """ Check whether the high-level utility functions return the expected content of a known GenPept file. """ gp_file = gb.GenBankFile.read(join(data_dir("sequence"), "bt_lysozyme.gp")) #[print(e) for e in gp_file._field_pos] assert gb.get_locus(gp_file) \ == ("AAC37312", 147, "", False, "MAM", "27-APR-1993") assert gb.get_definition(gp_file) == "lysozyme [Bos taurus]." assert gb.get_version(gp_file) == "AAC37312.1" assert gb.get_gi(gp_file) == 163334 annotation = gb.get_annotation(gp_file) feature = seq.Feature( "Site", [seq.Location(start, stop) for start, stop in zip( [52,55,62,76,78,81,117,120,125], [53,55,62,76,78,81,117,120,126] )], {"note": "lysozyme catalytic cleft [active]", "site_type": "active"} ) in_annotation = False for f in annotation: if f.key == feature.key and f.locs == feature.locs and \ all([(key, val in f.qual.items()) for key, val in feature.qual.items()]): in_annotation = True assert in_annotation assert len(gb.get_sequence(gp_file, format="gp")) == 147
def test_genbank_utility_gb(): """ Check whether the high-level utility functions return the expected content of a known GenBank file. """ gb_file = gb.GenBankFile.read(join(data_dir("sequence"), "ec_bl21.gb")) assert gb.get_locus(gb_file) \ == ("CP001509", 4558953, "DNA", True, "BCT", "16-FEB-2017") assert gb.get_definition(gb_file) \ == ("Escherichia coli BL21(DE3), complete genome.") assert gb.get_version(gb_file) == "CP001509.3" assert gb.get_gi(gb_file) == 296142109 assert gb.get_db_link(gb_file) \ == {"BioProject" : "PRJNA20713", "BioSample" : "SAMN02603478"} annotation = gb.get_annotation(gb_file, include_only=["CDS"]) feature = seq.Feature( "CDS", [seq.Location(5681, 6457, seq.Location.Strand.REVERSE)], {"gene": "yaaA", "transl_table": "11"} ) in_annotation = False for f in annotation: if f.key == feature.key and f.locs == feature.locs and \ all([(key, val in f.qual.items()) for key, val in feature.qual.items()]): in_annotation = True assert in_annotation assert len(gb.get_sequence(gb_file, format="gb")) == 4558953
def _add_sec_str(annotation, first, last, str_type): if str_type == "a": str_type = "helix" elif str_type == "b": str_type = "sheet" else: # coil return feature = seq.Feature("SecStr", [seq.Location(first, last)], {"sec_str_type": str_type}) annotation.add_feature(feature)
y, dx, dy, self._tail_width * bbox.height, self._head_width * bbox.height, # Create head with 90 degrees tip # -> head width/length ratio = 1/2 head_ratio=0.5, draw_head=draw_head, color=biotite.colors["orange"], linewidth=0)) # Test our drawing functions with example annotation annotation = seq.Annotation([ seq.Feature("SecStr", [seq.Location(10, 40)], {"sec_str_type": "helix"}), seq.Feature("SecStr", [seq.Location(60, 90)], {"sec_str_type": "sheet"}), ]) fig = plt.figure(figsize=(8.0, 0.8)) ax = fig.add_subplot(111) graphics.plot_feature_map( ax, annotation, multi_line=False, loc_range=(1, 100), # Register our drawing functions feature_plotters=[HelixPlotter(), SheetPlotter()]) fig.tight_layout() ########################################################################
# At this point the df ss_segments also contains 'L' linkers # Create new df to store only those relevent for plotting ss_segments_plot = ss_segments.query('sec_str_type != "L"') #%% annotation = seq.Annotation() for _, start_aa, end_aa, ss_type in ss_segments_plot.itertuples(): if ss_type == "H": ss_type = "helix" elif ss_type == "S": ss_type = "sheet" feature = seq.Feature("SecStr", [seq.Location(start_aa, end_aa)], {"sec_str_type": ss_type}) annotation.add_feature(feature) #%% class HelixPlotter(graphics.FeaturePlotter): def __init__(self): pass # Check whether this class is applicable for drawing a feature def matches(self, feature): if feature.key == "SecStr": if "sec_str_type" in feature.qual: if feature.qual["sec_str_type"] == "helix":
This script shows how :class:`Feature` objects are displayed in a plasmid map by using a custom 'toy' :class:`Annotation`. """ # Code source: Patrick Kunzmann # License: BSD 3 clause import matplotlib.pyplot as plt import numpy as np import biotite.sequence as seq import biotite.sequence.io.genbank as gb import biotite.sequence.graphics as graphics import biotite.database.entrez as entrez annotation = seq.Annotation([ seq.Feature("source", [seq.Location(0, 1500)], {"organism": "Escherichia coli"}), # Ori seq.Feature("rep_origin", [seq.Location(600, 700, seq.Location.Strand.REVERSE)], { "regulatory_class": "promoter", "note": "MyProm" }), # Promoter seq.Feature("regulatory", [seq.Location(1000, 1060)], { "regulatory_class": "promoter", "note": "MyProm" }), seq.Feature("protein_bind", [seq.Location(1025, 1045)], {"note": "repr"}),
def ss_csv_to_annotation(csv_path=str): # Codes retained for debugging # dataRootDir=r'W:\Data storage & Projects\PhD Project_Trevor Ho\3_Intein-assisted Bisection Mapping' # dataFolderDir='BM010\ECF20_structure_model' # exported_ss = pd.read_csv(os.path.join(dataRootDir,dataFolderDir,'ECF20_ExPASy_sec_struct.csv')) exported_ss = pd.read_csv(csv_path) ss_segments = pd.DataFrame() # Take info for the first ss segment without knowing when it ends start_aa, last_ss = exported_ss.iloc[0] previous_ss_seg = last_ss seq_end_aa, _ = exported_ss.iloc[ -1] # for recording the last segment of ss for _, aa, ss in exported_ss.itertuples(): # Only when a new ss sgement is detected would an entry # for the previous ss segment be recorded if ss != last_ss: ss_unit_entry = pd.DataFrame({ 'start_aa': [start_aa], 'end_aa': [aa - 1], 'sec_str_type': [previous_ss_seg] }) ss_segments = ss_segments.append(ss_unit_entry) previous_ss_seg = ss start_aa = aa if aa == seq_end_aa: ss_unit_entry = pd.DataFrame({ 'start_aa': [start_aa], 'end_aa': [aa], 'sec_str_type': [previous_ss_seg] }) ss_segments = ss_segments.append(ss_unit_entry) last_ss = ss # At this point the df ss_segments also contains 'L' linkers # Create new df to store only those relevent for plotting ss_segments_plot = ss_segments.query('sec_str_type != "L"') annotation = seq.Annotation() for _, start_aa, end_aa, ss_type in ss_segments_plot.itertuples(): if ss_type == "H": ss_type = "helix" elif ss_type == "S": ss_type = "sheet" feature = seq.Feature("SecStr", [seq.Location(start_aa, end_aa)], {"sec_str_type": ss_type}) annotation.add_feature(feature) return annotation