def make_feature(product, blast_qresult, fragment ,hit, hsp, fragstart, count): s = hsp.hit_start e = hsp.hit_end if product == 'YR': s = hsp.hit_start-int(fragstart) e = hsp.hit_end-int(fragstart) feature = SeqFeature(FeatureLocation(s, e), type="DOMAIN", strand= hsp.hit_strand) feature.qualifiers['loc_on_contig'] = str(hsp.hit_start+1) + '..' + str(hsp.hit_end) feature.qualifiers['product'] = product feature.qualifiers['serial_on_frag'] = count count += 1 feature.qualifiers['program'] = blast_qresult.program + "_" + blast_qresult.version feature.qualifiers['evalue'] = hsp.evalue feature.qualifiers['assembly'] = blast_qresult.target.split('/')[-1] feature.qualifiers['contig'] = contig feature.qualifiers['translation'] = feature.extract(fragment.seq).translate() return (feature, count)
for feature in rec.features: if snp in feature: print(feature.type, feature.qualifiers.get("db_xref")) # SeqFeatures doesn't contain a sequence (it has a parent sequence) from Bio.Seq import Seq from Bio.SeqFeature import SeqFeature, FeatureLocation seq = Seq( "ACCGAGACGGCAAAGGCTAGCATAGGTATGAGACTTCCTTCCTGCCAGTGCTGAGGAACTGGGAGCCTAC") feature = SeqFeature(FeatureLocation(5, 18), type="gene", strand=-1) # method 1 feature_seq = seq[feature.location.start:feature.location.end] feature_seq = feature_seq.reverse_complement() print(feature_seq) # method 2 feature_seq = feature.extract(seq) print(feature_seq) # we can't compare SeqRecords, but we can compare their attributes # References for citation in SeqFeature.References # format the SeqRecord to fiel format e.g. FASTA from Bio.Seq import Seq from Bio.SeqRecord import SeqRecord rec = SeqRecord(Seq( "MMYQQGCFAGGTVLRLAKDLAENNRGARVLVVCSEITAVTFRGPSETHLDSMVGQALFGD" "GAGAVIVGSDPDLSVERPLYELVWTGATLLPDSEGAIDGHLREVGLTFHLLKDVPGLISK" "NIEKSLKEAFTPLGISDWNSTFWIAHPGGPAILDQVEAKLGLKEEKMRATREVLSEYGNM" "SSAC"), id="gi|14150838|gb|AAK54648.1|AF376133_1",
print(standard_table.forward_table["ATA"]) ## Sect. sequence annotations from Bio import SeqFeature start_pos = SeqFeature.AfterPosition(5) end_pos = SeqFeature.BetweenPosition(9, left=8, right=9) my_location = SeqFeature.FeatureLocation(start_pos, end_pos) print(my_location) print(int(my_location.start)) print(int(my_location.end)) example_parent = Seq("ACCGAGACGGCAAAGGCTAGCATAGGTATGAGACTT") from Bio.SeqFeature import SeqFeature, FeatureLocation example_feature = SeqFeature(FeatureLocation(5, 18), type="gene", strand=-1) feature_seq = example_feature.extract(example_parent) print(feature_seq) from Bio.Seq import Seq seq = Seq("ATGAATGATAGCTGAT") from Bio.SeqRecord import SeqRecord seq_rec = SeqRecord(seq) seq_rec.id = "ABC12345" seq_rec.description = "My own sequence." seq_rec.annotations["role"] = "unknown" print(seq_rec) from Bio import SeqIO record = SeqIO.read("NC_005816.fna", "fasta") print(record) print(len(record.seq))
start_pos = SeqFeature.AfterPosition(5) end_pos = SeqFeature.BetweenPosition(9, left=8, right=9) mylocation = SeqFeature.FeatureLocation(start_pos, end_pos) print mylocation, mylocation.start, mylocation.end, int(mylocation.end) for feature in record.features: if 4350 in feature: # if position 4350 is in any feature print feature.type, feature.qualifiers.get('db_xref') from Bio.SeqFeature import SeqFeature, FeatureLocation seqParent = Seq('ACCGAGACGGCAAAGGCTAGCATAGGTATGAGACTTCCTTCCTGCCAGTGCTGAGGAACTGGGAGCCTAC') featu = SeqFeature(FeatureLocation(5, 18), type='gene', strand=-1) # location [5:18] in reverse_complement print featu featureSeq = seqParent[featu.location.start:featu.location.end].reverse_complement() print featureSeq print featu.extract(seqParent), len(featu.extract(seqParent)), len(featu), len(featu.location) # extract gets the subseq in location featu from seqParent # References publications that mention it # Bio.SeqFeature.Reference # journal: book, magazine, journal name # title, authors: of the paper # medline_id, pubmed_id: ID en Medline y PubMed # comment: about the reference # location: to specify location in the sequence mentioned in the paper # format: method to output as fasta or genbank formatted seq from Bio.Alphabet import generic_protein record = SeqRecord( Seq('MMYQQCFASSAC',generic_protein), id='gi|14150838|gb|AAK54648.1|AF376133_1', description='chalcone synthase [Cucumis sativus]') print record # normal Seq output print record.format('fasta') # fasta output ready for saving in a file