def make_feature(product, blast_qresult, fragment ,hit, hsp, fragstart, count):
    s = hsp.hit_start
    e = hsp.hit_end
    if product == 'YR':
        s = hsp.hit_start-int(fragstart)
        e = hsp.hit_end-int(fragstart)
    feature = SeqFeature(FeatureLocation(s, e), type="DOMAIN", strand= hsp.hit_strand)
    feature.qualifiers['loc_on_contig'] = str(hsp.hit_start+1) + '..' + str(hsp.hit_end)
    feature.qualifiers['product'] = product
    feature.qualifiers['serial_on_frag'] = count
    count += 1
    feature.qualifiers['program'] = blast_qresult.program + "_" + blast_qresult.version
    feature.qualifiers['evalue'] = hsp.evalue
    feature.qualifiers['assembly'] = blast_qresult.target.split('/')[-1]
    feature.qualifiers['contig'] = contig
    feature.qualifiers['translation'] = feature.extract(fragment.seq).translate()
    return (feature, count)
예제 #2
0
for feature in rec.features:
    if snp in feature:
        print(feature.type, feature.qualifiers.get("db_xref"))

# SeqFeatures doesn't contain a sequence (it has a parent sequence)
from Bio.Seq import Seq
from Bio.SeqFeature import SeqFeature, FeatureLocation
seq = Seq(
    "ACCGAGACGGCAAAGGCTAGCATAGGTATGAGACTTCCTTCCTGCCAGTGCTGAGGAACTGGGAGCCTAC")
feature = SeqFeature(FeatureLocation(5, 18), type="gene", strand=-1)
# method 1
feature_seq = seq[feature.location.start:feature.location.end]
feature_seq = feature_seq.reverse_complement()
print(feature_seq)
# method 2
feature_seq = feature.extract(seq)
print(feature_seq)

# we can't compare SeqRecords, but we can compare their attributes

# References for citation in SeqFeature.References

# format the SeqRecord to fiel format e.g. FASTA
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
rec = SeqRecord(Seq(
    "MMYQQGCFAGGTVLRLAKDLAENNRGARVLVVCSEITAVTFRGPSETHLDSMVGQALFGD"
    "GAGAVIVGSDPDLSVERPLYELVWTGATLLPDSEGAIDGHLREVGLTFHLLKDVPGLISK"
    "NIEKSLKEAFTPLGISDWNSTFWIAHPGGPAILDQVEAKLGLKEEKMRATREVLSEYGNM"
    "SSAC"),
                id="gi|14150838|gb|AAK54648.1|AF376133_1",
예제 #3
0
print(standard_table.forward_table["ATA"])

## Sect. sequence annotations

from Bio import SeqFeature
start_pos = SeqFeature.AfterPosition(5)
end_pos = SeqFeature.BetweenPosition(9, left=8, right=9)
my_location = SeqFeature.FeatureLocation(start_pos, end_pos)
print(my_location)
print(int(my_location.start))
print(int(my_location.end))

example_parent = Seq("ACCGAGACGGCAAAGGCTAGCATAGGTATGAGACTT")
from Bio.SeqFeature import SeqFeature, FeatureLocation
example_feature = SeqFeature(FeatureLocation(5, 18), type="gene", strand=-1)
feature_seq = example_feature.extract(example_parent)
print(feature_seq)

from Bio.Seq import Seq
seq = Seq("ATGAATGATAGCTGAT")
from Bio.SeqRecord import SeqRecord
seq_rec = SeqRecord(seq)
seq_rec.id = "ABC12345"
seq_rec.description = "My own sequence."
seq_rec.annotations["role"] = "unknown"
print(seq_rec)

from Bio import SeqIO
record = SeqIO.read("NC_005816.fna", "fasta")
print(record)
print(len(record.seq))
예제 #4
0
start_pos = SeqFeature.AfterPosition(5)
end_pos = SeqFeature.BetweenPosition(9, left=8, right=9)
mylocation = SeqFeature.FeatureLocation(start_pos, end_pos)
print mylocation, mylocation.start, mylocation.end, int(mylocation.end)

for feature in record.features:
    if 4350 in feature:    # if position 4350 is in any feature
        print feature.type, feature.qualifiers.get('db_xref')

from Bio.SeqFeature import SeqFeature, FeatureLocation
seqParent = Seq('ACCGAGACGGCAAAGGCTAGCATAGGTATGAGACTTCCTTCCTGCCAGTGCTGAGGAACTGGGAGCCTAC')
featu = SeqFeature(FeatureLocation(5, 18), type='gene', strand=-1)    # location [5:18] in reverse_complement
print featu
featureSeq = seqParent[featu.location.start:featu.location.end].reverse_complement()
print featureSeq
print featu.extract(seqParent), len(featu.extract(seqParent)), len(featu), len(featu.location)
# extract gets the subseq in location featu from seqParent

# References publications that mention it
# Bio.SeqFeature.Reference
#    journal: book, magazine, journal name
#    title, authors: of the paper
#    medline_id, pubmed_id: ID en Medline y PubMed
#    comment: about the reference
#    location: to specify location in the sequence mentioned in the paper

# format: method to output as fasta or genbank formatted seq
from Bio.Alphabet import generic_protein
record = SeqRecord( Seq('MMYQQCFASSAC',generic_protein), id='gi|14150838|gb|AAK54648.1|AF376133_1', description='chalcone synthase [Cucumis sativus]')
print record    # normal Seq output
print record.format('fasta')    # fasta output ready for saving in a file