コード例 #1
0
def main():
    (opts, args) = getoptions()

    # Load PWMs
    pssms = load_motifs(opts.pwm_dir, opts.pseudocount)

    if opts.testseq is not None:
        if opts.seqtype == 'RNA':
            seq = Seq(opts.testseq,
                      IUPAC.IUPACUnambiguousRNA()).back_transcribe()
            seq.alphabet = IUPAC.IUPACUnambiguousDNA()
        else:
            seq = Seq(opts.testseq, IUPAC.IUPACUnambiguousDNA())
        final = scan_all(pssms, seq, opts)
        print final.to_csv(sep="\t", index=False)
    else:
        # Scan in sequence
        print >> sys.stderr, "Scanning sequences ",
        tic = time.time()
        for seqrecord in SeqIO.parse(open(args[0]), "fasta"):

            seq = seqrecord.seq
            if opts.seqtype == "RNA":
                seq = seq.back_transcribe()
            seq.alphabet = IUPAC.IUPACUnambiguousDNA()

            final = scan_all(pssms, seq, opts)
            print final.to_csv(sep="\t", index=False)

        toc = time.time()
        print >> sys.stderr, "done in %0.2f seconds!" % (float(toc - tic))
コード例 #2
0
ファイル: motif_scan.py プロジェクト: miha-skalic/motif_scan
def main():
	(opts, args) = getoptions()

	# Load PWMs
	pssms = load_motifs(opts.pwm_dir, opts.pseudocount)

	if opts.testseq is not None:
		if opts.seqtype == 'RNA':
			seq = Seq(opts.testseq, IUPAC.IUPACUnambiguousRNA()).back_transcribe()
			seq.alphabet = IUPAC.IUPACUnambiguousDNA()
		else:
			seq = Seq(opts.testseq, IUPAC.IUPACUnambiguousDNA())
		final = scan_all(pssms, seq, opts)
		print final.to_csv(sep="\t", index = False)
	else:
		# Scan in sequence
		print >> sys.stderr, "Scanning sequences ",
		tic = time.time()
		for seqrecord in SeqIO.parse(open(args[0]), "fasta"):

			seq = seqrecord.seq
			if opts.seqtype == "RNA":
				seq = seq.back_transcribe()
			seq.alphabet = IUPAC.IUPACUnambiguousDNA()

			final = scan_all(pssms, seq, opts)
			print final.to_csv(sep="\t", index = False)

		toc = time.time()
		print >> sys.stderr, "done in %0.2f seconds!" % (float(toc - tic))
コード例 #3
0
def ConcatenatingSeq():
    protein_seq = Seq("EVRNAK", IUPAC.protein)
    dna_seq = Seq("ACGT", IUPAC.unambiguous_dna)

    #print(protein_seq + dna_seq) #error
    protein_seq.alphabet = generic_alphabet
    dna_seq.alphabet = generic_alphabet
    print(protein_seq + dna_seq)

    list_of_seqs = [
        Seq("ACGT", generic_dna),
        Seq("AACC", generic_dna),
        Seq("GGTT", generic_dna)
    ]
    concatenated = Seq("", generic_dna)
    for s in list_of_seqs:
        concatenated += s
    print('concatenated=', concatenated)

    con = sum(list_of_seqs, Seq("", generic_dna))
    print('con=', con)

    dna_seq = Seq("acgtACGT", generic_dna)
    print('unper=', dna_seq.upper())
    print('lower=', dna_seq.lower())
    print("GTAC" in dna_seq)
    print("GTAC" in dna_seq.upper())
コード例 #4
0
 def translate(self):
     seq = self.seq()
     try:
         seq = Seq(seq.tostring()[self.frame:], self.alphabet)
     except TypeError:
         seq.alphabet = self.alphabet
     try:
         return seq.translate() #default table
     except AssertionError:
         # if the feature was pickled then we have problems
         import cPickle
         if cPickle.dumps(seq.alphabet) == cPickle.dumps(DEFAULT_ALPHABET):
             seq.alphabet = DEFAULT_ALPHABET
             return seq.translate()
         else:
             raise
コード例 #5
0
 def translate(self):
     seq = self.seq()
     try:
         seq = Seq(seq.tostring()[self.frame:], self.alphabet)
     except TypeError:
         seq.alphabet = self.alphabet
     try:
         return standard_translator.translate(seq)
     except AssertionError:
         # if the feature was pickled then we have problems
         import cPickle
         if cPickle.dumps(seq.alphabet) == cPickle.dumps(DEFAULT_ALPHABET):
             seq.alphabet = DEFAULT_ALPHABET
             return standard_translator.translate(seq)
         else:
             raise
コード例 #6
0
ファイル: seq.py プロジェクト: christianbecke/Cre-ACEMBLER
def cre (*args):
	in_srecs = []
	for arg in args:
		in_srecs += decre (arg)

	seen_seqs = {}
	index_list = []
	for i in xrange (len (in_srecs)):
		k = str (in_srecs[i].seq)
		if not seen_seqs.has_key (k):
			seen_seqs[k] = i
		index_list.append (seen_seqs[k])

	out_srecs = []
	for p in circular_permutations (index_list):
		s = Seq ("")
		names = []
		i = 0
		for index in p:
			srec = in_srecs[index]
			i += 1
			s += srec.seq
			name = srec.annotations.get (CREACEMBLER_TAG, None)
			if not name:
				name = "seq%02d" % (i,)
			names.append (name)
		s.alphabet = generic_dna
		combined_srec = SeqRecord (s)
		combined_srec.annotations[CREACEMBLER_TAG] = "_x_".join (names)
		out_srecs.append (combined_srec)
	return out_srecs
コード例 #7
0
ファイル: seq.py プロジェクト: christianbecke/Cre-ACEMBLER
def decre (srec):
	n_fwd = srec.seq.upper ().count (LOXPFWD)
	n_rev = srec.seq.upper ().count (LOXPREV)

	if n_fwd > 0 and n_rev > 0:
		raise FwdAndRevLoxSite (srec)

	need_revcomp = False
	if n_fwd > 0:
		sep = LOXPFWD
	elif n_rev > 0:
		sep = LOXPREV
		need_revcomp = True
	else:
		raise NoLoxSite (srec)

	out = []
	original_name = srec.annotations.get (CREACEMBLER_TAG, "seq")
	count = 0
	parts = srec.seq.upper ().split (sep)
	parts[-1] += parts[0]
	parts = parts[1:]
	for p in parts:
		count += 1
		if need_revcomp:
			p = p.reverse_complement ()
		s = Seq (LOXPFWD) + p
		s.alphabet = generic_dna
		srec = SeqRecord (s)
		if len (parts) > 1:
			 name = "%s%02d" % (original_name, count)
		else:
			name = original_name
		srec.annotations[CREACEMBLER_TAG] = name
		out.append (srec)
	return out
コード例 #8
0
ファイル: lecture13.py プロジェクト: villegar/BIO792
str(my_seq) 
print(my_seq)
fasta_format_string = ">Name\n%s\n" % my_seq 
print(fasta_format_string)


# Concatenating or adding sequences
from Bio.Alphabet import IUPAC
from Bio.Seq import Seq
protein_seq = Seq("EVRNAK", IUPAC.protein)
dna_seq = Seq("ACGT", IUPAC.unambiguous_dna)
#protein_seq + dna_seq
## Error expected

from Bio.Alphabet import generic_alphabet 
protein_seq.alphabet = generic_alphabet 
dna_seq.alphabet = generic_alphabet
protein_seq + dna_seq

from Bio.Seq import Seq
from Bio.Alphabet import generic_dna
list_of_seqs = [Seq("ACGT", generic_dna), Seq("AACC", generic_dna), Seq("GGTT", generic_dna)] 
sum(list_of_seqs, Seq("", generic_dna))


# Changing case
from Bio.Seq import Seq
from Bio.Alphabet import generic_dna 
dna_seq = Seq("acgtACGT", generic_dna) 
dna_seq
dna_seq.upper()
コード例 #9
0
print(str(my_seq))
fasta_format_string = ">Name\n%s\n" % my_seq
print(fasta_format_string)
#print(my_seq.tostring())

#序列连接
protein_seq = Seq("EVRNAK", IUPAC.protein)
dna_seq = Seq("ACGT", IUPAC.unambiguous_dna)
try:
    print(protein_seq + dna_seq)
except:
    print("字母表不兼容,连接失败!")

#字母表转换为兼容类型
from Bio.Alphabet import generic_alphabet
protein_seq.alphabet = generic_alphabet
dna_seq.alphabet = generic_alphabet
try:
    print(protein_seq + "--" + dna_seq)
    print("字母表兼容,连接成功!")
except:
    print("字母表不兼容,连接失败!")

#连接后字母表的变化
from Bio.Alphabet import generic_nucleotide
nuc_seq = Seq("GATCGATGC", generic_nucleotide)
dna_seq = Seq("ACGT", IUPAC.unambiguous_dna)

print(nuc_seq.alphabet, "+", dna_seq.alphabet, "=", (nuc_seq + dna_seq).alphabet)

#大小写更改
コード例 #10
0
ファイル: MyNucs.py プロジェクト: yvancouver/BioPy
#!/usr/bin/python

from Bio.Seq import Seq
from Bio.Alphabet import IUPAC
seq = raw_input("enter your sequence:  ")

my_s = Seq(seq)
print my_s
my_s.alphabet()
s2 = Seq('ACGTACGTACGTACGT', IUPAC.IUPACAmbiguousDNA())
try:
    assert (isinstance (IUPAC.Alphabet._get_base_alphabet(s2.alphabet), IUPAC.ExtendedIUPACDNA))
except AssertionError:
    print "wrong type"

isinstance (IUPAC.Alphabet._get_base_alphabet(s2.alphabet), IUPAC.ExtendedIUPACDNA)
#>>> False

my_seq = Seq(seq, IUPAC.extended_dna)

print "original\t\t", my_seq
print "complement\t\t", my_seq.complement()
print "reverse_complement\t", my_seq.reverse_complement()
コード例 #11
0
import Bio
from Bio.Seq import Seq

print(Bio._Version_)
my_sequence = Seq('AGTATACTATGTGCATAGTCAGTCAGTCGA')
print(my_sequence)
print(my_sequence.alphabet())
my_sequence_complement = my_sequence.complement()
print(my_sequence_complement)
コード例 #12
0
    parser.add_argument("--max_primers_to_return", type=int, default=5)
    args = parser.parse_args()

    slop = args.slop
    alphabet = Alphabet()
    alphabet.letters = ["A", "T", "C", "G", "[", "]"]

    with open(args.flagged, "w") as fh:
        for seq_record in SeqIO.parse(args.fasta, "fasta"):
            if args.primer3:
                sequence = str(seq_record.seq)
                fh.write("SEQUENCE_ID=%s\n" % seq_record.id)
                fh.write("SEQUENCE_TEMPLATE=%s\n" % sequence)
                fh.write("SEQUENCE_TARGET=%i,%i\n" % (slop, len(sequence) - 2 * slop))
                fh.write("PRIMER_OPT_SIZE=%i\n" % args.opt_primer_size)
                fh.write("PRIMER_MIN_SIZE=%i\n" % args.min_primer_size)
                fh.write("PRIMER_MAX_SIZE=%i\n" % args.max_primer_size)
                fh.write("PRIMER_NUM_RETURN=%i\n" % args.max_primers_to_return)
                fh.write("=\n")
            else:
                sequence = Seq(
                    str(seq_record.seq)[:slop]
                    + "["
                    + str(seq_record.seq)[slop:-slop]
                    + "]"
                    + str(seq_record.seq)[-slop:]
                )
                sequence.alphabet = alphabet
                flagged_seq_record = SeqRecord(sequence, id=seq_record.id, description="")
                SeqIO.write([flagged_seq_record], fh, "fasta")
コード例 #13
0
print myseq.count('aa')    #must use same capitalize, non overlapping
print GC(myseq)    # GC content

print myseq[2:10]    #the slice is a new Seq with same alphabet
myseq2 = myseq[::3]
print myseq2, myseq2.alphabet
print myseq[::-1]

print str(myseq), type(str(myseq))    # (automatic) convert to string
#Concatenating sequences
protseq = Seq('EVRNAK',IUPAC.protein)
dnaseq = Seq('ACGT',IUPAC.unambiguous_dna)
#print protseq+dnaseq    #can't concatenate seqs with different alphabets
print myseq+dnaseq
from Bio.Alphabet import generic_alphabet
protseq.alphabet = generic_alphabet    # in order to concatenate them
dnaseq.alphabet = generic_alphabet    # must use generic_alphabet
print protseq+dnaseq

from Bio.Alphabet import generic_nucleotide
nucseq = Seq('GATCGATGC',generic_nucleotide)
dnaseq = Seq('ACGT',IUPAC.unambiguous_dna)
print nucseq.alphabet, dnaseq.alphabet
print nucseq + dnaseq, (nucseq + dnaseq).alphabet    # parent + child = parent type

from Bio.Alphabet import generic_dna
list_seqs = [Seq('ACGT', generic_dna), Seq('CCGG', generic_dna), Seq('TTATT', generic_dna)]
concatenated = Seq('',generic_dna)
for seq in list_seqs:
    concatenated += seq
print concatenated
コード例 #14
0
from Bio.Alphabet import IUPAC, generic_alphabet
from Bio.Seq import Seq

dna = Seq("ATGGTTAGTCTTTAA", IUPAC.unambiguous_dna)
protein = dna.translate()
#try to concetenate dna and protein sequences
try:
    concetencate = dna + protein
    print("Contenated successfully")
    #this attempt has been failed because dna and protein use different alphabet.
except:
    #we need same types to concetenate.
    dna.alphabet = generic_alphabet
    protein.alphabet = generic_alphabet
    print(dna + protein)