def main(): (opts, args) = getoptions() # Load PWMs pssms = load_motifs(opts.pwm_dir, opts.pseudocount) if opts.testseq is not None: if opts.seqtype == 'RNA': seq = Seq(opts.testseq, IUPAC.IUPACUnambiguousRNA()).back_transcribe() seq.alphabet = IUPAC.IUPACUnambiguousDNA() else: seq = Seq(opts.testseq, IUPAC.IUPACUnambiguousDNA()) final = scan_all(pssms, seq, opts) print final.to_csv(sep="\t", index=False) else: # Scan in sequence print >> sys.stderr, "Scanning sequences ", tic = time.time() for seqrecord in SeqIO.parse(open(args[0]), "fasta"): seq = seqrecord.seq if opts.seqtype == "RNA": seq = seq.back_transcribe() seq.alphabet = IUPAC.IUPACUnambiguousDNA() final = scan_all(pssms, seq, opts) print final.to_csv(sep="\t", index=False) toc = time.time() print >> sys.stderr, "done in %0.2f seconds!" % (float(toc - tic))
def main(): (opts, args) = getoptions() # Load PWMs pssms = load_motifs(opts.pwm_dir, opts.pseudocount) if opts.testseq is not None: if opts.seqtype == 'RNA': seq = Seq(opts.testseq, IUPAC.IUPACUnambiguousRNA()).back_transcribe() seq.alphabet = IUPAC.IUPACUnambiguousDNA() else: seq = Seq(opts.testseq, IUPAC.IUPACUnambiguousDNA()) final = scan_all(pssms, seq, opts) print final.to_csv(sep="\t", index = False) else: # Scan in sequence print >> sys.stderr, "Scanning sequences ", tic = time.time() for seqrecord in SeqIO.parse(open(args[0]), "fasta"): seq = seqrecord.seq if opts.seqtype == "RNA": seq = seq.back_transcribe() seq.alphabet = IUPAC.IUPACUnambiguousDNA() final = scan_all(pssms, seq, opts) print final.to_csv(sep="\t", index = False) toc = time.time() print >> sys.stderr, "done in %0.2f seconds!" % (float(toc - tic))
def ConcatenatingSeq(): protein_seq = Seq("EVRNAK", IUPAC.protein) dna_seq = Seq("ACGT", IUPAC.unambiguous_dna) #print(protein_seq + dna_seq) #error protein_seq.alphabet = generic_alphabet dna_seq.alphabet = generic_alphabet print(protein_seq + dna_seq) list_of_seqs = [ Seq("ACGT", generic_dna), Seq("AACC", generic_dna), Seq("GGTT", generic_dna) ] concatenated = Seq("", generic_dna) for s in list_of_seqs: concatenated += s print('concatenated=', concatenated) con = sum(list_of_seqs, Seq("", generic_dna)) print('con=', con) dna_seq = Seq("acgtACGT", generic_dna) print('unper=', dna_seq.upper()) print('lower=', dna_seq.lower()) print("GTAC" in dna_seq) print("GTAC" in dna_seq.upper())
def translate(self): seq = self.seq() try: seq = Seq(seq.tostring()[self.frame:], self.alphabet) except TypeError: seq.alphabet = self.alphabet try: return seq.translate() #default table except AssertionError: # if the feature was pickled then we have problems import cPickle if cPickle.dumps(seq.alphabet) == cPickle.dumps(DEFAULT_ALPHABET): seq.alphabet = DEFAULT_ALPHABET return seq.translate() else: raise
def translate(self): seq = self.seq() try: seq = Seq(seq.tostring()[self.frame:], self.alphabet) except TypeError: seq.alphabet = self.alphabet try: return standard_translator.translate(seq) except AssertionError: # if the feature was pickled then we have problems import cPickle if cPickle.dumps(seq.alphabet) == cPickle.dumps(DEFAULT_ALPHABET): seq.alphabet = DEFAULT_ALPHABET return standard_translator.translate(seq) else: raise
def cre (*args): in_srecs = [] for arg in args: in_srecs += decre (arg) seen_seqs = {} index_list = [] for i in xrange (len (in_srecs)): k = str (in_srecs[i].seq) if not seen_seqs.has_key (k): seen_seqs[k] = i index_list.append (seen_seqs[k]) out_srecs = [] for p in circular_permutations (index_list): s = Seq ("") names = [] i = 0 for index in p: srec = in_srecs[index] i += 1 s += srec.seq name = srec.annotations.get (CREACEMBLER_TAG, None) if not name: name = "seq%02d" % (i,) names.append (name) s.alphabet = generic_dna combined_srec = SeqRecord (s) combined_srec.annotations[CREACEMBLER_TAG] = "_x_".join (names) out_srecs.append (combined_srec) return out_srecs
def decre (srec): n_fwd = srec.seq.upper ().count (LOXPFWD) n_rev = srec.seq.upper ().count (LOXPREV) if n_fwd > 0 and n_rev > 0: raise FwdAndRevLoxSite (srec) need_revcomp = False if n_fwd > 0: sep = LOXPFWD elif n_rev > 0: sep = LOXPREV need_revcomp = True else: raise NoLoxSite (srec) out = [] original_name = srec.annotations.get (CREACEMBLER_TAG, "seq") count = 0 parts = srec.seq.upper ().split (sep) parts[-1] += parts[0] parts = parts[1:] for p in parts: count += 1 if need_revcomp: p = p.reverse_complement () s = Seq (LOXPFWD) + p s.alphabet = generic_dna srec = SeqRecord (s) if len (parts) > 1: name = "%s%02d" % (original_name, count) else: name = original_name srec.annotations[CREACEMBLER_TAG] = name out.append (srec) return out
str(my_seq) print(my_seq) fasta_format_string = ">Name\n%s\n" % my_seq print(fasta_format_string) # Concatenating or adding sequences from Bio.Alphabet import IUPAC from Bio.Seq import Seq protein_seq = Seq("EVRNAK", IUPAC.protein) dna_seq = Seq("ACGT", IUPAC.unambiguous_dna) #protein_seq + dna_seq ## Error expected from Bio.Alphabet import generic_alphabet protein_seq.alphabet = generic_alphabet dna_seq.alphabet = generic_alphabet protein_seq + dna_seq from Bio.Seq import Seq from Bio.Alphabet import generic_dna list_of_seqs = [Seq("ACGT", generic_dna), Seq("AACC", generic_dna), Seq("GGTT", generic_dna)] sum(list_of_seqs, Seq("", generic_dna)) # Changing case from Bio.Seq import Seq from Bio.Alphabet import generic_dna dna_seq = Seq("acgtACGT", generic_dna) dna_seq dna_seq.upper()
print(str(my_seq)) fasta_format_string = ">Name\n%s\n" % my_seq print(fasta_format_string) #print(my_seq.tostring()) #序列连接 protein_seq = Seq("EVRNAK", IUPAC.protein) dna_seq = Seq("ACGT", IUPAC.unambiguous_dna) try: print(protein_seq + dna_seq) except: print("字母表不兼容,连接失败!") #字母表转换为兼容类型 from Bio.Alphabet import generic_alphabet protein_seq.alphabet = generic_alphabet dna_seq.alphabet = generic_alphabet try: print(protein_seq + "--" + dna_seq) print("字母表兼容,连接成功!") except: print("字母表不兼容,连接失败!") #连接后字母表的变化 from Bio.Alphabet import generic_nucleotide nuc_seq = Seq("GATCGATGC", generic_nucleotide) dna_seq = Seq("ACGT", IUPAC.unambiguous_dna) print(nuc_seq.alphabet, "+", dna_seq.alphabet, "=", (nuc_seq + dna_seq).alphabet) #大小写更改
#!/usr/bin/python from Bio.Seq import Seq from Bio.Alphabet import IUPAC seq = raw_input("enter your sequence: ") my_s = Seq(seq) print my_s my_s.alphabet() s2 = Seq('ACGTACGTACGTACGT', IUPAC.IUPACAmbiguousDNA()) try: assert (isinstance (IUPAC.Alphabet._get_base_alphabet(s2.alphabet), IUPAC.ExtendedIUPACDNA)) except AssertionError: print "wrong type" isinstance (IUPAC.Alphabet._get_base_alphabet(s2.alphabet), IUPAC.ExtendedIUPACDNA) #>>> False my_seq = Seq(seq, IUPAC.extended_dna) print "original\t\t", my_seq print "complement\t\t", my_seq.complement() print "reverse_complement\t", my_seq.reverse_complement()
import Bio from Bio.Seq import Seq print(Bio._Version_) my_sequence = Seq('AGTATACTATGTGCATAGTCAGTCAGTCGA') print(my_sequence) print(my_sequence.alphabet()) my_sequence_complement = my_sequence.complement() print(my_sequence_complement)
parser.add_argument("--max_primers_to_return", type=int, default=5) args = parser.parse_args() slop = args.slop alphabet = Alphabet() alphabet.letters = ["A", "T", "C", "G", "[", "]"] with open(args.flagged, "w") as fh: for seq_record in SeqIO.parse(args.fasta, "fasta"): if args.primer3: sequence = str(seq_record.seq) fh.write("SEQUENCE_ID=%s\n" % seq_record.id) fh.write("SEQUENCE_TEMPLATE=%s\n" % sequence) fh.write("SEQUENCE_TARGET=%i,%i\n" % (slop, len(sequence) - 2 * slop)) fh.write("PRIMER_OPT_SIZE=%i\n" % args.opt_primer_size) fh.write("PRIMER_MIN_SIZE=%i\n" % args.min_primer_size) fh.write("PRIMER_MAX_SIZE=%i\n" % args.max_primer_size) fh.write("PRIMER_NUM_RETURN=%i\n" % args.max_primers_to_return) fh.write("=\n") else: sequence = Seq( str(seq_record.seq)[:slop] + "[" + str(seq_record.seq)[slop:-slop] + "]" + str(seq_record.seq)[-slop:] ) sequence.alphabet = alphabet flagged_seq_record = SeqRecord(sequence, id=seq_record.id, description="") SeqIO.write([flagged_seq_record], fh, "fasta")
print myseq.count('aa') #must use same capitalize, non overlapping print GC(myseq) # GC content print myseq[2:10] #the slice is a new Seq with same alphabet myseq2 = myseq[::3] print myseq2, myseq2.alphabet print myseq[::-1] print str(myseq), type(str(myseq)) # (automatic) convert to string #Concatenating sequences protseq = Seq('EVRNAK',IUPAC.protein) dnaseq = Seq('ACGT',IUPAC.unambiguous_dna) #print protseq+dnaseq #can't concatenate seqs with different alphabets print myseq+dnaseq from Bio.Alphabet import generic_alphabet protseq.alphabet = generic_alphabet # in order to concatenate them dnaseq.alphabet = generic_alphabet # must use generic_alphabet print protseq+dnaseq from Bio.Alphabet import generic_nucleotide nucseq = Seq('GATCGATGC',generic_nucleotide) dnaseq = Seq('ACGT',IUPAC.unambiguous_dna) print nucseq.alphabet, dnaseq.alphabet print nucseq + dnaseq, (nucseq + dnaseq).alphabet # parent + child = parent type from Bio.Alphabet import generic_dna list_seqs = [Seq('ACGT', generic_dna), Seq('CCGG', generic_dna), Seq('TTATT', generic_dna)] concatenated = Seq('',generic_dna) for seq in list_seqs: concatenated += seq print concatenated
from Bio.Alphabet import IUPAC, generic_alphabet from Bio.Seq import Seq dna = Seq("ATGGTTAGTCTTTAA", IUPAC.unambiguous_dna) protein = dna.translate() #try to concetenate dna and protein sequences try: concetencate = dna + protein print("Contenated successfully") #this attempt has been failed because dna and protein use different alphabet. except: #we need same types to concetenate. dna.alphabet = generic_alphabet protein.alphabet = generic_alphabet print(dna + protein)