def setUp(self): self.ACin = open("Motif/alignace.out") self.MEMEin = open("Motif/meme.out") self.PFMin = open("Motif/SRF.pfm") self.SITESin = open("Motif/Arnt.sites") self.TFout = "Motif/tf.out" self.FAout = "Motif/fa.out" self.PFMout = "Motif/fa.out" from Bio.Seq import Seq self.m = Motif.Motif() self.m.add_instance(Seq("ATATA", self.m.alphabet))
def build_motif(seqs): """Create motif from sequences""" m = Motif.Motif(alphabet=IUPAC.unambiguous_dna) for seq in seqs: try: m.add_instance(Seq(seq, m.alphabet)) except: print "Diff motif size length?" return None m.make_counts_from_instances() return m
def get_pwm_from_clustalw(clustalw_fname): """ Get PWM from CLUSTALW alignments file. Return PWM and motif object. """ from Bio import Motif from Bio.Alphabet import IUPAC import Bio.Seq as bio_seq import Bio.AlignIO as align_io # Load CLUSTALW file if not os.path.isfile(clustalw_fname): raise Exception, "CLUSTALW file %s does not exist" % (clustalw_fname) clustalw_input = align_io.read(clustalw_fname, "clustal") motif_obj = Motif.Motif(alphabet=IUPAC.unambiguous_dna) # Add sequences from CLUSTALW alignment to motif object for clustalw_seq in clustalw_input.get_all_seqs(): curr_seq = bio_seq.Seq(str(clustalw_seq.seq), IUPAC.unambiguous_dna) motif_obj.add_instance(curr_seq) # Compute PWM pwm = motif_obj.pwm() return pwm, motif_obj
#!/usr/bin/env python # counts a motif (arg1) with overlaps in a fasta file (arg2) import sys from Bio.Seq import Seq from Bio import SeqIO from Bio import Motif from Bio.Alphabet import IUPAC theMotif = sys.argv[1] fastafile = sys.argv[2] momo=Motif.Motif(alphabet=IUPAC.unambiguous_dna) momo.add_instance(Seq(theMotif,momo.alphabet)) momoc=0 handle = open(fastafile) def countMotif(myseqrecord, mymotif): i=0 for pos in mymotif.search_instances(myseqrecord.seq): i+=1 return i for seq_record in SeqIO.parse(handle, "fasta"): momoc=momoc + countMotif(seq_record,momo) handle.close() print "motif",theMotif, "found", momoc, "times in the", fastafile, "file"
def test_sites_parsing(self): """Test to be sure that Motif can parse sites files. """ motif = Motif.Motif() motif.from_jaspar_sites(self.SITESin) assert motif.length == 6
def test_pfm_parsing(self): """Test to be sure that Motif can parse pfm files. """ motif = Motif.Motif() motif.from_jaspar_pfm(self.PFMin) assert motif.length == 12
#!/usr/bin/env python # counts all dinucleotides in a DNA fasta file import sys from Bio.Seq import Seq from Bio import SeqIO from Bio import Motif from Bio.Alphabet import IUPAC fastafile = sys.argv[1] AA=Motif.Motif(alphabet=IUPAC.unambiguous_dna) AA.add_instance(Seq("AA",AA.alphabet)) CA=Motif.Motif(alphabet=IUPAC.unambiguous_dna) CA.add_instance(Seq("CA",CA.alphabet)) GA=Motif.Motif(alphabet=IUPAC.unambiguous_dna) GA.add_instance(Seq("GA",GA.alphabet)) TA=Motif.Motif(alphabet=IUPAC.unambiguous_dna) TA.add_instance(Seq("TA",TA.alphabet)) AC=Motif.Motif(alphabet=IUPAC.unambiguous_dna) AC.add_instance(Seq("AC",AC.alphabet)) CC=Motif.Motif(alphabet=IUPAC.unambiguous_dna) CC.add_instance(Seq("CC",CC.alphabet)) GC=Motif.Motif(alphabet=IUPAC.unambiguous_dna) GC.add_instance(Seq("GC",GC.alphabet)) TC=Motif.Motif(alphabet=IUPAC.unambiguous_dna) TC.add_instance(Seq("TC",TC.alphabet)) AG=Motif.Motif(alphabet=IUPAC.unambiguous_dna) AG.add_instance(Seq("AG",AG.alphabet)) CG=Motif.Motif(alphabet=IUPAC.unambiguous_dna) CG.add_instance(Seq("CG",CG.alphabet)) GG=Motif.Motif(alphabet=IUPAC.unambiguous_dna) GG.add_instance(Seq("GG",GG.alphabet))