Exemple #1
0
 def setUp(self):
     self.ACin = open("Motif/alignace.out")
     self.MEMEin = open("Motif/meme.out")
     self.PFMin = open("Motif/SRF.pfm")
     self.SITESin = open("Motif/Arnt.sites")
     self.TFout = "Motif/tf.out"
     self.FAout = "Motif/fa.out"
     self.PFMout = "Motif/fa.out"
     from Bio.Seq import Seq
     self.m = Motif.Motif()
     self.m.add_instance(Seq("ATATA", self.m.alphabet))
Exemple #2
0
def build_motif(seqs):
    """Create motif from sequences"""
    m = Motif.Motif(alphabet=IUPAC.unambiguous_dna)
    for seq in seqs:
        try:
            m.add_instance(Seq(seq, m.alphabet))
        except:
            print "Diff motif size length?"
            return None
    m.make_counts_from_instances()
    return m
Exemple #3
0
def get_pwm_from_clustalw(clustalw_fname):
    """
    Get PWM from CLUSTALW alignments file.

    Return PWM and motif object.
    """
    from Bio import Motif
    from Bio.Alphabet import IUPAC
    import Bio.Seq as bio_seq
    import Bio.AlignIO as align_io
    # Load CLUSTALW file
    if not os.path.isfile(clustalw_fname):
        raise Exception, "CLUSTALW file %s does not exist" % (clustalw_fname)
    clustalw_input = align_io.read(clustalw_fname, "clustal")
    motif_obj = Motif.Motif(alphabet=IUPAC.unambiguous_dna)
    # Add sequences from CLUSTALW alignment to motif object
    for clustalw_seq in clustalw_input.get_all_seqs():
        curr_seq = bio_seq.Seq(str(clustalw_seq.seq), IUPAC.unambiguous_dna)
        motif_obj.add_instance(curr_seq)
    # Compute PWM
    pwm = motif_obj.pwm()
    return pwm, motif_obj
Exemple #4
0
#!/usr/bin/env python
# counts a motif (arg1) with overlaps in a fasta file (arg2)
import sys
from Bio.Seq import Seq
from Bio import SeqIO
from Bio import Motif
from Bio.Alphabet import IUPAC
theMotif = sys.argv[1]
fastafile = sys.argv[2]

momo=Motif.Motif(alphabet=IUPAC.unambiguous_dna)
momo.add_instance(Seq(theMotif,momo.alphabet))

momoc=0

handle = open(fastafile)

def countMotif(myseqrecord, mymotif):
	i=0
	for pos in mymotif.search_instances(myseqrecord.seq):
		i+=1
	return i
			
for seq_record in SeqIO.parse(handle, "fasta"):
	momoc=momoc + countMotif(seq_record,momo)
handle.close()

print "motif",theMotif, "found", momoc, "times in the", fastafile, "file"


Exemple #5
0
 def test_sites_parsing(self):
     """Test to be sure that Motif can parse sites files.
     """
     motif = Motif.Motif()
     motif.from_jaspar_sites(self.SITESin)
     assert motif.length == 6
Exemple #6
0
 def test_pfm_parsing(self):
     """Test to be sure that Motif can parse pfm  files.
     """
     motif = Motif.Motif()
     motif.from_jaspar_pfm(self.PFMin)
     assert motif.length == 12
Exemple #7
0
#!/usr/bin/env python
# counts all dinucleotides in a DNA fasta file
import sys
from Bio.Seq import Seq
from Bio import SeqIO
from Bio import Motif
from Bio.Alphabet import IUPAC
fastafile = sys.argv[1]

AA=Motif.Motif(alphabet=IUPAC.unambiguous_dna)
AA.add_instance(Seq("AA",AA.alphabet))
CA=Motif.Motif(alphabet=IUPAC.unambiguous_dna)
CA.add_instance(Seq("CA",CA.alphabet))
GA=Motif.Motif(alphabet=IUPAC.unambiguous_dna)
GA.add_instance(Seq("GA",GA.alphabet))
TA=Motif.Motif(alphabet=IUPAC.unambiguous_dna)
TA.add_instance(Seq("TA",TA.alphabet))
AC=Motif.Motif(alphabet=IUPAC.unambiguous_dna)
AC.add_instance(Seq("AC",AC.alphabet))
CC=Motif.Motif(alphabet=IUPAC.unambiguous_dna)
CC.add_instance(Seq("CC",CC.alphabet))
GC=Motif.Motif(alphabet=IUPAC.unambiguous_dna)
GC.add_instance(Seq("GC",GC.alphabet))
TC=Motif.Motif(alphabet=IUPAC.unambiguous_dna)
TC.add_instance(Seq("TC",TC.alphabet))
AG=Motif.Motif(alphabet=IUPAC.unambiguous_dna)
AG.add_instance(Seq("AG",AG.alphabet))
CG=Motif.Motif(alphabet=IUPAC.unambiguous_dna)
CG.add_instance(Seq("CG",CG.alphabet))
GG=Motif.Motif(alphabet=IUPAC.unambiguous_dna)
GG.add_instance(Seq("GG",GG.alphabet))