def test_2():
    failure = 1
    try:
        motility.find_iupac("R", "N");
        failure = 0
    except Exception, e:
        pass
Example #2
0
def test_2():
    failure = 1
    try:
        motility.find_iupac("R", "N")
        failure = 0
    except Exception, e:
        pass
def main():
	usage="%prog [options]"
	parser = OptionParser(usage,version="%prog " + __version__)
	parser.add_option("-s","--sequence",action="store",type="string",dest="dna_seq",help="DNA sequence in fasta format")
	parser.add_option("-m","--motif",action="store",type="string",dest="motif_iupac",help="Tab (space) separated two-column file: Motif_ID <space> Motif_IUPAC. Motif_ID should be unique. For example:Motif.7.4	TGTWCHH\nMotif.6.3	RGWACA\nMotif.6.2	TGTWCW")
	parser.add_option("-o","--out-prefix",action="store",type="string",dest="output",help="Output file")
	parser.add_option("-n","--mismatch",action="store",type="int",dest="mismatch_num",default=0, help="Number of mismaatch. default=%default")

	(options,args)=parser.parse_args()

	if not (options.dna_seq and options.output and options.motif_iupac):
		parser.print_help()
		sys.exit(0)
	FOUT1 = open(options.output + '.seq2motif.xls','w')
	FOUT2 = open(options.output + '.motif2seq.xls','w')
	motifs={}
	
	print >>sys.stderr, "Reading motif file " + options.motif_iupac
	for line in open(options.motif_iupac,'r'):
		line=line.strip()
		if line.startswith(('#',' ','\n')):continue
		id,iupac = line.split()
		motifs[id] = iupac
	print >>sys.stderr, "Search motifs for each sequence "	
	for line in open(options.dna_seq,'r'):
		line=line.strip()
		if line.startswith(('#',' ','\n')):continue
		if line.startswith('>'):
			print >>FOUT1, line[1:] + '\t',
			continue
		print >>FOUT1, line + '\t',
		for motif in sorted(motifs):
			if len(motifs[motif]) > len(line):
				continue
			found = mt.find_iupac(line, motifs[motif], options.mismatch_num)
			print >>FOUT1, motif + ";" + motifs[motif] + ';' + str(len(found)) + '\t',
		print >>FOUT1
	
	print >>sys.stderr, "Search sequences for each motif"
	
	for motif in sorted(motifs):
		count = 0
		SEQ = open(options.dna_seq,'r')
		for line in SEQ:
			line=line.strip()
			if line.startswith(('#',' ','\n','>')):continue
			if len(motifs[motif]) > len(line): continue
			found = mt.find_iupac(line, motifs[motif], options.mismatch_num)
			if len(found)>0:count +=1
		SEQ.close()
		print >>FOUT2, motif + '\t' + motifs[motif]  + '\t' + str(count)
Example #4
0
def test_4():
    """
    Test misc coord handling / match str extraction
    """
    motif = 'ACGG'

    pwm = motility.make_pwm([motif])
    pwm_match = pwm.find(motif, 4)
    iupac_match = motility.find_iupac(motif, motif)
    exact_match = motility.find_exact(motif, motif)

    assert pwm_match == iupac_match
    assert pwm_match == exact_match

    rcmotif = 'CCGT'

    pwm_match = pwm.find(rcmotif, 4)
    iupac_match = motility.find_iupac(rcmotif, motif)
    exact_match = motility.find_exact(rcmotif, motif)

    assert pwm_match == iupac_match
def test_4():
    """
    Test misc coord handling / match str extraction
    """
    motif = 'ACGG'
    
    pwm = motility.make_pwm([motif])
    pwm_match = pwm.find(motif, 4)
    iupac_match = motility.find_iupac(motif, motif)
    exact_match = motility.find_exact(motif, motif)

    assert pwm_match == iupac_match
    assert pwm_match == exact_match

    rcmotif = 'CCGT'

    pwm_match = pwm.find(rcmotif, 4)
    iupac_match = motility.find_iupac(rcmotif, motif)
    exact_match = motility.find_exact(rcmotif, motif)

    assert pwm_match == iupac_match
Example #6
0
    def test_g(self):
        motif = 'G'
        motif_obj = IUPAC(motif)

        assert len(find_iupac('A', motif)) == 0
        assert len(motif_obj.find('A')) == 0
        assert motif_obj.calc_score('A') == 0

        assert len(find_iupac('C', motif)) == 1
        assert len(motif_obj.find('C')) == 1
        assert motif_obj.calc_score('C') == 0 # DOES NOT do RC

        assert len(find_iupac('G', motif)) == 1
        assert len(motif_obj.find('G')) == 1
        assert motif_obj.calc_score('G') == 1

        assert len(find_iupac('T', motif)) == 0
        assert len(motif_obj.find('T')) == 0
        assert motif_obj.calc_score('T') == 0

        assert len(find_iupac('N', motif)) == 0
        assert len(motif_obj.find('N')) == 0
        assert motif_obj.calc_score('N') == 0
Example #7
0
    def test_n(self):
        motif = 'N'
        motif_obj = IUPAC(motif)

        assert len(find_iupac('A', motif)) == 1
        assert len(motif_obj.find('A')) == 1
        assert motif_obj.calc_score('A') == 1

        assert len(find_iupac('C', motif)) == 1
        assert len(motif_obj.find('C')) == 1
        assert motif_obj.calc_score('C') == 1

        assert len(find_iupac('G', motif)) == 1
        assert len(motif_obj.find('G')) == 1
        assert motif_obj.calc_score('G') == 1

        assert len(find_iupac('T', motif)) == 1
        assert len(motif_obj.find('T')) == 1
        assert motif_obj.calc_score('T') == 1

        assert len(find_iupac('N', motif)) == 1
        assert len(motif_obj.find('N')) == 1
        assert motif_obj.calc_score('N') == 1
Example #8
0
    def test_g(self):
        motif = 'G'
        motif_obj = IUPAC(motif)

        assert len(find_iupac('A', motif)) == 0
        assert len(motif_obj.find('A')) == 0
        assert motif_obj.calc_score('A') == 0

        assert len(find_iupac('C', motif)) == 1
        assert len(motif_obj.find('C')) == 1
        assert motif_obj.calc_score('C') == 0  # DOES NOT do RC

        assert len(find_iupac('G', motif)) == 1
        assert len(motif_obj.find('G')) == 1
        assert motif_obj.calc_score('G') == 1

        assert len(find_iupac('T', motif)) == 0
        assert len(motif_obj.find('T')) == 0
        assert motif_obj.calc_score('T') == 0

        assert len(find_iupac('N', motif)) == 0
        assert len(motif_obj.find('N')) == 0
        assert motif_obj.calc_score('N') == 0
Example #9
0
    def test_n(self):
        motif = 'N'
        motif_obj = IUPAC(motif)

        assert len(find_iupac('A', motif)) == 1
        assert len(motif_obj.find('A')) == 1
        assert motif_obj.calc_score('A') == 1

        assert len(find_iupac('C', motif)) == 1
        assert len(motif_obj.find('C')) == 1
        assert motif_obj.calc_score('C') == 1

        assert len(find_iupac('G', motif)) == 1
        assert len(motif_obj.find('G')) == 1
        assert motif_obj.calc_score('G') == 1

        assert len(find_iupac('T', motif)) == 1
        assert len(motif_obj.find('T')) == 1
        assert motif_obj.calc_score('T') == 1

        assert len(find_iupac('N', motif)) == 1
        assert len(motif_obj.find('N')) == 1
        assert motif_obj.calc_score('N') == 1
Example #10
0
#! /usr/bin/env python
import motility

print '\n---------------------------------\n'
##############################################################################

#
# example 1: use an IUPAC motif with and without mismatches.
#

motif = "AR"  # R = A or G
seq = "ATCT"  # sequence to search

### 0 mismatches

matches = motility.find_iupac(seq, motif)  # search with zero mismatches

print "found %d match(es) to '%s' in '%s' with 0 mismatches:" \
      % (len(matches), motif, seq)

for (start, end, o, match) in matches:
    print '\t%d to %d in %d orientation; match is %s' % (start, end, o, match)

### 1 mismatch allowed

matches = motility.find_iupac(seq, motif, 1)  # search with 1 mismatch

print "\nfound %d match(es) to '%s' in '%s' with 1 mismatch allowed:" \
      % (len(matches), motif, seq)

for (start, end, o, match) in matches:
Example #11
0
#! /usr/bin/env python
import sys
sys.path.append('/u/t/dev/motility/python/build/lib.linux-i686-2.3/')
import motility

sys.path.append('/u/t/dev/slippy/lib')
import fasta

if len(sys.argv) != 3:
    sys.stderr.write('Usage:\n\t%s <motif> <FASTA file to search>\n' % (sys.argv[0],))
    sys.exit(0)

motif = sys.argv[1]
filename = sys.argv[2]

sys.stderr.write('searching file %s with motif %s\n' % (filename, motif,))

seq = fasta.load_single(filename)
results = motility.find_iupac(seq, motif)

for (start, end, orientation, match) in results:
    print 'MATCH: %d --> %d, orientation %d; match is %s' % (start, end,
                                                             orientation,
                                                             match,)

sys.exit(0)
Example #12
0
#! /usr/bin/env python
import motility

print "\n---------------------------------\n"
##############################################################################

#
# example 1: use an IUPAC motif with and without mismatches.
#

motif = "AR"  # R = A or G
seq = "ATCT"  # sequence to search

### 0 mismatches

matches = motility.find_iupac(seq, motif)  # search with zero mismatches

print "found %d match(es) to '%s' in '%s' with 0 mismatches:" % (len(matches), motif, seq)

for (start, end, o, match) in matches:
    print "\t%d to %d in %d orientation; match is %s" % (start, end, o, match)

### 1 mismatch allowed

matches = motility.find_iupac(seq, motif, 1)  # search with 1 mismatch

print "\nfound %d match(es) to '%s' in '%s' with 1 mismatch allowed:" % (len(matches), motif, seq)

for (start, end, o, match) in matches:
    print "\t%d to %d in %d orientation; match is %s" % (start, end, o, match)
Example #13
0
def test_1():
    assert len(motility.find_iupac("ACTGNX", "N")) == 6
Example #14
0
def test_1():
    assert len(motility.find_iupac("ACTGNX", "N")) == 6