def test_2(): failure = 1 try: motility.find_iupac("R", "N"); failure = 0 except Exception, e: pass
def test_2(): failure = 1 try: motility.find_iupac("R", "N") failure = 0 except Exception, e: pass
def main(): usage="%prog [options]" parser = OptionParser(usage,version="%prog " + __version__) parser.add_option("-s","--sequence",action="store",type="string",dest="dna_seq",help="DNA sequence in fasta format") parser.add_option("-m","--motif",action="store",type="string",dest="motif_iupac",help="Tab (space) separated two-column file: Motif_ID <space> Motif_IUPAC. Motif_ID should be unique. For example:Motif.7.4 TGTWCHH\nMotif.6.3 RGWACA\nMotif.6.2 TGTWCW") parser.add_option("-o","--out-prefix",action="store",type="string",dest="output",help="Output file") parser.add_option("-n","--mismatch",action="store",type="int",dest="mismatch_num",default=0, help="Number of mismaatch. default=%default") (options,args)=parser.parse_args() if not (options.dna_seq and options.output and options.motif_iupac): parser.print_help() sys.exit(0) FOUT1 = open(options.output + '.seq2motif.xls','w') FOUT2 = open(options.output + '.motif2seq.xls','w') motifs={} print >>sys.stderr, "Reading motif file " + options.motif_iupac for line in open(options.motif_iupac,'r'): line=line.strip() if line.startswith(('#',' ','\n')):continue id,iupac = line.split() motifs[id] = iupac print >>sys.stderr, "Search motifs for each sequence " for line in open(options.dna_seq,'r'): line=line.strip() if line.startswith(('#',' ','\n')):continue if line.startswith('>'): print >>FOUT1, line[1:] + '\t', continue print >>FOUT1, line + '\t', for motif in sorted(motifs): if len(motifs[motif]) > len(line): continue found = mt.find_iupac(line, motifs[motif], options.mismatch_num) print >>FOUT1, motif + ";" + motifs[motif] + ';' + str(len(found)) + '\t', print >>FOUT1 print >>sys.stderr, "Search sequences for each motif" for motif in sorted(motifs): count = 0 SEQ = open(options.dna_seq,'r') for line in SEQ: line=line.strip() if line.startswith(('#',' ','\n','>')):continue if len(motifs[motif]) > len(line): continue found = mt.find_iupac(line, motifs[motif], options.mismatch_num) if len(found)>0:count +=1 SEQ.close() print >>FOUT2, motif + '\t' + motifs[motif] + '\t' + str(count)
def test_4(): """ Test misc coord handling / match str extraction """ motif = 'ACGG' pwm = motility.make_pwm([motif]) pwm_match = pwm.find(motif, 4) iupac_match = motility.find_iupac(motif, motif) exact_match = motility.find_exact(motif, motif) assert pwm_match == iupac_match assert pwm_match == exact_match rcmotif = 'CCGT' pwm_match = pwm.find(rcmotif, 4) iupac_match = motility.find_iupac(rcmotif, motif) exact_match = motility.find_exact(rcmotif, motif) assert pwm_match == iupac_match
def test_g(self): motif = 'G' motif_obj = IUPAC(motif) assert len(find_iupac('A', motif)) == 0 assert len(motif_obj.find('A')) == 0 assert motif_obj.calc_score('A') == 0 assert len(find_iupac('C', motif)) == 1 assert len(motif_obj.find('C')) == 1 assert motif_obj.calc_score('C') == 0 # DOES NOT do RC assert len(find_iupac('G', motif)) == 1 assert len(motif_obj.find('G')) == 1 assert motif_obj.calc_score('G') == 1 assert len(find_iupac('T', motif)) == 0 assert len(motif_obj.find('T')) == 0 assert motif_obj.calc_score('T') == 0 assert len(find_iupac('N', motif)) == 0 assert len(motif_obj.find('N')) == 0 assert motif_obj.calc_score('N') == 0
def test_n(self): motif = 'N' motif_obj = IUPAC(motif) assert len(find_iupac('A', motif)) == 1 assert len(motif_obj.find('A')) == 1 assert motif_obj.calc_score('A') == 1 assert len(find_iupac('C', motif)) == 1 assert len(motif_obj.find('C')) == 1 assert motif_obj.calc_score('C') == 1 assert len(find_iupac('G', motif)) == 1 assert len(motif_obj.find('G')) == 1 assert motif_obj.calc_score('G') == 1 assert len(find_iupac('T', motif)) == 1 assert len(motif_obj.find('T')) == 1 assert motif_obj.calc_score('T') == 1 assert len(find_iupac('N', motif)) == 1 assert len(motif_obj.find('N')) == 1 assert motif_obj.calc_score('N') == 1
#! /usr/bin/env python import motility print '\n---------------------------------\n' ############################################################################## # # example 1: use an IUPAC motif with and without mismatches. # motif = "AR" # R = A or G seq = "ATCT" # sequence to search ### 0 mismatches matches = motility.find_iupac(seq, motif) # search with zero mismatches print "found %d match(es) to '%s' in '%s' with 0 mismatches:" \ % (len(matches), motif, seq) for (start, end, o, match) in matches: print '\t%d to %d in %d orientation; match is %s' % (start, end, o, match) ### 1 mismatch allowed matches = motility.find_iupac(seq, motif, 1) # search with 1 mismatch print "\nfound %d match(es) to '%s' in '%s' with 1 mismatch allowed:" \ % (len(matches), motif, seq) for (start, end, o, match) in matches:
#! /usr/bin/env python import sys sys.path.append('/u/t/dev/motility/python/build/lib.linux-i686-2.3/') import motility sys.path.append('/u/t/dev/slippy/lib') import fasta if len(sys.argv) != 3: sys.stderr.write('Usage:\n\t%s <motif> <FASTA file to search>\n' % (sys.argv[0],)) sys.exit(0) motif = sys.argv[1] filename = sys.argv[2] sys.stderr.write('searching file %s with motif %s\n' % (filename, motif,)) seq = fasta.load_single(filename) results = motility.find_iupac(seq, motif) for (start, end, orientation, match) in results: print 'MATCH: %d --> %d, orientation %d; match is %s' % (start, end, orientation, match,) sys.exit(0)
#! /usr/bin/env python import motility print "\n---------------------------------\n" ############################################################################## # # example 1: use an IUPAC motif with and without mismatches. # motif = "AR" # R = A or G seq = "ATCT" # sequence to search ### 0 mismatches matches = motility.find_iupac(seq, motif) # search with zero mismatches print "found %d match(es) to '%s' in '%s' with 0 mismatches:" % (len(matches), motif, seq) for (start, end, o, match) in matches: print "\t%d to %d in %d orientation; match is %s" % (start, end, o, match) ### 1 mismatch allowed matches = motility.find_iupac(seq, motif, 1) # search with 1 mismatch print "\nfound %d match(es) to '%s' in '%s' with 1 mismatch allowed:" % (len(matches), motif, seq) for (start, end, o, match) in matches: print "\t%d to %d in %d orientation; match is %s" % (start, end, o, match)
def test_1(): assert len(motility.find_iupac("ACTGNX", "N")) == 6