opts.trim[0] = int(opts.trim[0]) if opts.trim[1] == 'None': opts.trim[1] = None else: opts.trim[1] = int(opts.trim[1]) optionsObj = {'kmerSize':opts.ksize,'kmerRange':opts.krange} # need to change the use of this to be how mortals think of numbers posArgs = [opts.fasta,opts.regs] outFile = opts.out tWrap = TamoWrap(optionsObj,posArgs) shorties = 0 if opts.trim: for i in tWrap.allSeqs.probes: oLen = len(tWrap.allSeqs.probes[i]) tWrap.allSeqs.probes[i] = tWrap.allSeqs.probes[i][opts.trim[0]:opts.trim[1]] nLen = len(tWrap.allSeqs.probes[i]) #print 'old:%s, new:%s' % (oLen,nLen) if not nLen < oLen: shorties+=1 if shorties/float(len(tWrap.allSeqs.probes)) > 0.25: warn("""WARNING: more than 1/4 of the total sequences were shorter or equal to the length of the requested substring.""") tWrap.linkedSeqs_seqs = tWrap.allSeqs.seqs_from_ids(tWrap.linkedSeqs_ids) tWrap.go()
# TamoWrap -> def __init__(self, optionsObj, posArgs): # get list of file paths dirPath = '/Users/biggus/Documents/James/Data/ReClustering/kmedsPear33Clus50x_2/Clus2_247genes.6EucClus/' files = glob.glob('%s*.genes.txt' % (dirPath)) for genesFile in files: optionsObj = {'kmerSize':7,'kmerRange':'6,9'} # need to change the use of this to be how mortals think of numbers posArgs = ['/Users/biggus/Documents/James/Data/2KB/2kb_Sequence/2kb_Anopheles/2KBupTSS_goodAffyAGAPsFastasOUT.masked.nr.fas', genesFile] outFile = genesFile.replace('.txt','.6-9mers.txt') tWrap = TamoWrap(optionsObj,posArgs) tWrap.go() #for l in tWrap.toFile: #print l.strip() print 'num of keepers = %s' % (len(tWrap.output)) outFile = open(outFile, 'w') outFile.writelines(tWrap.toFile) outFile.close() del(outFile) print 'Done.'