def raw_squiggle_search3(squiggle,hashthang): result=[] for ref in hashthang: try: queryarray=scale(squiggle) dist, cost, path = mlpy.dtw_subsequence(queryarray,hashthang[ref]['Fprimewin']) result.append((dist,ref,"F",path[1][0],path[1][-1],path[0][0],path[0][-1],cost.mean())) dist, cost, path = mlpy.dtw_subsequence(queryarray,hashthang[ref]['Rprimewin']) result.append((dist,ref,"R",(len(hashthang[ref]['Rprimewin'])-path[1][-1]),(len(hashthang[ref]['Rprimewin'])-path[1][0]),path[0][0],path[0][-1],cost.mean())) except Exception,err: print "Warp Fail"
def squiggle_search2(squiggle,kmerhash,seqlen): result=[] for ref in kmerhash: #print "ss2",ref queryarray = sklearn.preprocessing.scale(np.array(squiggle),axis=0,with_mean=True,with_std=True,copy=True) dist, cost, path = mlpy.dtw_subsequence(queryarray,kmerhash[ref]['Fprime']) result.append((dist,ref,"F",path[1][0],ref,path[1][-1])) dist, cost, path = mlpy.dtw_subsequence(queryarray,kmerhash[ref]['Rprime']) result.append((dist,ref,"R",path[1][0],ref,path[1][-1])) return sorted(result,key=lambda result: result[0])[0][1],sorted(result,key=lambda result: result[0])[0][0],sorted(result,key=lambda result: result[0])[0][2],sorted(result,key=lambda result: result[0])[0][3],sorted(result,key=lambda result: result[0])[0][4],sorted(result,key=lambda result: result[0])[0][5]
def raw_squiggle_search2(squiggle,hashthang): result=[] #print args.speedmode for ref in hashthang: try: queryarray = scale(squiggle) mx = np.max(queryarray) scalingFactor = 1 # iqr # 3 # 1.2 # MS queryarray *= scalingFactor dist, cost, path = mlpy.dtw_subsequence(queryarray,hashthang[ref]['Fprime']) result.append((dist,ref,"F",path[1][0],path[1][-1],path[0][0],path[0][-1])) dist, cost, path = mlpy.dtw_subsequence(queryarray,hashthang[ref]['Rprime']) result.append((dist,ref,"R",(len(hashthang[ref]['Rprime'])-path[1][-1]),(len(hashthang[ref]['Rprime'])-path[1][0]),path[0][0],path[0][-1])) except Exception,err: print "Warp Fail"
result.append((distance,id,"F",location,ref)) subjectfile = id+"_"+str(ref)+"_"+"R"+"_subject.txt" subjectfile = re.sub('\|','_',subjectfile) #seqlen2 = str(seqlen[id]) commands = subjectfile+' '+queryfile+' 200'+' 0.1' #print commands #current = str(multiprocessing.current_process()) #currentnum=int(re.search(r'\d+', current).group()) gpucode=str() #if (currentnum % 2 == 0): #print "Even" gpucode='./UCR_DTW ' #else: # #print "Odd" # gpucode='./GPU-DTW ' runcommand = gpucode+commands
def raw_squiggle_search2(squiggle,hashthang): result=[] for ref in hashthang: try: memory_usage_psutil() queryarray = sklearn.preprocessing.scale(np.array(squiggle),axis=0,with_mean=True,with_std=True,copy=True) dist, cost, path = mlpy.dtw_subsequence(queryarray,hashthang[ref]['Fprime']) memory_usage_psutil() result.append((dist,ref,"F",path[1][0],path[1][-1],path[0][0],path[0][-1],cost,path)) dist, cost, path = mlpy.dtw_subsequence(queryarray,hashthang[ref]['Rprime']) result.append((dist,ref,"R",(len(hashthang[ref]['Rprime'])-path[1][-1]),(len(hashthang[ref]['Rprime'])-path[1][0]),path[0][0],path[0][-1],cost,path)) memory_usage_psutil() except Exception,err: print "Warp Fail" return sorted(result,key=lambda result: result[0])[0][1],sorted(result,key=lambda result: result[0])[0][0],sorted(result,key=lambda result: result[0])[0][2],sorted(result,key=lambda result: result[0])[0][3],sorted(result,key=lambda result: result[0])[0][4],sorted(result,key=lambda result: result[0])[0][5],sorted(result,key=lambda result: result[0])[0][6],sorted(result,key=lambda result: result[0])[0][7],sorted(result,key=lambda result: result[0])[0][8]
def raw_squiggle_search2(squiggle,hashthang): result=[] for ref in hashthang: try: if (args.verbose is True): memory_usage_psutil() queryarray = sklearn.preprocessing.scale(np.array(squiggle),axis=0,with_mean=True,with_std=True,copy=True) dist, cost, path = mlpy.dtw_subsequence(queryarray,hashthang[ref]['Fprime']) if (args.verbose is True): memory_usage_psutil() result.append((dist,ref,"F",path[1][0],path[1][-1],path[0][0],path[0][-1],cost,path)) dist, cost, path = mlpy.dtw_subsequence(queryarray,hashthang[ref]['Rprime']) result.append((dist,ref,"R",(len(hashthang[ref]['Rprime'])-path[1][-1]),(len(hashthang[ref]['Rprime'])-path[1][0]),path[0][0],path[0][-1],cost,path)) if (args.verbose is True): memory_usage_psutil() except Exception,err: print "Warp Fail"
def getDistance(self, songPath, songName, dbPath): temp = [] manager = midimanager.MidiManager() record = manager.segmentNote(songPath, songName + '_result.csv') # print 'Searching ' + songName + ' in database...' for fileName in os.listdir(dbPath): if fileName.endswith('.csv'): reference = manager.readMidi(dbPath, fileName) dist, cost, path = mlpy.dtw_subsequence(record, reference) temp.append([int(dist), fileName[:-10]]) return temp
def squiggle_search2_old(squiggle, kmerhash, seqlen): result = [] for ref in kmerhash: # print "ss2",ref queryarray = sklearn.preprocessing.scale(np.array(squiggle), axis=0, with_mean=True, with_std=True, copy=True) dist, cost, path = mlpy.dtw_subsequence(queryarray, kmerhash[ref]["Fprime"]) result.append((dist, ref, "F", path[1][0], ref, path[1][-1])) dist, cost, path = mlpy.dtw_subsequence(queryarray, kmerhash[ref]["Rprime"]) result.append((dist, ref, "R", path[1][0], ref, path[1][-1])) # ('J02459', 41.017514495176989, 'F', 10003, 'J02459', 10198) # distanceR,seqmatchnameR,frR,rsR,reR,qsR,qeR=sorted(result,key=lambda result: result[0])[0] # return seqmatchnameR,distanceR,frR,rsR,reR,qsR,qeR return ( sorted(result, key=lambda result: result[0])[0][1], sorted(result, key=lambda result: result[0])[0][0], sorted(result, key=lambda result: result[0])[0][2], sorted(result, key=lambda result: result[0])[0][3], sorted(result, key=lambda result: result[0])[0][4], sorted(result, key=lambda result: result[0])[0][5], )
def squiggle_search2(squiggle,channel_id,read_id,args,seqids,threedarray,seqlen): ''' This function matches an incoming squiggle to a reference. Coordinates are returned with respect to the forward strand only. This is important to recall for subsequent downstream processing. Thus a read which is reported as mapping to the reverse strand will report its coodinates on the forward strand. ''' result=[] blocksize=200000 overlap=blocksize-500 for ref in seqids: refid=seqids.index(ref) Rprime,Fprime=threedarray[refid] #queryarray = sklearn.preprocessing.scale(np.array(squiggle),axis=0,with_mean=True,with_std=True,copy=True) queryarray = sklearn.preprocessing.scale(np.array(squiggle,dtype=float),axis=0,with_mean=True,with_std=True,copy=True) refsubset = Fprime indexes = np.array(xrange(len(refsubset))) subrefs = [refsubset[i:i+blocksize]for i in indexes[::overlap]] for blockid,ref_ in enumerate(subrefs): #current = multiprocessing.current_process() tic = time.time() dist, cost, path = mlpy.dtw_subsequence(queryarray,ref_) #result.append((dist,ref,"F",path[1][0],path[1][-1],path[0][0],path[0][-1])) result.append((dist,ref,"F",path[1][0]+(blockid*overlap),path[1][-1]+(blockid*overlap),path[0][0],path[0][-1])) #print "Blockid", blockid, time.time()-tic refsubset = Rprime subrefs = [refsubset[i:i+blocksize]for i in indexes[::overlap]] for blockid,ref_ in enumerate(subrefs): #print "Blockid", blockid, time.time() dist, cost, path = mlpy.dtw_subsequence(queryarray,ref_) #result.append((dist,ref,"R",path[1][0]+(blockid*overlap),ref)) #result.append((dist,ref,"R",path[1][0]+(blockid*overlap),path[1][-1]+(blockid*overlap),path[0][0],path[0][-1])) #result.append((dist,ref,"R",(len(Rprime)-(path[1][-1]+(blockid*overlap))),(len(Rprime)-(path[1][0]+(blockid*overlap))),path[0][0],path[0][-1])) #Corrected for the fact that this is a reverse complement result.append((dist,ref,"R",(len(Rprime)-(path[1][0]+(blockid*overlap))),(len(Rprime)-(path[1][-1]+(blockid*overlap))),path[0][0],path[0][-1])) # Note first two elements flipped for return deliberately. distanceR,seqmatchnameR,frR,rsR,reR,qsR,qeR=sorted(result,key=lambda result: result[0])[0] return seqmatchnameR,distanceR,frR,rsR,reR,qsR,qeR
testarray = kmerhash2[id][ref]['Rprime'] filename = id+"_"+str(ref)+"_R_subject.bin" filename = re.sub('\|','_',filename) with open(filename, "wb") as f: f.write(ar.array("f", testarray)) filename = id+"_"+str(ref)+"_R_subject.txt" filename = re.sub('\|','_',filename) np.savetxt(filename, testarray, delimiter=',') #ecit db = MySQLdb.connect(host=dbhost, user=dbusername, passwd=dbpass, port=dbport) cursor = db.cursor() #sql = "use minion_PLSP57501_2014_10_10_DSmin1_run2_LambdaSK002_5041" #sql = "use minion_PLSP57501_20140909_JA_defA_4434" sql = "use minion_LomanLabz_013731_11rx_v2_3135" print sql cursor.execute(sql) numbers = range(0,10) for number in numbers: #print number sql = "SELECT basename_id ,pos,flag, channel, read_id,tracking_id.basename,file_path FROM caller_basecalled_template_%s inner join align_sam_basecalled_2d using (basename_id) inner join config_general using (basename_id) inner join tracking_id using (basename_id) group by basename_id limit 1" %(number) #sql = "SELECT basename_id , channel, read_id FROM caller_basecalled_template_%s inner join config_general using (basename_id) where basename_id not in (select basename_id from align_sam_basecalled_template) group by basename_id" %(number) print sql
import sys, os, re
if(retcode is not None): break ####################################################################### def squiggle_search2(squiggle,kmerhash2,channel_id,read_id,seqlen):
#!/usr/bin/env python
#!C:\anaconda python
with open("bench_log.txt", "a") as text_file: text_file.write("\n" + str(amp + 1) + "," + str(window + 1) + ",mlpy," + str(mlpystddist) + "," + str(timet.microseconds) + ',' + str(mlpystdpath[1][0] + count) + ',' + str(mlpystdpath[1][-1] + count)) path1 = np.savetxt('paths/' + "amp_" + str(amp + 1) + "_window_" + str(window + 1) + '_query_mlpy.txt', mlpystdpath[0], delimiter=',') path2 = np.savetxt('paths/' + "amp_" + str(amp + 1) + "_window_" + str(window + 1) + '_ref_mlpy.txt', mlpystdpath[1], delimiter=',') timeb = datetime.now() mlpysubdist, mlpysubcost, mlpysubpath = mlpy.dtw_subsequence(x, y) timet = datetime.now() - timeb print("mlpy sub complete on amp " + str(amp + 1)) with open("bench_log.txt", "a") as text_file: text_file.write("\n" + str(amp + 1) + "," + str(window + 1) + ",mlpy_sub," + str(mlpysubdist) + "," + str(timet.microseconds) + ',' + str(mlpysubpath[1][0] + count) + ',' + str(mlpysubpath[1][-1] + count)) path1 = np.savetxt('paths/' + "amp_" + str(amp + 1) + "_window_" + str(window + 1) + '_query_mlpysub.txt', mlpysubpath[0], delimiter=',') path2 = np.savetxt('paths/' + "amp_" + str(amp + 1) + "_window_" + str(window + 1) + '_ref_mlpysub.txt', mlpysubpath[1],
def dtwSubsequence(x, y): return mlpy.dtw_subsequence(x, y)
def dtwCalculate(x, y): dist, cost, path = mlpy.dtw_subsequence(x, y) path = path[1] return dist, path[len(path) - 1]
] refLen = len(ref_Ft) #frm_ = refLen - frm #to_ = refLen - to #------------------------------------------------------------------------------ # DTW ... import mlpy def dtw(qry, (ref_F, ref_R)): if len(qry) is 0 : return '-1', -1 dist_F, _, path_F = mlpy.dtw_subsequence(qry, ref_F) dist_R, _, path_R = mlpy.dtw_subsequence(qry, ref_R) if dist_F < dist_R: return 'F', path_F[1][0] - offset else: return 'R', refLen - path_R[1][0] - ampSz + offset #------------------------------------------------------------------------------ # Generate Synthectic Read... def generateRead(): sz = ampSz trg = np.random.randint(0, refLen-sz, 1)[0] # Using t_model ... i,j = trg, trg+sz # NB ascending numerically #print trg, i, j
def map(self, ref_event_levels, query_events): self.ref_event_levels = ref_event_levels self.query_events = query_events result = mlpy.dtw_subsequence(self.scaled_query_event_currents, self.scaled_reference_event_currents) return MapResults(result)