def process_hdf5(arg): # Extract info from arguments filename, seqids, threedarray, procampres, seqlen, args = arg # Open hdf5 file hdf = h5py.File(filename, 'r') # Process each read in the hdf file for read in hdf['Analyses']['EventDetection_000']['Reads']: events = hdf['Analyses']['EventDetection_000']['Reads'][read][ 'Events'][()] event_collection = list() for event in events: event_collection.append(float(event[0])) # We ignore the first 50 events (Protein driver) and process the following 250 events squiggle = event_collection[50:300] # Search squiggle in reference squiggle squiggleres = squiggle_search2(squiggle, 0, 0, args, seqids, threedarray, seqlen) if True: # logger.info("[%s, %s, %s]", squiggleres[0], squiggleres[2], squiggleres[3]) try: result = go_or_no(squiggleres[0], squiggleres[2], squiggleres[3], seqlen, args) except Exception as err: logger.error("%s", err) hdf.close() return (result, filename, squiggleres)
def process_hdf5((filename, seqids, threedarray, procampres, seqlen, args)): returndict = dict() hdf = h5py.File(filename, 'r') for read in hdf['Analyses']['EventDetection_000']['Reads']: events = hdf['Analyses']['EventDetection_000']['Reads'][read][ 'Events'][()] event_collection = list() for event in events: event_collection.append(float(event[0])) squiggle = event_collection[50:300] #print seqlen #squiggle,channel_id,read_id,args,seqids,threedarray,seqlen squiggleres = squiggle_search2(squiggle, 0, 0, args, seqids, threedarray, seqlen) if 1: print squiggleres[0], squiggleres[2], squiggleres[3] try: #seqid,direction,position,seqlen,args,distance,refmatchlen,querymatchlen #print squiggleres #result = go_or_no2(squiggleres[0],squiggleres[2],squiggleres[3],seqlen,args,squiggleres[1],0,0) result = go_or_no(squiggleres[0], squiggleres[2], squiggleres[3], seqlen, args) except Exception, err: print "ERROR" print err
def mp_worker((channel_id, data, kmerhash, seqlen, readstarttime, kmerhash_subset)): if ((time.time() - readstarttime) > args.time): print "We have a timeout" return 'timeout', channel_id, data.read_id, data.events[0].start else: try: print "Read start time", readstarttime squiggle = extractsquig(data.events) squiggleres = squiggle_search2(squiggle, channel_id, data.read_id, kmerhash, seqlen) print "Full Length:", squiggleres print "Full Length Match Length:", squiggleres[5] - squiggleres[3] squiggleres2 = squiggle_search2(squiggle, channel_id, data.read_id, kmerhash_subset, seqlen) squiggleres3 = squiggle_search2(squiggle[100:200], channel_id, data.read_id, kmerhash_subset, seqlen) squiggleres4 = squiggle_search2(squiggle[50:100], channel_id, data.read_id, kmerhash_subset, seqlen) if squiggleres2[5] > squiggleres3[3] > squiggleres2[ 3] and squiggleres3[3] > squiggleres4[3] > squiggleres2[3]: print "!!!!!!!!!!!!!!!! We got a good one! !!!!!!!!!!!!!!!!" print "Subset:", squiggleres2 print "Subset Match Length:", squiggleres2[5] - squiggleres2[3] print "SecondHalf:", squiggleres3 print "SecondHalf:", squiggleres3[5] - squiggleres3[3] print "FirstHalf:", squiggleres4, squiggleres4[ 5] - squiggleres4[3] result = "Sequence" else: result = "Skip" #print "This read don't match" #print "Subset:",squiggleres2 #print "Subset Match Length:", squiggleres2[5]-squiggleres2[3] #result = go_or_no(squiggleres[0],squiggleres[2],squiggleres[3],seqlen) #print "result:",result return result, channel_id, data.read_id, data.events[ 0].start, squiggleres except Exception, err: err_string = "Time Warping Stuff : %s" % (err) print >> sys.stderr, err_string
def mp_worker((channel_id, data, kmerhash, seqlen, readstarttime, kmerhash_subset)): if (time.time() - readstarttime) > args.time: print "We have a timeout" return "timeout", channel_id, data.read_id, data.events[0].start else: try: print "Read start time", readstarttime squiggle = extractsquig(data.events) squiggleres = squiggle_search2(squiggle, channel_id, data.read_id, kmerhash, seqlen) print "Full Length:", squiggleres print "Full Length Match Length:", squiggleres[5] - squiggleres[3] squiggleres2 = squiggle_search2(squiggle, channel_id, data.read_id, kmerhash_subset, seqlen) squiggleres3 = squiggle_search2(squiggle[100:200], channel_id, data.read_id, kmerhash_subset, seqlen) squiggleres4 = squiggle_search2(squiggle[50:100], channel_id, data.read_id, kmerhash_subset, seqlen) if ( squiggleres2[5] > squiggleres3[3] > squiggleres2[3] and squiggleres3[3] > squiggleres4[3] > squiggleres2[3] ): print "!!!!!!!!!!!!!!!! We got a good one! !!!!!!!!!!!!!!!!" print "Subset:", squiggleres2 print "Subset Match Length:", squiggleres2[5] - squiggleres2[3] print "SecondHalf:", squiggleres3 print "SecondHalf:", squiggleres3[5] - squiggleres3[3] print "FirstHalf:", squiggleres4, squiggleres4[5] - squiggleres4[3] result = "Sequence" else: result = "Skip" # print "This read don't match" # print "Subset:",squiggleres2 # print "Subset Match Length:", squiggleres2[5]-squiggleres2[3] # result = go_or_no(squiggleres[0],squiggleres[2],squiggleres[3],seqlen) # print "result:",result return result, channel_id, data.read_id, data.events[0].start, squiggleres except Exception, err: err_string = "Time Warping Stuff : %s" % (err) print >> sys.stderr, err_string
def process_hdf5((filename, seqids, threedarray, procampres, seqlen, args)): returndict = dict() hdf = h5py.File(filename, "r") for read in hdf["Analyses"]["EventDetection_000"]["Reads"]: events = hdf["Analyses"]["EventDetection_000"]["Reads"][read]["Events"][()] event_collection = list() for event in events: event_collection.append(float(event[0])) squiggle = event_collection[50:300] # print seqlen # squiggle,channel_id,read_id,args,seqids,threedarray,seqlen squiggleres = squiggle_search2(squiggle, 0, 0, args, seqids, threedarray, seqlen) if 1: print squiggleres[0], squiggleres[2], squiggleres[3] try: # seqid,direction,position,seqlen,args,distance,refmatchlen,querymatchlen # print squiggleres # result = go_or_no2(squiggleres[0],squiggleres[2],squiggleres[3],seqlen,args,squiggleres[1],0,0) result = go_or_no(squiggleres[0], squiggleres[2], squiggleres[3], seqlen, args) except Exception, err: print "ERROR" print err