Exemple #1
0
def process_hdf5(arg):
    # Extract info from arguments
    filename, seqids, threedarray, procampres, seqlen, args = arg

    # Open hdf5 file
    hdf = h5py.File(filename, 'r')

    # Process each read in the hdf file
    for read in hdf['Analyses']['EventDetection_000']['Reads']:
        events = hdf['Analyses']['EventDetection_000']['Reads'][read][
            'Events'][()]
        event_collection = list()
        for event in events:
            event_collection.append(float(event[0]))

        # We ignore the first 50 events (Protein driver) and process the following 250 events
        squiggle = event_collection[50:300]

        # Search squiggle in reference squiggle
        squiggleres = squiggle_search2(squiggle, 0, 0, args, seqids,
                                       threedarray, seqlen)

        if True:
            # logger.info("[%s, %s, %s]", squiggleres[0], squiggleres[2], squiggleres[3])
            try:
                result = go_or_no(squiggleres[0], squiggleres[2],
                                  squiggleres[3], seqlen, args)
            except Exception as err:
                logger.error("%s", err)
    hdf.close()
    return (result, filename, squiggleres)
Exemple #2
0
def process_hdf5((filename, seqids, threedarray, procampres, seqlen, args)):
    returndict = dict()
    hdf = h5py.File(filename, 'r')
    for read in hdf['Analyses']['EventDetection_000']['Reads']:
        events = hdf['Analyses']['EventDetection_000']['Reads'][read][
            'Events'][()]
        event_collection = list()
        for event in events:
            event_collection.append(float(event[0]))
        squiggle = event_collection[50:300]
        #print seqlen
        #squiggle,channel_id,read_id,args,seqids,threedarray,seqlen
        squiggleres = squiggle_search2(squiggle, 0, 0, args, seqids,
                                       threedarray, seqlen)
        if 1:
            print squiggleres[0], squiggleres[2], squiggleres[3]
            try:
                #seqid,direction,position,seqlen,args,distance,refmatchlen,querymatchlen
                #print squiggleres
                #result = go_or_no2(squiggleres[0],squiggleres[2],squiggleres[3],seqlen,args,squiggleres[1],0,0)
                result = go_or_no(squiggleres[0], squiggleres[2],
                                  squiggleres[3], seqlen, args)
            except Exception, err:
                print "ERROR"
                print err
Exemple #3
0
def mp_worker((channel_id, data, kmerhash, seqlen, readstarttime,
               kmerhash_subset)):
    if ((time.time() - readstarttime) > args.time):
        print "We have a timeout"
        return 'timeout', channel_id, data.read_id, data.events[0].start
    else:
        try:
            print "Read start time", readstarttime
            squiggle = extractsquig(data.events)
            squiggleres = squiggle_search2(squiggle, channel_id, data.read_id,
                                           kmerhash, seqlen)
            print "Full Length:", squiggleres
            print "Full Length Match Length:", squiggleres[5] - squiggleres[3]
            squiggleres2 = squiggle_search2(squiggle, channel_id, data.read_id,
                                            kmerhash_subset, seqlen)
            squiggleres3 = squiggle_search2(squiggle[100:200], channel_id,
                                            data.read_id, kmerhash_subset,
                                            seqlen)
            squiggleres4 = squiggle_search2(squiggle[50:100], channel_id,
                                            data.read_id, kmerhash_subset,
                                            seqlen)

            if squiggleres2[5] > squiggleres3[3] > squiggleres2[
                    3] and squiggleres3[3] > squiggleres4[3] > squiggleres2[3]:
                print "!!!!!!!!!!!!!!!! We got a good one! !!!!!!!!!!!!!!!!"
                print "Subset:", squiggleres2
                print "Subset Match Length:", squiggleres2[5] - squiggleres2[3]
                print "SecondHalf:", squiggleres3
                print "SecondHalf:", squiggleres3[5] - squiggleres3[3]
                print "FirstHalf:", squiggleres4, squiggleres4[
                    5] - squiggleres4[3]
                result = "Sequence"
            else:
                result = "Skip"
                #print "This read don't match"
                #print "Subset:",squiggleres2
                #print "Subset Match Length:", squiggleres2[5]-squiggleres2[3]

            #result = go_or_no(squiggleres[0],squiggleres[2],squiggleres[3],seqlen)
            #print "result:",result
            return result, channel_id, data.read_id, data.events[
                0].start, squiggleres
        except Exception, err:
            err_string = "Time Warping Stuff : %s" % (err)
            print >> sys.stderr, err_string
def mp_worker((channel_id, data, kmerhash, seqlen, readstarttime, kmerhash_subset)):
    if (time.time() - readstarttime) > args.time:
        print "We have a timeout"
        return "timeout", channel_id, data.read_id, data.events[0].start
    else:
        try:
            print "Read start time", readstarttime
            squiggle = extractsquig(data.events)
            squiggleres = squiggle_search2(squiggle, channel_id, data.read_id, kmerhash, seqlen)
            print "Full Length:", squiggleres
            print "Full Length Match Length:", squiggleres[5] - squiggleres[3]
            squiggleres2 = squiggle_search2(squiggle, channel_id, data.read_id, kmerhash_subset, seqlen)
            squiggleres3 = squiggle_search2(squiggle[100:200], channel_id, data.read_id, kmerhash_subset, seqlen)
            squiggleres4 = squiggle_search2(squiggle[50:100], channel_id, data.read_id, kmerhash_subset, seqlen)

            if (
                squiggleres2[5] > squiggleres3[3] > squiggleres2[3]
                and squiggleres3[3] > squiggleres4[3] > squiggleres2[3]
            ):
                print "!!!!!!!!!!!!!!!! We got a good one! !!!!!!!!!!!!!!!!"
                print "Subset:", squiggleres2
                print "Subset Match Length:", squiggleres2[5] - squiggleres2[3]
                print "SecondHalf:", squiggleres3
                print "SecondHalf:", squiggleres3[5] - squiggleres3[3]
                print "FirstHalf:", squiggleres4, squiggleres4[5] - squiggleres4[3]
                result = "Sequence"
            else:
                result = "Skip"
                # print "This read don't match"
                # print "Subset:",squiggleres2
                # print "Subset Match Length:", squiggleres2[5]-squiggleres2[3]

            # result = go_or_no(squiggleres[0],squiggleres[2],squiggleres[3],seqlen)
            # print "result:",result
            return result, channel_id, data.read_id, data.events[0].start, squiggleres
        except Exception, err:
            err_string = "Time Warping Stuff : %s" % (err)
            print >> sys.stderr, err_string
def process_hdf5((filename, seqids, threedarray, procampres, seqlen, args)):
    returndict = dict()
    hdf = h5py.File(filename, "r")
    for read in hdf["Analyses"]["EventDetection_000"]["Reads"]:
        events = hdf["Analyses"]["EventDetection_000"]["Reads"][read]["Events"][()]
        event_collection = list()
        for event in events:
            event_collection.append(float(event[0]))
        squiggle = event_collection[50:300]
        # print seqlen
        # squiggle,channel_id,read_id,args,seqids,threedarray,seqlen
        squiggleres = squiggle_search2(squiggle, 0, 0, args, seqids, threedarray, seqlen)
        if 1:
            print squiggleres[0], squiggleres[2], squiggleres[3]
            try:
                # seqid,direction,position,seqlen,args,distance,refmatchlen,querymatchlen
                # print squiggleres
                # result = go_or_no2(squiggleres[0],squiggleres[2],squiggleres[3],seqlen,args,squiggleres[1],0,0)
                result = go_or_no(squiggleres[0], squiggleres[2], squiggleres[3], seqlen, args)
            except Exception, err:
                print "ERROR"
                print err