Ejemplo n.º 1
0
def get_more_fragstats(f5, hascomp, check_time = False):
    ## f5 is the connection to fast5 file
    basecalled = is_basecalled(f5)
    input_events = store_input_events(f5, basecalled)
    if check_time:
        time_error = [int(has_time_error_list(input_events['start']))]
    else:
        time_error = []
    start_time = input_events['start'][0]
    end_time = input_events['start'][-1]
    nevents = len(input_events['start'])
    slope = (end_time-start_time)/(nevents-1)
    mean_dur_input = np.mean(input_events['length'])
    sd_dur_input = np.std(input_events['length'])
    med_dur_input = np.median(input_events['length'])
    min_dur_input = min(input_events['length'])
    max_dur_input = max(input_events['length'])
    template_events = store_template_events(f5, basecalled, getbasecallinfo=True)
    tmoves = {0:0, 1:0, 2:0, 3:0, 4:0, 5:0}
    for move in template_events['move']:
        tmoves[move] += 1
    if hascomp:
        comp_events = store_complement_events(f5, basecalled, getbasecallinfo=True)
        cmoves = {0:0, 1:0, 2:0, 3:0, 4:0, 5:0}
        for move in comp_events['move']:
            cmoves[move] += 1
    else:
        cmoves = {0:"-", 1:"-", 2:"-", 3:"-", 4:"-", 5:"-"}
    return [start_time, end_time, slope, mean_dur_input, sd_dur_input, med_dur_input, min_dur_input, max_dur_input, tmoves[0], tmoves[1], tmoves[2], tmoves[3], tmoves[4], tmoves[5], cmoves[0], cmoves[1], cmoves[2], cmoves[3], cmoves[4], cmoves[5]] + time_error
Ejemplo n.º 2
0
def frag_fxn(fast5, args):
    fragstats = get_frag_stats(fast5)
    if args.quadruplex:
        minG, maxN = (int(e) for e in args.g4motif.split(","))
        regex = re.compile(get_g4_regex(minG, maxN))
        revregex = re.compile(get_g4_revregex(minG, maxN))
    if args.extensive:
        hascomp = has_complement(fast5.hdf5file)
        if args.quadruplex:
            fragstats += get_regex_counts_in_fast5(fast5, regex, revregex)
        if args.checktime:
            fragstats += get_more_fragstats(fast5.hdf5file, hascomp = hascomp, check_time = True)
        else:
            fragstats += get_more_fragstats(fast5.hdf5file, hascomp = hascomp, check_time = False)
    if not args.extensive and args.quadruplex:
        fragstats += get_regex_counts_in_fast5(fast5, regex, revregex)
    if not args.extensive and args.checktime:
        input_events = store_input_events(fast5.hdf5file, basecalled)
        fragstats += [int(has_time_error_list(input_events['start']))]
    print ("\t").join([str(e) for e in fragstats])