def resegment_reads(fast5_path, params, speedy=False, overwrite=False):
    """Re-segment and create anchor alignment from previously base-called fast5 file
    :param fast5_path: path to fast5 file
    :param params: event detection parameters
    :param speedy: boolean option for speedyStatSplit or minknow
    :param overwrite: overwrite a previous event re-segmented event table
    :param name: name of key where events table will be placed (Analyses/'name'/Events)
    :return True when completed
    """
    assert os.path.isfile(fast5_path), "File does not exist: {}".format(fast5_path)
    name = "ReSegmentBasecall_00{}"
    # create Fast5 object
    f5fh = Fast5(fast5_path, read='r+')
    # gather previous event detection
    old_event_table = f5fh.get_basecall_data()
    # assert check_event_table_time(old_event_table), "Old event is not consistent"
    read_id = bytes.decode(f5fh.raw_attributes['read_id'])
    sampling_freq = f5fh.sample_rate
    start_time = f5fh.raw_attributes['start_time']
    # pick event detection algorithm
    signal = f5fh.get_read(raw=True, scale=True)

    if speedy:
        event_table = create_speedy_event_table(signal, sampling_freq, start_time, **params)
        params = merge_dicts([params, {"event_detection": "speedy_stat_split"}])
    else:
        event_table = create_minknow_event_table(signal, sampling_freq, start_time, **params)
        params = merge_dicts([params, {"event_detection": "minknow_event_detect"}])

    keys = ["nanotensor version", "time_stamp"]
    values = ["0.2.0", TimeStamp().posix_date()]
    attributes = merge_dicts([params, dict(zip(keys, values)), f5fh.raw_attributes])
    if f5fh.is_read_rna():
        old_event_table = index_to_time(old_event_table, sampling_freq=sampling_freq, start_time=start_time)
    # set event table
    new_event_table = create_anchor_kmers(new_events=event_table, old_events=old_event_table)
    f5fh.set_new_event_table(name, new_event_table, attributes, overwrite=overwrite)
    # gather new sequence
    sequence = sequence_from_events(new_event_table)
    if f5fh.is_read_rna():
        sequence = ReverseComplement().reverse(sequence)
        sequence = sequence.replace("T", "U")
    quality_scores = '!'*len(sequence)
    fastq = create_fastq_line(read_id+" :", sequence, quality_scores)
    # set fastq
    f5fh.set_fastq(name, fastq)
    return f5fh
Example #2
0
def resegment_reads(fast5_path,
                    params=None,
                    speedy=False,
                    overwrite=True,
                    analysis_path="ReSegmentBasecall_000"):
    """Re-segment and create anchor alignment from previously base-called fast5 file
    :param fast5_path: path to fast5 file
    :param params: event detection parameters
    :param speedy: boolean option for speedyStatSplit or minknow
    :param overwrite: overwrite a previous event re-segmented event table
    :param analysis_path: name of key where events table will be placed (Analyses/'name'/Events)
    :return True when completed
    """
    assert os.path.isfile(fast5_path), "File does not exist: {}".format(
        fast5_path)
    # create Fast5 object and sanity check
    f5fh = Fast5(fast5_path, read='r+')
    if not f5fh.has_basecall_data():
        f5fh.close()
        return None

    # gather previous event detection
    old_event_table = f5fh.get_basecall_data()

    read_id = bytes.decode(f5fh.raw_attributes['read_id'])
    sampling_freq = f5fh.sample_rate
    start_time = f5fh.raw_attributes['start_time']

    # get params
    if params is None:
        params = get_default_event_detection_params(
            EVENT_DETECT_SPEEDY if speedy else EVENT_DETECT_MINKNOW)

    # pick event detection algorithm
    signal = f5fh.get_read(raw=True, scale=True)
    if speedy:
        event_table = create_speedy_event_table(signal, sampling_freq,
                                                start_time, **params)
        params = merge_dicts(
            [params, {
                "event_detection": "speedy_stat_split"
            }])
    else:
        event_table = create_minknow_event_table(signal, sampling_freq,
                                                 start_time, **params)
        params = merge_dicts(
            [params, {
                "event_detection": "minknow_event_detect"
            }])

    # metadata
    keys = ["nanotensor version", "time_stamp"]
    values = ["0.2.0", TimeStamp().posix_date()]
    attributes = merge_dicts(
        [params, dict(zip(keys, values)), f5fh.raw_attributes])

    # do resegmentation
    if f5fh.is_read_rna():
        old_event_table = index_to_time(old_event_table,
                                        sampling_freq=sampling_freq,
                                        start_time=start_time)
    new_event_table = create_anchor_kmers(new_events=event_table,
                                          old_events=old_event_table)

    # get destination in fast5
    #todo find latest location? ie: save_event_table_and_fastq(..)
    destination = f5fh._join_path(f5fh.__base_analysis__, analysis_path)

    f5fh.set_event_table(destination,
                         new_event_table,
                         attributes,
                         overwrite=overwrite)

    # gather new sequence
    sequence = sequence_from_events(new_event_table)
    if f5fh.is_read_rna():
        sequence = ReverseComplement().reverse(sequence)
        sequence = sequence.replace("T", "U")
    quality_scores = '!' * len(sequence)
    fastq = create_fastq_line(read_id + " :", sequence, quality_scores)

    # set fastq
    f5fh.set_fastq(destination, fastq, overwrite=overwrite)
    return f5fh