Beispiel #1
0
def run(args):
    """The main function that does everything."""
    utt2dur = {}
    if args.utt2dur is not None:
        with common_lib.smart_open(args.utt2dur) as utt2dur_fh:
            for line in utt2dur_fh:
                parts = line.strip().split()
                if len(parts) != 2:
                    raise RuntimeError("Unable to parse line '{0}' in {1}"
                                       "".format(line.strip(), args.utt2dur))
                utt2dur[parts[0]] = float(parts[1])

    global_stats = SegmenterStats()
    with common_lib.smart_open(args.in_sad) as in_sad_fh, \
            common_lib.smart_open(args.out_segments, 'w') as out_segments_fh:
        for line in in_sad_fh:
            parts = line.strip().split()
            utt_id = parts[0]

            if len(parts) < 2:
                raise RuntimeError("Unable to parse line '{0}' in {1}"
                                   "".format(line.strip(),
                                             in_sad_fh))

            segmentation = Segmentation()
            segmentation.initialize_segments(
                parts[1:], args.frame_shift)
            segmentation.pad_speech_segments(args.segment_padding,
                                             None if args.utt2dur is None
                                             else utt2dur[utt_id])
            segmentation.write(utt_id, out_segments_fh)
            global_stats.add(segmentation.stats)
    logger.info(global_stats)
Beispiel #2
0
def run(args):
    """The main function that does everything."""
    utt2dur = {}
    if args.utt2dur is not None:
        with common_lib.smart_open(args.utt2dur) as utt2dur_fh:
            for line in utt2dur_fh:
                parts = line.strip().split()
                if len(parts) != 2:
                    raise RuntimeError("Unable to parse line '{0}' in {1}"
                                       "".format(line.strip(), args.utt2dur))
                utt2dur[parts[0]] = float(parts[1])

    global_stats = SegmenterStats()
    with common_lib.smart_open(args.in_sad) as in_sad_fh, \
            common_lib.smart_open(args.out_segments, 'w') as out_segments_fh:
        for line in in_sad_fh:
            parts = line.strip().split()
            utt_id = parts[0]

            if len(parts) < 2:
                raise RuntimeError("Unable to parse line '{0}' in {1}"
                                   "".format(line.strip(), in_sad_fh))

            segmentation = Segmentation()
            segmentation.initialize_segments(parts[1:], args.frame_shift)
            segmentation.pad_speech_segments(
                args.segment_padding,
                None if args.utt2dur is None else utt2dur[utt_id])
            segmentation.write(utt_id, out_segments_fh)
            global_stats.add(segmentation.stats)
    logger.info(global_stats)
Beispiel #3
0
def run(args):
    num_done = 0

    with common_lib.smart_open(
            args.pasted_targets) as targets_reader, common_lib.smart_open(
                args.out_targets, "w") as targets_writer:
        for key, mat in common_lib.read_mat_ark(targets_reader):
            mat = np.matrix(mat)
            if mat.shape[1] % args.dim != 0:
                raise RuntimeError(
                    "For utterance {utt} in {f}, num-columns {nc} "
                    "is not a multiple of dim {dim}"
                    "".format(
                        utt=key,
                        f=args.pasted_targets.name,
                        nc=mat.shape[1],
                        dim=args.dim,
                    ))
            num_sources = mat.shape[1] // args.dim

            out_mat = np.matrix(np.zeros([mat.shape[0], args.dim]))

            if args.remove_mismatch_frames:
                for n in range(mat.shape[0]):
                    if should_remove_frame(mat[n, :].getA()[0], args.dim):
                        out_mat[n, :] = np.zeros([1, args.dim])
                    else:
                        for i in range(num_sources):
                            out_mat[n, :] += mat[n, (i * args.dim):(
                                (i + 1) *
                                args.dim)] * (1.0 if args.weights is None else
                                              args.weights[i])
            else:
                # Just interpolate the targets
                for i in range(num_sources):
                    out_mat += mat[:, (i * args.dim):((i + 1) * args.dim)] * (
                        1.0 if args.weights is None else args.weights[i])

            common_lib.write_matrix_ascii(targets_writer,
                                          out_mat.tolist(),
                                          key=key)
            num_done += 1

    logger.info("Merged {num_done} target matrices"
                "".format(num_done=num_done))

    if num_done == 0:
        raise RuntimeError
def load_rttm(file):
    recoid_dict = {}
    with common_lib.smart_open(file, 'r') as f:
        for line in f:
            parts = line.strip().split()
            sessionid_arrayid = parts[1]
            sessionid = sessionid_arrayid.split('_')[0]
            reference = sessionid_arrayid.split('_')[-1]
            start_time = float(parts[3])
            start_time_td = timedelta(seconds=start_time)

            time = str(start_time_td).split(':')
            hrs, mins, secs = time[0], time[1], float(time[2])
            secs1 = "{0:.2f}".format(secs)
            start_time_str = str(hrs) + ':' + str(mins) + ':' + str(secs1)

            end_time = start_time + float(parts[4])
            end_time_td = str(timedelta(seconds=end_time))

            time = str(end_time_td).split(':')
            hrs, mins, secs = time[0], time[1], float(time[2])
            secs1 = "{0:.2f}".format(secs)
            end_time_str = str(hrs) + ':' + str(mins) + ':' + str(secs1)

            spkr = parts[7]
            st = int(start_time * 100)
            end = int(end_time * 100)
            utt = "{0}-{1:06d}-{2:06d}".format(spkr, st, end)
            recoid_dict[utt] = (end_time_str, start_time_str, "NA", spkr, reference, "NA", sessionid)
    return recoid_dict
Beispiel #5
0
def main():
    try:
        args = get_args()
        with common_lib.smart_open(args.output_graph, 'w') as f:
            print_states(args, f)
    except Exception:
        raise
Beispiel #6
0
def main():
    try:
        args = get_args()
        with common_lib.smart_open(args.output_graph, 'w') as f:
            print_states(args, f)
    except Exception:
        raise
Beispiel #7
0
def main():
    args = get_args()

    if args.reco2file_and_channel is not None:
        reco2file_and_channel = {}
        with common_lib.smart_open(args.reco2file_and_channel) as fh:
            for line in fh:
                parts = line.strip().split()
                reco2file_and_channel[parts[0]] = (parts[1], parts[2])

    utt2spk = {}
    with common_lib.smart_open(args.utt2spk) as fh:
        for line in fh:
            parts = line.strip().split()
            utt2spk[parts[0]] = parts[1]

    with common_lib.smart_open(
            args.segments) as segments_reader, common_lib.smart_open(
                args.rttm_file, "w") as rttm_writer:
        for line in segments_reader:
            parts = line.strip().split()

            utt = parts[0]
            spkr = utt2spk[utt]

            reco = parts[1]
            file_id = reco
            channel = 1

            if args.reco2file_and_channel is not None:
                try:
                    file_id, channel = reco2file_and_channel[reco]
                except KeyError:
                    raise RuntimeError(
                        "Could not find recording {0} in {1}".format(
                            reco, args.reco2file_and_channel))

            start_time = float(parts[2])
            duration = float(parts[3]) - start_time

            print(
                "SPEAKER {0} {1} {2:7.2f} {3:7.2f} "
                "<NA> <NA> {4} <NA>".format(file_id, channel, start_time,
                                            duration, spkr),
                file=rttm_writer,
            )
Beispiel #8
0
def run(args):
    num_done = 0

    with common_lib.smart_open(args.pasted_targets) as targets_reader, \
            common_lib.smart_open(args.out_targets, 'w') as targets_writer:
        for key, mat in common_lib.read_mat_ark(targets_reader):
            mat = np.matrix(mat)
            if mat.shape[1] % args.dim != 0:
                raise RuntimeError(
                    "For utterance {utt} in {f}, num-columns {nc} "
                    "is not a multiple of dim {dim}"
                    "".format(utt=key, f=args.pasted_targets.name,
                              nc=mat.shape[1], dim=args.dim))
            num_sources = mat.shape[1] // args.dim

            out_mat = np.matrix(np.zeros([mat.shape[0], args.dim]))

            if args.remove_mismatch_frames:
                for n in range(mat.shape[0]):
                    if should_remove_frame(mat[n, :].getA()[0], args.dim):
                        out_mat[n, :] = np.zeros([1, args.dim])
                    else:
                        for i in range(num_sources):
                            out_mat[n, :] += (
                                mat[n, (i * args.dim) : ((i+1) * args.dim)]
                                * (1.0 if args.weights is None
                                   else args.weights[i]))
            else:
                # Just interpolate the targets
                for i in range(num_sources):
                    out_mat += (
                        mat[:, (i * args.dim) : ((i+1) * args.dim)]
                        * (1.0 if args.weights is None else args.weights[i]))

            common_lib.write_matrix_ascii(targets_writer, out_mat.tolist(),
                                          key=key)
            num_done += 1

    logger.info("Merged {num_done} target matrices"
                "".format(num_done=num_done))

    if num_done == 0:
        raise RuntimeError
def main():
    args = get_args()

    if args.reco2file_and_channel is not None:
        reco2file_and_channel = {}
        with common_lib.smart_open(args.reco2file_and_channel) as fh:
            for line in fh:
                parts = line.strip().split()
                reco2file_and_channel[parts[0]] = (parts[1], parts[2])

    utt2spk = {}
    with common_lib.smart_open(args.utt2spk) as fh:
        for line in fh:
            parts = line.strip().split()
            utt2spk[parts[0]] = parts[1]

    with common_lib.smart_open(args.segments) as segments_reader, \
            common_lib.smart_open(args.rttm_file, 'w') as rttm_writer:
        for line in segments_reader:
            parts = line.strip().split()

            utt = parts[0]
            spkr = utt2spk[utt]

            reco = parts[1]
            file_id = reco
            channel = 1

            if args.reco2file_and_channel is not None:
                try:
                    file_id, channel = reco2file_and_channel[reco]
                except KeyError:
                    raise RuntimeError(
                        "Could not find recording {0} in {1}".format(
                            reco, args.reco2file_and_channel))

            start_time = float(parts[2])
            duration = float(parts[3]) - start_time

            print("SPEAKER {0} {1} {2:7.2f} {3:7.2f} "
                  "<NA> <NA> {4} <NA>\n".format(
                      file_id, channel, start_time,
                      duration, spkr), file=rttm_writer)
Beispiel #10
0
def read_reco2utt_file(reco2utt_file):
    # Read reco2utt file
    reco2utt = {}
    with common_lib.smart_open(reco2utt_file) as fh:
        for line in fh:
            parts = line.strip().split()
            if len(parts) < 2:
                raise ValueError("Could not parse line {0} in reco2utt "
                                 "file {1}".format(line, reco2utt_file))
            reco2utt[parts[0]] = parts[1:]
    return reco2utt
def read_reco2utt_file(reco2utt_file):
    # Read reco2utt file
    reco2utt = {}
    with common_lib.smart_open(reco2utt_file) as fh:
        for line in fh:
            parts = line.strip().split()
            if len(parts) < 2:
                raise ValueError("Could not parse line {0} in reco2utt "
                                 "file {1}".format(line, reco2utt_file))
            reco2utt[parts[0]] = parts[1:]
    return reco2utt
def read_reco2num_frames_file(reco2num_frames_file):
    # Read reco2num_frames file
    reco2num_frames = {}
    with common_lib.smart_open(reco2num_frames_file) as fh:
        for line in fh:
            parts = line.strip().split()
            if len(parts) != 2:
                raise ValueError("Could not parse line {0} in "
                                 "reco2num-frames file {1}".format(
                                     line, reco2num_frames_file))
            reco2num_frames[parts[0]] = int(parts[1])
    return reco2num_frames
Beispiel #13
0
def read_reco2num_frames_file(reco2num_frames_file):
    # Read reco2num_frames file
    reco2num_frames = {}
    with common_lib.smart_open(reco2num_frames_file) as fh:
        for line in fh:
            parts = line.strip().split()
            if len(parts) != 2:
                raise ValueError("Could not parse line {0} in "
                                 "reco2num-frames file {1}".format(
                                     line, reco2num_frames_file))
            reco2num_frames[parts[0]] = int(parts[1])
    return reco2num_frames
Beispiel #14
0
def load_rttm(rttmf):
    with common_lib.smart_open(rttmf, 'r') as f:
        turns = []
        speaker_ids = set()
        file_ids = set()
        for line in f:
            if line.startswith('SPKR-INFO'):
                continue
            turn = _parse_rttm_line(line)
            turns.append(turn)
            speaker_ids.add(turn.speaker_id)
            file_ids.add(turn.file_id)
    return turns, speaker_ids, file_ids
Beispiel #15
0
def read_targets_scp(targets_scp, segments):
    # Read the SCP file containing targets
    targets = {}
    with common_lib.smart_open(targets_scp) as fh:
        for line in fh:
            parts = line.strip().split()
            if len(parts) != 2:
                raise ValueError("Could not parse line {0} in "
                                 "targets scp file".format(line, targets_scp))
            utt = parts[0]
            if utt not in segments:
                continue
            targets[utt] = parts[1]
    return targets
def read_targets_scp(targets_scp, segments):
    # Read the SCP file containing targets
    targets = {}
    with common_lib.smart_open(targets_scp) as fh:
        for line in fh:
            parts = line.strip().split()
            if len(parts) != 2:
                raise ValueError("Could not parse line {0} in "
                                 "targets scp file".format(line, targets_scp))
            utt = parts[0]
            if utt not in segments:
                continue
            targets[utt] = parts[1]
    return targets
Beispiel #17
0
def write_rttm(rttmf, turns, n_digits=3):
    with common_lib.smart_open(rttmf, 'w') as f:
        for turn in sorted(turns, key=lambda x:x.onset):
            fields = ['SPEAKER',
                      turn.file_id,
                      '1',
                      format_float(turn.onset, n_digits),
                      format_float(turn.dur, n_digits),
                      '<NA>',
                      '<NA>',
                      turn.speaker_id,
                      '<NA>',
                      '<NA>']
            line = ' '.join(fields)
            f.write(line + '\n')
def read_segments_file(segments_file, reco2utt):
    # Read segments from segments file
    segments = {}
    with common_lib.smart_open(segments_file) as fh:
        for line in fh:
            parts = line.strip().split()
            if len(parts) not in [4, 5]:
                raise ValueError("Could not parse line {0} in "
                                 "segments file {1}".format(line, segments))
            utt = parts[0]
            reco = parts[1]
            if reco not in reco2utt:
                continue
            start_time = float(parts[2])
            end_time = float(parts[3])
            segments[utt] = [reco, start_time, end_time]
    return segments
Beispiel #19
0
def read_segments_file(segments_file, reco2utt):
    # Read segments from segments file
    segments = {}
    with common_lib.smart_open(segments_file) as fh:
        for line in fh:
            parts = line.strip().split()
            if len(parts) not in [4, 5]:
                raise ValueError("Could not parse line {0} in "
                                 "segments file {1}".format(line, segments))
            utt = parts[0]
            reco = parts[1]
            if reco not in reco2utt:
                continue
            start_time = float(parts[2])
            end_time = float(parts[3])
            segments[utt] = [reco, start_time, end_time]
    return segments
        for line in f:
<<<<<<< HEAD
            parts = line.strip().split()
            if len(parts) != 2:
                raise ValueError("Could not parse line {0}".format(line))
            reco2num_frames[parts[0]] = int(parts[1])
=======
            fields = line.strip().split()
            if len(fields) != 2:
                raise ValueError("Could not parse line {0}".format(line))
            reco2num_frames[fields[0]] = int(fields[1])
>>>>>>> upstream/master

    # We read all segments and store as a list of objects
    segments = []
    with common_lib.smart_open(args.rttm) as f:
        for line in f.readlines():
<<<<<<< HEAD
            fields = line.strip().split()
            segments.append(Segment(fields[1], float(fields[3]), dur=float(fields[4]), label=fields[7]))

    # We group the segment list into a dictionary indexed by reco_id
    reco2segs = defaultdict(list,
        {reco_id : list(g) for reco_id, g in groupby(segments, lambda x: x.reco_id)})

    # Now, for each reco, create a matrix of shape num_frames x 3 and fill in using
    # the segments information for that reco
    reco2targets = {}
    for reco_id in reco2num_frames:
        segs = sorted(reco2segs[reco_id], key=lambda x: x.start_time)
=======
def run(args):
    reco2utt = {}
    with common_lib.smart_open(args.reco2utt) as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) < 2:
                raise ValueError("Could not parse line {0}".format(line))
            reco2utt[parts[0]] = parts[1:]

    reco2num_frames = {}
    with common_lib.smart_open(args.reco2num_frames) as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) != 2:
                raise ValueError("Could not parse line {0}".format(line))
            if parts[0] not in reco2utt:
                continue
            reco2num_frames[parts[0]] = int(parts[1])

    segments = {}
    with common_lib.smart_open(args.segments) as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) not in [4, 5]:
                raise ValueError("Could not parse line {0}".format(line))
            utt = parts[0]
            reco = parts[1]
            if reco not in reco2utt:
                continue
            start_time = float(parts[2])
            end_time = float(parts[3])
            segments[utt] = [reco, start_time, end_time]

    num_utt_err = 0
    num_utt = 0
    num_reco = 0

    if args.default_targets is not None:
        default_targets = np.matrix(
            common_lib.read_matrix_ascii(args.default_targets))
    else:
        default_targets = np.matrix([[1, 0, 0]])
    assert (np.shape(default_targets)[0] == 1
            and np.shape(default_targets)[1] == 3)

    with common_lib.smart_open(args.out_targets_ark, 'w') as f:
        for reco, utts in reco2utt.iteritems():
            reco_mat = np.repeat(default_targets,
                                 reco2num_frames[reco],
                                 axis=0)
            utts.sort(key=lambda x: segments[x][1])  # sort on start time
            for i, utt in enumerate(utts):
                if utt not in segments:
                    num_utt_err += 1
                    continue
                segment = segments[utt]

                start_frame = int(segment[1] / args.frame_shift)
                end_frame = int(segment[2] / args.frame_shift)
                num_frames = end_frame - start_frame

                if end_frame > reco2num_frames[reco]:
                    end_frame = reco2num_frames[reco]
                    num_frames = end_frame - start_frame

                reco_mat[start_frame:end_frame] = np.zeros([num_frames, 3])
                num_utt += 1

            if reco_mat.shape[0] > 0:
                common_lib.write_matrix_ascii(f, reco_mat.tolist(), key=reco)
                num_reco += 1

    logger.info("Got default out-of-segment targets for {num_reco} recordings "
                "containing {num_utt} in-segment regions; "
                "failed to account {num_utt_err} utterances"
                "".format(num_reco=num_reco,
                          num_utt=num_utt,
                          num_utt_err=num_utt_err))

    if num_utt == 0 or num_utt_err > num_utt // 2 or num_reco == 0:
        raise RuntimeError
Beispiel #22
0
def run(args):
    silence_phones = {}
    with common_lib.smart_open(args.silence_phones) as silence_phones_fh:
        for line in silence_phones_fh:
            silence_phones[line.strip().split()[0]] = 1

    if len(silence_phones) == 0:
        raise RuntimeError("Could not find any phones in {silence}"
                           "".format(silence=args.silence_phones))

    garbage_phones = {}
    with common_lib.smart_open(args.garbage_phones) as garbage_phones_fh:
        for line in garbage_phones_fh:
            word = line.strip().split()[0]
            if word in silence_phones:
                raise RuntimeError("Word '{word}' is in both {silence} "
                                   "and {garbage}".format(
                                       word=word,
                                       silence=args.silence_phones,
                                       garbage=args.garbage_phones))
            garbage_phones[word] = 1

    if len(garbage_phones) == 0:
        raise RuntimeError("Could not find any phones in {garbage}"
                           "".format(garbage=args.garbage_phones))

    num_utts = 0
    num_err = 0
    targets = []
    prev_utt = ""

    with common_lib.smart_open(args.arc_info) as arc_info_reader, \
            common_lib.smart_open(args.targets_file, 'w') as targets_writer:
        for line in arc_info_reader:
            try:
                parts = line.strip().split()
                utt = parts[0]

                if utt != prev_utt:
                    if prev_utt != "":
                        if len(targets) > 0:
                            num_utts += 1
                            common_lib.write_matrix_ascii(
                                targets_writer, targets, key=prev_utt)
                        else:
                            num_err += 1
                    prev_utt = utt
                    targets = []

                start_frame = int(parts[1])
                num_frames = int(parts[2])
                post = float(parts[3])
                phone = parts[4]

                if start_frame + num_frames > len(targets):
                    for t in range(len(targets), start_frame + num_frames):
                        targets.append([0, 0, 0])
                    assert start_frame + num_frames == len(targets)

                for t in range(start_frame, start_frame + num_frames):
                    if phone in silence_phones:
                        targets[t][0] += post
                    elif num_frames > args.max_phone_length:
                        targets[t][2] += post
                    elif phone in garbage_phones:
                        targets[t][2] += post
                    else:
                        targets[t][1] += post
            except Exception:
                logger.error("Failed to process line {line} in {f}"
                             "".format(line=line.strip(), f=args.arc_info))
                logger.error("len(targets) = {l}".format(l=len(targets)))
                raise

    if prev_utt != "":
        if len(targets) > 0:
            num_utts += 1
            common_lib.write_matrix_ascii(args.targets_file, targets,
                                          key=prev_utt)
        else:
            num_err += 1

    logger.info("Wrote {num_utts} targets; failed with {num_err}"
                "".format(num_utts=num_utts, num_err=num_err))
    if num_utts == 0 or num_err >= num_utts / 2:
        raise RuntimeError
def run(args):
    # Get all reco to num_frames, which will be used to decide the number of
    # rows of matrix
    reco2num_frames = {}
    with common_lib.smart_open(args.reco2num_frames) as f:
        for line in f:
            fields = line.strip().split()
            if len(fields) != 2:
                raise ValueError("Could not parse line {0}".format(line))
            reco2num_frames[fields[0]] = int(fields[1])

    # We read all segments and store as a list of objects
    segments = []
    with common_lib.smart_open(args.rttm) as f:
        for line in f.readlines():
            segment_fields = line.strip().split()
            start = float(segment_fields[3])
            duration = float(segment_fields[4])
            end = start + duration
            segments.append(
                Segment(reco=segment_fields[1],
                        spk=segment_fields[7],
                        start=start,
                        dur=duration,
                        end=end))

    keyfunc = lambda x: x.reco
    segments_iterable = sorted(segments, key=keyfunc)
    reco2segs = defaultdict(list, {
        reco: list(g)
        for reco, g in itertools.groupby(segments_iterable, keyfunc)
    })

    # Now, for each reco, create a matrix of shape num_frames x 2 and fill in using
    # the segments information for that reco
    reco2targets = {}
    for reco_id in reco2num_frames:
        segs = sorted(reco2segs[reco_id], key=lambda x: x.start)

        target_val = 1 - args.label_smoothing
        other_val = args.label_smoothing / 2
        silence_vec = np.array([target_val, other_val], dtype=np.float)
        speech_vec = np.array([other_val, target_val], dtype=np.float)
        num_targets = [0, 0]
        # The default target (if not  speech) is silence
        targets_mat = np.tile(silence_vec, (reco2num_frames[reco_id], 1))
        # Now iterate over all segments of the recording and assign targets
        for seg in segs:
            start_frame = int(seg.start / args.frame_shift)
            end_frame = min(int(seg.end / args.frame_shift),
                            reco2num_frames[reco_id])
            num_frames = end_frame - start_frame
            if (num_frames <= 0):
                continue

            targets_mat[start_frame:end_frame] = np.tile(
                speech_vec, (num_frames, 1))
            num_targets[1] += end_frame - start_frame

        num_targets[0] = reco2num_frames[reco_id] - sum(num_targets)
        reco2targets[reco_id] = targets_mat

    with common_lib.smart_open(args.out_targets_ark, 'w') as f:
        for reco_id in sorted(reco2targets.keys()):
            common_lib.write_matrix_ascii(f,
                                          reco2targets[reco_id].tolist(),
                                          key=reco_id)
def run(args):
    reco2utt = read_reco2utt_file(args.reco2utt)
    reco2num_frames = read_reco2num_frames_file(args.reco2num_frames)
    segments = read_segments_file(args.segments, reco2utt)
    targets = read_targets_scp(args.targets_scp, segments)

    if args.default_targets is not None:
        # Read the vector of default targets for out-of-segment regions
        default_targets = np.matrix(
            common_lib.read_matrix_ascii(args.default_targets))
    else:
        default_targets = np.zeros([1, 3])
    assert (np.shape(default_targets)[0] == 1
            and np.shape(default_targets)[1] == 3)

    num_utt_err = 0
    num_utt = 0
    num_reco = 0

    with common_lib.smart_open(args.out_targets_ark, 'w') as fh:
        for reco, utts in reco2utt.iteritems():
            # Read a recording and the list of its utterances from the
            # reco2utt dictionary
            reco_mat = np.repeat(default_targets, reco2num_frames[reco],
                                 axis=0)
            utts.sort(key=lambda x: segments[x][1])   # sort on start time

            for i, utt in enumerate(utts):
                if utt not in segments or utt not in targets:
                    num_utt_err += 1
                    continue
                segment = segments[utt]

                # Read the targets corresponding to the segments
                cmd = ("copy-feats --binary=false {mat_fn} -"
                       "".format(mat_fn=targets[utt]))
                p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE,
                                     stderr=subprocess.PIPE)

                try:
                    mat = np.matrix(common_lib.read_matrix_ascii(p.stdout),
                                    dtype='float32')
                except Exception:
                    logger.error("Command '{cmd}' failed".format(cmd=cmd))
                    raise
                finally:
                    [stdout, stderr] = p.communicate()
                    if p.returncode is not None and p.returncode != 0:
                        raise RuntimeError(
                            'Command "{cmd}" failed with status {status}; '
                            'stderr = {stderr}'.format(cmd=cmd, status=-p.returncode,
                                                       stderr=stderr))

                start_frame = int(segment[1] / args.frame_shift + 0.5)
                end_frame = int(segment[2] / args.frame_shift + 0.5)
                num_frames = end_frame - start_frame

                if num_frames <= 0:
                    raise ValueError("Invalid line in segments file {0}"
                                     "".format(segment))

                if abs(mat.shape[0] - num_frames) > args.length_tolerance:
                    logger.warning("For utterance {utt}, mismatch in segment "
                                   "length and targets matrix size; "
                                   "{s_len} vs {t_len}".format(
                                       utt=utt, s_len=num_frames,
                                       t_len=mat.shape[0]))
                    num_utt_err += 1
                    continue

                if end_frame > reco2num_frames[reco]:
                    end_frame = reco2num_frames[reco]
                    num_frames = end_frame - start_frame

                if num_frames < 0:
                    logger.warning("For utterance {utt}, start-frame {start} "
                                   "is outside the recording"
                                   "".format(utt=utt, start=start_frame))
                    num_utt_err += 1
                    continue

                prev_utt_end_frame = (
                    int(segments[utts[i-1]][2] / args.frame_shift + 0.5)
                    if i > 0 else 0)
                if start_frame < prev_utt_end_frame:
                    # Segment overlaps with the previous utterance
                    # Combine targets using a weighted interpolation using a
                    # triangular window with a weight of 1 at the start/end of
                    # overlap and 0 at the end/start of the segment
                    for n in range(0, prev_utt_end_frame - start_frame):
                        w = float(n) / float(prev_utt_end_frame - start_frame)
                        reco_mat[n + start_frame, :] = (
                            reco_mat[n + start_frame, :] * (1.0 - w)
                            + mat[n, :] * w)

                    num_frames = min(num_frames, mat.shape[0])
                    end_frame = start_frame + num_frames
                    reco_mat[prev_utt_end_frame:end_frame, :] = (
                        mat[(prev_utt_end_frame-start_frame):
                            (end_frame-start_frame), :])
                else:
                    # No overlap with the previous utterances.
                    # So just add it to the output.
                    num_frames = min(num_frames, mat.shape[0])
                    reco_mat[start_frame:(start_frame + num_frames), :] = (
                        mat[0:num_frames, :])
                logger.debug("reco_mat shape = %s, mat shape = %s, "
                             "start_frame = %d, end_frame = %d", reco_mat.shape,
                             mat.shape, start_frame, end_frame)
                num_utt += 1

            if reco_mat.shape[0] > 0:
                common_lib.write_matrix_ascii(fh, reco_mat,
                                              key=reco)
                num_reco += 1

    logger.info("Merged {num_utt} segment targets from {num_reco} recordings; "
                "failed with {num_utt_err} utterances"
                "".format(num_utt=num_utt, num_reco=num_reco,
                          num_utt_err=num_utt_err))

    if num_utt == 0 or num_utt_err > num_utt / 2 or num_reco == 0:
        raise RuntimeError
Beispiel #25
0
def run(args):
    reco2utt = read_reco2utt_file(args.reco2utt)
    reco2num_frames = read_reco2num_frames_file(args.reco2num_frames)
    segments = read_segments_file(args.segments, reco2utt)
    targets = read_targets_scp(args.targets_scp, segments)

    if args.default_targets is not None:
        # Read the vector of default targets for out-of-segment regions
        default_targets = np.matrix(
            common_lib.read_matrix_ascii(args.default_targets))
    else:
        default_targets = np.zeros([1, 3])
    assert np.shape(default_targets)[0] == 1 and np.shape(
        default_targets)[1] == 3

    num_utt_err = 0
    num_utt = 0
    num_reco = 0

    with common_lib.smart_open(args.out_targets_ark, "w") as fh:
        for reco, utts in reco2utt.items():
            # Read a recording and the list of its utterances from the
            # reco2utt dictionary
            reco_mat = np.repeat(default_targets,
                                 reco2num_frames[reco],
                                 axis=0)
            utts.sort(key=lambda x: segments[x][1])  # sort on start time

            end_frame_accounted = 0

            for i, utt in enumerate(utts):
                if utt not in segments or utt not in targets:
                    num_utt_err += 1
                    continue
                segment = segments[utt]

                # Read the targets corresponding to the segments
                cmd = "copy-feats --binary=false {mat_fn} -" "".format(
                    mat_fn=targets[utt])
                p = subprocess.Popen(cmd,
                                     shell=True,
                                     stdout=subprocess.PIPE,
                                     stderr=subprocess.PIPE)

                try:
                    mat = np.matrix(common_lib.read_matrix_ascii(p.stdout),
                                    dtype="float32")
                except Exception:
                    logger.error("Command '{cmd}' failed".format(cmd=cmd))
                    raise
                finally:
                    [stdout, stderr] = p.communicate()
                    if p.returncode is not None and p.returncode != 0:
                        raise RuntimeError(
                            'Command "{cmd}" failed with status {status}; '
                            "stderr = {stderr}".format(cmd=cmd,
                                                       status=-p.returncode,
                                                       stderr=stderr))

                start_frame = int(segment[1] / args.frame_shift + 0.5)
                end_frame = int(segment[2] / args.frame_shift + 0.5)
                num_frames = end_frame - start_frame

                if num_frames <= 0:
                    raise ValueError("Invalid line in segments file {0}"
                                     "".format(segment))

                if abs(mat.shape[0] - num_frames) > args.length_tolerance:
                    logger.warning("For utterance {utt}, mismatch in segment "
                                   "length and targets matrix size; "
                                   "{s_len} vs {t_len}".format(
                                       utt=utt,
                                       s_len=num_frames,
                                       t_len=mat.shape[0]))
                    num_utt_err += 1
                    continue

                # Fix end_frame and num_frames if the segment goes beyond
                # the length of the recording.
                if end_frame > reco2num_frames[reco]:
                    end_frame = reco2num_frames[reco]
                    num_frames = end_frame - start_frame

                # Fix "num_frames" and "end_frame" if "num_frames" is lower
                # than the size of the targets matrix "mat"
                num_frames = min(num_frames, mat.shape[0])
                end_frame = start_frame + num_frames

                if num_frames <= 0:
                    logger.warning("For utterance {utt}, start-frame {start} "
                                   "is outside the recording"
                                   "".format(utt=utt, start=start_frame))
                    num_utt_err += 1
                    continue

                if end_frame < end_frame_accounted:
                    logger.warning("For utterance {utt}, end-frame {end} "
                                   "is before the end of a previous segment. "
                                   "i.e. this segment is completely within "
                                   "another segment. Ignoring this segment."
                                   "".format(utt=utt, end=end_frame))
                    num_utt_err += 1
                    continue

                if start_frame < end_frame_accounted:
                    # Segment overlaps with a previous utterance
                    # Combine targets using a weighted interpolation using a
                    # triangular window with a weight of 1 at the start/end of
                    # overlap and 0 at the end/start of the segment
                    for n in range(0, end_frame_accounted - start_frame):
                        w = float(n) / float(end_frame_accounted - start_frame)
                        reco_mat[n + start_frame, :] = (
                            reco_mat[n + start_frame, :] * (1.0 - w) +
                            mat[n, :] * w)

                    if end_frame > end_frame_accounted:
                        reco_mat[end_frame_accounted:end_frame, :] = mat[(
                            end_frame_accounted -
                            start_frame):(end_frame - start_frame), :, ]
                else:
                    # No overlap with the previous utterances.
                    # So just add it to the output.
                    reco_mat[start_frame:end_frame, :] = mat[0:num_frames, :]
                logger.debug(
                    "reco_mat shape = %s, mat shape = %s, "
                    "start_frame = %d, end_frame = %d",
                    reco_mat.shape,
                    mat.shape,
                    start_frame,
                    end_frame,
                )

                end_frame_accounted = end_frame
                num_utt += 1

            if reco_mat.shape[0] > 0:
                common_lib.write_matrix_ascii(fh, reco_mat, key=reco)
                num_reco += 1

    logger.info("Merged {num_utt} segment targets from {num_reco} recordings; "
                "failed with {num_utt_err} utterances"
                "".format(num_utt=num_utt,
                          num_reco=num_reco,
                          num_utt_err=num_utt_err))

    if num_utt == 0 or num_utt_err > num_utt // 2 or num_reco == 0:
        raise RuntimeError
def run(args):
    reco2utt = {}
    with common_lib.smart_open(args.reco2utt) as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) < 2:
                raise ValueError("Could not parse line {0}".format(line))
            reco2utt[parts[0]] = parts[1:]

    reco2num_frames = {}
    with common_lib.smart_open(args.reco2num_frames) as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) != 2:
                raise ValueError("Could not parse line {0}".format(line))
            if parts[0] not in reco2utt:
                continue
            reco2num_frames[parts[0]] = int(parts[1])

    segments = {}
    with common_lib.smart_open(args.segments) as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) not in [4, 5]:
                raise ValueError("Could not parse line {0}".format(line))
            utt = parts[0]
            reco = parts[1]
            if reco not in reco2utt:
                continue
            start_time = float(parts[2])
            end_time = float(parts[3])
            segments[utt] = [reco, start_time, end_time]

    num_utt_err = 0
    num_utt = 0
    num_reco = 0

    if args.default_targets is not None:
        default_targets = np.matrix(common_lib.read_matrix_ascii(args.default_targets))
    else:
        default_targets = np.matrix([[1, 0, 0]])
    assert (np.shape(default_targets)[0] == 1
            and np.shape(default_targets)[1] == 3)

    with common_lib.smart_open(args.out_targets_ark, 'w') as f:
        for reco, utts in reco2utt.iteritems():
            reco_mat = np.repeat(default_targets, reco2num_frames[reco],
                                 axis=0)
            utts.sort(key=lambda x: segments[x][1])   # sort on start time
            for i, utt in enumerate(utts):
                if utt not in segments:
                    num_utt_err += 1
                    continue
                segment = segments[utt]

                start_frame = int(segment[1] / args.frame_shift)
                end_frame = int(segment[2] / args.frame_shift)
                num_frames = end_frame - start_frame

                if end_frame > reco2num_frames[reco]:
                    end_frame = reco2num_frames[reco]
                    num_frames = end_frame - start_frame

                reco_mat[start_frame:end_frame] = np.zeros([num_frames, 3])
                num_utt += 1

            if reco_mat.shape[0] > 0:
                common_lib.write_matrix_ascii(f, reco_mat.tolist(),
                                              key=reco)
                num_reco += 1

    logger.info("Got default out-of-segment targets for {num_reco} recordings "
                "containing {num_utt} in-segment regions; "
                "failed to account {num_utt_err} utterances"
                "".format(num_reco=num_reco, num_utt=num_utt,
                          num_utt_err=num_utt_err))

    if num_utt == 0 or num_utt_err > num_utt / 2 or num_reco == 0:
        raise RuntimeError
Beispiel #27
0
def run(args):
    silence_phones = {}
    with common_lib.smart_open(args.silence_phones) as silence_phones_fh:
        for line in silence_phones_fh:
            silence_phones[line.strip().split()[0]] = 1

    if len(silence_phones) == 0:
        raise RuntimeError("Could not find any phones in {silence}"
                           "".format(silence=args.silence_phones))

    garbage_phones = {}
    with common_lib.smart_open(args.garbage_phones) as garbage_phones_fh:
        for line in garbage_phones_fh:
            word = line.strip().split()[0]
            if word in silence_phones:
                raise RuntimeError("Word '{word}' is in both {silence} "
                                   "and {garbage}".format(
                    word=word,
                    silence=args.silence_phones,
                    garbage=args.garbage_phones))
            garbage_phones[word] = 1

    if len(garbage_phones) == 0:
        raise RuntimeError("Could not find any phones in {garbage}"
                           "".format(garbage=args.garbage_phones))

    num_utts = 0
    num_err = 0
    targets = []
    prev_utt = ""

    with common_lib.smart_open(args.arc_info) as arc_info_reader, \
            common_lib.smart_open(args.targets_file, 'w') as targets_writer:
        for line in arc_info_reader:
            try:
                parts = line.strip().split()
                utt = parts[0]

                if utt != prev_utt:
                    if prev_utt != "":
                        if len(targets) > 0:
                            num_utts += 1
                            common_lib.write_matrix_ascii(
                                targets_writer, targets, key=prev_utt)
                        else:
                            num_err += 1
                    prev_utt = utt
                    targets = []

                start_frame = int(parts[1])
                num_frames = int(parts[2])
                post = float(parts[3])
                phone = parts[4]

                if start_frame + num_frames > len(targets):
                    for t in range(len(targets), start_frame + num_frames):
                        targets.append([0, 0, 0])
                    assert start_frame + num_frames == len(targets)

                for t in range(start_frame, start_frame + num_frames):
                    if phone in silence_phones:
                        targets[t][0] += post
                    elif num_frames > args.max_phone_length:
                        targets[t][2] += post
                    elif phone in garbage_phones:
                        targets[t][2] += post
                    else:
                        targets[t][1] += post
            except Exception:
                logger.error("Failed to process line {line} in {f}"
                             "".format(line=line.strip(), f=args.arc_info))
                logger.error("len(targets) = {l}".format(l=len(targets)))
                raise

    if prev_utt != "":
        if len(targets) > 0:
            num_utts += 1
            common_lib.write_matrix_ascii(args.targets_file, targets,
                                          key=prev_utt)
        else:
            num_err += 1

    logger.info("Wrote {num_utts} targets; failed with {num_err}"
                "".format(num_utts=num_utts, num_err=num_err))
    if num_utts == 0 or num_err >= num_utts // 2:
        raise RuntimeError
def run(args):
    # Get all reco to num_frames, which will be used to decide the number of
    # rows of matrix
    reco2num_frames = {}
    with common_lib.smart_open(args.reco2num_frames) as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) != 2:
                raise ValueError("Could not parse line {0}".format(line))
            reco2num_frames[parts[0]] = int(parts[1])

    # We read all segments and store as a list of objects
    segments = []
    with common_lib.smart_open(args.overlap_rttm) as f:
        for line in f.readlines():
            parts = line.strip().split()
            segments.append(
                Segment(parts[1],
                        float(parts[3]),
                        dur=float(parts[4]),
                        label=parts[7]))

    # We group the segment list into a dictionary indexed by reco_id
    reco2segs = defaultdict(list, {
        reco_id: list(g)
        for reco_id, g in groupby(segments, lambda x: x.reco_id)
    })

    # Now, for each reco, create a matrix of shape num_frames x 3 and fill in using
    # the segments information for that reco
    reco2targets = {}
    for reco_id in reco2num_frames:
        segs = sorted(reco2segs[reco_id], key=lambda x: x.start_time)

        target_val = 1 - args.label_smoothing
        other_val = args.label_smoothing / 2
        silence_vec = np.array([target_val, other_val, other_val],
                               dtype=np.float)
        single_vec = np.array([other_val, target_val, other_val],
                              dtype=np.float)
        overlap_vec = np.array([other_val, other_val, target_val],
                               dtype=np.float)
        num_targets = [0, 0, 0]

        # The default target (if not single or overlap) is silence
        targets_mat = np.tile(silence_vec, (reco2num_frames[reco_id], 1))

        # Now iterate over all segments of the recording and assign targets
        for seg in segs:
            start_frame = int(seg.start_time / args.frame_shift)
            end_frame = min(int(seg.end_time / args.frame_shift),
                            reco2num_frames[reco_id])
            num_frames = end_frame - start_frame
            if (num_frames <= 0):
                continue
            if (seg.label == "overlap"):
                targets_mat[start_frame:end_frame] = np.tile(
                    overlap_vec, (num_frames, 1))
                num_targets[2] += end_frame - start_frame
            else:
                targets_mat[start_frame:end_frame] = np.tile(
                    single_vec, (num_frames, 1))
                num_targets[1] += end_frame - start_frame

        num_targets[0] = reco2num_frames[reco_id] - sum(num_targets)
        # print ("{}: {}".format(reco_id, num_targets))
        reco2targets[reco_id] = targets_mat

    with common_lib.smart_open(args.out_targets_ark, 'w') as f:
        for reco_id in sorted(reco2targets.keys()):
            common_lib.write_matrix_ascii(f,
                                          reco2targets[reco_id].tolist(),
                                          key=reco_id)
def WriteDistMatrices(D, wark):
    with common_lib.smart_open(wark, 'w') as f:
        for id in sorted(D.keys()):
            common_lib.write_matrix_ascii(f, D[id].tolist(), key=id)
def run(args):
    # Get all reco to num_frames, which will be used to decide the number of
    # rows of matrix
    reco2num_frames = {}
    with common_lib.smart_open(args.reco2num_frames) as f:
        for line in f: