def run(args): priors = [[1.0, 1.0, 1.0]] if args.priors is not None: priors = common_lib.read_matrix_ascii(args.priors) if len(priors) != 0 and len(priors[0]) != 3: raise RuntimeError("Invalid dimension for priors {0}" "".format(priors)) priors_sum = sum(priors[0]) sil_prior = old_div(priors[0][0], priors_sum) speech_prior = old_div(priors[0][1], priors_sum) garbage_prior = old_div(priors[0][2], priors_sum) transform_mat = [[ old_div(args.sil_scale, sil_prior), old_div(args.speech_in_sil_weight, speech_prior), old_div(args.garbage_in_sil_weight, garbage_prior) ], [ old_div(args.sil_in_speech_weight, sil_prior), old_div(1.0, speech_prior), old_div(args.garbage_in_speech_weight, garbage_prior) ]] common_lib.write_matrix_ascii(sys.stdout, transform_mat)
def run(args): priors = [[1.0, 1.0, 1.0]] if args.priors is not None: priors = common_lib.read_matrix_ascii(args.priors) if len(priors) != 0 and len(priors[0]) != 3: raise RuntimeError("Invalid dimension for priors {0}" "".format(priors)) priors_sum = sum(priors[0]) sil_prior = priors[0][0] / priors_sum speech_prior = priors[0][1] / priors_sum garbage_prior = priors[0][2] / priors_sum transform_mat = [ [ args.sil_scale / sil_prior, args.speech_in_sil_weight / speech_prior, args.garbage_in_sil_weight / garbage_prior, ], [ args.sil_in_speech_weight / sil_prior, 1.0 / speech_prior, args.garbage_in_speech_weight / garbage_prior, ], ] common_lib.write_matrix_ascii(sys.stdout, transform_mat)
def run(args): # Load priors. # - priors[0] -- prior probability of non-speech # - priors[1] -- prior probability of speech # - priors[2] -- prior probability of garbage; ignored priors = common_lib.read_matrix_ascii(args.priors) if len(priors) != 0 and len(priors[0]) != 3: raise RuntimeError(f'Invalid dimension for priors {priors}') priors = np.squeeze(np.array(priors, dtype=np.float64)) # Create matrix that converts posteriors to likelihoods by dividing by # normalized priors. pmass = priors[0] + priors[1] # Total mass devoted to speech/non-speech. priors /= pmass transform_mat = np.diag(1 / priors) transform_mat[2, 2] = 0.0 # Ignore garbage entirely transform_mat[1, 1] *= args.speech_likelihood_weight common_lib.write_matrix_ascii(sys.stdout, transform_mat)
def run(args): priors = [[1.0, 1.0, 1.0]] if args.priors is not None: priors = common_lib.read_matrix_ascii(args.priors) if len(priors) != 0 and len(priors[0]) != 3: raise RuntimeError("Invalid dimension for priors {0}" "".format(priors)) priors_sum = sum(priors[0]) sil_prior = priors[0][0] / priors_sum speech_prior = priors[0][1] / priors_sum garbage_prior = priors[0][2] / priors_sum transform_mat = [[args.sil_scale / sil_prior, args.speech_in_sil_weight / speech_prior, args.garbage_in_sil_weight / garbage_prior], [args.sil_in_speech_weight / sil_prior, 1.0 / speech_prior, args.garbage_in_speech_weight / garbage_prior]] common_lib.write_matrix_ascii(sys.stdout, transform_mat)
def run(args): reco2utt = {} with common_lib.smart_open(args.reco2utt) as f: for line in f: parts = line.strip().split() if len(parts) < 2: raise ValueError("Could not parse line {0}".format(line)) reco2utt[parts[0]] = parts[1:] reco2num_frames = {} with common_lib.smart_open(args.reco2num_frames) as f: for line in f: parts = line.strip().split() if len(parts) != 2: raise ValueError("Could not parse line {0}".format(line)) if parts[0] not in reco2utt: continue reco2num_frames[parts[0]] = int(parts[1]) segments = {} with common_lib.smart_open(args.segments) as f: for line in f: parts = line.strip().split() if len(parts) not in [4, 5]: raise ValueError("Could not parse line {0}".format(line)) utt = parts[0] reco = parts[1] if reco not in reco2utt: continue start_time = float(parts[2]) end_time = float(parts[3]) segments[utt] = [reco, start_time, end_time] num_utt_err = 0 num_utt = 0 num_reco = 0 if args.default_targets is not None: default_targets = np.matrix( common_lib.read_matrix_ascii(args.default_targets)) else: default_targets = np.matrix([[1, 0, 0]]) assert (np.shape(default_targets)[0] == 1 and np.shape(default_targets)[1] == 3) with common_lib.smart_open(args.out_targets_ark, 'w') as f: for reco, utts in reco2utt.iteritems(): reco_mat = np.repeat(default_targets, reco2num_frames[reco], axis=0) utts.sort(key=lambda x: segments[x][1]) # sort on start time for i, utt in enumerate(utts): if utt not in segments: num_utt_err += 1 continue segment = segments[utt] start_frame = int(segment[1] / args.frame_shift) end_frame = int(segment[2] / args.frame_shift) num_frames = end_frame - start_frame if end_frame > reco2num_frames[reco]: end_frame = reco2num_frames[reco] num_frames = end_frame - start_frame reco_mat[start_frame:end_frame] = np.zeros([num_frames, 3]) num_utt += 1 if reco_mat.shape[0] > 0: common_lib.write_matrix_ascii(f, reco_mat.tolist(), key=reco) num_reco += 1 logger.info("Got default out-of-segment targets for {num_reco} recordings " "containing {num_utt} in-segment regions; " "failed to account {num_utt_err} utterances" "".format(num_reco=num_reco, num_utt=num_utt, num_utt_err=num_utt_err)) if num_utt == 0 or num_utt_err > num_utt // 2 or num_reco == 0: raise RuntimeError
def run(args): reco2utt = read_reco2utt_file(args.reco2utt) reco2num_frames = read_reco2num_frames_file(args.reco2num_frames) segments = read_segments_file(args.segments, reco2utt) targets = read_targets_scp(args.targets_scp, segments) if args.default_targets is not None: # Read the vector of default targets for out-of-segment regions default_targets = np.matrix( common_lib.read_matrix_ascii(args.default_targets)) else: default_targets = np.zeros([1, 3]) assert np.shape(default_targets)[0] == 1 and np.shape( default_targets)[1] == 3 num_utt_err = 0 num_utt = 0 num_reco = 0 with common_lib.smart_open(args.out_targets_ark, "w") as fh: for reco, utts in reco2utt.items(): # Read a recording and the list of its utterances from the # reco2utt dictionary reco_mat = np.repeat(default_targets, reco2num_frames[reco], axis=0) utts.sort(key=lambda x: segments[x][1]) # sort on start time end_frame_accounted = 0 for i, utt in enumerate(utts): if utt not in segments or utt not in targets: num_utt_err += 1 continue segment = segments[utt] # Read the targets corresponding to the segments cmd = "copy-feats --binary=false {mat_fn} -" "".format( mat_fn=targets[utt]) p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) try: mat = np.matrix(common_lib.read_matrix_ascii(p.stdout), dtype="float32") except Exception: logger.error("Command '{cmd}' failed".format(cmd=cmd)) raise finally: [stdout, stderr] = p.communicate() if p.returncode is not None and p.returncode != 0: raise RuntimeError( 'Command "{cmd}" failed with status {status}; ' "stderr = {stderr}".format(cmd=cmd, status=-p.returncode, stderr=stderr)) start_frame = int(segment[1] / args.frame_shift + 0.5) end_frame = int(segment[2] / args.frame_shift + 0.5) num_frames = end_frame - start_frame if num_frames <= 0: raise ValueError("Invalid line in segments file {0}" "".format(segment)) if abs(mat.shape[0] - num_frames) > args.length_tolerance: logger.warning("For utterance {utt}, mismatch in segment " "length and targets matrix size; " "{s_len} vs {t_len}".format( utt=utt, s_len=num_frames, t_len=mat.shape[0])) num_utt_err += 1 continue # Fix end_frame and num_frames if the segment goes beyond # the length of the recording. if end_frame > reco2num_frames[reco]: end_frame = reco2num_frames[reco] num_frames = end_frame - start_frame # Fix "num_frames" and "end_frame" if "num_frames" is lower # than the size of the targets matrix "mat" num_frames = min(num_frames, mat.shape[0]) end_frame = start_frame + num_frames if num_frames <= 0: logger.warning("For utterance {utt}, start-frame {start} " "is outside the recording" "".format(utt=utt, start=start_frame)) num_utt_err += 1 continue if end_frame < end_frame_accounted: logger.warning("For utterance {utt}, end-frame {end} " "is before the end of a previous segment. " "i.e. this segment is completely within " "another segment. Ignoring this segment." "".format(utt=utt, end=end_frame)) num_utt_err += 1 continue if start_frame < end_frame_accounted: # Segment overlaps with a previous utterance # Combine targets using a weighted interpolation using a # triangular window with a weight of 1 at the start/end of # overlap and 0 at the end/start of the segment for n in range(0, end_frame_accounted - start_frame): w = float(n) / float(end_frame_accounted - start_frame) reco_mat[n + start_frame, :] = ( reco_mat[n + start_frame, :] * (1.0 - w) + mat[n, :] * w) if end_frame > end_frame_accounted: reco_mat[end_frame_accounted:end_frame, :] = mat[( end_frame_accounted - start_frame):(end_frame - start_frame), :, ] else: # No overlap with the previous utterances. # So just add it to the output. reco_mat[start_frame:end_frame, :] = mat[0:num_frames, :] logger.debug( "reco_mat shape = %s, mat shape = %s, " "start_frame = %d, end_frame = %d", reco_mat.shape, mat.shape, start_frame, end_frame, ) end_frame_accounted = end_frame num_utt += 1 if reco_mat.shape[0] > 0: common_lib.write_matrix_ascii(fh, reco_mat, key=reco) num_reco += 1 logger.info("Merged {num_utt} segment targets from {num_reco} recordings; " "failed with {num_utt_err} utterances" "".format(num_utt=num_utt, num_reco=num_reco, num_utt_err=num_utt_err)) if num_utt == 0 or num_utt_err > num_utt // 2 or num_reco == 0: raise RuntimeError
def run(args): reco2utt = read_reco2utt_file(args.reco2utt) reco2num_frames = read_reco2num_frames_file(args.reco2num_frames) segments = read_segments_file(args.segments, reco2utt) targets = read_targets_scp(args.targets_scp, segments) if args.default_targets is not None: # Read the vector of default targets for out-of-segment regions default_targets = np.matrix( common_lib.read_matrix_ascii(args.default_targets)) else: default_targets = np.zeros([1, 3]) assert (np.shape(default_targets)[0] == 1 and np.shape(default_targets)[1] == 3) num_utt_err = 0 num_utt = 0 num_reco = 0 with common_lib.smart_open(args.out_targets_ark, 'w') as fh: for reco, utts in reco2utt.iteritems(): # Read a recording and the list of its utterances from the # reco2utt dictionary reco_mat = np.repeat(default_targets, reco2num_frames[reco], axis=0) utts.sort(key=lambda x: segments[x][1]) # sort on start time for i, utt in enumerate(utts): if utt not in segments or utt not in targets: num_utt_err += 1 continue segment = segments[utt] # Read the targets corresponding to the segments cmd = ("copy-feats --binary=false {mat_fn} -" "".format(mat_fn=targets[utt])) p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) try: mat = np.matrix(common_lib.read_matrix_ascii(p.stdout), dtype='float32') except Exception: logger.error("Command '{cmd}' failed".format(cmd=cmd)) raise finally: [stdout, stderr] = p.communicate() if p.returncode is not None and p.returncode != 0: raise RuntimeError( 'Command "{cmd}" failed with status {status}; ' 'stderr = {stderr}'.format(cmd=cmd, status=-p.returncode, stderr=stderr)) start_frame = int(segment[1] / args.frame_shift + 0.5) end_frame = int(segment[2] / args.frame_shift + 0.5) num_frames = end_frame - start_frame if num_frames <= 0: raise ValueError("Invalid line in segments file {0}" "".format(segment)) if abs(mat.shape[0] - num_frames) > args.length_tolerance: logger.warning("For utterance {utt}, mismatch in segment " "length and targets matrix size; " "{s_len} vs {t_len}".format( utt=utt, s_len=num_frames, t_len=mat.shape[0])) num_utt_err += 1 continue if end_frame > reco2num_frames[reco]: end_frame = reco2num_frames[reco] num_frames = end_frame - start_frame if num_frames < 0: logger.warning("For utterance {utt}, start-frame {start} " "is outside the recording" "".format(utt=utt, start=start_frame)) num_utt_err += 1 continue prev_utt_end_frame = ( int(segments[utts[i-1]][2] / args.frame_shift + 0.5) if i > 0 else 0) if start_frame < prev_utt_end_frame: # Segment overlaps with the previous utterance # Combine targets using a weighted interpolation using a # triangular window with a weight of 1 at the start/end of # overlap and 0 at the end/start of the segment for n in range(0, prev_utt_end_frame - start_frame): w = float(n) / float(prev_utt_end_frame - start_frame) reco_mat[n + start_frame, :] = ( reco_mat[n + start_frame, :] * (1.0 - w) + mat[n, :] * w) num_frames = min(num_frames, mat.shape[0]) end_frame = start_frame + num_frames reco_mat[prev_utt_end_frame:end_frame, :] = ( mat[(prev_utt_end_frame-start_frame): (end_frame-start_frame), :]) else: # No overlap with the previous utterances. # So just add it to the output. num_frames = min(num_frames, mat.shape[0]) reco_mat[start_frame:(start_frame + num_frames), :] = ( mat[0:num_frames, :]) logger.debug("reco_mat shape = %s, mat shape = %s, " "start_frame = %d, end_frame = %d", reco_mat.shape, mat.shape, start_frame, end_frame) num_utt += 1 if reco_mat.shape[0] > 0: common_lib.write_matrix_ascii(fh, reco_mat, key=reco) num_reco += 1 logger.info("Merged {num_utt} segment targets from {num_reco} recordings; " "failed with {num_utt_err} utterances" "".format(num_utt=num_utt, num_reco=num_reco, num_utt_err=num_utt_err)) if num_utt == 0 or num_utt_err > num_utt / 2 or num_reco == 0: raise RuntimeError
def run(args): reco2utt = {} with common_lib.smart_open(args.reco2utt) as f: for line in f: parts = line.strip().split() if len(parts) < 2: raise ValueError("Could not parse line {0}".format(line)) reco2utt[parts[0]] = parts[1:] reco2num_frames = {} with common_lib.smart_open(args.reco2num_frames) as f: for line in f: parts = line.strip().split() if len(parts) != 2: raise ValueError("Could not parse line {0}".format(line)) if parts[0] not in reco2utt: continue reco2num_frames[parts[0]] = int(parts[1]) segments = {} with common_lib.smart_open(args.segments) as f: for line in f: parts = line.strip().split() if len(parts) not in [4, 5]: raise ValueError("Could not parse line {0}".format(line)) utt = parts[0] reco = parts[1] if reco not in reco2utt: continue start_time = float(parts[2]) end_time = float(parts[3]) segments[utt] = [reco, start_time, end_time] num_utt_err = 0 num_utt = 0 num_reco = 0 if args.default_targets is not None: default_targets = np.matrix(common_lib.read_matrix_ascii(args.default_targets)) else: default_targets = np.matrix([[1, 0, 0]]) assert (np.shape(default_targets)[0] == 1 and np.shape(default_targets)[1] == 3) with common_lib.smart_open(args.out_targets_ark, 'w') as f: for reco, utts in reco2utt.iteritems(): reco_mat = np.repeat(default_targets, reco2num_frames[reco], axis=0) utts.sort(key=lambda x: segments[x][1]) # sort on start time for i, utt in enumerate(utts): if utt not in segments: num_utt_err += 1 continue segment = segments[utt] start_frame = int(segment[1] / args.frame_shift) end_frame = int(segment[2] / args.frame_shift) num_frames = end_frame - start_frame if end_frame > reco2num_frames[reco]: end_frame = reco2num_frames[reco] num_frames = end_frame - start_frame reco_mat[start_frame:end_frame] = np.zeros([num_frames, 3]) num_utt += 1 if reco_mat.shape[0] > 0: common_lib.write_matrix_ascii(f, reco_mat.tolist(), key=reco) num_reco += 1 logger.info("Got default out-of-segment targets for {num_reco} recordings " "containing {num_utt} in-segment regions; " "failed to account {num_utt_err} utterances" "".format(num_reco=num_reco, num_utt=num_utt, num_utt_err=num_utt_err)) if num_utt == 0 or num_utt_err > num_utt / 2 or num_reco == 0: raise RuntimeError