Beispiel #1
0
def run(args):
    num_done = 0

    with common_lib.smart_open(
            args.pasted_targets) as targets_reader, common_lib.smart_open(
                args.out_targets, "w") as targets_writer:
        for key, mat in common_lib.read_mat_ark(targets_reader):
            mat = np.matrix(mat)
            if mat.shape[1] % args.dim != 0:
                raise RuntimeError(
                    "For utterance {utt} in {f}, num-columns {nc} "
                    "is not a multiple of dim {dim}"
                    "".format(
                        utt=key,
                        f=args.pasted_targets.name,
                        nc=mat.shape[1],
                        dim=args.dim,
                    ))
            num_sources = mat.shape[1] // args.dim

            out_mat = np.matrix(np.zeros([mat.shape[0], args.dim]))

            if args.remove_mismatch_frames:
                for n in range(mat.shape[0]):
                    if should_remove_frame(mat[n, :].getA()[0], args.dim):
                        out_mat[n, :] = np.zeros([1, args.dim])
                    else:
                        for i in range(num_sources):
                            out_mat[n, :] += mat[n, (i * args.dim):(
                                (i + 1) *
                                args.dim)] * (1.0 if args.weights is None else
                                              args.weights[i])
            else:
                # Just interpolate the targets
                for i in range(num_sources):
                    out_mat += mat[:, (i * args.dim):((i + 1) * args.dim)] * (
                        1.0 if args.weights is None else args.weights[i])

            common_lib.write_matrix_ascii(targets_writer,
                                          out_mat.tolist(),
                                          key=key)
            num_done += 1

    logger.info("Merged {num_done} target matrices"
                "".format(num_done=num_done))

    if num_done == 0:
        raise RuntimeError
Beispiel #2
0
def run(args):
    num_done = 0

    with common_lib.smart_open(args.pasted_targets) as targets_reader, \
            common_lib.smart_open(args.out_targets, 'w') as targets_writer:
        for key, mat in common_lib.read_mat_ark(targets_reader):
            mat = np.matrix(mat)
            if mat.shape[1] % args.dim != 0:
                raise RuntimeError(
                    "For utterance {utt} in {f}, num-columns {nc} "
                    "is not a multiple of dim {dim}"
                    "".format(utt=key, f=args.pasted_targets.name,
                              nc=mat.shape[1], dim=args.dim))
            num_sources = mat.shape[1] // args.dim

            out_mat = np.matrix(np.zeros([mat.shape[0], args.dim]))

            if args.remove_mismatch_frames:
                for n in range(mat.shape[0]):
                    if should_remove_frame(mat[n, :].getA()[0], args.dim):
                        out_mat[n, :] = np.zeros([1, args.dim])
                    else:
                        for i in range(num_sources):
                            out_mat[n, :] += (
                                mat[n, (i * args.dim) : ((i+1) * args.dim)]
                                * (1.0 if args.weights is None
                                   else args.weights[i]))
            else:
                # Just interpolate the targets
                for i in range(num_sources):
                    out_mat += (
                        mat[:, (i * args.dim) : ((i+1) * args.dim)]
                        * (1.0 if args.weights is None else args.weights[i]))

            common_lib.write_matrix_ascii(targets_writer, out_mat.tolist(),
                                          key=key)
            num_done += 1

    logger.info("Merged {num_done} target matrices"
                "".format(num_done=num_done))

    if num_done == 0:
        raise RuntimeError
Beispiel #3
0
def run(args):
    num_utts = 0
    for key, mat in common_lib.read_mat_ark(args.targets_in_ark):
        mat = np.matrix(mat)
        if args.subsampling_factor > 0:
            num_indexes = (old_div(
                (mat.shape[0] + args.subsampling_factor - 1),
                args.subsampling_factor))

        out_mat = np.zeros([num_indexes, mat.shape[1]])
        i = 0
        for k in range(int(old_div(args.subsampling_factor, 2.0)),
                       mat.shape[0], args.subsampling_factor):
            st = int(k - old_div(float(args.subsampling_factor), 2.0))
            end = int(k + old_div(float(args.subsampling_factor), 2.0))

            if st < 0:
                st = 0
            if end > mat.shape[0]:
                end = mat.shape[0]

            try:
                out_mat[i, :] = old_div(np.sum(mat[st:end, :], axis=0),
                                        float(end - st))
            except IndexError:
                logger.error("mat.shape = {0}, st = {1}, end = {2}"
                             "".format(mat.shape, st, end))
                raise
            assert i == old_div(k, args.subsampling_factor)
            i += 1

        common_lib.write_matrix_ascii(args.targets_out_ark, out_mat, key=key)
        num_utts += 1
    args.targets_in_ark.close()
    args.targets_out_ark.close()

    logger.info("Sub-sampled {num_utts} target matrices"
                "".format(num_utts=num_utts))
Beispiel #4
0
def run(args):
    num_utts = 0
    for key, mat in common_lib.read_mat_ark(args.targets_in_ark):
        mat = np.matrix(mat)
        if args.subsampling_factor > 0:
            num_indexes = ((mat.shape[0] + args.subsampling_factor - 1)
                            / args.subsampling_factor)

        out_mat = np.zeros([num_indexes, mat.shape[1]])
        i = 0
        for k in range(int(args.subsampling_factor / 2.0),
                       mat.shape[0], args.subsampling_factor):
            st = int(k - float(args.subsampling_factor) / 2.0)
            end = int(k + float(args.subsampling_factor) / 2.0)

            if st < 0:
                st = 0
            if end > mat.shape[0]:
                end = mat.shape[0]

            try:
                out_mat[i, :] = np.sum(mat[st:end, :], axis=0) / float(end - st)
            except IndexError:
                logger.error("mat.shape = {0}, st = {1}, end = {2}"
                             "".format(mat.shape, st, end))
                raise
            assert i == k / args.subsampling_factor
            i += 1

        common_lib.write_matrix_ascii(args.targets_out_ark, out_mat, key=key)
        num_utts += 1
    args.targets_in_ark.close()
    args.targets_out_ark.close()

    logger.info("Sub-sampled {num_utts} target matrices"
                "".format(num_utts=num_utts))