예제 #1
0
            for i in range(num_chunks):
                start = i * (args.chunk_size / 2)
                this_chunk_size = args.chunk_size if feature.shape[
                    0] - start > args.chunk_size else feature.shape[0] - start
                feature_length.append(this_chunk_size)
                feature_array.append(feature[start:start + this_chunk_size])

            feature_length = np.expand_dims(np.array(feature_length), axis=1)
            # Except for the last feature, the length of other features should be the same (=chunk_size)
            embeddings = trainer.predict(
                np.array(feature_array[:-1], dtype=np.float32))
            embedding_last = trainer.predict(feature_array[-1])
            embeddings = np.concatenate(
                [embeddings,
                 np.expand_dims(embedding_last, axis=0)], axis=0)
            if args.normalize:
                embeddings /= np.sqrt(
                    np.sum(np.square(embeddings), axis=1, keepdims=True))
            embedding = np.sum(embeddings * feature_length,
                               axis=0) / np.sum(feature_length)
        else:
            tf.logging.info("[INFO] Key %s length %d." %
                            (key, feature.shape[0]))
            embedding = trainer.predict(feature)

        if args.normalize:
            embedding /= np.sqrt(np.sum(np.square(embedding)))
        write_vec_flt(fp_out, embedding, key=key)
    fp_out.close()
    trainer.close()
예제 #2
0
vector_in = sys.argv[1:-1]
vector_out = sys.argv[-1]

vector_tot = []
vector_names = []

vector_single = {}
for key, vec in read_vec_flt_scp(vector_in[0]):
    vector_names.append(key)
    vector_single[key] = vec
    dim = vec.shape[0]
vector_tot.append(vector_single)

for vector_file in vector_in[1:]:
    vector_single = {}
    index = 0
    for key, vec in read_vec_flt_scp(vector_file):
        assert(key == vector_names[index])
        index += 1
        vector_single[key] = vec
        dim = vec.shape[0]
    vector_tot.append(vector_single)

with open_or_fd(vector_out, 'wb') as f:
    for name in vector_names:
        vector = []
        for vector_single in vector_tot:
            vector.append(vector_single[name])
        vector = np.concatenate(vector, axis=0)
        write_vec_flt(f, vector, key=name)