for i in range(num_chunks): start = i * (args.chunk_size / 2) this_chunk_size = args.chunk_size if feature.shape[ 0] - start > args.chunk_size else feature.shape[0] - start feature_length.append(this_chunk_size) feature_array.append(feature[start:start + this_chunk_size]) feature_length = np.expand_dims(np.array(feature_length), axis=1) # Except for the last feature, the length of other features should be the same (=chunk_size) embeddings = trainer.predict( np.array(feature_array[:-1], dtype=np.float32)) embedding_last = trainer.predict(feature_array[-1]) embeddings = np.concatenate( [embeddings, np.expand_dims(embedding_last, axis=0)], axis=0) if args.normalize: embeddings /= np.sqrt( np.sum(np.square(embeddings), axis=1, keepdims=True)) embedding = np.sum(embeddings * feature_length, axis=0) / np.sum(feature_length) else: tf.logging.info("[INFO] Key %s length %d." % (key, feature.shape[0])) embedding = trainer.predict(feature) if args.normalize: embedding /= np.sqrt(np.sum(np.square(embedding))) write_vec_flt(fp_out, embedding, key=key) fp_out.close() trainer.close()
vector_in = sys.argv[1:-1] vector_out = sys.argv[-1] vector_tot = [] vector_names = [] vector_single = {} for key, vec in read_vec_flt_scp(vector_in[0]): vector_names.append(key) vector_single[key] = vec dim = vec.shape[0] vector_tot.append(vector_single) for vector_file in vector_in[1:]: vector_single = {} index = 0 for key, vec in read_vec_flt_scp(vector_file): assert(key == vector_names[index]) index += 1 vector_single[key] = vec dim = vec.shape[0] vector_tot.append(vector_single) with open_or_fd(vector_out, 'wb') as f: for name in vector_names: vector = [] for vector_single in vector_tot: vector.append(vector_single[name]) vector = np.concatenate(vector, axis=0) write_vec_flt(f, vector, key=name)