def _main(): """ Test model. """ from pyasv.data_manage import DataManage from pyasv import Config sys.path.append("../..") con = Config(name='MFM', n_speaker=100, batch_size=64, n_gpu=2, max_step=5, is_big_dataset=False, learning_rate=0.001, save_path='./save') x = np.random.random([6400, 50, 40, 1]) y = np.random.randint(0, 100, [6400, 1]) train = DataManage(x, y, con) enroll = train x = np.random.random([640, 50, 40, 1]) y = np.random.randint(0, 100, [640, 1]) validation = DataManage(x, y, con) test = validation run(con, train, validation) restore(con, enroll, test)
def _main(): """ Test model. """ from pyasv.data_manage import DataManage from pyasv import Config sys.path.append("../..") con = Config(name='ctdnn', n_speaker=100, batch_size=64 * 2, n_gpu=2, max_step=5, is_big_dataset=False, learning_rate=0.001, save_path='./save') #con.save('ctdnn') x = np.random.random([6500, 9, 40, 1]) y = np.random.randint(0, 99, [6500, 1]) enroll = DataManage(x, y, con) train = enroll x = np.random.random([1500, 9, 40, 1]) y = np.random.randint(0, 99, [1500, 1]) test = DataManage(x, y, con) validation = test run(con, train, validation, False) restore(con, enroll, test)
def _main(): """ Test model. """ from pyasv.data_manage import DataManage from pyasv import Config import sys sys.path.append("../..") print("Model test") print("input n_gpu", end="") a = int(eval(input())) con = Config(name='deepspeaker', n_speaker=100, batch_size=32 * max(1, a), n_gpu=a, max_step=5, is_big_dataset=False, learning_rate=0.001, save_path='./save', conv_weight_decay=0.01, fc_weight_decay=0.01, bn_epsilon=1e-3, deep_speaker_out_channel=[32, 64]) x = np.random.random([320, 100, 64, 1]) y = np.random.randint(0, 99, [320, 1]) train = DataManage(x, y, con) x = np.random.random([64, 100, 64, 1]) y = np.random.randint(0, 99, [64, 1]) validation = DataManage(x, y, con) # run(con, train, validation) restore(con, train, validation)
def _test(): config = pyasv.Config(plda_rankf=150, plda_rankg=150, batch_size=3000, n_speaker=100, max_step=5) x = numpy.random.random([600, 200]) y = numpy.random.randint(0, 99, [600, 1]) train = DataManage(x, y, config) model = PLDA(config=config, data=train) x = numpy.random.random([600, 200]) y = numpy.random.randint(0, 99, [600, 1]) enroll = DataManage(x, y, config) x = numpy.random.random([600, 200]) y = numpy.random.randint(0, 99, [600, 1]) test = DataManage(x, y, config) model.score(enroll, test)
def _main(): """ Test model. """ from pyasv.data_manage import DataManage from pyasv import Config import sys sys.path.append("../..") con = Config(name='deepspeaker', n_speaker=100, batch_size=64, n_gpu=4, max_step=20, is_big_dataset=False, learning_rate=0.001, save_path='./save', conv_weight_decay=0.01, fc_weight_decay=0.01, bn_epsilon=1e-3) x = np.random.random([6400, 100, 64, 1]) y = np.random.randint(0, 100, [6400, 1]) train = DataManage(x, y, con) x = np.random.random([640, 100, 64, 1]) y = np.random.randint(0, 100, [640, 1]) validation = DataManage(x, y, con) run(con, train, validation)
def run(self, train_frames, train_labels, enroll_frames=None, enroll_labels=None, test_frames=None, test_labels=None, need_prediction_now=False): """Run the MFM model. Will save model to save_path/ and save tensorboard to save_path/graph/. Parameters ---------- train_frames : ``list`` or ``np.ndarray`` The feature array of train dataset. train_labels : ``list`` or ``np.ndarray`` The label array of train dataset. enroll_frames : ``list`` or ``np.ndarray`` The feature array of enroll dataset. enroll_labels : ``list`` or ``np.ndarray`` The label array of enroll dataset. test_frames : ``list`` or ``np.ndarray`` The feature array of test dataset. test_labels : ``list`` or ``np.ndarray`` The label array of test dataset. need_prediction_now : ``bool`` if *True* we will create predict graph and run predict now. if *False* we will exit after training. """ with tf.Graph().as_default(): with tf.Session(config=tf.ConfigProto( allow_soft_placement=False, log_device_placement=False, )) as sess: if self._is_big_dataset: train_data = DataManage4BigData(self._config) if not train_data.file_is_exist: train_data.write_file(train_frames, train_labels) del train_frames, train_labels self._build_train_graph() else: self._build_train_graph() train_data = DataManage(train_frames, train_labels, self._batch_size - 1) initial = tf.global_variables_initializer() sess.run(initial) train_op = self._train_step() last_time = time.time() for i in range(self._max_step): input_frames = [] input_labels = [] for x in range(self._n_gpu): print(x) frames, labels = train_data.next_batch input_frames.append(frames) input_labels.append(labels) input_frames = np.array(input_frames).reshape( [-1, 9, 40, 1]) input_labels = np.array(input_labels).reshape( [-1, self._n_speaker]) sess.run(train_op, feed_dict={ 'x:0': input_frames, 'y_:0': input_labels }) current_time = time.time() print("No.%d step use %f sec" % (i, current_time - last_time)) last_time = time.time() if i % 10 == 0 or i + 1 == self._max_step: self._saver.save( sess, os.path.join(self._save_path, 'model')) if need_prediction_now: self.run_predict(enroll_frames, enroll_labels, test_frames, test_labels)
def run_predict(self, enroll_frames, enroll_labels, test_frames, test_labels): """Run prediction, will save the result to save_path Parameters ---------- enroll_frames : ``list`` or ``np.ndarray`` The feature array of enroll dataset. enroll_labels : ``list`` or ``np.ndarray`` The label of enrol dataset. test_frames : ``list`` or ``np.ndarray`` The feature array of test dataset. test_labels : The label of test dataset. """ with tf.Graph().as_default() as graph: with tf.Session() as sess: self._build_pred_graph() new_saver = tf.train.Saver() # needn't batch and gpu in prediction enroll_data = DataManage(enroll_frames, enroll_labels, self._batch_size) test_data = DataManage(test_frames, test_labels, self._batch_size) new_saver.restore(sess, tf.train.latest_checkpoint(self._save_path)) feature_op = graph.get_operation_by_name( 'feature_layer_output') vector_dict = dict() while not enroll_data.is_eof: frames, labels = enroll_data.next_batch frames = np.array(frames).reshape([-1, 9, 40, 1]) labels = np.array(labels).reshape([-1, self._n_speaker]) vectors = sess.run(feature_op, feed_dict={'pred_x:0': frames}) for i in range(len(enroll_labels)): if vector_dict[np.argmax(enroll_labels[i])]: vector_dict[np.argmax( enroll_labels[i])] += vectors[i] vector_dict[np.argmax(enroll_labels[i])] /= 2 else: vector_dict[np.argmax( enroll_labels[i])] = vectors[i] while not test_data.is_eof: frames, labels = test_data.next_batch frames = np.array(frames).reshape([-1, 9, 40, 1]) labels = np.array(labels).reshape([-1, self._n_speaker]) vectors = sess.run(feature_op, feed_dict={'pred_x:0': frames}) keys = vector_dict.keys() true_key = test_labels support = 0 for i in len(vectors): score = 0 label = -1 for key in keys: if cosine(vectors[i], vector_dict[key]) > score: score = cosine(vectors[i], vector_dict[key]) label = key if label == true_key[i]: support += 1 with open(os.path.join(self._save_path, 'result.txt'), 'w') as f: s = "Acc = %f" % (support / test_data.raw_frames.shape[0]) f.writelines(s)
def run(self, train_frames, train_labels, enroll_frames=None, enroll_labels=None, test_frames=None, test_labels=None, need_prediction_now=False): """Run the ctdnn model. Will save model to save_path/ and save tensorboard to save_path/graph/. Parameters ---------- train_frames : ``list`` or ``np.ndarray`` The feature array of train dataset. train_labels : ``list`` or ``np.ndarray`` The label array of train dataset. enroll_frames : ``list`` or ``np.ndarray`` The feature array of enroll dataset. enroll_labels : ``list`` or ``np.ndarray`` The label array of enroll dataset. test_frames : ``list`` or ``np.ndarray`` The feature array of test dataset. test_labels : ``list`` or ``np.ndarray`` The label array of test dataset. need_prediction_now : ``bool`` if *True* we will create predict graph and run predict now. if *False* we will exit after training. """ with tf.Graph().as_default(): with tf.Session(config=tf.ConfigProto( allow_soft_placement=False, log_device_placement=False, )) as sess: # convert all data to np.ndarray train_frames = np.array(train_frames) train_targets = np.array(train_labels) if train_targets.shape[-1] != self._n_speaker: tmp = [] for i in range(train_targets.shape[0]): tmp_line = np.zeros((self._n_speaker, )) tmp_line[np.argmax(train_targets[i])] = 1 tmp.append(tmp_line) train_targets = np.array(tmp) else: train_targets = np.array(train_targets) if enroll_frames is not None: enroll_frames = np.array(enroll_frames) if enroll_labels is not None: enroll_labels = np.array(enroll_labels) if enroll_labels.shape[-1] != self._n_speaker: tmp = [] for i in range(enroll_labels.shape[0]): tmp_line = np.zeros((self._n_speaker, )) tmp_line[np.argmax(enroll_labels[i])] = 1 tmp.append(tmp_line) enroll_labels = np.array(tmp) else: enroll_labels = np.array(test_labels) if test_frames is not None: test_frames = np.array(test_frames) if test_labels is not None: test_labels = np.array(test_labels) if test_labels.shape[-1] != self._n_speaker: tmp = [] for i in range(test_labels.shape[0]): tmp_line = np.zeros((self._n_speaker, )) tmp_line[np.argmax(test_labels[i])] = 1 tmp.append(tmp_line) test_labels = np.array(tmp) else: test_labels = np.array(test_labels) # initial tensorboard writer = tf.summary.FileWriter( os.path.join(self._save_path, 'graph'), sess.graph) # prepare data if self._is_big_dataset: train_data = DataManage4BigData(self._config) if not train_data.file_is_exist: train_data.write_file(train_frames, train_targets) del train_frames, train_targets self._build_train_graph() else: self._build_train_graph() train_data = DataManage(train_frames, train_targets, self._config) # initial step initial = tf.global_variables_initializer() sess.run(initial) debug_sess = tfdbg.LocalCLIDebugWrapperSession(sess=sess) train_op, loss = self._train_step() if enroll_frames is not None: accuracy = self._validation_acc(sess, enroll_frames, enroll_labels, test_frames, test_labels) acc_summary = tf.summary.scalar('accuracy', accuracy) # record the memory usage and time of each step run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() # define tensorboard steps loss_summary = tf.summary.scalar('loss_summary', loss) merged_summary = tf.summary.merge_all() last_time = time.time() # train loop for i in range(self._max_step): # get data input_frames = [] input_labels = [] for x in range(self._n_gpu): frames, labels = train_data.next_batch L = [] for m in range(labels.shape[0]): ids = np.zeros(self._n_speaker) ids[np.argmax(enroll_labels[m])] = 1 L.append(ids) labels = L input_frames.append(frames) input_labels.append(labels) input_frames = np.array(input_frames).reshape( [self._n_gpu, -1, 9, 40, 1]) input_labels = np.array(input_labels).reshape( [self._n_gpu, -1, self._n_speaker]) _, summary_str = debug_sess.run([train_op, merged_summary], feed_dict={ 'x:0': input_frames, "y_:0": input_labels }) # _, summary_str = sess.run([train_op, merged_summary], # feed_dict={'x:0': input_frames, 'y_:0': input_labels}) current_time = time.time() # print log print("------------------------") print("No.%d step use %f sec" % (i, current_time - last_time)) try: print("Acc = %f" % accuracy.eval()) except: pass print("------------------------") last_time = time.time() # record if i % 10 == 0 or i + 1 == self._max_step: self._saver.save( sess, os.path.join(self._save_path, 'model')) writer.add_run_metadata(run_metadata, 'step%d' % i) writer.add_summary(summary_str, i) if need_prediction_now: self.run_predict(enroll_frames, enroll_labels, test_frames, test_labels) writer.close()
def run(self, train_frames, train_labels, enroll_frames=None, enroll_labels=None, test_frames=None, test_labels=None): """Run the deep speaker model. Will save model to save_path/ and save tensorboard to save_path/graph/. Parameters ---------- train_frames : ``list`` or ``np.ndarray`` The feature array of train dataset. train_labels : ``list`` or ``np.ndarray`` The label array of train dataset. enroll_frames : ``list`` or ``np.ndarray`` The feature array of enroll dataset. enroll_labels : ``list`` or ``np.ndarray`` The label array of enroll dataset. test_frames : ``list`` or ``np.ndarray`` The feature array of test dataset. test_labels : ``list`` or ``np.ndarray`` The label array of test dataset. """ with tf.Graph().as_default(): with tf.Session(config=tf.ConfigProto( allow_soft_placement=False, log_device_placement=False, )) as sess: self._build_train_graph() # Make sure the format of data is np.ndarray train_frames = np.array(train_frames) train_targets = np.array(train_labels) enroll_frames = np.array(enroll_frames) enroll_labels = np.array(enroll_labels) test_frames = np.array(test_frames) test_labels = np.array(test_labels) train_data = DataManage(train_frames, train_targets, self._batch_size) initial = tf.global_variables_initializer() sess.run(initial) saver = tf.train.Saver() train_op = self._train_step() # initial tensorboard writer = tf.summary.FileWriter(os.path.join(self._save_path, 'graph'), sess.graph) if enroll_frames is not None: accuracy = self._validation_acc(sess, enroll_frames, enroll_labels, test_frames, test_labels) acc_summary = tf.summary.scalar('accuracy', accuracy) # record the memory usage and time of each step run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() # define tensorboard steps merged_summary = tf.summary.merge_all() for i in range(self._max_step): inp_frames = [] inp_labels = [] for i in range(self._n_gpu): frames, labels = train_data.next_batch inp_frames.append(frames) inp_labels.append(labels) inp_frames = np.array(inp_frames) inp_labels = np.array(inp_labels) sess.run(train_op, feed_dict={'x:0': inp_frames, 'y_:0': inp_labels}) if i % 25 == 0 or i + 1 == self._max_step: saver.save(sess, os.path.join(self._save_path, 'model'), global_step=i) INF = 0x3f3f3f3f self._n_gpu = 1 enroll_data = DataManage(enroll_frames, enroll_labels, INF) test_data = DataManage(test_frames, test_labels, INF) get_vector = self.feature frames, labels = enroll_data.next_batch embeddings = sess.run(get_vector, feed_dict={'x:0': frames, 'y_:0': labels}) for i in range(len(enroll_labels)): if self._vectors[np.argmax(enroll_labels[i])]: self._vectors[np.argmax(enroll_labels[i])] = embeddings[i] else: self._vectors[np.argmax(enroll_labels)[i]] += embeddings[i] self._vectors[np.argmax(enroll_labels)[i]] /= 2 frames, labels = test_data.next_batch embeddings = sess.run(get_vector, feed_dict={'x:0': frames, 'y_:0': labels}) support = 0 for i in range(len(embeddings)): keys = self._vectors.keys() score = 0 label = -1 for key in keys: new_score = self._cosine(self._vectors[key], embeddings[i]) if new_score > score: label = key if label == np.argmax(test_labels[i]): support += 1 with open(os.path.join(self._save_path, 'result'), 'w') as f: s = "Acc is %f" % (support / len(embeddings)) f.writelines(s)