Exemple #1
0
def _main():
    """
    Test model.
    """
    from pyasv.data_manage import DataManage
    from pyasv import Config
    sys.path.append("../..")

    con = Config(name='MFM',
                 n_speaker=100,
                 batch_size=64,
                 n_gpu=2,
                 max_step=5,
                 is_big_dataset=False,
                 learning_rate=0.001,
                 save_path='./save')
    x = np.random.random([6400, 50, 40, 1])
    y = np.random.randint(0, 100, [6400, 1])
    train = DataManage(x, y, con)
    enroll = train

    x = np.random.random([640, 50, 40, 1])
    y = np.random.randint(0, 100, [640, 1])
    validation = DataManage(x, y, con)
    test = validation

    run(con, train, validation)
    restore(con, enroll, test)
Exemple #2
0
def _main():
    """
    Test model.
    """
    from pyasv.data_manage import DataManage
    from pyasv import Config
    sys.path.append("../..")

    con = Config(name='ctdnn',
                 n_speaker=100,
                 batch_size=64 * 2,
                 n_gpu=2,
                 max_step=5,
                 is_big_dataset=False,
                 learning_rate=0.001,
                 save_path='./save')
    #con.save('ctdnn')
    x = np.random.random([6500, 9, 40, 1])
    y = np.random.randint(0, 99, [6500, 1])
    enroll = DataManage(x, y, con)
    train = enroll

    x = np.random.random([1500, 9, 40, 1])
    y = np.random.randint(0, 99, [1500, 1])
    test = DataManage(x, y, con)
    validation = test

    run(con, train, validation, False)
    restore(con, enroll, test)
def _main():
    """
    Test model.
    """
    from pyasv.data_manage import DataManage
    from pyasv import Config
    import sys
    sys.path.append("../..")
    print("Model test")
    print("input n_gpu", end="")
    a = int(eval(input()))
    con = Config(name='deepspeaker',
                 n_speaker=100,
                 batch_size=32 * max(1, a),
                 n_gpu=a,
                 max_step=5,
                 is_big_dataset=False,
                 learning_rate=0.001,
                 save_path='./save',
                 conv_weight_decay=0.01,
                 fc_weight_decay=0.01,
                 bn_epsilon=1e-3,
                 deep_speaker_out_channel=[32, 64])
    x = np.random.random([320, 100, 64, 1])
    y = np.random.randint(0, 99, [320, 1])
    train = DataManage(x, y, con)

    x = np.random.random([64, 100, 64, 1])
    y = np.random.randint(0, 99, [64, 1])
    validation = DataManage(x, y, con)

    # run(con, train, validation)
    restore(con, train, validation)
def _test():
    config = pyasv.Config(plda_rankf=150,
                          plda_rankg=150,
                          batch_size=3000,
                          n_speaker=100,
                          max_step=5)
    x = numpy.random.random([600, 200])
    y = numpy.random.randint(0, 99, [600, 1])
    train = DataManage(x, y, config)
    model = PLDA(config=config, data=train)
    x = numpy.random.random([600, 200])
    y = numpy.random.randint(0, 99, [600, 1])
    enroll = DataManage(x, y, config)
    x = numpy.random.random([600, 200])
    y = numpy.random.randint(0, 99, [600, 1])
    test = DataManage(x, y, config)
    model.score(enroll, test)
def _main():
    """
    Test model.
    """
    from pyasv.data_manage import DataManage
    from pyasv import Config
    import sys
    sys.path.append("../..")
    con = Config(name='deepspeaker', n_speaker=100, batch_size=64, n_gpu=4, max_step=20, is_big_dataset=False,
                 learning_rate=0.001, save_path='./save', conv_weight_decay=0.01, fc_weight_decay=0.01, bn_epsilon=1e-3)
    x = np.random.random([6400, 100, 64, 1])
    y = np.random.randint(0, 100, [6400, 1])
    train = DataManage(x, y, con)

    x = np.random.random([640, 100, 64, 1])
    y = np.random.randint(0, 100, [640, 1])
    validation = DataManage(x, y, con)

    run(con, train, validation)
    def run(self,
            train_frames,
            train_labels,
            enroll_frames=None,
            enroll_labels=None,
            test_frames=None,
            test_labels=None,
            need_prediction_now=False):
        """Run the MFM model. Will save model to save_path/ and save tensorboard to save_path/graph/.

        Parameters
        ----------
        train_frames : ``list`` or ``np.ndarray``
            The feature array of train dataset.
        train_labels : ``list`` or ``np.ndarray``
            The label array of train dataset.
        enroll_frames : ``list`` or ``np.ndarray``
            The feature array of enroll dataset.
        enroll_labels : ``list`` or ``np.ndarray``
            The label array of enroll dataset.
        test_frames : ``list`` or ``np.ndarray``
            The feature array of test dataset.
        test_labels : ``list`` or ``np.ndarray``
            The label array of test dataset.
        need_prediction_now : ``bool``
            if *True* we will create predict graph and run predict now.
            if *False* we will exit after training.
        """

        with tf.Graph().as_default():
            with tf.Session(config=tf.ConfigProto(
                    allow_soft_placement=False,
                    log_device_placement=False,
            )) as sess:
                if self._is_big_dataset:
                    train_data = DataManage4BigData(self._config)
                    if not train_data.file_is_exist:
                        train_data.write_file(train_frames, train_labels)
                    del train_frames, train_labels
                    self._build_train_graph()
                else:
                    self._build_train_graph()
                    train_data = DataManage(train_frames, train_labels,
                                            self._batch_size - 1)
                initial = tf.global_variables_initializer()
                sess.run(initial)
                train_op = self._train_step()
                last_time = time.time()
                for i in range(self._max_step):
                    input_frames = []
                    input_labels = []
                    for x in range(self._n_gpu):
                        print(x)
                        frames, labels = train_data.next_batch
                        input_frames.append(frames)
                        input_labels.append(labels)
                    input_frames = np.array(input_frames).reshape(
                        [-1, 9, 40, 1])
                    input_labels = np.array(input_labels).reshape(
                        [-1, self._n_speaker])
                    sess.run(train_op,
                             feed_dict={
                                 'x:0': input_frames,
                                 'y_:0': input_labels
                             })
                    current_time = time.time()
                    print("No.%d step use %f sec" %
                          (i, current_time - last_time))
                    last_time = time.time()
                    if i % 10 == 0 or i + 1 == self._max_step:
                        self._saver.save(
                            sess, os.path.join(self._save_path, 'model'))
        if need_prediction_now:
            self.run_predict(enroll_frames, enroll_labels, test_frames,
                             test_labels)
    def run_predict(self, enroll_frames, enroll_labels, test_frames,
                    test_labels):
        """Run prediction, will save the result to save_path

        Parameters
        ----------
        enroll_frames : ``list`` or ``np.ndarray``
            The feature array of enroll dataset.
        enroll_labels : ``list`` or ``np.ndarray``
            The label of enrol dataset.
        test_frames : ``list`` or ``np.ndarray``
            The feature array of test dataset.
        test_labels :
            The label of test dataset.
        """
        with tf.Graph().as_default() as graph:
            with tf.Session() as sess:
                self._build_pred_graph()
                new_saver = tf.train.Saver()

                # needn't batch and gpu in prediction

                enroll_data = DataManage(enroll_frames, enroll_labels,
                                         self._batch_size)
                test_data = DataManage(test_frames, test_labels,
                                       self._batch_size)
                new_saver.restore(sess,
                                  tf.train.latest_checkpoint(self._save_path))
                feature_op = graph.get_operation_by_name(
                    'feature_layer_output')
                vector_dict = dict()
                while not enroll_data.is_eof:
                    frames, labels = enroll_data.next_batch
                    frames = np.array(frames).reshape([-1, 9, 40, 1])
                    labels = np.array(labels).reshape([-1, self._n_speaker])
                    vectors = sess.run(feature_op,
                                       feed_dict={'pred_x:0': frames})
                    for i in range(len(enroll_labels)):
                        if vector_dict[np.argmax(enroll_labels[i])]:
                            vector_dict[np.argmax(
                                enroll_labels[i])] += vectors[i]
                            vector_dict[np.argmax(enroll_labels[i])] /= 2
                        else:
                            vector_dict[np.argmax(
                                enroll_labels[i])] = vectors[i]
                while not test_data.is_eof:
                    frames, labels = test_data.next_batch
                    frames = np.array(frames).reshape([-1, 9, 40, 1])
                    labels = np.array(labels).reshape([-1, self._n_speaker])
                    vectors = sess.run(feature_op,
                                       feed_dict={'pred_x:0': frames})
                    keys = vector_dict.keys()
                    true_key = test_labels
                    support = 0
                    for i in len(vectors):
                        score = 0
                        label = -1
                        for key in keys:
                            if cosine(vectors[i], vector_dict[key]) > score:
                                score = cosine(vectors[i], vector_dict[key])
                                label = key
                        if label == true_key[i]:
                            support += 1
                with open(os.path.join(self._save_path, 'result.txt'),
                          'w') as f:
                    s = "Acc = %f" % (support / test_data.raw_frames.shape[0])
                    f.writelines(s)
    def run(self,
            train_frames,
            train_labels,
            enroll_frames=None,
            enroll_labels=None,
            test_frames=None,
            test_labels=None,
            need_prediction_now=False):
        """Run the ctdnn model. Will save model to save_path/ and save tensorboard to save_path/graph/.

        Parameters
        ----------
        train_frames : ``list`` or ``np.ndarray``
            The feature array of train dataset.
        train_labels : ``list`` or ``np.ndarray``
            The label array of train dataset.
        enroll_frames : ``list`` or ``np.ndarray``
            The feature array of enroll dataset.
        enroll_labels : ``list`` or ``np.ndarray``
            The label array of enroll dataset.
        test_frames : ``list`` or ``np.ndarray``
            The feature array of test dataset.
        test_labels : ``list`` or ``np.ndarray``
            The label array of test dataset.
        need_prediction_now : ``bool``
            if *True* we will create predict graph and run predict now.
            if *False* we will exit after training.
        """
        with tf.Graph().as_default():
            with tf.Session(config=tf.ConfigProto(
                    allow_soft_placement=False,
                    log_device_placement=False,
            )) as sess:
                # convert all data to np.ndarray
                train_frames = np.array(train_frames)
                train_targets = np.array(train_labels)
                if train_targets.shape[-1] != self._n_speaker:
                    tmp = []
                    for i in range(train_targets.shape[0]):
                        tmp_line = np.zeros((self._n_speaker, ))
                        tmp_line[np.argmax(train_targets[i])] = 1
                        tmp.append(tmp_line)
                    train_targets = np.array(tmp)
                else:
                    train_targets = np.array(train_targets)

                if enroll_frames is not None:
                    enroll_frames = np.array(enroll_frames)
                if enroll_labels is not None:
                    enroll_labels = np.array(enroll_labels)
                    if enroll_labels.shape[-1] != self._n_speaker:
                        tmp = []
                        for i in range(enroll_labels.shape[0]):
                            tmp_line = np.zeros((self._n_speaker, ))
                            tmp_line[np.argmax(enroll_labels[i])] = 1
                            tmp.append(tmp_line)
                        enroll_labels = np.array(tmp)
                    else:
                        enroll_labels = np.array(test_labels)

                if test_frames is not None:
                    test_frames = np.array(test_frames)
                if test_labels is not None:
                    test_labels = np.array(test_labels)
                    if test_labels.shape[-1] != self._n_speaker:
                        tmp = []
                        for i in range(test_labels.shape[0]):
                            tmp_line = np.zeros((self._n_speaker, ))
                            tmp_line[np.argmax(test_labels[i])] = 1
                            tmp.append(tmp_line)
                        test_labels = np.array(tmp)
                    else:
                        test_labels = np.array(test_labels)

                # initial tensorboard
                writer = tf.summary.FileWriter(
                    os.path.join(self._save_path, 'graph'), sess.graph)

                # prepare data
                if self._is_big_dataset:
                    train_data = DataManage4BigData(self._config)
                    if not train_data.file_is_exist:
                        train_data.write_file(train_frames, train_targets)
                    del train_frames, train_targets
                    self._build_train_graph()
                else:
                    self._build_train_graph()
                    train_data = DataManage(train_frames, train_targets,
                                            self._config)

                # initial step
                initial = tf.global_variables_initializer()
                sess.run(initial)
                debug_sess = tfdbg.LocalCLIDebugWrapperSession(sess=sess)

                train_op, loss = self._train_step()
                if enroll_frames is not None:
                    accuracy = self._validation_acc(sess, enroll_frames,
                                                    enroll_labels, test_frames,
                                                    test_labels)
                    acc_summary = tf.summary.scalar('accuracy', accuracy)
                # record the memory usage and time of each step
                run_options = tf.RunOptions(
                    trace_level=tf.RunOptions.FULL_TRACE)
                run_metadata = tf.RunMetadata()

                # define tensorboard steps

                loss_summary = tf.summary.scalar('loss_summary', loss)
                merged_summary = tf.summary.merge_all()

                last_time = time.time()

                # train loop
                for i in range(self._max_step):
                    # get data
                    input_frames = []
                    input_labels = []
                    for x in range(self._n_gpu):
                        frames, labels = train_data.next_batch
                        L = []
                        for m in range(labels.shape[0]):
                            ids = np.zeros(self._n_speaker)
                            ids[np.argmax(enroll_labels[m])] = 1
                            L.append(ids)
                        labels = L
                        input_frames.append(frames)
                        input_labels.append(labels)
                    input_frames = np.array(input_frames).reshape(
                        [self._n_gpu, -1, 9, 40, 1])
                    input_labels = np.array(input_labels).reshape(
                        [self._n_gpu, -1, self._n_speaker])

                    _, summary_str = debug_sess.run([train_op, merged_summary],
                                                    feed_dict={
                                                        'x:0': input_frames,
                                                        "y_:0": input_labels
                                                    })
                    # _, summary_str = sess.run([train_op, merged_summary],
                    #                          feed_dict={'x:0': input_frames, 'y_:0': input_labels})
                    current_time = time.time()

                    # print log

                    print("------------------------")
                    print("No.%d step use %f sec" %
                          (i, current_time - last_time))
                    try:
                        print("Acc = %f" % accuracy.eval())
                    except:
                        pass
                    print("------------------------")
                    last_time = time.time()

                    # record
                    if i % 10 == 0 or i + 1 == self._max_step:
                        self._saver.save(
                            sess, os.path.join(self._save_path, 'model'))

                    writer.add_run_metadata(run_metadata, 'step%d' % i)
                    writer.add_summary(summary_str, i)

        if need_prediction_now:
            self.run_predict(enroll_frames, enroll_labels, test_frames,
                             test_labels)
        writer.close()
Exemple #9
0
    def run(self,
            train_frames,
            train_labels,
            enroll_frames=None,
            enroll_labels=None,
            test_frames=None,
            test_labels=None):
        """Run the deep speaker model. Will save model to save_path/ and save tensorboard to save_path/graph/.

        Parameters
        ----------
        train_frames : ``list`` or ``np.ndarray``
            The feature array of train dataset.
        train_labels : ``list`` or ``np.ndarray``
            The label array of train dataset.
        enroll_frames : ``list`` or ``np.ndarray``
            The feature array of enroll dataset.
        enroll_labels : ``list`` or ``np.ndarray``
            The label array of enroll dataset.
        test_frames : ``list`` or ``np.ndarray``
            The feature array of test dataset.
        test_labels : ``list`` or ``np.ndarray``
            The label array of test dataset.
        """

        with tf.Graph().as_default():
            with tf.Session(config=tf.ConfigProto(
                    allow_soft_placement=False,
                    log_device_placement=False,
            )) as sess:
                self._build_train_graph()
                # Make sure the format of data is np.ndarray
                train_frames = np.array(train_frames)
                train_targets = np.array(train_labels)
                enroll_frames = np.array(enroll_frames)
                enroll_labels = np.array(enroll_labels)
                test_frames = np.array(test_frames)
                test_labels = np.array(test_labels)

                train_data = DataManage(train_frames, train_targets, self._batch_size)
                initial = tf.global_variables_initializer()
                sess.run(initial)
                saver = tf.train.Saver()
                train_op = self._train_step()

                # initial tensorboard
                writer = tf.summary.FileWriter(os.path.join(self._save_path, 'graph'), sess.graph)
                if enroll_frames is not None:
                    accuracy = self._validation_acc(sess, enroll_frames, enroll_labels, test_frames, test_labels)
                    acc_summary = tf.summary.scalar('accuracy', accuracy)
                # record the memory usage and time of each step
                run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
                run_metadata = tf.RunMetadata()

                # define tensorboard steps

                merged_summary = tf.summary.merge_all()
                for i in range(self._max_step):
                    inp_frames = []
                    inp_labels = []
                    for i in range(self._n_gpu):
                        frames, labels = train_data.next_batch
                        inp_frames.append(frames)
                        inp_labels.append(labels)
                    inp_frames = np.array(inp_frames)
                    inp_labels = np.array(inp_labels)
                    sess.run(train_op, feed_dict={'x:0': inp_frames, 'y_:0': inp_labels})
                    if i % 25 == 0 or i + 1 == self._max_step:
                        saver.save(sess, os.path.join(self._save_path, 'model'), global_step=i)

                INF = 0x3f3f3f3f
                self._n_gpu = 1
                enroll_data = DataManage(enroll_frames, enroll_labels, INF)
                test_data = DataManage(test_frames, test_labels, INF)

                get_vector = self.feature
                frames, labels = enroll_data.next_batch
                embeddings = sess.run(get_vector, feed_dict={'x:0': frames, 'y_:0': labels})

                for i in range(len(enroll_labels)):
                    if self._vectors[np.argmax(enroll_labels[i])]:
                        self._vectors[np.argmax(enroll_labels[i])] = embeddings[i]
                    else:
                        self._vectors[np.argmax(enroll_labels)[i]] += embeddings[i]
                        self._vectors[np.argmax(enroll_labels)[i]] /= 2

                frames, labels = test_data.next_batch
                embeddings = sess.run(get_vector, feed_dict={'x:0': frames, 'y_:0': labels})

                support = 0
                for i in range(len(embeddings)):
                    keys = self._vectors.keys()
                    score = 0
                    label = -1
                    for key in keys:
                        new_score = self._cosine(self._vectors[key], embeddings[i])
                        if new_score > score:
                            label = key
                    if label == np.argmax(test_labels[i]):
                        support += 1
                with open(os.path.join(self._save_path, 'result'), 'w') as f:
                    s = "Acc is %f" % (support / len(embeddings))
                    f.writelines(s)