Example #1
0
def vad_test(m_eval, sess_eval, batch_size_eval, eval_file_dir, norm_dir,
             data_len, eval_type):

    eval_input_dir = eval_file_dir
    eval_output_dir = eval_file_dir + '/Labels'

    pad_size = batch_size_eval - data_len % batch_size_eval
    if eval_type != 2:
        eval_data_set = dr.DataReader(eval_input_dir,
                                      eval_output_dir,
                                      norm_dir,
                                      w=19,
                                      u=9,
                                      name="eval")
    else:
        eval_data_set = dnn_dr.DataReader(eval_input_dir,
                                          eval_output_dir,
                                          norm_dir,
                                          w=19,
                                          u=9,
                                          name="eval")

    final_softout, final_label = evaluation(m_eval, eval_data_set, sess_eval,
                                            batch_size_eval, eval_type)

    return final_softout, final_label
def do_test(fname_model, test_file_dir, norm_dir, data_len, is_default,
            model_type):

    eval_input_dir = test_file_dir
    eval_output_dir = test_file_dir + '/Labels'

    graph = load_graph(fname_model)

    w = 19
    u = 9
    # [print(n.name) for n in graph.as_graph_def().node]
    # for op in graph.get_operations():
    #     print(op.name)

    final_softout = []
    final_label = []

    if model_type == 0:  # acam
        import data_reader_bDNN_v2 as dr
        print(os.path.abspath('./configure/ACAM'))
        sys.path.insert(0, os.path.abspath('./configure/ACAM'))

        import config as cg

        if is_default:
            w = 19
            u = 9
            valid_batch_size = 4096
        else:
            w = cg.w
            u = cg.u
            valid_batch_size = cg.batch_size

        valid_data_set = dr.DataReader(eval_input_dir,
                                       eval_output_dir,
                                       norm_dir,
                                       w=w,
                                       u=u,
                                       name="eval")
        node_inputs = graph.get_tensor_by_name('prefix/model_1/inputs:0')
        node_labels = graph.get_tensor_by_name('prefix/model_1/labels:0')
        node_keep_probability = graph.get_tensor_by_name(
            'prefix/model_1/keep_probabilty:0')

        node_logits = graph.get_tensor_by_name('prefix/model_1/logits:0')
        node_raw_labels = graph.get_tensor_by_name(
            'prefix/model_1/raw_labels:0')

        while True:

            valid_inputs, valid_labels = valid_data_set.next_batch(
                valid_batch_size)

            feed_dict = {
                node_inputs: valid_inputs,
                node_labels: valid_labels,
                node_keep_probability: 1
            }
            if valid_data_set.eof_checker():
                final_softout = np.reshape(np.asarray(final_softout), [-1, 1])
                final_label = np.reshape(np.asarray(final_label), [-1, 1])
                valid_data_set.reader_initialize()
                # print('Valid data reader was initialized!')  # initialize eof flag & num_file & start index
                break

            with tf.Session(graph=graph) as sess:
                logits, raw_labels = sess.run([node_logits, node_raw_labels],
                                              feed_dict=feed_dict)

            soft_pred = bdnn_prediction(valid_batch_size,
                                        logits,
                                        threshold=0.6,
                                        w=w,
                                        u=u)[1]

            raw_labels = raw_labels.reshape((-1, 1))

            final_softout.append(soft_pred)
            final_label.append(raw_labels)

            # if valid_data_set.eof_checker():
            #     final_softout = np.reshape(np.asarray(final_softout), [-1, 1])
            #     final_label = np.reshape(np.asarray(final_label), [-1, 1])
            #     valid_data_set.reader_initialize()
            #     # print('Valid data reader was initialized!')  # initialize eof flag & num_file & start index
            #     break

        return final_softout[0:data_len, :], final_label[0:data_len, :]

    if model_type == 1:  # bdnn
        import data_reader_bDNN_v2 as dr
        print(os.path.abspath('./configure/bDNN'))
        sys.path.insert(0, os.path.abspath('./configure/bDNN'))

        import config as cg

        if is_default:
            w = 19
            u = 9
            valid_batch_size = 4096
        else:
            w = cg.w
            u = cg.u
            valid_batch_size = cg.batch_size

        valid_data_set = dr.DataReader(
            eval_input_dir, eval_output_dir, norm_dir, w=w, u=u,
            name="eval")  # training data reader initialization
        node_inputs = graph.get_tensor_by_name('prefix/model_1/inputs:0')
        node_labels = graph.get_tensor_by_name('prefix/model_1/labels:0')
        node_keep_probability = graph.get_tensor_by_name(
            'prefix/model_1/keep_probabilty:0')

        node_logits = graph.get_tensor_by_name('prefix/model_1/logits:0')

        while True:
            valid_inputs, valid_labels = valid_data_set.next_batch(
                valid_batch_size)
            feed_dict = {
                node_inputs: valid_inputs,
                node_labels: valid_labels,
                node_keep_probability: 1
            }

            if valid_data_set.eof_checker():
                final_softout = np.reshape(np.asarray(final_softout), [-1, 1])
                final_label = np.reshape(np.asarray(final_label), [-1, 1])
                valid_data_set.reader_initialize()
                # print('Valid data reader was initialized!')  # initialize eof flag & num_file & start index
                break

            with tf.Session(graph=graph) as sess:
                logits, labels = sess.run([node_logits, node_labels],
                                          feed_dict=feed_dict)

            soft_pred = bdnn_prediction(valid_batch_size,
                                        logits,
                                        threshold=0.6,
                                        w=w,
                                        u=u)[1]

            raw_indx = int(np.floor(labels.shape[1] / 2))
            raw_labels = labels[:, raw_indx]

            raw_labels = raw_labels.reshape((-1, 1))

            final_softout.append(soft_pred)
            final_label.append(raw_labels)

        return final_softout[0:data_len, :], final_label[0:data_len, :]

    if model_type == 2:  # dnn
        import data_reader_DNN_v2 as dnn_dr
        print(os.path.abspath('./configure/DNN'))
        sys.path.insert(0, os.path.abspath('./configure/DNN'))

        import config as cg

        if is_default:
            w = 19
            u = 9
            valid_batch_size = 4096
        else:
            w = cg.w
            u = cg.u
            valid_batch_size = cg.batch_size

        valid_data_set = dnn_dr.DataReader(eval_input_dir,
                                           eval_output_dir,
                                           norm_dir,
                                           w=w,
                                           u=u,
                                           name="eval")
        node_inputs = graph.get_tensor_by_name('prefix/model_1/inputs:0')
        node_labels = graph.get_tensor_by_name('prefix/model_1/labels:0')
        node_keep_probability = graph.get_tensor_by_name(
            'prefix/model_1/keep_probabilty:0')

        node_softpred = graph.get_tensor_by_name('prefix/model_1/soft_pred:0')
        node_raw_labels = graph.get_tensor_by_name(
            'prefix/model_1/raw_labels:0')
        while True:

            valid_inputs, valid_labels = valid_data_set.next_batch(
                valid_batch_size)

            one_hot_labels = valid_labels.reshape((-1, 1))
            one_hot_labels = utils.dense_to_one_hot(one_hot_labels,
                                                    num_classes=2)
            feed_dict = {
                node_inputs: valid_inputs,
                node_labels: one_hot_labels,
                node_keep_probability: 1
            }
            if valid_data_set.eof_checker():
                final_softout = np.reshape(np.asarray(final_softout), [-1, 1])
                final_label = np.reshape(np.asarray(final_label), [-1, 1])
                valid_data_set.reader_initialize()
                # print('Valid data reader was initialized!')  # initialize eof flag & num_file & start index
                break
            print("aa")
            session_conf = tf.ConfigProto(device_count={
                'CPU': 1,
                'GPU': 0
            },
                                          allow_soft_placement=True,
                                          log_device_placement=False)

            with tf.Session(graph=graph, config=session_conf) as sess:
                soft_pred, raw_labels = sess.run(
                    [node_softpred, node_raw_labels], feed_dict=feed_dict)
            raw_labels = raw_labels.reshape((-1, 1))

            final_softout.append(soft_pred)
            final_label.append(raw_labels)

        return final_softout[0:data_len, :], final_label[0:data_len, :]

    if model_type == 3:  # lstm
        import data_reader_RNN as rnn_dr

        print(os.path.abspath('./configure/LSTM'))
        sys.path.insert(0, os.path.abspath('./configure/LSTM'))

        import config as cg

        if is_default:
            target_delay = 5
            seq_size = 20
            batch_num = 200
            valid_batch_size = seq_size * batch_num
        else:
            target_delay = cg.target_delay
            seq_size = cg.seq_len
            batch_num = cg.num_batches

            valid_batch_size = seq_size * batch_num

        valid_data_set = rnn_dr.DataReader(eval_input_dir,
                                           eval_output_dir,
                                           norm_dir,
                                           target_delay=target_delay,
                                           name="eval")
        node_inputs = graph.get_tensor_by_name('prefix/model_1/inputs:0')
        node_labels = graph.get_tensor_by_name('prefix/model_1/labels:0')
        node_keep_probability = graph.get_tensor_by_name(
            'prefix/model_1/keep_probabilty:0')

        node_softpred = graph.get_tensor_by_name('prefix/model_1/soft_pred:0')
        node_raw_labels = graph.get_tensor_by_name(
            'prefix/model_1/raw_labels:0')

        while True:

            valid_inputs, valid_labels = valid_data_set.next_batch(
                valid_batch_size)

            one_hot_labels = valid_labels.reshape((-1, 1))
            one_hot_labels = utils.dense_to_one_hot(one_hot_labels,
                                                    num_classes=2)
            feed_dict = {
                node_inputs: valid_inputs,
                node_labels: one_hot_labels,
                node_keep_probability: 1
            }
            if valid_data_set.eof_checker():
                final_softout = np.reshape(np.asarray(final_softout), [-1, 1])
                final_label = np.reshape(np.asarray(final_label), [-1, 1])
                valid_data_set.reader_initialize()
                # print('Valid data reader was initialized!')  # initialize eof flag & num_file & start index
                break
            with tf.Session(graph=graph) as sess:
                soft_pred, raw_labels = sess.run(
                    [node_softpred, node_raw_labels], feed_dict=feed_dict)
            raw_labels = raw_labels.reshape((-1, 1))

            final_softout.append(soft_pred)
            final_label.append(raw_labels)
            # if valid_data_set.eof_checker():
            #     final_softout = np.reshape(np.asarray(final_softout), [-1, 1])
            #     final_label = np.reshape(np.asarray(final_label), [-1, 1])
            #     valid_data_set.reader_initialize()
            #     # print('Valid data reader was initialized!')  # initialize eof flag & num_file & start index
            #     break

        return final_softout[0:data_len, :], final_label[0:data_len, :]
def main(prj_dir=None, model=None, mode=None):

    #                               Configuration Part                       #
    if mode is 'train':

        import path_setting as ps

        set_path = ps.PathSetting(prj_dir, model)
        logs_dir = initial_logs_dir = set_path.logs_dir
        input_dir = set_path.input_dir
        output_dir = set_path.output_dir
        norm_dir = set_path.norm_dir
        valid_file_dir = set_path.valid_file_dir

        sys.path.insert(0, prj_dir + '/configure/ACAM')
        import config as cg

        global initLr, dropout_rate, max_epoch, batch_size, valid_batch_size
        initLr = cg.lr
        dropout_rate = cg.dropout_rate
        max_epoch = cg.max_epoch
        batch_size = valid_batch_size = cg.batch_size

        global w, u
        w = cg.w
        u = cg.u

        global bdnn_winlen, bdnn_inputsize, bdnn_outputsize
        bdnn_winlen = (((w - 1) / u) * 2) + 3
        bdnn_inputsize = int(bdnn_winlen * num_features)
        bdnn_outputsize = int(bdnn_winlen)

        global glimpse_hidden, bp_hidden, glimpse_out, bp_out, nGlimpses,\
            lstm_cell_size, action_hidden_1, action_hidden_2

        glimpse_hidden = cg.glimpse_hidden
        bp_hidden = cg.bp_hidden
        glimpse_out = bp_out = cg.glimpse_out
        nGlimpses = cg.nGlimpse  # 7
        lstm_cell_size = cg.lstm_cell_size
        action_hidden_1 = cg.action_hidden_1  # default : 256
        action_hidden_2 = cg.action_hidden_2  # default : 256

    #                               Graph Part                                 #

    mean_acc_list = []
    var_acc_list = []

    print('Mode : ' + mode)
    print("Graph initialization...")
    with tf.device(device):
        with tf.variable_scope("model", reuse=None):
            m_train = Model(batch_size=batch_size,
                            reuse=None,
                            is_training=True)
            # m_train(batch_size)
    with tf.device(device):
        with tf.variable_scope("model", reuse=True):
            m_valid = Model(batch_size=valid_batch_size,
                            reuse=True,
                            is_training=False)

    print("Done")

    #                               Summary Part                               #

    print("Setting up summary op...")
    summary_ph = tf.placeholder(dtype=tf.float32)

    with tf.variable_scope("Training_procedure"):

        cost_summary_op = tf.summary.scalar("cost", summary_ph)
        accuracy_summary_op = tf.summary.scalar("accuracy", summary_ph)

    # train_summary_writer = tf.summary.FileWriter(logs_dir + '/train/', max_queue=4)
    # valid_summary_writer = tf.summary.FileWriter(logs_dir + '/valid/', max_queue=4)
    # summary_dic = summary_generation(valid_file_dir)

    print("Done")

    #                               Model Save Part                            #

    print("Setting up Saver...")
    saver = tf.train.Saver()
    ckpt = tf.train.get_checkpoint_state(initial_logs_dir)
    print("Done")

    #                               Session Part                               #

    sess_config = tf.ConfigProto(allow_soft_placement=True,
                                 log_device_placement=False)
    sess_config.gpu_options.allow_growth = True
    sess = tf.Session(config=sess_config)

    if mode is 'train':
        train_summary_writer = tf.summary.FileWriter(logs_dir + '/train/',
                                                     sess.graph,
                                                     max_queue=2)
        valid_summary_writer = tf.summary.FileWriter(logs_dir + '/valid/',
                                                     max_queue=2)

    if ckpt and ckpt.model_checkpoint_path:  # model restore
        print("Model restored...")
        print(initial_logs_dir + ckpt_name)
        if mode is 'train':
            saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            saver.restore(sess, initial_logs_dir + ckpt_name)
            saver.save(sess, initial_logs_dir + "/model_ACAM.ckpt",
                       0)  # model save

        print("Done")

    else:
        sess.run(tf.global_variables_initializer()
                 )  # if the checkpoint doesn't exist, do initialization

    if mode is 'train':
        train_data_set = dr.DataReader(
            input_dir, output_dir, norm_dir, w=w, u=u,
            name="train")  # training data reader initialization
    if mode is 'train':

        for itr in range(max_epoch):

            start_time = time.time()

            train_inputs, train_labels = train_data_set.next_batch(batch_size)

            feed_dict = {
                m_train.inputs: train_inputs,
                m_train.labels: train_labels,
                m_train.keep_probability: dropout_rate
            }

            sess.run(m_train.train_op, feed_dict=feed_dict)

            if itr % 10 == 0 and itr >= 0:

                train_cost, train_reward, train_avg_b, train_rminusb, train_p_bps, train_lr \
                    = sess.run([m_train.cost, m_train.reward, m_train.avg_b, m_train.rminusb, m_train.p_bps,
                                m_train.print_lr]
                               , feed_dict=feed_dict)

                duration = time.time() - start_time
                print(
                    "Step: %d, cost: %.4f, accuracy: %4.4f, b: %4.4f, R-b: %4.4f, p_bps: %4.4f, lr: %7.6f (%.3f sec)"
                    % (itr, train_cost, train_reward, train_avg_b,
                       train_rminusb, train_p_bps, train_lr, duration))

                train_cost_summary_str = sess.run(
                    cost_summary_op, feed_dict={summary_ph: train_cost})
                train_accuracy_summary_str = sess.run(
                    accuracy_summary_op, feed_dict={summary_ph: train_reward})
                train_summary_writer.add_summary(
                    train_cost_summary_str,
                    itr)  # write the train phase summary to event files
                train_summary_writer.add_summary(train_accuracy_summary_str,
                                                 itr)

            # if train_data_set.eof_checker():

            # if itr % val_freq == 0 and itr >= val_start_step:
            if itr % 50 == 0 and itr > 0:
                saver.save(sess, logs_dir + "/model.ckpt", itr)  # model save
                print('validation start!')
                valid_accuracy, valid_cost = \
                    utils.do_validation(m_valid, sess, valid_file_dir, norm_dir,
                                        type='ACAM')

                print("valid_cost: %.4f, valid_accuracy=%4.4f" %
                      (valid_cost, valid_accuracy * 100))
                valid_cost_summary_str = sess.run(
                    cost_summary_op, feed_dict={summary_ph: valid_cost})
                valid_accuracy_summary_str = sess.run(
                    accuracy_summary_op,
                    feed_dict={summary_ph: valid_accuracy})
                valid_summary_writer.add_summary(
                    valid_cost_summary_str,
                    itr)  # write the train phase summary to event files
                valid_summary_writer.add_summary(valid_accuracy_summary_str,
                                                 itr)

                # mean_accuracy, var_accuracy = full_evaluation(m_valid, sess, valid_batch_size, valid_file_dir, valid_summary_writer, summary_dic, itr)
                # if mean_accuracy >= 0.991:
                #
                #     print('model was saved!')
                #     model_name = '/model' + str(int(mean_accuracy * 1e4)) + 'and'\
                #                  + str(int(var_accuracy * 1e5)) + '.ckpt'
                #     saver.save(sess, save_dir + model_name, itr)
                # mean_acc_list.append(mean_accuracy)
                # var_acc_list.append(var_accuracy)

                # train_data_set.initialize()

    elif mode == 'test':

        final_softout, final_label = utils.vad_test(m_valid, sess,
                                                    valid_batch_size,
                                                    test_file_dir, norm_dir,
                                                    data_len, eval_type)

        # if data_len is None:
        #     return final_softout, final_label
        # else:
        #     final_softout = final_softout[0:data_len, :]
        #     final_label = final_label[0:data_len, :]

        # fpr, tpr, thresholds = metrics.roc_curve(final_label, final_softout, pos_label=1)
        # eval_auc = metrics.auc(fpr, tpr)
        # print(eval_auc)

        # full_evaluation(m_valid, sess, valid_batch_size, test_file_dir, valid_summary_writer, summary_dic, 0)
        # if visualization:
        #     global attention
        #     attention = np.asarray(attention)
        #     sio.savemat('attention.mat', {'attention' : attention})
        #     subprocess.call(['./visualize.sh'])
        if data_len is None:
            return final_softout, final_label
        else:
            return final_softout[0:data_len, :], final_label[0:data_len, :]
def full_evaluation(m_eval, sess_eval, batch_size_eval, eval_file_dir,
                    summary_writer, summary_dic, itr):

    mean_cost = []
    mean_accuracy = []
    mean_auc = []

    print("-------- Performance for each of noise types --------")

    noise_list = os.listdir(eval_file_dir)
    noise_list = sorted(noise_list)

    summary_ph = summary_dic["summary_ph"]

    for i in range(len(noise_list)):

        noise_name = '/' + noise_list[i]
        eval_input_dir = eval_file_dir + noise_name
        eval_output_dir = eval_file_dir + noise_name + '/Labels'
        eval_data_set = dr.DataReader(eval_input_dir,
                                      eval_output_dir,
                                      norm_dir,
                                      w=w,
                                      u=u,
                                      name="eval")
        eval_cost, eval_accuracy, eval_list, eval_auc, eval_auc_list = evaluation(
            m_eval, eval_data_set, sess_eval, batch_size_eval)

        print("--noise type : " + noise_list[i])
        print("cost: %.4f, accuracy across all SNRs: %.4f" %
              (eval_cost, eval_accuracy * 100))

        print('accuracy wrt SNR:')

        print('SNR_-5 : %.4f, SNR_0 : %.4f, SNR_5 : %.4f, SNR_10 : %.4f' %
              (eval_list[0] * 100, eval_list[1] * 100, eval_list[2] * 100,
               eval_list[3] * 100))
        print('AUC wrt SNR:')
        print('SNR_-5 : %.4f, SNR_0 : %.4f, SNR_5 : %.4f, SNR_10 : %.4f' %
              (eval_auc_list[0] * 100, eval_auc_list[1] * 100,
               eval_auc_list[2] * 100, eval_auc_list[3] * 100))
        print('')

        eval_summary_list = [eval_cost] + eval_list + [eval_accuracy]

        for j, summary_name in enumerate(summary_list):
            summary_str = sess_eval.run(
                summary_dic[noise_list[i] + "_" + summary_name],
                feed_dict={summary_ph: eval_summary_list[j]})
            summary_writer.add_summary(summary_str, itr)

        mean_cost.append(eval_cost)
        mean_accuracy.append(eval_accuracy)
        mean_auc.append(eval_auc)

    mean_cost = np.mean(np.asarray(mean_cost))
    var_accuracy = np.var(np.asarray(mean_accuracy))
    mean_accuracy = np.mean(np.asarray(mean_accuracy))
    mean_auc = np.mean(np.asarray(mean_auc))

    summary_writer.add_summary(
        sess_eval.run(summary_dic["cost_across_all_noise_types"],
                      feed_dict={summary_ph: mean_cost}), itr)
    summary_writer.add_summary(
        sess_eval.run(summary_dic["accuracy_across_all_noise_types"],
                      feed_dict={summary_ph: mean_accuracy}), itr)
    summary_writer.add_summary(
        sess_eval.run(summary_dic["variance_across_all_noise_types"],
                      feed_dict={summary_ph: var_accuracy}), itr)

    print("-------- Performance across all of noise types --------")
    print("cost : %.4f" % mean_cost)
    print("******* averaged accuracy across all noise_types : %.4f *******" %
          (mean_accuracy * 100))
    print("******* averaged auc across all noise_types : %.7f *******" %
          (mean_auc * 100))
    print(
        "******* variance of accuracies across all noise_types : %6.6f *******"
        % var_accuracy)

    return mean_auc, var_accuracy
Example #5
0
def main(save_dir, prj_dir=None, model=None, mode=None, dev="/gpu:2"):

    #                               Configuration Part                       #
    # os.environ["CUDA_VISIBLE_DEVICES"] = '3'
    device = dev
    os.environ["CUDA_VISIBLE_DEVICES"] = device[-1]
    if mode is 'train':

        import path_setting as ps

        set_path = ps.PathSetting(prj_dir, model, save_dir)
        logs_dir = initial_logs_dir = set_path.logs_dir
        input_dir = set_path.input_dir
        output_dir = set_path.output_dir
        norm_dir = set_path.norm_dir
        valid_file_dir = set_path.valid_file_dir

        sys.path.insert(0, prj_dir + '/configure/ACAM')
        import config as cg

        global initLr, dropout_rate, max_epoch, batch_size, valid_batch_size
        initLr = cg.lr
        dropout_rate = cg.dropout_rate
        max_epoch = cg.max_epoch
        batch_size = valid_batch_size = cg.batch_size

        global w, u
        w = cg.w
        u = cg.u

        global bdnn_winlen, bdnn_inputsize, bdnn_outputsize
        bdnn_winlen = (((w - 1) / u) * 2) + 3
        bdnn_inputsize = int(bdnn_winlen * num_features)
        bdnn_outputsize = int(bdnn_winlen)

        global glimpse_hidden, bp_hidden, glimpse_out, bp_out, nGlimpses,\
            lstm_cell_size, action_hidden_1, action_hidden_2

        glimpse_hidden = cg.glimpse_hidden
        bp_hidden = cg.bp_hidden
        glimpse_out = bp_out = cg.glimpse_out
        nGlimpses = cg.nGlimpse  # 7
        lstm_cell_size = cg.lstm_cell_size
        action_hidden_1 = cg.action_hidden_1  # default : 256
        action_hidden_2 = cg.action_hidden_2  # default : 256

    #                               Graph Part                                 #

    mean_acc_list = []
    var_acc_list = []

    print('Mode : ' + mode)
    print("Graph initialization...")
    with tf.device(device):
        with tf.variable_scope("model", reuse=None):
            m_train = Model(batch_size=batch_size,
                            reuse=None,
                            is_training=True)
            # m_train(batch_size)
    with tf.device(device):
        with tf.variable_scope("model", reuse=True):
            m_valid = Model(batch_size=valid_batch_size,
                            reuse=True,
                            is_training=False)

    print("Done")

    #                               Summary Part                               #

    print("Setting up summary op...")
    summary_ph = tf.placeholder(dtype=tf.float32)

    with tf.variable_scope("Training_procedure"):

        cost_summary_op = tf.summary.scalar("cost", summary_ph)
        accuracy_summary_op = tf.summary.scalar("accuracy", summary_ph)

    # train_summary_writer = tf.summary.FileWriter(logs_dir + '/train/', max_queue=4)
    # valid_summary_writer = tf.summary.FileWriter(logs_dir + '/valid/', max_queue=4)
    # summary_dic = summary_generation(valid_file_dir)

    print("Done")

    #                               Model Save Part                            #

    print("Setting up Saver...")
    saver = tf.train.Saver()
    ckpt = tf.train.get_checkpoint_state(initial_logs_dir)
    print("Done")

    #                               Session Part                               #

    sess_config = tf.ConfigProto(allow_soft_placement=True,
                                 log_device_placement=False)
    sess_config.gpu_options.allow_growth = True
    sess = tf.Session(config=sess_config)

    if mode is 'train':
        train_summary_writer = tf.summary.FileWriter(logs_dir + '/train/',
                                                     sess.graph,
                                                     max_queue=2)
        valid_summary_writer = tf.summary.FileWriter(logs_dir + '/valid/',
                                                     max_queue=2)

    if ckpt and ckpt.model_checkpoint_path:  # model restore
        print("Model restored...")
        print(initial_logs_dir + ckpt_name)
        if mode is 'train':
            saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            saver.restore(sess, initial_logs_dir + ckpt_name)
            saver.save(sess, initial_logs_dir + "/model_ACAM.ckpt",
                       0)  # model save

        print("Done")

    else:
        sess.run(tf.global_variables_initializer()
                 )  # if the checkpoint doesn't exist, do initialization

    if mode is 'train':
        train_data_set = dr.DataReader(
            input_dir, output_dir, norm_dir, w=w, u=u,
            name="train")  # training data reader initialization
    if mode is 'train':
        file_len = train_data_set.get_file_len()
        MAX_STEP = max_epoch * file_len
        print(get_num_params())

        for itr in range(MAX_STEP):

            start_time = time.time()

            train_inputs, train_labels = train_data_set.next_batch(batch_size)

            feed_dict = {
                m_train.inputs: train_inputs,
                m_train.labels: train_labels,
                m_train.keep_probability: dropout_rate
            }

            sess.run(m_train.train_op, feed_dict=feed_dict)

            if itr % 100 == 0 and itr >= 0:
                train_cost, train_reward, train_avg_b, train_rminusb, train_p_bps, train_lr, train_res \
                    = sess.run([m_train.cost, m_train.reward, m_train.avg_b, m_train.rminusb, m_train.p_bps,
                                m_train.print_lr,m_train.result]
                               , feed_dict=feed_dict)

                duration = time.time() - start_time
                print(
                    "Step: %d, cost: %.4f, accuracy: %4.4f, b: %4.4f, R-b: %4.4f, p_bps: %4.4f, lr: %7.6f (%.3f sec)"
                    % (itr, train_cost, train_reward, train_avg_b,
                       train_rminusb, train_p_bps, train_lr, duration))

                # np.save('pre/'+train_data_set.get_cur_file_name().split('/')[-1], train_res)

                train_cost_summary_str = sess.run(
                    cost_summary_op, feed_dict={summary_ph: train_cost})
                train_accuracy_summary_str = sess.run(
                    accuracy_summary_op, feed_dict={summary_ph: train_reward})
                train_summary_writer.add_summary(train_cost_summary_str, itr)
                train_summary_writer.add_summary(train_accuracy_summary_str,
                                                 itr)

            if itr % file_len == 0 and itr > 0:
                saver.save(sess, logs_dir + "/model.ckpt", itr)  # model save
                print('validation start!')
                valid_accuracy, valid_cost = \
                    utils.do_validation(m_valid, sess, valid_file_dir, norm_dir,
                                        type='ACAM')

                print("valid_cost: %.4f, valid_accuracy=%4.4f" %
                      (valid_cost, valid_accuracy * 100))
                valid_cost_summary_str = sess.run(
                    cost_summary_op, feed_dict={summary_ph: valid_cost})
                valid_accuracy_summary_str = sess.run(
                    accuracy_summary_op,
                    feed_dict={summary_ph: valid_accuracy})
                valid_summary_writer.add_summary(
                    valid_cost_summary_str,
                    itr)  # write the train phase summary to event files
                valid_summary_writer.add_summary(valid_accuracy_summary_str,
                                                 itr)
                gs.freeze_graph(prj_dir + '/logs/ACAM',
                                prj_dir + '/saved_model/graph/ACAM',
                                'model_1/logits,model_1/raw_labels')

    elif mode == 'test':

        final_softout, final_label = utils.vad_test(m_valid, sess,
                                                    valid_batch_size,
                                                    test_file_dir, norm_dir,
                                                    data_len, eval_type)
        if data_len is None:
            return final_softout, final_label
        else:
            return final_softout[0:data_len, :], final_label[0:data_len, :]
Example #6
0
def do_test(fname_model, test_file_dir, norm_dir, prj_dir, is_default,
            model_type):

    eval_input_dir = test_file_dir

    graph = load_graph(fname_model)

    w = 19
    u = 9
    # [print(n.name) for n in graph.as_graph_def().node]
    # for op in graph.get_operations():
    #     print(op.name)

    final_softout = []
    final_label = []

    if model_type == 0:  # acam
        import data_reader_bDNN_v2 as dr

        print(prj_dir + '/configure/ACAM')
        sys.path.insert(0, os.path.abspath(prj_dir + 'ACAM'))

        # import config as cg
        #
        # if is_default:
        #     w = 19
        #     u = 9
        #     valid_batch_size = 4096
        # else:
        #     w = cg.w
        #     u = cg.u
        #     valid_batch_size = cg.batch_size

        valid_data_set = dr.DataReader(eval_input_dir + "/train/feature_mrcg",
                                       eval_input_dir + "/train/label",
                                       norm_dir,
                                       w=w,
                                       u=u,
                                       name="eval")
        node_inputs = graph.get_tensor_by_name('prefix/model_1/inputs:0')
        node_labels = graph.get_tensor_by_name('prefix/model_1/labels:0')
        node_keep_probability = graph.get_tensor_by_name(
            'prefix/model_1/keep_probabilty:0')

        node_logits = graph.get_tensor_by_name('prefix/model_1/logits:0')
        node_raw_labels = graph.get_tensor_by_name(
            'prefix/model_1/raw_labels:0')

        acc_sum = 0
        file_num = 0

        acc_sum = 0
        tp_sum = 0
        tn_sum = 0
        fp_sum = 0
        fn_sum = 0
        frame_num = 0

        while True:

            valid_inputs, valid_labels = valid_data_set.next_batch(7)

            feed_dict = {
                node_inputs: valid_inputs,
                node_labels: valid_labels,
                node_keep_probability: 1
            }
            if valid_data_set.eof_checker():
                final_softout = np.reshape(np.asarray(final_softout), [-1, 1])
                final_label = np.reshape(np.asarray(final_label), [-1, 1])
                valid_data_set.reader_initialize()
                # print('Valid data reader was initialized!')  # initialize eof flag & num_file & start index
                break

            with tf.Session(graph=graph) as sess:
                logits, raw_labels = sess.run([node_logits, node_raw_labels],
                                              feed_dict=feed_dict)

            raw_labels = raw_labels.reshape((-1, 1))
            soft_pred = bdnn_prediction(len(raw_labels),
                                        logits,
                                        threshold=0.6,
                                        w=w,
                                        u=u)[0]
            eql = (raw_labels == soft_pred)

            acc = np.mean(eql)
            acc_sum += acc
            file_num += 1

            frame_s = len(raw_labels)
            frame_num += len(raw_labels)
            tn = 0
            tp = 0
            fp = 0
            fn = 0

            for i in range(len(soft_pred)):
                if soft_pred[i] == 0 and raw_labels[i] == 0:
                    tn += 1
                elif soft_pred[i] == 0 and raw_labels[i] == 1:
                    fn += 1
                elif soft_pred[i] == 1 and raw_labels[i] == 0:
                    fp += 1
                elif soft_pred[i] == 1 and raw_labels[i] == 1:
                    tp += 1

            final_softout.append(soft_pred)
            final_label.append(raw_labels)
            tn_sum += tn
            tp_sum += tp
            fn_sum += fn
            fp_sum += fp

            # if acc <= 0.7:
            #     file_name =valid_data_set.get_cur_file_name().split('/')[-1]
            #     obj_name = file_name.split('.')[0]
            #     wav_path = "/mnt/E_DRIVE/Lipr_with_label/train/low"
            #     shutil.copy("/mnt/E_DRIVE/Lipr_with_label/train/" + obj_name + '.wav', wav_path)
            #     np.save(os.path.join("/mnt/E_DRIVE/Lipr_with_label/train/low", obj_name + '.label.npy'),
            #             original_label(soft_pred, "/mnt/E_DRIVE/Lipr_with_label/train/" + obj_name + '.wav'))

            print(" train_accuracy=%4.4f" % (acc * 100))
            # print("path is " + valid_data_set.get_cur_file_name())
            print(
                "true_positive: %f, false positive: %f, true negative: %f, false negative: %f"
                % (tp / frame_s, fp / frame_s, tn / frame_s, fn / frame_s))

            # print("accuracy: %f, true_positive: %f, false positive: %f, true negative: %f, false negative: %f" % (
            # acc, tp / frame_s, fp / frame_s, tn / frame_s, fn / frame_s))
        print("total accuracy: " + str(acc_sum / file_num))
        print(
            "total: true_positive: %f, false positive: %f, true negative: %f, false negative: %f"
            % (tp_sum / frame_num, fp_sum / frame_num, tn_sum / frame_num,
               fn_sum / frame_num))

        return final_softout[:, :], final_label[:, :]

    if model_type == 1:  # bdnn
        import data_reader_bDNN_v2 as dr
        print(prj_dir + '/configure/bDNN')
        sys.path.insert(0, os.path.abspath(prj_dir + 'bDNN'))

        import config as cg

        if is_default:
            w = 19
            u = 9
            valid_batch_size = 4096
        else:
            w = cg.w
            u = cg.u
            valid_batch_size = cg.batch_size

        valid_data_set = dr.DataReader(
            eval_input_dir, eval_output_dir, norm_dir, w=w, u=u,
            name="eval")  # training data reader initialization
        node_inputs = graph.get_tensor_by_name('prefix/model_1/inputs:0')
        node_labels = graph.get_tensor_by_name('prefix/model_1/labels:0')
        node_keep_probability = graph.get_tensor_by_name(
            'prefix/model_1/keep_probabilty:0')

        node_logits = graph.get_tensor_by_name('prefix/model_1/logits:0')

        while True:
            valid_inputs, valid_labels = valid_data_set.next_batch(
                valid_batch_size)
            feed_dict = {
                node_inputs: valid_inputs,
                node_labels: valid_labels,
                node_keep_probability: 1
            }

            if valid_data_set.eof_checker():
                final_softout = np.reshape(np.asarray(final_softout), [-1, 1])
                final_label = np.reshape(np.asarray(final_label), [-1, 1])
                valid_data_set.reader_initialize()
                # print('Valid data reader was initialized!')  # initialize eof flag & num_file & start index
                break

            with tf.Session(graph=graph) as sess:
                logits, labels = sess.run([node_logits, node_labels],
                                          feed_dict=feed_dict)

            soft_pred = bdnn_prediction(valid_batch_size,
                                        logits,
                                        threshold=0.6,
                                        w=w,
                                        u=u)[1]

            raw_indx = int(np.floor(labels.shape[1] / 2))
            raw_labels = labels[:, raw_indx]

            raw_labels = raw_labels.reshape((-1, 1))

            final_softout.append(soft_pred)
            final_label.append(raw_labels)

        return final_softout[0:data_len, :], final_label[0:data_len, :]

    if model_type == 2:  # dnn
        import data_reader_DNN_v2 as dnn_dr
        print(prj_dir + '/configure/DNN')
        sys.path.insert(0, os.path.abspath(prj_dir + 'DNN'))

        import config as cg

        if is_default:
            w = 19
            u = 9
            valid_batch_size = 4096
        else:
            w = cg.w
            u = cg.u
            valid_batch_size = cg.batch_size

        valid_data_set = dnn_dr.DataReader(eval_input_dir,
                                           eval_output_dir,
                                           norm_dir,
                                           w=w,
                                           u=u,
                                           name="eval")
        node_inputs = graph.get_tensor_by_name('prefix/model_1/inputs:0')
        node_labels = graph.get_tensor_by_name('prefix/model_1/labels:0')
        node_keep_probability = graph.get_tensor_by_name(
            'prefix/model_1/keep_probabilty:0')

        node_softpred = graph.get_tensor_by_name('prefix/model_1/soft_pred:0')
        node_raw_labels = graph.get_tensor_by_name(
            'prefix/model_1/raw_labels:0')
        while True:

            valid_inputs, valid_labels = valid_data_set.next_batch(
                valid_batch_size)

            one_hot_labels = valid_labels.reshape((-1, 1))
            one_hot_labels = utils.dense_to_one_hot(one_hot_labels,
                                                    num_classes=2)
            feed_dict = {
                node_inputs: valid_inputs,
                node_labels: one_hot_labels,
                node_keep_probability: 1
            }
            if valid_data_set.eof_checker():
                final_softout = np.reshape(np.asarray(final_softout), [-1, 1])
                final_label = np.reshape(np.asarray(final_label), [-1, 1])
                valid_data_set.reader_initialize()
                # print('Valid data reader was initialized!')  # initialize eof flag & num_file & start index
                break
            with tf.Session(graph=graph) as sess:
                soft_pred, raw_labels = sess.run(
                    [node_softpred, node_raw_labels], feed_dict=feed_dict)
            raw_labels = raw_labels.reshape((-1, 1))

            final_softout.append(soft_pred)
            final_label.append(raw_labels)

        return final_softout[0:data_len, :], final_label[0:data_len, :]

    if model_type == 3:  # lstm

        import data_reader_RNN as rnn_dr

        print(prj_dir + '/configure/LSTM')
        sys.path.insert(0, os.path.abspath(prj_dir + 'LSTM'))

        import config as cg

        if is_default:
            target_delay = 5
            seq_size = 20
            batch_num = 200
            valid_batch_size = seq_size * batch_num
        else:
            target_delay = cg.target_delay
            seq_size = cg.seq_len

            valid_batch_size = seq_size

        valid_data_set = rnn_dr.DataReader(eval_input_dir +
                                           "/train/feature_mrcg",
                                           eval_input_dir + '/train/label',
                                           norm_dir,
                                           target_delay=target_delay,
                                           name="eval")
        node_inputs = graph.get_tensor_by_name('prefix/model_1/inputs:0')
        node_labels = graph.get_tensor_by_name('prefix/model_1/labels:0')
        node_keep_probability = graph.get_tensor_by_name(
            'prefix/model_1/keep_probabilty:0')

        node_softpred = graph.get_tensor_by_name('prefix/model_1/soft_pred:0')
        node_raw_labels = graph.get_tensor_by_name(
            'prefix/model_1/raw_labels:0')

        while True:

            valid_inputs, valid_labels = valid_data_set.next_batch(
                valid_batch_size)

            one_hot_labels = valid_labels.reshape((-1, 1))
            one_hot_labels = utils.dense_to_one_hot(one_hot_labels,
                                                    num_classes=2)
            feed_dict = {
                node_inputs: valid_inputs,
                node_labels: one_hot_labels,
                node_keep_probability: 1
            }

            if valid_data_set.eof_checker():
                # final_softout = np.reshape(np.asarray(final_softout), [-1, 1])
                # final_label = np.reshape(np.asarray(final_label), [-1, 1])
                valid_data_set.reader_initialize()
                break

            with tf.Session(graph=graph) as sess:
                soft_pred, raw_labels = sess.run(
                    [node_softpred, node_raw_labels], feed_dict=feed_dict)
            raw_labels = raw_labels.reshape((-1, 1))

            final_softout.append(soft_pred)
            final_label.append(raw_labels)
            # if valid_data_set.eof_checker():
            #     final_softout = np.reshape(np.asarray(final_softout), [-1, 1])
            #     final_label = np.reshape(np.asarray(final_label), [-1, 1])
            #     valid_data_set.reader_initialize()
            #     # print('Valid data reader was initialized!')  # initialize eof flag & num_file & start index
            #     break

        return final_softout, final_label
Example #7
0
def do_validation(m_valid, sess, valid_file_dir, norm_dir, type='DNN'):

    # dataset reader setting #

    # sys.path.insert(0, prj_dir + '/configure/DNN')

    if type is 'DNN':

        sys.path.insert(0, os.path.abspath('../../configure/DNN'))
        import config as cg
        valid_batch_size = cg.batch_size

        valid_data_set = dnn_dr.DataReader(valid_file_dir,
                                           valid_file_dir + '/Labels',
                                           norm_dir,
                                           w=cg.w,
                                           u=cg.u,
                                           name="eval")

        avg_valid_accuracy = 0.
        avg_valid_cost = 0.
        itr_sum = 0.

        accuracy_list = [0 for i in range(valid_data_set._file_len)]
        cost_list = [0 for i in range(valid_data_set._file_len)]
        itr_file = 0
        while True:

            valid_inputs, valid_labels = valid_data_set.next_batch(
                valid_batch_size)

            if valid_data_set.file_change_checker():
                # print(itr_file)
                accuracy_list[itr_file] = avg_valid_accuracy / itr_sum
                cost_list[itr_file] = avg_valid_cost / itr_sum
                avg_valid_cost = 0.
                avg_valid_accuracy = 0.
                itr_sum = 0
                itr_file += 1
                valid_data_set.file_change_initialize()

            if valid_data_set.eof_checker():
                valid_data_set.reader_initialize()
                print('Valid data reader was initialized!'
                      )  # initialize eof flag & num_file & start index
                break

            one_hot_labels = valid_labels.reshape((-1, 1))
            one_hot_labels = dense_to_one_hot(one_hot_labels, num_classes=2)

            feed_dict = {
                m_valid.inputs: valid_inputs,
                m_valid.labels: one_hot_labels,
                m_valid.keep_probability: 1
            }

            # valid_cost, valid_softpred, valid_raw_labels\
            #     = sess.run([m_valid.cost, m_valid.softpred, m_valid.raw_labels], feed_dict=feed_dict)
            #
            # fpr, tpr, thresholds = metrics.roc_curve(valid_raw_labels, valid_softpred, pos_label=1)
            # valid_auc = metrics.auc(fpr, tpr)

            valid_cost, valid_accuracy = sess.run(
                [m_valid.cost, m_valid.accuracy], feed_dict=feed_dict)

            avg_valid_accuracy += valid_accuracy
            avg_valid_cost += valid_cost
            itr_sum += 1

        total_avg_valid_accuracy = np.asscalar(
            np.mean(np.asarray(accuracy_list)))
        total_avg_valid_cost = np.asscalar(np.mean(np.asarray(cost_list)))

    elif type is 'bDNN':

        sys.path.insert(0, os.path.abspath('../../configure/bDNN'))
        import config as cg
        valid_batch_size = cg.batch_size

        valid_data_set = dr.DataReader(valid_file_dir,
                                       valid_file_dir + '/Labels',
                                       norm_dir,
                                       w=cg.w,
                                       u=cg.u,
                                       name="eval")
        avg_valid_accuracy = 0.
        avg_valid_cost = 0.
        itr_sum = 0.

        accuracy_list = [0 for i in range(valid_data_set._file_len)]
        cost_list = [0 for i in range(valid_data_set._file_len)]
        itr_file = 0

        while True:

            valid_inputs, valid_labels = valid_data_set.next_batch(
                valid_batch_size)

            if valid_data_set.file_change_checker():
                # print(itr_file)
                accuracy_list[itr_file] = avg_valid_accuracy / itr_sum
                cost_list[itr_file] = avg_valid_cost / itr_sum
                avg_valid_cost = 0.
                avg_valid_accuracy = 0.
                itr_sum = 0
                itr_file += 1
                valid_data_set.file_change_initialize()

            if valid_data_set.eof_checker():
                valid_data_set.reader_initialize()
                print('Valid data reader was initialized!'
                      )  # initialize eof flag & num_file & start index
                break

            feed_dict = {
                m_valid.inputs: valid_inputs,
                m_valid.labels: valid_labels,
                m_valid.keep_probability: 1
            }

            valid_cost, valid_logits = sess.run([m_valid.cost, m_valid.logits],
                                                feed_dict=feed_dict)
            valid_pred, soft_pred = bdnn_prediction(valid_batch_size,
                                                    valid_logits,
                                                    threshold=0.6)
            # print(np.sum(valid_pred))

            raw_indx = int(np.floor(valid_labels.shape[1] / 2))
            raw_labels = valid_labels[:, raw_indx]
            raw_labels = raw_labels.reshape((-1, 1))

            valid_accuracy = np.equal(valid_pred, raw_labels)
            valid_accuracy = valid_accuracy.astype(int)
            valid_accuracy = np.sum(valid_accuracy) / valid_batch_size
            avg_valid_cost += valid_cost
            avg_valid_accuracy += valid_accuracy
            itr_sum += 1

        total_avg_valid_accuracy = np.asscalar(
            np.mean(np.asarray(accuracy_list)))
        total_avg_valid_cost = np.asscalar(np.mean(np.asarray(cost_list)))

    elif type is 'ACAM':

        sys.path.insert(0, os.path.abspath('../../configure/ACAM'))
        import config as cg
        valid_batch_size = cg.batch_size

        valid_data_set = dr.DataReader(valid_file_dir,
                                       valid_file_dir + '/Labels',
                                       norm_dir,
                                       w=cg.w,
                                       u=cg.u,
                                       name="eval")
        avg_valid_accuracy = 0.
        avg_valid_cost = 0.
        itr_sum = 0.

        accuracy_list = [0 for i in range(valid_data_set._file_len)]
        cost_list = [0 for i in range(valid_data_set._file_len)]
        itr_file = 0
        while True:

            valid_inputs, valid_labels = valid_data_set.next_batch(
                valid_batch_size)

            if valid_data_set.file_change_checker():
                # print(itr_file)
                accuracy_list[itr_file] = avg_valid_accuracy / itr_sum
                cost_list[itr_file] = avg_valid_cost / itr_sum
                avg_valid_cost = 0.
                avg_valid_accuracy = 0.
                itr_sum = 0
                itr_file += 1
                valid_data_set.file_change_initialize()

            if valid_data_set.eof_checker():
                valid_data_set.reader_initialize()
                print('Valid data reader was initialized!'
                      )  # initialize eof flag & num_file & start index
                break

            feed_dict = {
                m_valid.inputs: valid_inputs,
                m_valid.labels: valid_labels,
                m_valid.keep_probability: 1
            }

            # valid_cost, valid_softpred, valid_raw_labels\
            #     = sess.run([m_valid.cost, m_valid.softpred, m_valid.raw_labels], feed_dict=feed_dict)
            #
            # fpr, tpr, thresholds = metrics.roc_curve(valid_raw_labels, valid_softpred, pos_label=1)
            # valid_auc = metrics.auc(fpr, tpr)

            valid_cost, valid_accuracy = sess.run(
                [m_valid.cost, m_valid.reward], feed_dict=feed_dict)

            avg_valid_accuracy += valid_accuracy
            avg_valid_cost += valid_cost
            itr_sum += 1

        total_avg_valid_accuracy = np.asscalar(
            np.mean(np.asarray(accuracy_list)))
        total_avg_valid_cost = np.asscalar(np.mean(np.asarray(cost_list)))

    elif type is 'LSTM':

        sys.path.insert(0, os.path.abspath('../../configure/LSTM'))
        import config as cg

        valid_batch_size = cg.seq_len * cg.num_batches

        valid_data_set = rnn_dr.DataReader(valid_file_dir,
                                           valid_file_dir + '/Labels',
                                           norm_dir,
                                           target_delay=cg.target_delay,
                                           name="eval")

        avg_valid_accuracy = 0.
        avg_valid_cost = 0.
        itr_sum = 0.

        accuracy_list = [0 for i in range(valid_data_set._file_len)]
        cost_list = [0 for i in range(valid_data_set._file_len)]
        itr_file = 0
        while True:

            valid_inputs, valid_labels = valid_data_set.next_batch(
                valid_batch_size)

            if valid_data_set.file_change_checker():
                # print(itr_file)
                accuracy_list[itr_file] = avg_valid_accuracy / itr_sum
                cost_list[itr_file] = avg_valid_cost / itr_sum
                avg_valid_cost = 0.
                avg_valid_accuracy = 0.
                itr_sum = 0
                itr_file += 1
                valid_data_set.file_change_initialize()

            if valid_data_set.eof_checker():
                valid_data_set.reader_initialize()
                print('Valid data reader was initialized!'
                      )  # initialize eof flag & num_file & start index
                break

            one_hot_labels = valid_labels.reshape((-1, 1))
            one_hot_labels = dense_to_one_hot(one_hot_labels, num_classes=2)

            feed_dict = {
                m_valid.inputs: valid_inputs,
                m_valid.labels: one_hot_labels,
                m_valid.keep_probability: 1
            }

            # valid_cost, valid_softpred, valid_raw_labels\
            #     = sess.run([m_valid.cost, m_valid.softpred, m_valid.raw_labels], feed_dict=feed_dict)
            #
            # fpr, tpr, thresholds = metrics.roc_curve(valid_raw_labels, valid_softpred, pos_label=1)
            # valid_auc = metrics.auc(fpr, tpr)

            valid_cost, valid_accuracy = sess.run(
                [m_valid.cost, m_valid.accuracy], feed_dict=feed_dict)

            avg_valid_accuracy += valid_accuracy
            avg_valid_cost += valid_cost
            itr_sum += 1

        total_avg_valid_accuracy = np.asscalar(
            np.mean(np.asarray(accuracy_list)))
        total_avg_valid_cost = np.asscalar(np.mean(np.asarray(cost_list)))

    return total_avg_valid_accuracy, total_avg_valid_cost
Example #8
0
def main(prj_dir=None, model=None, mode=None):

    #                               Configuration Part                       #
    if mode is 'train':

        import path_setting as ps

        set_path = ps.PathSetting(prj_dir, model)
        logs_dir = initial_logs_dir = set_path.logs_dir
        input_dir = set_path.input_dir
        output_dir = set_path.output_dir
        norm_dir = set_path.norm_dir
        valid_file_dir = set_path.valid_file_dir

        sys.path.insert(0, prj_dir+'/configure/bDNN')
        import config as cg

        global initLr, dropout_rate, max_epoch, batch_size, valid_batch_size
        initLr = cg.lr
        dropout_rate = cg.dropout_rate
        max_epoch = cg.max_epoch
        batch_size = valid_batch_size = cg.batch_size

        global w, u
        w = cg.w
        u = cg.u

        global bdnn_winlen, bdnn_inputsize, bdnn_outputsize
        bdnn_winlen = (((w-1) / u) * 2) + 3
        bdnn_inputsize = int(bdnn_winlen * num_features)
        bdnn_outputsize = int(bdnn_winlen)

        global num_hidden_1, num_hidden_2
        num_hidden_1 = cg.num_hidden_1
        num_hidden_2 = cg.num_hidden_2

    #                               Graph Part                                 #

    print("Graph initialization...")
    with tf.device(device):
        with tf.variable_scope("model", reuse=None):
            m_train = Model(is_training=True)
        with tf.variable_scope("model", reuse=True):
            m_valid = Model(is_training=False)

    print("Done")

    #                               Summary Part                               #

    print("Setting up summary op...")
    summary_ph = tf.placeholder(dtype=tf.float32)

    with tf.variable_scope("Training_procedure"):

        cost_summary_op = tf.summary.scalar("cost", summary_ph)
        accuracy_summary_op = tf.summary.scalar("accuracy", summary_ph)

    # train_summary_writer = tf.summary.FileWriter(logs_dir + '/train/', max_queue=4)
    # valid_summary_writer = tf.summary.FileWriter(logs_dir + '/valid/', max_queue=4)
    # summary_dic = summary_generation(valid_file_dir)

    print("Done")

    #                               Model Save Part                            #

    print("Setting up Saver...")
    saver = tf.train.Saver()
    ckpt = tf.train.get_checkpoint_state(initial_logs_dir)
    print("Done")

    #                               Session Part                               #

    sess_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)
    sess_config.gpu_options.allow_growth = True
    sess = tf.Session(config=sess_config)

    if mode is 'train':
        train_summary_writer = tf.summary.FileWriter(logs_dir + '/train/', sess.graph, max_queue=2)
        valid_summary_writer = tf.summary.FileWriter(logs_dir + '/valid/', max_queue=2)

    if ckpt and ckpt.model_checkpoint_path:  # model restore
        print("Model restored...")

        if mode is 'train':
            saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            saver.restore(sess, initial_logs_dir+ckpt_name)
            # print(initial_logs_dir)
            # saver.save(sess, initial_logs_dir + "/model_bDNN.ckpt", 0)  # model save

        print("Done")
    else:
        sess.run(tf.global_variables_initializer())  # if the checkpoint doesn't exist, do initialization

    if mode is 'train':
        train_data_set = dr.DataReader(input_dir, output_dir, norm_dir, w=w, u=u, name="train")  # training data reader initialization
    # train_data_set = dr.DataReader(input_dir, output_dir, norm_dir, w=w, u=u, name="train")  # training data reader initialization

    if mode is 'train':

        for itr in range(max_epoch):

            train_inputs, train_labels = train_data_set.next_batch(batch_size)

            feed_dict = {m_train.inputs: train_inputs, m_train.labels: train_labels,
                         m_train.keep_probability: dropout_rate}

            sess.run(m_train.train_op, feed_dict=feed_dict)

            if itr % 10 == 0 and itr >= 0:

                train_cost, logits = sess.run([m_train.cost, m_train.logits], feed_dict=feed_dict)

                result = bdnn_prediction(batch_size, logits, threshold=th)
                raw_indx = int(np.floor(train_labels.shape[1] / 2))
                raw_labels = train_labels[:, raw_indx]
                raw_labels = raw_labels.reshape((-1, 1))
                train_accuracy = np.equal(result, raw_labels)
                train_accuracy = train_accuracy.astype(int)
                train_accuracy = np.sum(train_accuracy) / batch_size  # change to mean...

                print("Step: %d, train_cost: %.4f, train_accuracy=%4.4f" % (itr, train_cost, train_accuracy*100))

                train_cost_summary_str = sess.run(cost_summary_op, feed_dict={summary_ph: train_cost})
                train_accuracy_summary_str = sess.run(accuracy_summary_op, feed_dict={summary_ph: train_accuracy})
                train_summary_writer.add_summary(train_cost_summary_str, itr)  # write the train phase summary to event files
                train_summary_writer.add_summary(train_accuracy_summary_str, itr)

            # if train_data_set.eof_checker():
            if itr % 50 == 0 and itr > 0:

                saver.save(sess, logs_dir + "/model.ckpt", itr)  # model save
                print('validation start!')

                valid_accuracy, valid_cost = \
                    utils.do_validation(m_valid, sess, valid_file_dir, norm_dir, type='bDNN')

                print("valid_cost: %.4f, valid_accuracy=%4.4f" % (valid_cost, valid_accuracy * 100))
                valid_cost_summary_str = sess.run(cost_summary_op, feed_dict={summary_ph: valid_cost})
                valid_accuracy_summary_str = sess.run(accuracy_summary_op, feed_dict={summary_ph: valid_accuracy})
                valid_summary_writer.add_summary(valid_cost_summary_str, itr)  # write the train phase summary to event files
                valid_summary_writer.add_summary(valid_accuracy_summary_str, itr)

                # full_evaluation(m_valid, sess, valid_batch_size, valid_file_dir, valid_summary_writer, summary_dic, itr)
                # train_data_set.reader_initialize()
                # print('Train data reader was initialized!')  # initialize eof flag & num_file & start index

    elif mode is 'test':
        # full_evaluation(m_valid, sess, valid_batch_size, test_file_dir, valid_summary_writer, summary_dic, 0)

        final_softout, final_label = utils.vad_test(m_valid, sess, valid_batch_size, test_file_dir, norm_dir, data_len,
                                                    eval_type)

        if data_len is None:
            return final_softout, final_label
        else:
            return final_softout[0:data_len, :], final_label[0:data_len, :]