Ejemplo n.º 1
0
def eval_predictions(y_true, y_pred, print_results=False):
    # Plot confusion matrix
    plot_confusion_matrix(y_true=y_true,
                          y_pred=y_pred,
                          normalize=True,
                          classes=['agree', 'disagree', 'discuss'])
    # TODO: Precision, recall, return results in a dict
    accuracy = accuracy_score(y_true=y_true, y_pred=y_pred)
    f1_score_micro = f1_score(y_true=y_true, y_pred=y_pred, average='micro')
    f1_score_macro = f1_score(y_true=y_true, y_pred=y_pred, average='macro')
    f1_score_weighted = f1_score(y_true=y_true,
                                 y_pred=y_pred,
                                 average='weighted')
    if print_results:
        log("Prediction Evaluation", header=True)
        log(f"Accuracy: {accuracy}")
        log(f"F1 Score (Macro): {f1_score_macro}")
        log(f"F1 Score (Micro): {f1_score_micro}")
        log(f"F1 Score (Weighted): {f1_score_weighted}")
Ejemplo n.º 2
0
x_train, y_train = preprocess(x_train, y_train)

train_labels, test_labels = update_labels(y_train, y_test)
"""TFiDF"""
tfidf = TfidfVectorizer(min_df=2, max_df=0.5, ngram_range=(1, 1))
vectorizer = tfidf.fit(x_train["text"])

features_train = pd.DataFrame(vectorizer.transform(x_train["text"]).todense(),
                              columns=tfidf.get_feature_names())
features_test = pd.DataFrame(vectorizer.transform(x_test["text"]).todense(),
                             columns=tfidf.get_feature_names())
"""Support Vector Machine Classifier"""
clf = SVC(kernel='linear').fit(features_train.values, train_labels)
predicted = clf.predict(features_test.values)
"""Metrics"""
print(
    metrics.classification_report(test_labels,
                                  predicted,
                                  target_names=["negative", "positive"]))
print(metrics.confusion_matrix(test_labels, predicted))

plot_confusion_matrix(test_labels,
                      predicted, [0, 1], ["Negative", "Positive"],
                      save_path_prefix + "svc_prep_cf_10k_2k",
                      normalize=True)

filename_clf = save_path_prefix + "svc_prep_data_10k_train_2k_test.clf"
filename_vect = save_path_prefix + "svc_prep_data_10k_train_2k_test.vect"

save_classifier(clf, vectorizer, filename_clf, filename_vect)
Ejemplo n.º 3
0
model = Sequential()
model.add(Embedding(20000, 128))
model.add(Conv1D(filters=64, kernel_size=3, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(LSTM(64, dropout=0.35, recurrent_dropout=0.35))
model.add(Dense(2, activation='sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

history = model.fit(x_train,
                    y_train,
                    batch_size=batch_size,
                    epochs=train_epochs,
                    verbose=2,
                    validation_data=(x_test, y_test))

plot_training_results(history)

y_pred = model.predict_classes(x_test)
plot_confusion_matrix(y_test[4],
                      y_pred, [0, 1], ['Negative', 'Positive'],
                      "/home/sabir/Documents/",
                      normalize=True)

model_json = model.to_json()
save_model("./resources/lstm/lstm.json", ".../resources/lstm/lstm.h5",
           model_json, model)
Ejemplo n.º 4
0
def eval(args):
    if args.checkpoint:
        checkpoint_path = args.checkpoint
    else:
        checkpoint_path = tf.train.latest_checkpoint(hp.logdir)
    log('Loading checkpoint: %s' % checkpoint_path)
    log(hparams_debug_string())

    # Set up model:
    audio = tf.placeholder(tf.float32, [None, None, hp.num_mels], 'audio')
    sentence = tf.placeholder(tf.int32, [None, None], 'sentence')
    targets = tf.placeholder(tf.int32, [None, None], 'targets')
    audio_length = tf.placeholder(tf.int32, [None], 'audio_length')
    sentence_length = tf.placeholder(tf.int32, [None], 'sentence_length')

    # Set up model:
    with tf.variable_scope('model') as scope:
        model = create_model(args.model, hp)
        model.initialize(audio, sentence, audio_length, sentence_length,
                         targets)
        model.add_loss()
        model.add_acc()

    # Bookkeeping:
    time_window = ValueWindow(100)
    acc_window = ValueWindow(100)
    correct_window = ValueWindow(100)

    # Eval!
    step = 0
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    output_list = []
    target_list = []
    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        saver.restore(sess, checkpoint_path)
        log('Loading evaluate data from: %s' % hp.test_data_path)
        feature_files, batches = get_test_batches(hp.test_data_path)
        for idx, batch in enumerate(batches):
            batch = prepare_batch(batch)
            feed_dict = {
                model.audio: batch[0],
                model.sentence: batch[1],
                model.targets: batch[2],
                model.audio_length: batch[3],
                model.sentence_length: batch[4]
            }
            step = step + 1
            start_time = time.time()
            time_window.append(time.time() - start_time)

            output, target, istarget, origin_acc = sess.run(
                [model.preds, model.targets, model.istarget, model.acc],
                feed_dict=feed_dict)
            # mapping to 39
            output = map_to_39_2d(output)
            target = map_to_39_2d(target)
            origin_acc_39 = calculate_acc(istarget, output, target)
            output, target, preds, labels = obtain_list(
                output, target, istarget)
            acc, correct = batch_lcs(output, target)
            print(origin_acc_39, acc, correct)
            acc_window.append(acc)
            correct_window.append(correct)

            output_list.extend(preds)
            target_list.extend(labels)

            message = 'Step %-7d [%.03f sec/step, avg=%.05f, correct=%.05f]' % (
                step, time_window.average, acc_window.average,
                correct_window.average)
            log(message)

        plot.plot_confusion_matrix(target_list, output_list, idx2phn,
                                   args.checkpoint + ".png")
        log('Confusion matrix saved!')
Ejemplo n.º 5
0
    def train(self):

        self.ckpt.restore(self.ckpt_manager.latest_checkpoint)
        if self.ckpt_manager.latest_checkpoint:
            print("Restored from {}".format(
                self.ckpt_manager.latest_checkpoint))
        else:
            print("Initializing from scratch.")

        dg = DataGenerator(task_version=self.task_version, action='train')
        validation_batch_num = self.batch_num // 10

        over_all_performace = []

        print('Start to train')
        print('#' * 20)

        for epoch_i in range(self.epoch_num):
            print('Epoch ' + str(epoch_i))

            # Train
            #
            train_loss_all = 0
            for batch_i in range(self.batch_num):
                decs, masks, inputs, outputs = next(dg)
                with tf.GradientTape() as tape:
                    logits, _ = self.ei_rnn(inputs, [self.init_state],
                                            training=True)
                    logits = tf.transpose(logits, perm=[0, 2, 1])
                    train_loss = self.loss_fun(outputs, logits, masks)
                    train_loss += sum(self.ei_rnn.losses)
                    train_loss_all += train_loss.numpy()

                all_weights = self.ei_rnn.trainable_weights + [self.init_state]
                grads = tape.gradient(train_loss, all_weights)
                grads, _ = tf.clip_by_global_norm(grads, self.grad_clip)
                self.optimizer.apply_gradients(
                    zip(grads, self.ei_rnn.trainable_weights))

            train_loss_all = train_loss_all / self.batch_num
            print('train loss:', train_loss_all)

            with self.train_summary_writer.as_default():
                tf.summary.scalar('loss', train_loss_all, step=epoch_i)

            # Validation
            #
            validation_loss_all = 0
            validation_acc_all = 0

            for v_batch_i in range(validation_batch_num):
                _, v_masks, v_inputs, v_outputs = dg.get_valid_test_datasets(
                )  # dg.get_valid_test_datasets()
                v_logits, _ = self.ei_rnn(v_inputs, [self.init_state])

                acc_v_logits = v_logits.numpy()
                acc_v_outputs = tf.transpose(v_outputs, perm=[0, 2, 1]).numpy()
                validation_acc = self.get_accuracy(acc_v_logits, acc_v_outputs,
                                                   v_masks)
                validation_acc_all += validation_acc

                v_logits = tf.transpose(v_logits, perm=[0, 2, 1])
                validation_loss = self.loss_fun(v_outputs, v_logits, v_masks)
                validation_loss += sum(self.ei_rnn.losses)
                validation_loss_all += validation_loss.numpy()

            validation_loss_all = validation_loss_all / validation_batch_num
            validation_acc_all = validation_acc_all / validation_batch_num
            over_all_performace.append(validation_acc_all)

            print('validation loss:', validation_loss_all)
            print('validation acc:', validation_acc_all)

            with self.validation_summary_writer.as_default():
                tf.summary.scalar('loss', validation_loss_all, step=epoch_i)
                tf.summary.scalar('acc', validation_acc_all, step=epoch_i)

                cm_image = plot.plot_confusion_matrix(self.get_w_rec_m())
                tf.summary.image('M_rec', cm_image, step=epoch_i)

                win_image = plot.plot_confusion_matrix(
                    funs.rectify(
                        self.rnn_cell.W_in.numpy()[:, :int(UNITS_SIZE *
                                                           EI_RATIO)]), False)
                tf.summary.image('M_in', win_image, step=epoch_i)

                wout_image = plot.plot_confusion_matrix(
                    self.get_w_out_m()[:, :int(UNITS_SIZE * EI_RATIO)], False)
                tf.summary.image('M_out', wout_image, step=epoch_i)

                print('spr: ', funs.spectral_radius(self.get_w_rec_m().T))

            if epoch_i > PERFORMANCE_CHECK_REGION and \
                    np.mean(over_all_performace[-PERFORMANCE_CHECK_REGION:]) > PERFORMANCE_LEVEL:
                print(
                    'Overall performance level is satisfied, training is terminated\n'
                )
                break

            # Save Model
            self.ckpt.step.assign_add(1)
            self.ckpt_manager.save()

            # self.reset_all_weights() # todo: may uncomment
            # print('Remove all weights below ' + str(SGD_p['mini_w_threshold']))
            # print('\n')

        print('Training is done')
        print('#' * 20)
        print('\n')
        # Test
        #
        self.test()
Ejemplo n.º 6
0
    def test(self, test_batch_num=50):
        print('Start to test')
        print('#' * 20)
        self.ckpt.restore(self.ckpt_manager.latest_checkpoint)
        if self.ckpt_manager.latest_checkpoint:
            print("Restored from {}".format(
                self.ckpt_manager.latest_checkpoint))
        else:
            print("Initializing from scratch.")

        dg = DataGenerator(task_version=self.task_version,
                           action='test')  # todo: should be test for action
        psycollection = {'coh': [], 'perc': []}

        for batch_index in range(20):
            descs, test_masks, test_inputs, test_outputs = dg.get_valid_test_datasets(
            )

            test_logits, _ = self.ei_rnn(test_inputs, [self.init_state],
                                         training=False)

            acc_test_logits = test_logits.numpy()
            acc_test_outputs = tf.transpose(test_outputs, perm=[0, 2,
                                                                1]).numpy()
            test_acc = self.get_accuracy(acc_test_logits, acc_test_outputs,
                                         test_masks)

            test_logits = tf.transpose(test_logits, perm=[0, 2, 1])
            test_loss = self.loss_fun(test_outputs, test_logits, test_masks)
            test_loss += sum(self.ei_rnn.losses)

            print('test loss:', test_loss.numpy())
            print('test acc:', test_acc)

            tmp_data = self.get_psychometric_data(descs, test_logits.numpy())

            psycollection['coh'] += tmp_data['coh']
            psycollection['perc'] += tmp_data['perc']

            with self.test_summary_writer.as_default():
                tf.summary.scalar('loss', test_loss, step=batch_index)
                tf.summary.scalar('acc', test_acc, step=batch_index)

                curve_image = plot.plot_dots(psycollection['coh'],
                                             psycollection['perc'])
                tf.summary.image('psycollection',
                                 curve_image,
                                 step=batch_index)

                cm_image = plot.plot_confusion_matrix(self.get_w_rec_m())
                tf.summary.image('M_rec', cm_image, step=batch_index)

                win_image = plot.plot_confusion_matrix(
                    funs.rectify(
                        self.rnn_cell.W_in.numpy()[:, :int(UNITS_SIZE *
                                                           EI_RATIO)]), False)
                tf.summary.image('M_in', win_image, step=batch_index)

                wout_image = plot.plot_confusion_matrix(
                    self.get_w_out_m()[:, :int(UNITS_SIZE * EI_RATIO)], False)
                tf.summary.image('M_out', wout_image, step=batch_index)