def trainAndSaveModel(x_train, y_train):
    if (args.continous):
        activation = 'linear'  # Use linear activation
        # activation = 'sigmoid' # Use linear activation
        loss = 'mse'           # Use mean squared error loss
        metric = ['mse', 'mae']
    else:
        activation = 'sigmoid'
        loss = 'binary_crossentropy'
        metric = ['accuracy']

    appraisal_predictor = text_cnn_model_appraisals(MAX_SEQUENCE_LENGTH, vocab_size,
                EMBEDDING_DIMS, FILTER_SIZE, CONV_FILTERS,
                embedding_matrix, DROPOUT, len(APPRAISALS), activation)
    appraisal_predictor.compile(OPTIMIZER, loss, metrics=metric)

    # appraisals_shaped = np.expand_dims(appraisals, axis=2)
    print('\nINFO: Training on %d instances...' % len(x_train))
    if (args.quiet):
        for _ in range(EPOCHS):
            appraisal_predictor.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=1, verbose=VERBOSITY)
            print('.', end='', flush=True)
    else:
        appraisal_predictor.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=VERBOSITY)

    if (not args.savemodel.endswith('.h5')):
        print('\nINFO: Your model does not end with ".h5".')
        print('      Automatically appending file extension.')
        args.savemodel += '.h5'
    appraisal_predictor.save(args.savemodel)
    print('\nSUCCESS: Saved model to %s' % args.savemodel)
def evalTrainAndTestSet(x_train, x_test, y_train, y_test):
    if (args.continous):
        _reg_metrics = reg_metrics.metrics_regression(APPRAISALS, 2)
        activation = 'linear'  # Use linear activation
        # activation = 'sigmoid' # Use linear activation
        loss = 'mse'           # Use mean squared error loss
        metric = ['mse', 'mae']
    else:
        _metrics = metrics.metrics(APPRAISALS, 2)
        activation = 'sigmoid'
        loss = 'binary_crossentropy'
        metric = ['accuracy']

    appraisal_predictor = text_cnn_model_appraisals(MAX_SEQUENCE_LENGTH, vocab_size,
                EMBEDDING_DIMS, FILTER_SIZE, CONV_FILTERS,
                embedding_matrix, DROPOUT, len(APPRAISALS), activation)
    appraisal_predictor.compile(OPTIMIZER, loss, metrics=metric)

    # appraisals_shaped = np.expand_dims(appraisals, axis=2)
    print('\nINFO: Training on %d instances...' % len(x_train))
    if (args.quiet):
        for _ in range(EPOCHS):
            appraisal_predictor.fit(x_train,  y_train, batch_size=BATCH_SIZE, epochs=1, verbose=VERBOSITY)
            print('.', end='', flush=True)
    else:
        appraisal_predictor.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=VERBOSITY)


    print('\nINFO: Testing on %d instances...' % len(x_test))
    if (args.continous==False):
        # weights = [0.55, 0.65, 0.48, 0.3, 0.425, 0.4, 0.45] # Some experimental settings
        weights = [0.50, 0.50, 0.50, 0.50, 0.50, 0.50, 0.50]
        preds = appraisal_predictor.predict(x_test)
        results = []
        for row in range(len(preds)):
            res = []
            for dim in range(len(APPRAISALS)):
                value = preds[row][dim]
                if (value >= weights[dim]):
                    value = 1
                else:
                    value = 0
                res.append(value)
            results.append(res)
        appraisal_predictions = np.array(results)
        _metrics.evaluateFold(appraisal_predictions, y_test)
    else:
        preds = appraisal_predictor.predict(x_test)
        # for i in range(len(preds)):
            # print('\n Predicted:', preds[i])
            # print('  Correct: ', appraisal_test[i])
        appraisal_predictions = np.array(preds)
        _reg_metrics.evaluateFold(appraisal_predictions, y_test)

    if (args.savemodel):
        if (not args.savemodel.endswith('.h5')):
            args.savemodel += '.h5'
        model.save(args.savemodel)
        print('\nINFO: Saved model to %s' % args.savemodel)
Beispiel #3
0
def trainAndSaveModel(x_train, y_appraisal, y_emotion):
    if (args.continous):
        activation = 'linear'  # Use linear activation
        activation = 'sigmoid' # Use sigmoid activation
        loss = 'mse'           # Use mean squared error loss
        metric = ['mse', 'mae']
    else:
        activation = 'sigmoid'
        loss = 'binary_crossentropy'
        metric = ['accuracy']

    model = text_cnn_model_appraisals(MAX_SEQUENCE_LENGTH,
                            vocab_size, EMBEDDING_DIMS,
                            FILTER_SIZE, CONV_FILTERS,
                            embedding_matrix, DROPOUT,
                            len(APPRAISALS), activation)
    model.compile(OPTIMIZER, loss, metrics=metric)


    # appraisals_shaped = np.expand_dims(appraisals, axis=2)
    print('\nINFO: Learning to predict appraisals on %d instances...' % len(x_train))
    if (args.quiet):
        for _ in range(EPOCHS_A):
            model.fit(x_train, y_appraisal, batch_size=BATCH_SIZE, epochs=1, verbose=VERBOSITY)
            print('.', end='', flush=True)
    else:
        model.fit(x_train, y_appraisal, batch_size=BATCH_SIZE, epochs=EPOCHS_A, verbose=VERBOSITY)

    if (not args.savemodel[0].endswith('.h5')):
        print('\nINFO: Your appraisal model does not end with ".h5".')
        print('      Automatically appending file extension.')
        args.savemodel[0] += '.h5'
    model.save(args.savemodel[0])
    print('\nINFO: Saved appraisal prediction model to %s' % args.savemodel[0])

    emotion_predictor = shallowNN_emotions_from_dimensions(
            len(APPRAISALS), CONV_FILTERS, DROPOUT, LABELS, 'softmax')

    emotion_predictor.compile(OPTIMIZER, 'categorical_crossentropy', metrics=['accuracy'])
    emotions_train = pd.concat([y_emotion, pd.get_dummies(y_emotion)],axis=1).drop(['Prior_Emotion'],axis=1)

    print('\nINFO: Learning to predict emototions on %d instances...' % len(text_instances_padded))
    if (args.quiet):
        for _ in range(EPOCHS_E):
            emotion_predictor.fit(y_appraisal, emotions_train,
                            batch_size=BATCH_SIZE, epochs=1,
                            verbose=VERBOSITY)
            print('.', end='', flush=True)
    else:
        emotion_predictor.fit(y_appraisal, emotions_train,
                        batch_size=BATCH_SIZE, epochs=EPOCHS_E,
                        verbose=VERBOSITY)

    if (not args.savemodel[1].endswith('.h5')):
        print('\nINFO: Your emotion model does not end with ".h5".')
        print('      Automatically appending file extension.')
        args.savemodel[1] += '.h5'
    emotion_predictor.save(args.savemodel[1])
    print('\nINFO: Saved emotion prediction model to %s' % args.savemodel[1])
def performCrossValidation(x_data, y_data):
    percentage_done = 0
    metrics_final = metrics.metrics(None, None, LABELS, 2)
    TP_total = 0
    size_total = 0
    TP_Baseline = 0

    for seed in range(ROUNDS):
        np.random.seed(seed)

        kfold = KFold(n_splits=KFOLDS, shuffle=True, random_state=seed)
        for train, test in kfold.split(x_data, y_data):
            K.clear_session()

            classes_train = pd.concat(
                [y_data[train], pd.get_dummies(y_data[train])],
                axis=1).drop(['Prior_Emotion'], axis=1)
            classes_test = pd.concat(
                [y_data[test], pd.get_dummies(y_data[test])],
                axis=1).drop(['Prior_Emotion'], axis=1)

            # Learn to predict emotins based on text (on enISEAR)
            text_to_emotion_model = text_cnn_model_baseline(
                MAX_SEQUENCE_LENGTH, 3116, EMBEDDING_DIMS, FILTER_SIZE,
                CONV_FILTERS, embedding_matrix, DROPOUT, LABELS, 'softmax')
            text_to_emotion_model.compile(OPTIMIZER,
                                          'categorical_crossentropy',
                                          metrics=['accuracy'])
            text_to_emotion_model.fit(data_padded[train],
                                      classes_train,
                                      batch_size=BATCH_SIZE,
                                      epochs=EPOCHS_text_to_emotion,
                                      verbose=VERBOSITY)

            # text_to_emotion_model.save('saved_models/baseline_' + str(percentage_done))
            # # del text_to_emotion_model
            # # K.clear_session()
            # text_to_emotion_model = load_model('saved_models/baseline_' + str(percentage_done))

            predicted_classes_text_to_emotion = []
            predictions = text_to_emotion_model.predict(x_data[test])
            for i in range(len(predictions)):
                index = np.argmax(predictions[i])
                predicted_classes_text_to_emotion.append(LABELS[index])

            metrics_fold1 = metrics.metrics(y_data[test],
                                            predicted_classes_text_to_emotion,
                                            LABELS, 2)
            metrics_fold1.showResults()

            # # Learn to predict emotions from dimensions
            model1 = shallowNN_emotions_from_dimensions(
                len(DIMENSIONS), LAYER_DIM, DROPOUT, LABELS, 'softmax')
            model1.compile(OPTIMIZER,
                           'categorical_crossentropy',
                           metrics=['accuracy'])
            # vectors_shaped = np.expand_dims(vectors[train], axis=2)
            model1.fit(vectors[train],
                       classes_train,
                       batch_size=BATCH_SIZE,
                       epochs=EPOCHS_p1,
                       verbose=VERBOSITY)
            # model1.save('saved_models/dim_to_emotion_' + str(percentage_done))
            # model1 = load_model('saved_models/dim_to_emotion_' + str(percentage_done))

            model = text_cnn_model_appraisals(MAX_SEQUENCE_LENGTH, vocab_size,
                                              EMBEDDING_DIMS, FILTER_SIZE,
                                              CONV_FILTERS, embedding_matrix,
                                              DROPOUT, len(LABELS), 'sigmoid')

            model.compile(OPTIMIZER,
                          'binary_crossentropy',
                          metrics=['accuracy'])
            # model.fit(data_padded[train], vectors[train], batch_size=BATCH_SIZE, epochs=EPOCHS_p2, verbose=VERBOSITY, class_weight=class_weight)
            model.fit(data_padded[train],
                      vectors[train],
                      batch_size=BATCH_SIZE,
                      epochs=EPOCHS_p2,
                      verbose=VERBOSITY)
            # model.save('saved_models/text_to_dim_' + str(percentage_done))
            # model = load_model('saved_models/text_to_dim_' + str(percentage_done))

            # predict dimensions on enISEAR
            preds = model.predict(data_padded[test])
            results = []
            for row in range(len(preds)):
                res = []
                for dim in range(len(DIMENSIONS)):
                    value = preds[row][dim]
                    if (value >= 0.5):
                        value = 1
                    else:
                        value = 0

                    res.append(value)
                results.append(res)

            results = np.array(results)

            predicted_classes = []
            predictions = model1.predict(results)
            for i in range(len(predictions)):
                index = np.argmax(predictions[i])
                predicted_classes.append(LABELS[index])

            preds = model.predict(data_padded)
            results = []
            for row in range(len(preds)):
                res = []
                for dim in range(len(DIMENSIONS)):
                    value = preds[row][dim]
                    if (value >= 0.5):
                        value = 1
                    else:
                        value = 0

                    res.append(value)
                results.append(res)
            results = np.array(results)

            # Predict emotions from predicted appraisals on enISEAR
            predicted_classes_train = []
            predictions = model1.predict(results)
            for i in range(len(predictions)):
                index = np.argmax(predictions[i])
                predicted_classes_train.append(LABELS[index])

            print('enISEAR: T->A->E')
            metrics_fold = metrics.metrics(y_data[test], predicted_classes,
                                           LABELS, 2)
            metrics_fold.showResults()

            # Predict emotions from text on enISEAR
            predicted_classes_text_to_emotion_train = []
            predictions = text_to_emotion_model.predict(data_padded)
            for i in range(len(predictions)):
                index = np.argmax(predictions[i])
                predicted_classes_text_to_emotion_train.append(LABELS[index])

            sentences = data_enISEAR
            classes = classes_enISEAR

            TP_text_to_emotion = 0
            TP_text_to_appraisal_to_emotion = 0
            FN = 0

            train_sentences = []
            pred_combined = []
            labels_selector = []
            for i in range(len(data_padded)):
                label_gold = classes_enISEAR.iloc[i]
                if (label_gold == predicted_classes_text_to_emotion_train[i]
                        and label_gold == predicted_classes_train[i]):
                    # Both models correct
                    labels_selector.append([1, 1])
                elif (label_gold == predicted_classes_train[i]):
                    # Only appraisal model correct
                    labels_selector.append([1, 0])
                elif (label_gold == predicted_classes_text_to_emotion_train[i]
                      ):
                    # Only text-to-emotion model correct
                    labels_selector.append([0, 1])
                else:
                    # Both models predicted the wrong emotion
                    labels_selector.append([0, 0])

            # print(len(labels_selector))

            labels_selector = np.array(labels_selector)
            selector_model = text_cnn_model_selector(
                MAX_SEQUENCE_LENGTH, vocab_size, EMBEDDING_DIMS, FILTER_SIZE,
                CONV_FILTERS, embedding_matrix, DROPOUT, LABELS, 'sigmoid')
            selector_model.compile(loss='binary_crossentropy',
                                   metrics=['accuracy'],
                                   optimizer=OPTIMIZER)
            selector_model.fit(data_padded,
                               labels_selector,
                               batch_size=32,
                               epochs=EPOCHS_SELECTION_MODEL,
                               verbose=VERBOSITY,
                               class_weight=class_weight_selector_model)

            TP_text_to_emotion = 0
            TP_text_to_appraisal_to_emotion = 0
            FN = 0

            selection_ = []
            selections = selector_model.predict(x_data[test])
            for i in range(len(selections)):
                selection_.append(np.argmax(selections[i]))
                # selection_.append(selection[i])
                # print(selection_[i])
                # print(selection[i])

            sentences = sentence_enISEAR_raw[test]
            classes = classes_enISEAR[test]

            pred_combined = []
            labels_selector = []
            for i in range(len(x_data[test])):
                label_gold = classes.iloc[i]
                if (selection_[i] == 1):
                    TP_text_to_emotion += 1
                    pred_combined.append(predicted_classes_text_to_emotion[i])
                elif (selection_[i] == 0):
                    # Appraisal system is correct
                    TP_text_to_appraisal_to_emotion += 1  # = TN
                    pred_combined.append(predicted_classes[i])
                else:
                    FN += 1  #
                    pred_combined.append(predicted_classes_text_to_emotion[i])

            percentage_done += 1
            print('\nPerforming CV... (' + str(percentage_done) + "%)")
            print('Selected from Baseline : ' + str(TP_text_to_emotion))
            print('Selected from Pipeline : ' +
                  str(TP_text_to_appraisal_to_emotion))

            metrics_fold = metrics.metrics(y_data[test], pred_combined, LABELS,
                                           2)
            metrics_fold.showResults()
            metrics_final.addIntermediateResults(y_data[test], pred_combined)
            # TP_total += TP
            # size_total += size

    print('\nFinal Result:')
    metrics_final.writeResults(EXPERIMENTNAME, SAVEFILE)
    return
def performCrossValidation(x_data, y_data):
    percentage_done = 0
    metrics_final = metrics.metrics(None, None, LABELS, 2)
    TP_total = 0
    size_total = 0

    for seed in range(ROUNDS):
        np.random.seed(seed)

        kfold = KFold(n_splits=KFOLDS, shuffle=True, random_state=seed)
        for train, test in kfold.split(x_data, y_data):
            from keras import backend as K
            K.clear_session()
            tf.reset_default_graph()

            classes_train = pd.concat(
                [y_data[train], pd.get_dummies(y_data[train])],
                axis=1).drop(['Prior_Emotion'], axis=1)
            classes_test = pd.concat(
                [y_data[test], pd.get_dummies(y_data[test])],
                axis=1).drop(['Prior_Emotion'], axis=1)

            ####################################################################
            # The problem with this orcale setup is that the models somehow
            # influence each other if they are running in the same process
            # on the GPU. A workaround is to train the baseline model on the
            # folds and save all model weights locally. Then do the same for
            # the pipeline. Afterwards load the saved weights and use them to
            # predict the emotions.
            # This means 10*10 models will be saved (10 folds times 10 runs)
            ####################################################################

            ####################################################################
            # Uncomment this to create and save the baseline model weights
            ####################################################################
            text_to_emotion_model = text_cnn_model_baseline(
                MAX_SEQUENCE_LENGTH, vocab_size, EMBEDDING_DIMS, FILTER_SIZE,
                CONV_FILTERS, embedding_matrix, DROPOUT, LABELS, 'softmax')
            text_to_emotion_model.compile(OPTIMIZER,
                                          'categorical_crossentropy',
                                          metrics=['accuracy'])
            text_to_emotion_model.fit(x_data[train],
                                      classes_train,
                                      batch_size=BATCH_SIZE,
                                      epochs=EPOCHS_text_to_emotion,
                                      verbose=VERBOSITY)

            text_to_emotion_model.save('baseline_' + str(percentage_done))

            ####################################################################
            # This will load the baseline model weights
            ####################################################################
            text_to_emotion_model = load_model('baseline_' +
                                               str(percentage_done))

            # Evaluate baseline model
            predicted_classes_text_to_emotion = []
            predictions = text_to_emotion_model.predict(x_data[test])
            for i in range(len(predictions)):
                index = np.argmax(predictions[i])
                predicted_classes_text_to_emotion.append(LABELS[index])

            metrics_fold = metrics.metrics(y_data[test],
                                           predicted_classes_text_to_emotion,
                                           LABELS, 2)
            metrics_fold.showResults()

            ####################################################################
            # Uncomment this to create and save the pipeline model weights
            ####################################################################
            appraisal_emotion_predictor = shallowNN_emotions_from_dimensions(
                len(DIMENSIONS), LAYER_DIM, DROPOUT, LABELS, 'softmax')
            appraisal_emotion_predictor.compile(OPTIMIZER,
                                                'categorical_crossentropy',
                                                metrics=['accuracy'])
            # vectors_shaped = np.expand_dims(vectors[train], axis=2)
            appraisal_emotion_predictor.fit(vectors[train],
                                            classes_train,
                                            batch_size=BATCH_SIZE,
                                            epochs=EPOCHS_p1,
                                            verbose=VERBOSITY)
            # Save weights
            appraisal_emotion_predictor.save('dim_to_emotion_' +
                                             str(percentage_done))

            input_shape = sentence_enISEAR.shape[1]  # feature count
            model = text_cnn_model_appraisals(MAX_SEQUENCE_LENGTH, vocab_size,
                                              EMBEDDING_DIMS, FILTER_SIZE,
                                              CONV_FILTERS, embedding_matrix,
                                              DROPOUT, LABELS, 'sigmoid')

            model.compile(OPTIMIZER,
                          'binary_crossentropy',
                          metrics=['accuracy'])
            # model.fit(data_padded[train], vectors[train], batch_size=BATCH_SIZE, epochs=EPOCHS_p2, verbose=VERBOSITY)
            model.fit(x_data[train],
                      vectors[train],
                      batch_size=BATCH_SIZE,
                      epochs=EPOCHS_p2,
                      verbose=VERBOSITY,
                      class_weight=class_weight)
            model.save('text_to_dim_' + str(percentage_done))

            # Load models
            appraisal_emotion_predictor = load_model('dim_to_emotion_' +
                                                     str(percentage_done))
            model = load_model('text_to_dim_' + str(percentage_done))

            # predict dimensions from ISEAR
            weights = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
            preds = model.predict(data_padded[test])
            results = []
            for row in range(len(preds)):
                res = []
                for dim in range(len(DIMENSIONS)):
                    value = preds[row][dim]
                    if (value >= weights[dim]):
                        value = 1
                    else:
                        value = 0

                    res.append(value)
                results.append(res)

            results = np.array(results)

            predicted_classes = []
            predictions = appraisal_emotion_predictor.predict(results)
            for i in range(len(predictions)):
                index = np.argmax(predictions[i])
                predicted_classes.append(LABELS[index])

            metrics_fold = metrics.metrics(y_data[test], predicted_classes,
                                           LABELS, 2)
            metrics_fold.showResults()

            sentences = sentence_enISEAR_raw[test]
            classes = classes_enISEAR[test]

            TP_text_to_emotion = 0
            TP_text_to_appraisal_to_emotion = 0
            FN = 0

            pred_combined = []
            for i in range(len(x_data[test])):
                label_gold = classes.iloc[i]
                if (label_gold == predicted_classes_text_to_emotion[i]):
                    # Text based system is correct
                    TP_text_to_emotion += 1
                    # green('\n')
                    # green(i)
                    # green('Gold-Emotion    : ' + label_gold)
                    # green('Prediction T->E   : ' + str(predicted_classes_text_to_emotion[i]))
                    # green('Prediction T->A->E: ' + predicted_classes[i])
                    pred_combined.append(predicted_classes_text_to_emotion[i])
                elif (label_gold == predicted_classes[i]):
                    # Appraisal system is correct
                    TP_text_to_appraisal_to_emotion += 1  # = TN
                    # yellow('\n')
                    # yellow(i)
                    # yellow('Gold-Emotion    : ' + label_gold)
                    # yellow('Prediction T->E   : ' + str(predicted_classes_text_to_emotion[i]))
                    # yellow('Prediction T->A->E: ' + predicted_classes[i])
                    pred_combined.append(predicted_classes[i])
                else:
                    FN += 1  #
                    # print('\nGold-Emotion      : ' + label_gold)
                    # print('Prediction T->E   : ' + str(predicted_classes_text_to_emotion[i]))
                    # print('Prediction T->A->E: ' + predicted_classes[i])
                    pred_combined.append(predicted_classes_text_to_emotion[i])

            percentage_done += 1
            print('\nPerforming CV... (%2d%%)' % percentage_done)

            size = len(predicted_classes)
            TP = TP_text_to_emotion + TP_text_to_appraisal_to_emotion
            accuracy = (TP / size) * 100
            print('Current fold:')
            print('Accuracy: %2.2f' % accuracy)
            print('Text-to-emotion TP         : %2d' % TP_text_to_emotion)
            print('Text-to-appr-to-emotion TP : %2d' %
                  TP_text_to_appraisal_to_emotion)

            metrics_fold = metrics.metrics(y_data[test], pred_combined, LABELS,
                                           2)
            # metrics_fold.showResults()
            metrics_final.addIntermediateResults(y_data[test], pred_combined)
            TP_total += TP
            size_total += size

    print('\n\nFinal Result:')
    accuracy = ((TP_total) / size_total) * 100
    print('Accuracy: %2.2f' % accuracy)
    print(TP_total)
    print(size_total)
    metrics_final.writeResults(EXPERIMENTNAME, SAVEFILE)
    return
def annotatePredictedAppraisals(text_instances_padded,
                                text_instances_padded_annotate,
                                appraisals):
    print('INFO: Annotating Dataset')

    if (args.continous):
        _reg_metrics = reg_metrics.metrics_regression(APPRAISALS, 2)
        activation = 'linear'  # Use linear activation
        # activation = 'sigmoid' # Use linear activation
        loss = 'mse'           # Use mean squared error loss
        metric = ['mse', 'mae']
    else:
        _metrics = metrics.metrics(APPRAISALS, 2)
        activation = 'sigmoid'
        loss = 'binary_crossentropy'
        metric = ['accuracy']

    if (args.loadmodel):
        if (not args.loadmodel.endswith('.h5')):
            args.loadmodel += '.h5'
        try:
            appraisal_predictor = load_model(args.loadmodel)
            print('INFO: Loaded pre-trained model: %s' % args.loadmodel)
        except:
            print('\nUnexpected error:', sys.exc_info()[1])
            sys.exit(1)

    else:
        appraisal_predictor = text_cnn_model_appraisals(MAX_SEQUENCE_LENGTH, vocab_size,
                    EMBEDDING_DIMS, FILTER_SIZE, CONV_FILTERS,
                    embedding_matrix, DROPOUT, len(APPRAISALS), activation)
        appraisal_predictor.compile(OPTIMIZER, loss, metrics=metric)
        appraisal_predictor.fit(text_instances_padded, appraisals,
                                batch_size=BATCH_SIZE, epochs=EPOCHS,
                                verbose=VERBOSITY)

    if (args.continous==False):
        weights = [0.50, 0.50, 0.50, 0.50, 0.50, 0.50, 0.50]
        preds = appraisal_predictor.predict(text_instances_padded_annotate)
        results = []
        for row in range(len(preds)):
            res = []
            for dim in range(len(APPRAISALS)):
                value = preds[row][dim]
                if (value >= weights[dim]):
                    value = 1
                else:
                    value = 0
                res.append(value)
            results.append(res)
        appraisal_predictions = list(results)
    else:
        preds = appraisal_predictor.predict(text_instances_padded_annotate)
        appraisal_predictions = list(preds)

    if (args.annotate.endswith('.tsv')):
        extension = '.tsv'
    elif(args.annotate.endswith('.csv')):
        extension = '.csv'
    out_file_name = args.annotate[:len(args.annotate)-4] + '_appraisals' + extension
    first_line = True
    with open(out_file_name, 'w') as out_file:
        with open(args.annotate, 'r') as in_file:
            for i, line in enumerate(in_file):
                if (first_line):
                    annotation = ''
                    for dimension in APPRAISALS:
                        annotation += sep + dimension
                    out_file.write(line.rstrip('\n') + str(annotation) + '\n')
                    first_line = False
                else:
                    annotation = ''
                    for p in range(len(APPRAISALS)):
                        annotation += sep + str(appraisal_predictions[i-1][p])
                    out_file.write(line.rstrip('\n') + annotation + '\n')
    print('INFO: Created dataset with appraisal annotation: %s' % out_file_name)
def performCrossValidation(x_data, y_data):
    if (args.continous):
        _reg_metrics = reg_metrics.metrics_regression(APPRAISALS, 2)
    else:
        _metrics = metrics.metrics(APPRAISALS, 2)

    for seed in range(ROUNDS):
        np.random.seed(seed)

        kfold = KFold(n_splits=KFOLDS, shuffle=True, random_state=seed)
        for train, test in kfold.split(x_data, y_data):
            K.clear_session()

            if (args.continous):
                activation = 'linear'  # Use linear activation
                # activation = 'sigmoid' # Use linear activation
                loss = 'mse'           # Use mean squared error loss
                metric = ['mse', 'mae']
            else:
                activation = 'sigmoid'
                loss = 'binary_crossentropy'
                metric = ['accuracy']

            appraisal_predictor = text_cnn_model_appraisals(MAX_SEQUENCE_LENGTH, vocab_size,
                        EMBEDDING_DIMS, FILTER_SIZE, CONV_FILTERS,
                        embedding_matrix, DROPOUT, len(APPRAISALS), activation)
            appraisal_predictor.compile(OPTIMIZER, loss, metrics=metric)

            if (args.quiet):
                for _ in range(EPOCHS):
                    appraisal_predictor.fit(text_instances_padded[train], appraisals[train],
                                    batch_size=BATCH_SIZE, epochs=1,
                                    verbose=VERBOSITY, class_weight=class_weight)
                    print('.', end='', flush=True)
            else:
                appraisal_predictor.fit(text_instances_padded[train], appraisals[train],
                                batch_size=BATCH_SIZE, epochs=EPOCHS,
                                verbose=VERBOSITY, class_weight=class_weight)

            if (args.continous==False):
                # weights = [0.55, 0.65, 0.48, 0.3, 0.425, 0.4, 0.45] # Some experimental settings
                weights = [0.50, 0.50, 0.50, 0.50, 0.50, 0.50, 0.50]
                test_instances = text_instances_padded[test]
                preds = appraisal_predictor.predict(test_instances)
                results = []
                for row in range(len(preds)):
                    res = []
                    for dim in range(len(APPRAISALS)):
                        value = preds[row][dim]
                        if (value >= weights[dim]):
                            value = 1
                        else:
                            value = 0
                        res.append(value)
                    results.append(res)
                appraisal_predictions = np.array(results)
                _metrics.evaluateFold(appraisal_predictions, appraisals[test])
            else:
                test_instances = text_instances_padded[test]
                appraisal_test = appraisals[test]
                preds = appraisal_predictor.predict(test_instances)
                # for i in range(len(preds)):
                    # print('\n Predicted:', preds[i])
                    # print('  Correct: ', appraisal_test[i])
                appraisal_predictions = np.array(preds)
                _reg_metrics.evaluateFold(appraisal_predictions, appraisals[test])

    if (args.continous == False):
        if (args.format):
            _metrics.showFinalResults(args.format)
        else: _metrics.showFinalResults(format='text')
    else:
        _reg_metrics.showResults()
Beispiel #8
0
def evalTrainAndTestSet(text_instances_padded, text_instances_padded_test,
            appraisals_train, appraisals_test, class_labels_train, class_labels_test):
    emotions_train = pd.concat([class_labels_train, pd.get_dummies(class_labels_train)],axis=1).drop(['Prior_Emotion'],axis=1)
    emotions_test = pd.concat([class_labels_test, pd.get_dummies(class_labels_test)],axis=1).drop(['Prior_Emotion'],axis=1)

    print('\nINFO: Learning to predict emototions on %d instances...' % len(text_instances_padded))
    emotion_predictor = shallowNN_emotions_from_dimensions(
        len(APPRAISALS), CONV_FILTERS, DROPOUT, LABELS, 'softmax')

    emotion_predictor.compile(OPTIMIZER, 'categorical_crossentropy', metrics=['accuracy'])

    # vectors_shaped = np.expand_dims(vectors[train], axis=2)
    if (args.quiet):
        for _ in range(EPOCHS_E):
            emotion_predictor.fit(appraisals_train, emotions_train,
                            batch_size=BATCH_SIZE, epochs=1,
                            verbose=VERBOSITY)
            print('.', end='', flush=True)
    else:
        emotion_predictor.fit(appraisals_train, emotions_train,
                        batch_size=BATCH_SIZE, epochs=EPOCHS_E,
                        verbose=VERBOSITY)

    if (args.continous):
        activation = 'linear'  # Use linear activation
        activation = 'sigmoid' # Use sigmoid activation
        loss = 'mse'           # Use mean squared error loss
        metric = ['mse', 'mae']
    else:
        activation = 'sigmoid'
        loss = 'binary_crossentropy'
        metric = ['accuracy']

    appraisal_predictor = text_cnn_model_appraisals(MAX_SEQUENCE_LENGTH,
                            vocab_size, EMBEDDING_DIMS,
                            FILTER_SIZE, CONV_FILTERS,
                            embedding_matrix, DROPOUT,
                            len(APPRAISALS), activation)
    appraisal_predictor.compile(OPTIMIZER, loss, metrics=metric)

    # appraisals_shaped = np.expand_dims(appraisals, axis=2)
    print('\nINFO: Learning to predict appraisals on %d instances...' % len(text_instances_padded))
    if (args.quiet):
        for _ in range(EPOCHS_A):
            appraisal_predictor.fit(text_instances_padded, appraisals_train, batch_size=BATCH_SIZE, epochs=1, verbose=VERBOSITY)
            print('.', end='', flush=True)
    else:
        appraisal_predictor.fit(text_instances_padded,appraisals_train, batch_size=BATCH_SIZE, epochs=EPOCHS_A, verbose=VERBOSITY)

    print('\nINFO: Testing on %d instances...' % len(class_labels_test))

    if (args.continous == False):
        # weights = [0.55, 0.65, 0.48, 0.3, 0.425, 0.4, 0.45] # Some experimental settings
        weights = [0.50, 0.50, 0.50, 0.50, 0.50, 0.50, 0.50]
        preds = appraisal_predictor.predict(text_instances_padded_test)
        results = []
        for row in range(len(preds)):
            res = []
            for dim in range(len(APPRAISALS)):
                value = preds[row][dim]
                if (value >= weights[dim]):
                    value = 1
                else:
                    value = 0
                res.append(value)
            results.append(res)
        appraisal_predictions = np.array(results)
    else:
        preds = appraisal_predictor.predict(text_instances_padded_test)
        appraisal_predictions = np.array(preds)

    # Predict emotions based on appraisal predictions
    predicted_emotions = []
    # results = np.expand_dims(results, axis=2)
    emotions_predictions = emotion_predictor.predict(appraisal_predictions)
    for i in range(len(emotions_predictions)):
        index = np.argmax(emotions_predictions[i])
        predicted_emotions.append(LABELS[index])

    _metrics = metrics.metrics(class_labels_test, predicted_emotions, LABELS, 2)
    _metrics.showResults()
Beispiel #9
0
def performCrossValidation(x_data, y_data_appraisal, y_data_emotion):
    metrics_final = metrics.metrics(None, None, LABELS, 2)
    percentage_done = 1
    for seed in range(ROUNDS):
        np.random.seed(seed)

        kfold = KFold(n_splits=KFOLDS, shuffle=True, random_state=seed)
        for train, test in kfold.split(x_data, y_data_emotion):
            K.clear_session()

            emotions_train = pd.concat([y_data_emotion[train], pd.get_dummies(y_data_emotion[train])],axis=1).drop(['Prior_Emotion'],axis=1)
            emotions_test = pd.concat([y_data_emotion[test], pd.get_dummies(y_data_emotion[test])],axis=1).drop(['Prior_Emotion'],axis=1)

            ####################################################################
            # Task 1 : Learn to predict appraisals from text
            ####################################################################
            if (args.continous):
                activation = 'linear'  # Use linear activation
                # activation = 'sigmoid' # Use linear activation
                loss = 'mse'           # Use mean squared error loss
                metric = ['mse', 'mae']
            else:
                activation = 'sigmoid'
                loss = 'binary_crossentropy'
                metric = ['accuracy']


            print('\nINFO: Learning to predict appraisals from text...')
            appraisal_predictor = text_cnn_model_appraisals(MAX_SEQUENCE_LENGTH, vocab_size,
                        EMBEDDING_DIMS, FILTER_SIZE, CONV_FILTERS,
                        embedding_matrix, DROPOUT, len(APPRAISALS), activation)
            appraisal_predictor.compile(OPTIMIZER, loss, metrics=metric)

            if (args.quiet):
                for _ in range(EPOCHS_A):
                    appraisal_predictor.fit(text_instances_padded[train], appraisals[train],
                                    batch_size=BATCH_SIZE, epochs=1,
                                    verbose=VERBOSITY, class_weight=class_weight)
                    print('.', end='', flush=True)
            else:
                appraisal_predictor.fit(text_instances_padded[train], appraisals[train],
                                batch_size=BATCH_SIZE, epochs=EPOCHS_A,
                                verbose=VERBOSITY, class_weight=class_weight)

            if (args.continous == False):
                # weights = [0.55, 0.65, 0.48, 0.3, 0.425, 0.4, 0.45] # Some experimental settings
                weights = [0.50, 0.50, 0.50, 0.50, 0.50, 0.50, 0.50]
                test_instances = text_instances_padded[test]
                preds = appraisal_predictor.predict(test_instances)
                results = []
                for row in range(len(preds)):
                    res = []
                    for dim in range(len(APPRAISALS)):
                        value = preds[row][dim]
                        if (value >= weights[dim]):
                            value = 1
                        else:
                            value = 0
                        res.append(value)
                    results.append(res)
                appraisal_predictions = np.array(results)
            else:
                test_instances = text_instances_padded[test]
                appraisal_test = appraisals[test]
                preds = appraisal_predictor.predict(test_instances)
                # for i in range(len(preds)):
                    # print('\n Predicted:', preds[i])
                    # print('  Correct: ', appraisal_test[i])
                appraisal_predictions = np.array(preds)

            ####################################################################
            # Task 2 : Learn to predict emotions from appraisals
            ####################################################################
            print('\nINFO: Learning to predict emotions from appraisals...')
            emotion_predictor = shallowNN_emotions_from_dimensions(
                    len(APPRAISALS), CONV_FILTERS, DROPOUT, LABELS, 'softmax')

            emotion_predictor.compile(OPTIMIZER, 'categorical_crossentropy', metrics=['accuracy'])
            # vectors_shaped = np.expand_dims(vectors[train], axis=2)
            if (args.quiet):
                for _ in range(EPOCHS_E):
                    emotion_predictor.fit(appraisals[train], emotions_train,
                                    batch_size=BATCH_SIZE, epochs=1,
                                    verbose=VERBOSITY)
                    print('.', end='', flush=True)
            else:
                emotion_predictor.fit(appraisals[train], emotions_train,
                                batch_size=BATCH_SIZE, epochs=EPOCHS_E,
                                verbose=VERBOSITY)

            # Predict emotions based on appraisal predictions
            predicted_emotions = []
            # results = np.expand_dims(results, axis=2)
            emotions_predictions = emotion_predictor.predict(appraisal_predictions)
            for i in range(len(emotions_predictions)):
                index = np.argmax(emotions_predictions[i])
                predicted_emotions.append(LABELS[index])

            # Show results
            print('\n\nINFO: Evaluating CV-fold...')
            metrics_fold = metrics.metrics(y_data_emotion[test], predicted_emotions, LABELS, 2)
            metrics_fold.showResults()
            metrics_final.addIntermediateResults(y_data_emotion[test], predicted_emotions)
    metrics_final.showResults()
    metrics_final.showConfusionMatrix(False)
Beispiel #10
0
def performCrossValidation(x_data, y_data):
    metrics_final = metrics.metrics(None, None, LABELS, 2)
    TP_total = 0
    size_total = 0

    percentage_done = 0
    for seed in range(ROUNDS):
        np.random.seed(seed)
        # Create baseline model weights
        kfold = KFold(n_splits=KFOLDS, shuffle=True, random_state=seed)
        for train, test in kfold.split(x_data, y_data):
            print('Training Baseline model %i/%i' %
                  (percentage_done + 1, KFOLDS * ROUNDS))
            from keras import backend as K
            K.clear_session()
            # tf.reset_default_graph()

            classes_train = pd.concat(
                [y_data[train], pd.get_dummies(y_data[train])],
                axis=1).drop(['Prior_Emotion'], axis=1)
            classes_test = pd.concat(
                [y_data[test], pd.get_dummies(y_data[test])],
                axis=1).drop(['Prior_Emotion'], axis=1)

            text_to_emotion_model = text_cnn_model_baseline(
                MAX_SEQUENCE_LENGTH, vocab_size, EMBEDDING_DIMS, FILTER_SIZE,
                CONV_FILTERS, embedding_matrix, DROPOUT, LABELS, 'softmax')
            text_to_emotion_model.compile(OPTIMIZER,
                                          'categorical_crossentropy',
                                          metrics=['accuracy'])
            text_to_emotion_model.fit(x_data[train],
                                      classes_train,
                                      batch_size=BATCH_SIZE,
                                      epochs=EPOCHS_text_to_emotion,
                                      verbose=VERBOSITY)
            text_to_emotion_model.save('saved_models/baseline_' +
                                       str(percentage_done))
            percentage_done += 1

    percentage_done = 0
    for seed in range(ROUNDS):
        np.random.seed(seed)
        # Create pipeline model weights
        kfold = KFold(n_splits=KFOLDS, shuffle=True, random_state=seed)
        for train, test in kfold.split(x_data, y_data):
            print('Training Pipeline model %i/%i' %
                  (percentage_done + 1, KFOLDS * ROUNDS))
            from keras import backend as K
            K.clear_session()
            # tf.reset_default_graph()

            classes_train = pd.concat(
                [y_data[train], pd.get_dummies(y_data[train])],
                axis=1).drop(['Prior_Emotion'], axis=1)
            classes_test = pd.concat(
                [y_data[test], pd.get_dummies(y_data[test])],
                axis=1).drop(['Prior_Emotion'], axis=1)

            appraisal_emotion_predictor = shallowNN_emotions_from_dimensions(
                len(DIMENSIONS), LAYER_DIM, DROPOUT, LABELS, 'softmax')
            appraisal_emotion_predictor.compile(OPTIMIZER,
                                                'categorical_crossentropy',
                                                metrics=['accuracy'])
            # vectors_shaped = np.expand_dims(vectors[train], axis=2)
            appraisal_emotion_predictor.fit(vectors[train],
                                            classes_train,
                                            batch_size=BATCH_SIZE,
                                            epochs=EPOCHS_p1,
                                            verbose=VERBOSITY)
            # Save weights
            appraisal_emotion_predictor.save('saved_models/dim_to_emotion_' +
                                             str(percentage_done))

            input_shape = sentence_enISEAR.shape[1]  # feature count
            model = text_cnn_model_appraisals(MAX_SEQUENCE_LENGTH, vocab_size,
                                              EMBEDDING_DIMS, FILTER_SIZE,
                                              CONV_FILTERS,
                                              embedding_matrix, DROPOUT,
                                              len(DIMENSIONS), 'sigmoid')

            model.compile(OPTIMIZER,
                          'binary_crossentropy',
                          metrics=['accuracy'])
            # model.fit(data_padded[train], vectors[train], batch_size=BATCH_SIZE, epochs=EPOCHS_p2, verbose=VERBOSITY)
            model.fit(x_data[train],
                      vectors[train],
                      batch_size=BATCH_SIZE,
                      epochs=EPOCHS_p2,
                      verbose=VERBOSITY,
                      class_weight=class_weight)
            model.save('saved_models/text_to_dim_' + str(percentage_done))
            percentage_done += 1

    percentage_done = 0
    for seed in range(ROUNDS):
        np.random.seed(seed)
        # Evaluate models
        kfold = KFold(n_splits=KFOLDS, shuffle=True, random_state=seed)
        for train, test in kfold.split(x_data, y_data):
            print(
                '\n\n############################################################'
            )
            print('Evaluating fold %i/%i in run %i/%i' %
                  ((percentage_done + 1) % 10, KFOLDS, seed + 1, ROUNDS))
            print(
                '############################################################')
            from keras import backend as K
            K.clear_session()
            tf.reset_default_graph()

            classes_train = pd.concat(
                [y_data[train], pd.get_dummies(y_data[train])],
                axis=1).drop(['Prior_Emotion'], axis=1)
            classes_test = pd.concat(
                [y_data[test], pd.get_dummies(y_data[test])],
                axis=1).drop(['Prior_Emotion'], axis=1)

            # Load the baseline model weights
            text_to_emotion_model = load_model('saved_models/baseline_' +
                                               str(percentage_done))

            # Evaluate baseline model
            predicted_classes_text_to_emotion = []
            predictions = text_to_emotion_model.predict(x_data[test])
            for i in range(len(predictions)):
                index = np.argmax(predictions[i])
                predicted_classes_text_to_emotion.append(LABELS[index])

            # Show results of baseline
            print(
                '\n############################################################'
            )
            print('Baseline result:')
            print(
                '############################################################')
            metrics_fold = metrics.metrics(y_data[test],
                                           predicted_classes_text_to_emotion,
                                           LABELS, 2)
            metrics_fold.showResults()

            # Load appraisal models
            appraisal_emotion_predictor = load_model(
                'saved_models/dim_to_emotion_' + str(percentage_done))
            model = load_model('saved_models/text_to_dim_' +
                               str(percentage_done))

            # Predict Appraisals from ISEAR
            weights = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
            preds = model.predict(data_padded[test])
            results = []
            for row in range(len(preds)):
                res = []
                for dim in range(len(DIMENSIONS)):
                    value = preds[row][dim]
                    if (value >= weights[dim]):
                        value = 1
                    else:
                        value = 0

                    res.append(value)
                results.append(res)
            results = np.array(results)

            # Predict Emotions based on predicted Appraisals
            predicted_classes = []
            predictions = appraisal_emotion_predictor.predict(results)
            for i in range(len(predictions)):
                index = np.argmax(predictions[i])
                predicted_classes.append(LABELS[index])

            # Show results of pipeline
            print(
                '\n############################################################'
            )
            print('Pipeline result:')
            print(
                '############################################################')
            metrics_fold = metrics.metrics(y_data[test], predicted_classes,
                                           LABELS, 2)
            metrics_fold.showResults()

            # Evaluate models and create oracle 'prediction'
            sentences = sentence_enISEAR_raw[test]
            classes = classes_enISEAR[test]
            TP_text_to_emotion = 0
            TP_text_to_appraisal_to_emotion = 0
            FN = 0

            pred_combined = []
            for i in range(len(x_data[test])):
                label_gold = classes.iloc[i]
                if (label_gold == predicted_classes_text_to_emotion[i]):
                    # Text based system is correct
                    TP_text_to_emotion += 1
                    # green('\n')
                    # green(i)
                    # green('Gold-Emotion    : ' + label_gold)
                    # green('Prediction T->E   : ' + str(predicted_classes_text_to_emotion[i]))
                    # green('Prediction T->A->E: ' + predicted_classes[i])
                    pred_combined.append(predicted_classes_text_to_emotion[i])
                elif (label_gold == predicted_classes[i]):
                    # Appraisal system is correct
                    TP_text_to_appraisal_to_emotion += 1  # = TN
                    # yellow('\n')
                    # yellow(i)
                    # yellow('Gold-Emotion    : ' + label_gold)
                    # yellow('Prediction T->E   : ' + str(predicted_classes_text_to_emotion[i]))
                    # yellow('Prediction T->A->E: ' + predicted_classes[i])
                    pred_combined.append(predicted_classes[i])
                else:
                    FN += 1  #
                    # print('\nGold-Emotion      : ' + label_gold)
                    # print('Prediction T->E   : ' + str(predicted_classes_text_to_emotion[i]))
                    # print('Prediction T->A->E: ' + predicted_classes[i])
                    pred_combined.append(predicted_classes_text_to_emotion[i])

            percentage_done += 1
            print('\nPerforming CV... (%2d%%)' % percentage_done)

            size = len(predicted_classes)
            TP = TP_text_to_emotion + TP_text_to_appraisal_to_emotion
            accuracy = (TP / size) * 100
            print('Current fold:')
            print('Accuracy: %2.2f' % accuracy)
            print('Text-to-emotion TP         : %2d' % TP_text_to_emotion)
            print('Text-to-appr-to-emotion TP : %2d' %
                  TP_text_to_appraisal_to_emotion)

            # Evaluate oracle 'prediction'
            metrics_fold = metrics.metrics(y_data[test], pred_combined, LABELS,
                                           2)
            print(
                '\n############################################################'
            )
            print('Oracle result:')
            print(
                '############################################################')
            metrics_fold.showResults()
            metrics_final.addIntermediateResults(y_data[test], pred_combined)
            TP_total += TP
            size_total += size

    print('\nFinal Result:')
    accuracy = ((TP_total) / size_total) * 100
    print('Accuracy: %2.2f' % accuracy)
    print(TP_total)
    print(size_total)
    metrics_final.writeResults(EXPERIMENTNAME, SAVEFILE)
    metrics_final.createMarkdownResults()
    return