Exemple #1
0
         model = build_LSTM(args=args_lstm)
         early_stopping = EarlyStopping(monitor='val_loss', patience=10)
         hist = model.fit(data_train,
                          labels_train,
                          validation_data=(data_val, labels_val),
                          epochs=args_lstm.epochs,
                          batch_size=args_lstm.batch_size,
                          shuffle=True,
                          callbacks=[early_stopping])
         # predict
         print('Testing')
         preds = model.predict(test_tweet, batch_size=32, verbose=1)
         y_pred = preds.ravel()
         acc, pre, rec, f1, auc = evaluate_prediction(
             y_test=y_test,
             y_pred=y_pred,
             k_th=num_fold,
             model_name='LSTM-word2vec',
             dataset_name=args_lstm.dataset)
         list_acc.append(acc)
         list_pre.append(pre)
         list_rec.append(rec)
         list_f1.append(f1)
         list_auc.append(auc)
         result.append(['LSTM-word2vec', acc, pre, rec, f1, auc])
     n_sampling -= 1
 result.append([
     'average',
     np.mean(list_acc),
     np.mean(list_pre),
     np.mean(list_rec),
     np.mean(list_f1),
Exemple #2
0
            y = df_all['y'].as_matrix()

        # 10-fold cross validation
        num_fold = 10
        kf = KFold(n_splits=num_fold, shuffle=True, random_state=0)
        for train_index, test_index in kf.split(X):
            num_fold -= 1
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]
            # Logisitc Regression
            clf = LogisticRegression(penalty='l2', tol=1e-6)
            clf.fit(X_train, y_train)
            y_pred = clf.predict_proba(X_test)[:, 1]
            acc, pre, rec, f1, auc = evaluate_prediction(
                y_test,
                y_pred,
                k_th=num_fold,
                model_name='Logistic Regression',
                dataset_name=args.dataset)
            lr_acc.append(acc)
            lr_pre.append(pre)
            lr_rec.append(rec)
            lr_f1.append(f1)
            lr_auc.append(auc)
            # Random Forest
            clf = RandomForestClassifier(n_estimators=20,
                                         max_depth=8,
                                         random_state=0)
            clf.fit(X_train, y_train)
            y_pred = clf.predict_proba(X_test)[:, 1]
            acc, pre, rec, f1, auc = evaluate_prediction(
                y_test,
Exemple #3
0
            tokenizer.fit_on_texts(list(title_train)+list(usertext_train))
            tokenized_title_train = tokenizer.texts_to_sequences(title_train)
            tokenized_title_test = tokenizer.texts_to_sequences(title_test)
            tokenized_usertext_train = tokenizer.texts_to_sequences(usertext_train)
            tokenized_usertext_test = tokenizer.texts_to_sequences(usertext_test)
            X1_train = sequence.pad_sequences(tokenized_title_train, maxlen=args_rnn.max_seq_len, dtype='float64')
            X1_test = sequence.pad_sequences(tokenized_title_test, maxlen=args_rnn.max_seq_len, dtype='float64')
            X2_train = sequence.pad_sequences(tokenized_usertext_train, maxlen=args_rnn.max_seq_len, dtype='float64')
            X2_test = sequence.pad_sequences(tokenized_usertext_test, maxlen=args_rnn.max_seq_len, dtype='float64')

            # train model and predict
            model = build_RNN(args=args_rnn)
            early = EarlyStopping(monitor="val_loss", mode="min", patience=20)
            model.fit([X1_train, X2_train], y_train, batch_size=args_rnn.batch_size, epochs=args_rnn.epochs,
                      validation_split=0.1, callbacks=[early])

            y_pred = model.predict([X1_test, X2_test]).reshape(y_test.shape)
            acc, pre, rec, f1, auc = evaluate_prediction(y_test, y_pred, k_th=num_fold, model_name='RNN',
                                                         dataset_name=args_rnn.dataset)

            list_acc.append(acc)
            list_pre.append(pre)
            list_rec.append(rec)
            list_f1.append(f1)
            list_auc.append(auc)
            result.append(['RNN', acc, pre, rec, f1, auc])
            num_fold += 1
        n_sampling -= 1
        result.append(['average', np.mean(list_acc), np.mean(list_pre),
                       np.mean(list_rec), np.mean(list_f1), np.mean(list_auc)])
        print(tabulate(result, headers=h))