def main(): global config USE_CLASSIFIER='lstm' weight_file_path = Config.getPath('models') + '/' + USE_CLASSIFIER + '-weights.h5' load_config(USE_CLASSIFIER) classifier=ClassifierFactory.getLSTM(**{'config':config}) #Classifier(model_name=USE_CLASSIFIER,config=config) classifier.load_weights(weight_file_path) df = pd.read_csv(Config.getPath('data') + '/' + TESTING_DATA) Xtest = df['question_text'] Ytest = df['target'] print('extract configuration from input texts ...') print('testing size: ', len(Xtest)) print('start predicting ...') pred = classifier.predict(Xtest) print(pred) score = metrics.accuracy_score(Ytest, pred) print("accuracy: %0.3f" % score) cm = metrics.confusion_matrix(Ytest, pred, labels=[0, 1]) plot_confusion_matrix(cm, classes=[0, 1])
def load_config(model_name): print('loading csv file ...') global config config_file_path = Config.getPath('models') + '/' + model_name + '-config.joblib' config = ClassifierFactory.getConfig(joblib_file=config_file_path) #Two classes - Fake=0, Reliable=1 config.set('num_target_tokens',2)
def main(): load_config( **{ 'embedding': 'glove', 'max_input_seq_length': 100, 'max_vocab_size': 8000, 'epochs': 20 }) c = ClassifierFactory.getLSTM(**{'lstm_units': 64}) #c=ClassifierFactory.getLSTM(**{'lstm_units':64,'dropout':0.2,'epochs':15}) train_vanilla(c)
def load_config(**kwargs): print('loading csv file ...') global config df = pd.read_csv(Config.getPath('data') + '/' + TRAINING_DATA) df = df.sample(50000) X = df['question_text'] Y = df['target'] print('preparing configuration...') config = ClassifierFactory.getConfig(X, Y, json_file=None, **kwargs) #Two classes - Fake=0, Reliable=1 config.set('num_target_tokens', 2)
def predict_svm(): global config load_config('svm') print('loading data...') df = pd.read_csv(Config.getPath('data') + '/' + TRAINING_DATA) df2 = df.sample(50000) X = df2['question_text'] Y = df2['target'] Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.2, random_state=42) # Two classes - Fake=0, Reliable=1 config.set('num_target_tokens', 2) classifier = ClassifierFactory.getSVM() print('training size: ', len(Xtrain)) print('testing size: ', len(Xtest)) print('start fitting ...') classifier.fit(Xtrain, Ytrain, Xtest, Ytest) df = pd.read_csv(Config.getPath('data') + '/' + TESTING_DATA) df = df.sample(100000) X = df['question_text'] Y = df['target'] pred = classifier.predict(X) score = metrics.accuracy_score(Y, pred) f1score = metrics.f1_score(Y, pred) print("accuracy: %0.3f" % score) print("f1 score: %0.3f" % f1score) cm = metrics.confusion_matrix(Ytest, pred, labels=[0, 1]) plot_confusion_matrix(cm, classes=[0, 1])
def train_experiment(classifier): print('loading csv file ...') global config df = pd.read_csv(Config.getPath('data') + '/' + TRAINING_DATA) df = df.sample(20000) X = df['question_text'] Y = df['target'] print('splitting data...') Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.2, random_state=42) print('training size: ', len(Xtrain)) print('testing size: ', len(Xtest)) print('start fitting ...') # max_sequence, vocab_size, lstm_units, dropout experiment = [ [20, 5000, 64, 0.2], #0 [35, 5000, 64, 0.2], #1 [50, 5000, 64, 0.2], #2 [100, 5000, 64, 0.2], #3 [50, 5000, 128, 0.2], # 4 [50, 5000, 256, 0.2], # 5 [50, 5000, 512, 0.2], # 6 [50, 2000, 64, 0.2], # 7 [50, 3000, 64, 0.2], # 8 [50, 4000, 64, 0.2], # 9 [50, 5000, 64, 0.2], # 10 [50, 6000, 64, 0.2], # 11 [50, 7000, 64, 0.2], # 12 [50, 8000, 64, 0.2], # 13 [50, 9000, 64, 0.2], # 14 [50, 5000, 64, 0.1], # 15 [50, 5000, 64, 0.2], # 16 [50, 5000, 64, 0.3], # 17 [50, 5000, 64, 0.4], # 18 ] i = 0 for max_seq, vocab_siz, lstm_u, drop in experiment: config = Config(X, Y, max_seq, vocab_siz, 2, 'glove') print("%s starting experiment ... %d" % (datetime.datetime.now(), i)) #model=ClassifierFactory.getLSTM(**{'config':config,'lstm_units':lstm_u,'dropout':drop}) model = ClassifierFactory.getLSTM(**{ 'config': config, 'lstm_units': lstm_u, 'dropout': drop }) history = model.fit(Xtrain, Ytrain, Xtest, Ytest, epochs=10, file_prefix='experiment-%i' % i) history_plot_file_path = Config.getPath( 'reports') + '/' + model.model_name + ('_experiment_%d' % i) + '-history.png' plot_and_save_history(history, model.model_name, history_plot_file_path) i += 1
def main_svm(): load_config() c = ClassifierFactory.getSVM() train_vanilla(c)