ans2_pros.append(result[i + 1][0]) i += 2 if sorted(ans1_pros, reverse=True)[0] > sorted(ans2_pros, reverse=True)[0]: predicty[key][0] = 1 predicty[key + 1][0] = 0 else: predicty[key][0] = 0 predicty[key + 1][0] = 1 print("update") return predicty # train model = svm.SVC(gamma=10, probability=True) svm_train(data.trainset, model, 1470) y, predicty = svm_test(data.testset, model) eval1 = Evaluation() eval1.accuracy(y, predicty, data) with open('result_svm.txt', 'w') as f: for index, maxd in enumerate(eval1.wrong): f.write("Case #{}: {} ".format(index + 1, maxd) + '\n') # predicty=[[0.1], [0.2], [0.1], [0.2], [0.1], [0.2]] predicty = use_csk(data.testset, predicty, model) # Evaluation eval = Evaluation() eval.accuracy(y, predicty, data) final = time.time() print("time:", final - begin)
if __name__ == '__main__': # datalen = [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000, # 2200, 2400, 2600, 2800, 3000] # data = Dataset() # results = [] # for i in datalen: # model = svm.SVC(gamma=10, probability=True) # train(data.trainset, model, i) # y, predicty = test(data.testset, model) # eval = Evaluation() # results.append(eval.accuracy(y, predicty, data)) # # plt.xlabel("Train data text amount") # plt.ylabel("accuracy") # plt.plot(datalen, results) # plt.show() begin = time.time() data = Dataset() model = svm.SVC(gamma=10, probability=True) train(data.trainset, model, 1470) y, predicty = test(data.testset, model) eval = Evaluation() eval.accuracy(y, predicty, data) with open('result_svm.txt', 'w') as f: for index, maxd in enumerate(eval.wrong): f.write("Case #{}: {} ".format(index + 1, maxd) + '\n') # final = time.time() # print("time", final - begin)
def run(self): results = self.fit_to_validation() evaluation = Evaluation(results, verbose=False) evaluation.run()
def main(): # Directory Setting train_dir = "../data/binary_train.csv" test_dir = "../data/binary_test.csv" model_dir = "./model_save" # HyperParameter max_len = 50 epoch = 2 batch = 512 hidden_units = 256 # Flow print("0. Setting Environment") set_env() print("1. load data") train_x, train_y, test_x, test_y, val_x, val_y = load_data( train_dir, test_dir) print("2. pre processing") train_x, val_x, test_x = train_x.tolist(), val_x.tolist(), test_x.tolist() train_x = [' '.join(t.split()[0:max_len]) for t in train_x] train_x = np.array(train_x, dtype=object)[:, np.newaxis] val_x = [' '.join(t.split()[0:max_len]) for t in val_x] val_x = np.array(val_x, dtype=object)[:, np.newaxis] test_x = [' '.join(t.split()[0:max_len]) for t in test_x] test_x = np.array(test_x, dtype=object)[:, np.newaxis] tokenizer = create_tokenizer_from_hub_module() train_examples = convert_text_to_examples(train_x, train_y) val_examples = convert_text_to_examples(val_x, val_y) test_examples = convert_text_to_examples(test_x, test_y) train_input_ids, train_input_masks, train_segment_ids, train_labels = convert_examples_to_features( tokenizer, train_examples, max_len) val_input_ids, val_input_masks, val_segment_ids, val_labels = convert_examples_to_features( tokenizer, val_examples, max_len) test_input_ids, test_input_masks, test_segment_ids, test_labels = convert_examples_to_features( tokenizer, test_examples, max_len) print("3. build model") model = BERT(max_len, data_type="binary") model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) initialize_vars(sess) cp_callback = tf.keras.callbacks.ModelCheckpoint( filepath=model_dir + "/model-weights.{epoch:02d}-{val_acc:.6f}.hdf5", monitor='val_acc', save_best_only=True, verbose=1) model.fit( [train_input_ids, train_input_masks, train_segment_ids], train_labels, validation_data=([val_input_ids, val_input_masks, val_segment_ids], val_labels), epochs=epoch, batch_size=batch, callbacks=[cp_callback]) print("4. evaluation") evaluation = Evaluation( model, [test_input_ids, test_input_masks, test_segment_ids], test_y) accuracy, cf_matrix, report = evaluation.eval_classification_bert( data_type="binary") print("## Classification Report \n", report) print("## Confusion Matrix \n", cf_matrix) print("## Accuracy \n", accuracy)