modeldir = args.modeldir + "_feat" modeldirpath = "models/" + modeldir + "/" modeldirpath = checkdir(modeldirpath) base = os.path.basename(trainFile) tr_file_name = os.path.splitext(base)[0] evaluation = checkdir("results_baseline/") resultsFile = open(evaluation + tr_file_name + "_result.txt", 'w') MAX_SEQUENCE_LENGTH = 20 MAX_NB_WORDS = 20000 EMBEDDING_DIM = 300 batch_size = 128 delim = "\t" data, _, _ = data_process.getData(allxFile, delim) word_index, tokenizer = data_process.getTokenizer(data, MAX_NB_WORDS, MAX_SEQUENCE_LENGTH) train_x, train_y, train_le, train_labels, _ = data_process.getDevData2( trainFile, tokenizer, MAX_SEQUENCE_LENGTH, delim) delim = "\t" dev_x, dev_y, dev_le, dev_labels, _ = data_process.getDevData2( devFile, tokenizer, MAX_SEQUENCE_LENGTH, delim) test_x, test_y, test_le, test_labels, _ = data_process.getDevData2( testFile, tokenizer, MAX_SEQUENCE_LENGTH, delim) delim = "\t" allx, _, _, _, _ = data_process.getDevData2(allxFile, tokenizer, MAX_SEQUENCE_LENGTH, delim) graph = getGraph(graphFile)
str("{0:.2f}".format(auc)) + "\t" + str("{0:.2f}".format(wauc)) + "\t" + str("{0:.2f}".format(precision)) + "\t" + str("{0:.2f}".format(recall)) + "\t" + str("{0:.2f}".format(f1_score)) + "\n") print(report) probability = model.predict_proba(ublabelled_X, batch_size=batch_size, verbose=1) ul_pred_val = model.predict_classes([ublabelled_X], batch_size=batch_size, verbose=1) ul_pred = le.inverse_transform(ul_pred_val) # fout = open("ul_pred1.txt", 'w') delim = "\t" train_xdata, lab_tr_x, ids = data_process.getData(train_file, delim) uData, uLab, uIds = data_process.getData(unlabaled_data, delim) ul_pred_data = [] ul_pred_labels = [] ul_pred_ids = [] for pred, prob, val, udata, id in zip(ul_pred, probability, ul_pred_val, uData, uIds): class_prob = prob[val] if (class_prob >= 0.75): ul_pred_data.append(udata) ul_pred_labels.append(pred) ul_pred_ids.append(id) base = os.path.basename(train_file) basename = os.path.splitext(base)[0] dirname = os.path.dirname(train_file)