def main(_): loadData = False useOntology = True # Ontology runCABASC = False runLCRROT = False runLCRROTINVERSE = False runLCRROTALT = False #Olaf model runSVM = False runLCRROTALT_v4 = False # Maria Model weightanalysis = False #determine if backupmethod is used if runCABASC or runLCRROT or runLCRROTALT or runLCRROTINVERSE or runSVM: backup = True else: backup = False # retrieve data and wordembeddings train_size, test_size, train_polarity_vector, test_polarity_vector = loadDataAndEmbeddings( FLAGS, loadData) print(test_size) remaining_size = 250 accuracyOnt = 0.87 if useOntology == True: print('Starting Ontology Reasoner') Ontology = OntReasoner() #out of sample accuracy accuracyOnt, remaining_size = Ontology.run(backup, FLAGS.test_path, runSVM) #in sample accuracy Ontology = OntReasoner() accuracyInSampleOnt, remaining_size = Ontology.run( backup, FLAGS.train_path, runSVM) if runSVM == True: test = FLAGS.remaining_svm_test_path else: test = FLAGS.remaining_test_path print('train acc = {:.4f}, test acc={:.4f}, remaining size={}'.format( accuracyInSampleOnt, accuracyOnt, remaining_size)) else: if runSVM == True: test = FLAGS.test_svm_path else: test = FLAGS.test_path if runLCRROTALT_v4 == True: _, pred2, fw2, bw2, tl2, tr2 = lcrModelAlt_hierarchical_v4.main( FLAGS.train_path, test, accuracyOnt, test_size, remaining_size) tf.reset_default_graph() # LCR-Rot-hop model if runLCRROTALT == True: tf.reset_default_graph() _, pred2, fw2, bw2, tl2, tr2 = lcrModelAlt.main( FLAGS.train_path, test, accuracyOnt, test_size, remaining_size) tf.reset_default_graph() print([_, pred2, fw2, bw2, tl2, tr2]) print('Finished program succesfully')
def main(_): loadData = True # only for non-contextualised word embeddings. # Use prepareBERT for BERT (and BERT_Large) and prepareELMo for ELMo useOntology = False # When run together with runLCRROTALT, the two-step method is used runLCRROTALT = True runSVM = False runCABASC = False runLCRROT = False runLCRROTINVERSE = False weightanalysis = False #determine if backupmethod is used if runCABASC or runLCRROT or runLCRROTALT or runLCRROTINVERSE or runSVM: backup = True else: backup = False # retrieve data and wordembeddings train_size, test_size, train_polarity_vector, test_polarity_vector = loadDataAndEmbeddings( FLAGS, loadData) print(test_size) remaining_size = 250 accuracyOnt = 0.87 if useOntology == True: print('Starting Ontology Reasoner') Ontology = OntReasoner() #out of sample accuracy accuracyOnt, remaining_size = Ontology.run(backup, FLAGS.test_path_ont, runSVM) #in sample accuracy Ontology = OntReasoner() accuracyInSampleOnt, remaining_size = Ontology.run( backup, FLAGS.train_path_ont, runSVM) if runSVM == True: test = FLAGS.remaining_svm_test_path else: test = FLAGS.remaining_test_path print(test[0]) print('train acc = {:.4f}, test acc={:.4f}, remaining size={}'.format( accuracyOnt, accuracyOnt, remaining_size)) else: if runSVM == True: test = FLAGS.test_svm_path else: test = FLAGS.test_path # LCR-Rot-hop model if runLCRROTALT == True: _, pred2, fw2, bw2, tl2, tr2 = lcrModelAlt.main( FLAGS.train_path, test, accuracyOnt, test_size, remaining_size) tf.reset_default_graph()
def main(_): loadData = False useOntology = True runCABASC = False runLCRROT = False runLCRROTINVERSE = False runLCRROTALT = True runSVM = False #determine if backupmethod is used if runCABASC or runLCRROT or runLCRROTALT or runLCRROTINVERSE or runSVM: backup = True else: backup = False BASE_train = "C:/Users/Maria/Desktop/data/programGeneratedData/crossValidation"+str(FLAGS.year)+'/cross_train_' BASE_val = "C:/Users/Maria/Desktop/data/programGeneratedData/crossValidation"+str(FLAGS.year)+'/cross_val_' BASE_svm_train = "C:/Users/Maria/Desktop/data/programGeneratedData/crossValidation"+str(FLAGS.year)+'/svm/cross_train_svm_' BASE_svm_val = "C:/Users/Maria/Desktop/data/programGeneratedData/crossValidation"+str(FLAGS.year)+'/svm/cross_val_svm_' REMAIN_val = "C:/Users/Maria/Desktop/data/programGeneratedData/crossValidation"+str(FLAGS.year)+'/cross_val_remainder_' REMAIN_svm_val = "C:/Users/Maria/Desktop/data/programGeneratedData/crossValidation"+str(FLAGS.year)+'/svm/cross_val_remainder_' # Number of k-fold cross validations split_size = 10 # retrieve data and wordembeddings train_size, test_size, train_polarity_vector, test_polarity_vector = loadCrossValidation(FLAGS, split_size, loadData) remaining_size = 248 accuracyOnt = 0.87 if useOntology == True: print('Starting Ontology Reasoner') acc = [] remaining_size_vec = [] #k-fold cross validation for i in range(split_size): Ontology = OntReasoner() accuracyOnt, remaining_size = Ontology.run(backup,BASE_val+str(i)+'.txt', runSVM, True, i) acc.append(accuracyOnt) remaining_size_vec.append(remaining_size) with open("C:/Users/Maria/Desktop/data/programGeneratedData/crossValidation"+str(FLAGS.year)+'/cross_results_"+str(FLAGS.year)+"/ONTOLOGY_"+str(FLAGS.year)+'.txt', 'w') as result: print(str(split_size)+'-fold cross validation results') print('Accuracy: {}, St Dev:{}'.format(np.mean(np.asarray(acc)), np.std(np.asarray(acc)))) print(acc) result.write('size:' + str(test_size)) result.write('accuracy: '+ str(acc)+'\n') result.write('remaining size: '+ str(remaining_size_vec)+'\n') result.write('Accuracy: {}, St Dev:{} \n'.format(np.mean(np.asarray(acc)), np.std(np.asarray(acc)))) if runSVM == True: test = REMAIN_svm_val else: test = REMAIN_val
def main(_): loadData = True # only for non-contextualised word embeddings. augment_data = True # Load data must be true to augment # Use prepareBERT for BERT (and BERT_Large) and prepareELMo for ELMo useOntology = False # When run together with runLCRROTALT, the two-step method is used runLCRROTALT = True # determine if backupmethod is used if runLCRROTALT: backup = True else: backup = False # retrieve data and wordembeddings train_size, test_size, train_polarity_vector, test_polarity_vector, ct = loadDataAndEmbeddings( FLAGS, loadData, augment_data) remaining_size = 250 accuracyOnt = 0.87 if FLAGS.year == 2016 else 0.8277 if useOntology == True: print('Starting Ontology Reasoner') # in sample accuracy Ontology = OntReasoner() accuracyOnt, remaining_size = Ontology.run(backup, FLAGS.test_path_ont, runSVM) # out of sample accuracy # Ontology = OntReasoner() # accuracyInSampleOnt, remainingInSample_size = Ontology.run(backup,FLAGS.train_path_ont, runSVM) test = FLAGS.remaining_test_path print(test[0]) print('train acc = {:.4f}, test acc={:.4f}, remaining size={}'.format( accuracyOnt, accuracyOnt, remaining_size)) else: test = FLAGS.remaining_test_path # LCR-Rot-hop model if runLCRROTALT == True: _, pred2, fw2, bw2, tl2, tr2 = lcrModelAlt.main( FLAGS.train_path, test, accuracyOnt, test_size, remaining_size, augment_data, FLAGS.augmentation_file_path, ct) tf.reset_default_graph()
def main(_): loadData = False useOntology = False runCABASC = False runLCRROT = False runLCRROTINVERSE = False runLCRROTALT = False runSVM = False runLCRModelAlt_hierarchical_v4 = True runAdversarial = True #determine if backupmethod is used if runCABASC or runLCRROT or runLCRROTALT or runLCRROTINVERSE or runSVM: backup = True else: backup = False BASE_train = FLAGS.hardcoded_path + "/data/programGeneratedData/crossValidation" + str( FLAGS.year) + '/cross_train_' BASE_val = FLAGS.hardcoded_path + "/data/programGeneratedData/crossValidation" + str( FLAGS.year) + '/cross_val_' BASE_svm_train = FLAGS.hardcoded_path + "/data/programGeneratedData/crossValidation" + str( FLAGS.year) + '/svm/cross_train_svm_' BASE_svm_val = FLAGS.hardcoded_path + "/data/programGeneratedData/crossValidation" + str( FLAGS.year) + '/svm/cross_val_svm_' REMAIN_val = FLAGS.hardcoded_path + "/data/programGeneratedData/crossValidation" + str( FLAGS.year) + '/cross_val_remainder_' REMAIN_svm_val = FLAGS.hardcoded_path + "/data/programGeneratedData/crossValidation" + str( FLAGS.year) + '/svm/cross_val_remainder_' # Number of k-fold cross validations split_size = 10 # retrieve data and wordembeddings train_size, test_size, train_polarity_vector, test_polarity_vector = loadCrossValidation( FLAGS, split_size, loadData) remaining_size = 248 accuracyOnt = 0.87 if useOntology == True: print('Starting Ontology Reasoner') acc = [] remaining_size_vec = [] #k-fold cross validation for i in range(split_size): Ontology = OntReasoner() accuracyOnt, remaining_size = Ontology.run( backup, BASE_val + str(i) + '.txt', runSVM, True, i) acc.append(accuracyOnt) remaining_size_vec.append(remaining_size) with open( FLAGS.hardcoded_path + "/data/programGeneratedData/crossValidation" + str(FLAGS.year) + '/cross_results_"+str(FLAGS.year)+"/ONTOLOGY_"+str(FLAGS.year)' + '.txt', 'w') as result: print(str(split_size) + '-fold cross validation results') print('Accuracy: {}, St Dev:{}'.format(np.mean(np.asarray(acc)), np.std(np.asarray(acc)))) print(acc) result.write('size:' + str(test_size)) result.write('accuracy: ' + str(acc) + '\n') result.write('remaining size: ' + str(remaining_size_vec) + '\n') result.write('Accuracy: {}, St Dev:{} \n'.format( np.mean(np.asarray(acc)), np.std(np.asarray(acc)))) if runSVM == True: test = REMAIN_svm_val else: test = REMAIN_val else: if runSVM == True: test = BASE_svm_val else: test = BASE_val #test = REMAIN_val if runLCRROT == True: acc = [] #k-fold cross validation for i in [8]: acc1, _, _, _, _, _, _, _, _ = lcrModel.main( BASE_train + str(i) + '.txt', test + str(i) + '.txt', accuracyOnt, test_size[i], remaining_size) acc.append(acc1) tf.reset_default_graph() print('iteration: ' + str(i)) with open( "cross_results_" + str(FLAGS.year) + "/LCRROT_" + str(FLAGS.year) + '.txt', 'w') as result: result.write(str(acc) + '\n') result.write('Accuracy: {}, St Dev:{} /n'.format( np.mean(np.asarray(acc)), np.std(np.asarray(acc)))) print(str(split_size) + '-fold cross validation results') print('Accuracy: {}, St Dev:{}'.format(np.mean(np.asarray(acc)), np.std(np.asarray(acc)))) if runLCRROTINVERSE == True: acc = [] #k-fold cross validation for i in range(split_size): acc1, _, _, _, _, _ = lcrModelInverse.main( BASE_train + str(i) + '.txt', test + str(i) + '.txt', accuracyOnt, test_size[i], remaining_size) acc.append(acc1) tf.reset_default_graph() print('iteration: ' + str(i)) with open( "cross_results_" + str(FLAGS.year) + "/LCRROT_INVERSE_" + str(FLAGS.year) + '.txt', 'w') as result: result.write(str(acc)) result.write('Accuracy: {}, St Dev:{} /n'.format( np.mean(np.asarray(acc)), np.std(np.asarray(acc)))) print(str(split_size) + '-fold cross validation results') print('Accuracy: {}, St Dev:{}'.format(np.mean(np.asarray(acc)), np.std(np.asarray(acc)))) if runLCRROTALT == True: acc = [] #k-fold cross validation for i in range(split_size): acc1, _, _, _, _, _ = lcrModelAlt_hierarchical_v3.main( BASE_train + str(i) + '.txt', test + str(i) + '.txt', accuracyOnt, test_size[i], remaining_size) acc.append(acc1) tf.reset_default_graph() print('iteration: ' + str(i)) with open( FLAGS.hardcoded_path + "/data/programGeneratedData/crossValidation" + str(FLAGS.year) + '/cross_results_"+str(FLAGS.year)+"/LCRROT_ALT_"+str(FLAGS.year)' + '.txt', 'w') as result: result.write(str(acc)) result.write('Accuracy: {}, St Dev:{} /n'.format( np.mean(np.asarray(acc)), np.std(np.asarray(acc)))) print(str(split_size) + '-fold cross validation results') print('Accuracy: {}, St Dev:{}'.format(np.mean(np.asarray(acc)), np.std(np.asarray(acc)))) if runCABASC == True: acc = [] #k-fold cross validation for i in range(split_size): acc1, _, _ = cabascModel.main(BASE_train + str(i) + '.txt', REMAIN_val + str(i) + '.txt', accuracyOnt, test_size[i], remaining_size) acc.append(acc1) tf.reset_default_graph() print('iteration: ' + str(i)) with open( "cross_results_" + str(FLAGS.year) + "/CABASC_" + str(FLAGS.year) + '.txt', 'w') as result: result.write(str(acc)) result.write('Accuracy: {}, St Dev:{} /n'.format( np.mean(np.asarray(acc)), np.std(np.asarray(acc)))) print(str(split_size) + '-fold cross validation results') print('Accuracy: {}, St Dev:{}'.format(np.mean(np.asarray(acc)), np.std(np.asarray(acc)))) if runSVM == True: acc = [] #k-fold cross validation for i in range(split_size): acc1 = svmModel.main(BASE_svm_train + str(i) + '.txt', test + str(i) + '.txt', accuracyOnt, test_size[i], remaining_size) acc.append(acc1) tf.reset_default_graph() with open( "cross_results_" + str(FLAGS.year) + "/SVM_" + str(FLAGS.year) + '.txt', 'w') as result: print(str(split_size) + '-fold cross validation results') print('Accuracy: {:.5f}, St Dev:{:.4f}'.format( np.mean(np.asarray(acc)), np.std(np.asarray(acc)))) result.write(str(acc)) result.write('Accuracy: {:.5f}, St Dev:{:.4f} /n'.format( np.mean(np.asarray(acc)), np.std(np.asarray(acc)))) print('Finished program succesfully') if runLCRModelAlt_hierarchical_v4 == True: print('Running CrossVal V4, year = ' + str(FLAGS.year)) acc = [] # k-fold cross validation for i in range(split_size): acc1 = lcrModelAlt_hierarchical_v4.main( BASE_train + str(i) + '.txt', test + str(i) + '.txt', accuracyOnt, test_size[i], remaining_size) acc.append(acc1) tf.reset_default_graph() with open( FLAGS.hardcoded_path + "/data/programGeneratedData/crossValidation" + str(FLAGS.year) + "cross_results_" + str(FLAGS.year) + "/v4_" + str(FLAGS.year) + '.txt', 'w') as result: print(str(split_size) + '-fold cross validation results') print('Accuracy: {:.5f}, St Dev:{:.4f}'.format( np.mean(np.asarray(acc)), np.std(np.asarray(acc)))) result.write(str(acc)) result.write('Accuracy: {:.5f}, St Dev:{:.4f} /n'.format( np.mean(np.asarray(acc)), np.std(np.asarray(acc)))) print('Finished program succesfully') if runAdversarial == True: print('Running CrossVal adversarial, year = ' + str(FLAGS.year)) acc = [] # k-fold cross validation for i in range(split_size): if FLAGS.year == 2015: acc1, pred2, fw2, bw2, tl2, tr2 = adversarial.main( BASE_train + str(i) + '.txt', test + str(i) + '.txt', accuracyOnt, test_size[i], remaining_size, learning_rate_dis=0.02, learning_rate_gen=0.002, keep_prob=0.3, momentum_dis=0.9, momentum_gen=0.36, l2=0.00001, k=3, WriteFile=False) else: acc1, pred2, fw2, bw2, tl2, tr2 = adversarial.main( BASE_train + str(i) + '.txt', test + str(i) + '.txt', accuracyOnt, test_size[i], remaining_size, learning_rate_dis=0.03, learning_rate_gen=0.0045, keep_prob=0.3, momentum_dis=0.7, momentum_gen=0.42, l2=0.00001, k=3, WriteFile=False) acc.append(acc1) tf.reset_default_graph() with open( FLAGS.hardcoded_path + "/data/programGeneratedData/crossValidation" + str(FLAGS.year) + "cross_results_" + str(FLAGS.year) + "/Adv_" + str(FLAGS.year) + '.txt', 'w') as result: print(str(split_size) + '-fold cross validation results') print('Accuracy: {:.5f}, St Dev:{:.4f}'.format( np.mean(np.asarray(acc)), np.std(np.asarray(acc)))) result.write(str(acc)) result.write('Accuracy: {:.5f}, St Dev:{:.4f} /n'.format( np.mean(np.asarray(acc)), np.std(np.asarray(acc)))) print('Finished program succesfully')
def main(_): loadData = True # only for non-contextualised word embeddings. augment_data = False # Load data must be true to augment useOntology = False runLCRROTALT = True #determine if backupmethod is used if runLCRROTALT: backup = True else: backup = False BASE_train = "data/programGeneratedData/crossValidation" + str( FLAGS.year) + '/cross_train_' BASE_val = "data/programGeneratedData/crossValidation" + str( FLAGS.year) + '/cross_val_' BASE_svm_train = "data/programGeneratedData/crossValidation" + str( FLAGS.year) + '/svm/cross_train_svm_' BASE_svm_val = "data/programGeneratedData/crossValidation" + str( FLAGS.year) + '/svm/cross_val_svm_' REMAIN_val = "data/programGeneratedData/crossValidation" + str( FLAGS.year) + '/cross_val_remainder_' REMAIN_svm_val = "data/programGeneratedData/crossValidation" + str( FLAGS.year) + '/svm/cross_val_remainder_' # Number of k-fold cross validations split_size = 10 # retrieve data and wordembeddings train_size, test_size, train_polarity_vector, test_polarity_vector = loadCrossValidation( FLAGS, split_size, augment_data, loadData) remaining_size = 248 accuracyOnt = 0.87 if useOntology == True: print('Starting Ontology Reasoner') acc = [] remaining_size_vec = [] #k-fold cross validation for i in range(split_size): Ontology = OntReasoner() accuracyOnt, remaining_size = Ontology.run( backup, BASE_val + str(i) + '.txt', runSVM, True, i) acc.append(accuracyOnt) remaining_size_vec.append(remaining_size) with open( "cross_results_" + str(FLAGS.year) + "/ONTOLOGY_" + str(FLAGS.year) + '.txt', 'w') as result: print(str(split_size) + '-fold cross validation results') print('Accuracy: {}, St Dev:{}'.format(np.mean(np.asarray(acc)), np.std(np.asarray(acc)))) print(acc) result.write('size:' + str(test_size)) result.write('accuracy: ' + str(acc) + '\n') result.write('remaining size: ' + str(remaining_size_vec) + '\n') result.write('Accuracy: {}, St Dev:{} \n'.format( np.mean(np.asarray(acc)), np.std(np.asarray(acc)))) test = REMAIN_val else: test = BASE_val if runLCRROTALT == True: acc = [] #k-fold cross validation for i in range(split_size): acc1, _, _, _, _, _ = lcrModelAlt.main( BASE_train + str(i) + '.txt', test + str(i) + '.txt', accuracyOnt, test_size[i], remaining_size, augment_data, FLAGS.augmentation_file_path, ct={}) acc.append(acc1) tf.reset_default_graph() print('iteration: ' + str(i)) with open( "cross_results_" + str(FLAGS.year) + "/LCRROT_ALT_" + str(FLAGS.year) + '.txt', 'w') as result: result.write(str(acc)) result.write('Accuracy: {}, St Dev:{} /n'.format( np.mean(np.asarray(acc)), np.std(np.asarray(acc)))) print(str(split_size) + '-fold cross validation results') print('Accuracy: {}, St Dev:{}'.format(np.mean(np.asarray(acc)), np.std(np.asarray(acc)))) print('Finished program succesfully')
def main(_): loadData = False # loadData = True # useOntology = True useOntology = False runCABASC = False runLCRROT = False runLCRROTINVERSE = False runLCRROTALT = True runSVM = False weightanalysis = False #determine if backupmethod is used if runCABASC or runLCRROT or runLCRROTALT or runLCRROTINVERSE or runSVM: backup = True else: backup = False # retrieve data and wordembeddings train_size, test_size, train_polarity_vector, test_polarity_vector = loadDataAndEmbeddings( FLAGS, loadData) # print(test_size) remaining_size = 250 accuracyOnt = 0.87 if useOntology == True: print('Starting Ontology Reasoner') Ontology = OntReasoner() #out of sample accuracy accuracyOnt, remaining_size = Ontology.run(backup, FLAGS.test_path, runSVM) #in sample accuracy Ontology = OntReasoner() accuracyInSampleOnt, remaining_size = Ontology.run( backup, FLAGS.train_path, runSVM) if runSVM == True: test = FLAGS.remaining_svm_test_path else: test = FLAGS.remaining_test_path print('train acc = {:.4f}, test acc={:.4f}, remaining size={}'.format( accuracyOnt, accuracyOnt, remaining_size)) else: if runSVM == True: test = FLAGS.test_svm_path else: test = FLAGS.test_path # LCR-Rot model if runLCRROT == True: _, pred1, fw1, bw1, tl1, tr1, sent, target, true = lcrModel.main( FLAGS.train_path, test, accuracyOnt, test_size, remaining_size) tf.reset_default_graph() # LCR-Rot-inv model if runLCRROTINVERSE == True: lcrModelInverse.main(FLAGS.train_path, test, accuracyOnt, test_size, remaining_size) tf.reset_default_graph() # LCR-Rot-hop model if runLCRROTALT == True: _, pred2, fw2, bw2, tl2, tr2 = lcrModelAlt.main( FLAGS.train_path, test, accuracyOnt, test_size, remaining_size) tf.reset_default_graph() # CABASC model if runCABASC == True: _, pred3, weights = cabascModel.main(FLAGS.train_path, test, accuracyOnt, test_size, remaining_size) if weightanalysis and runLCRROT and runLCRROTALT: outF = open('sentence_analysis.txt', "w") dif = np.subtract(pred3, pred1) for i, value in enumerate(pred3): if value == 1 and pred2[i] == 0: sentleft, sentright = [], [] flag = True for word in sent[i]: if word == '$t$': flag = False continue if flag: sentleft.append(word) else: sentright.append(word) print(i) outF.write(str(i)) outF.write("\n") outF.write( 'lcr pred: {}; CABASC pred: {}; lcralt pred: {}; true: {}' .format(pred1[i], pred3[i], pred2[i], true[i])) outF.write("\n") outF.write(";".join(sentleft)) outF.write("\n") outF.write(";".join(str(x) for x in fw1[i][0])) outF.write("\n") outF.write(";".join(sentright)) outF.write("\n") outF.write(";".join(str(x) for x in bw1[i][0])) outF.write("\n") outF.write(";".join(target[i])) outF.write("\n") outF.write(";".join(str(x) for x in tl1[i][0])) outF.write("\n") outF.write(";".join(str(x) for x in tr1[i][0])) outF.write("\n") outF.write(";".join(sentleft)) outF.write("\n") outF.write(";".join(str(x) for x in fw2[i][0])) outF.write("\n") outF.write(";".join(sentright)) outF.write("\n") outF.write(";".join(str(x) for x in bw2[i][0])) outF.write("\n") outF.write(";".join(target[i])) outF.write("\n") outF.write(";".join(str(x) for x in tl2[i][0])) outF.write("\n") outF.write(";".join(str(x) for x in tr2[i][0])) outF.write("\n") outF.write(";".join(sent[i])) outF.write("\n") outF.write(";".join(str(x) for x in weights[i][0])) outF.write("\n") outF.close() # BoW model if runSVM == True: svmModel.main(FLAGS.train_svm_path, test, accuracyOnt, test_size, remaining_size) print('Finished program succesfully')
def main(_): loadData = False # only for non-contextualised word embeddings. # Use prepareBERT for BERT (and BERT_Large) and prepareELMo for ELMo useOntology = False # When run together with runLCRROTALT, the two-step method is used runLCRROTALT = False runSVM = False runCABASC = False runLCRROT = False runLCRROTINVERSE = False weightanalysis = False runLCRROTALT_v1 = False runLCRROTALT_v2 = False runLCRROTALT_v3 = False runLCRROTALT_v4 = True #curriculum_learning = True # if curriculum_learning = True, then choose either one_pass or baby_steps to be True as well! runOne_Pass = False runBaby_Steps = True if runOne_Pass or runBaby_Steps: # if baby steps or one pass, then automatically curriculum learning True as well to get the sorted indices. curriculum_learning = True # determine if backupmethod is used if runCABASC or runLCRROT or runLCRROTALT or runLCRROTINVERSE or runSVM or runLCRROTALT_v1 or runLCRROTALT_v2 or runLCRROTALT_v3 or runLCRROTALT_v4: backup = True else: backup = False # retrieve data and wordembeddings train_size, test_size, train_polarity_vector, test_polarity_vector = loadDataAndEmbeddings(FLAGS, loadData) print(test_size) remaining_size = 250 accuracyOnt = 0.87 tf.reset_default_graph() if useOntology == True: print('Starting Ontology Reasoner') # in sample accuracy Ontology = OntReasoner() accuracyOnt, remaining_size = Ontology.run(backup, FLAGS.test_path_ont, runSVM) # out of sample accuracy # Ontology = OntReasoner() # accuracyInSampleOnt, remainingInSample_size = Ontology.run(backup,FLAGS.train_path_ont, runSVM) if runSVM == True: test = FLAGS.remaining_svm_test_path else: test = FLAGS.remaining_test_path print(test[0]) print('train acc = {:.4f}, test acc={:.4f}, remaining size={}'.format(accuracyOnt, accuracyOnt, remaining_size)) else: if runSVM == True: test = FLAGS.test_svm_path else: test = FLAGS.test_path # Get curriculum learning scores, either the ones already saved, or new ones # Make sure that the instances in FLAGS.train_path_ont and FLAGS.train_path (and the two test sets) have the same order of their instances! if curriculum_learning == True: try: sort_ind = pickle.load(open(FLAGS.sorted_indices, "rb")) except: tr_features, tr_sent = sentiWordNet.main(FLAGS.train_path_ont, FLAGS.train_aspect_categories) te_features, te_sent = sentiWordNet.main(FLAGS.test_path_ont, FLAGS.test_aspect_categories) tr_sent = np.asarray(utils.change_y_to_onehot(tr_sent)) te_sent = np.asarray(utils.change_y_to_onehot(te_sent)) print(tr_features.shape) print(tr_sent.shape) print(te_features.shape) print(te_sent.shape) curr_scores = auxModel.main(tr_features, te_features, tr_sent, te_sent) tf.reset_default_graph() inds1 = np.arange(0, len(curr_scores)) sort_ind = [x for _, x in sorted(zip(curr_scores, inds1))] pickle.dump(sort_ind, open(FLAGS.sorted_indices, "wb")) # LCR-Rot-hop model if runLCRROTALT == True: _, pred2, fw2, bw2, tl2, tr2 = lcrModelAlt.main(FLAGS.train_path, test, accuracyOnt, test_size, remaining_size) tf.reset_default_graph() if runLCRROTALT_v1 == True: _, pred2, fw2, bw2, tl2, tr2 = lcrModelAlt_hierarchical_v1.main(FLAGS.train_path, test, accuracyOnt, test_size, remaining_size) tf.reset_default_graph() if runLCRROTALT_v2 == True: _, pred2, fw2, bw2, tl2, tr2 = lcrModelAlt_hierarchical_v2.main(FLAGS.train_path, test, accuracyOnt, test_size, remaining_size) tf.reset_default_graph() if runLCRROTALT_v3 == True: _, pred2, fw2, bw2, tl2, tr2 = lcrModelAlt_hierarchical_v3.main(FLAGS.train_path, test, accuracyOnt, test_size, remaining_size) tf.reset_default_graph() if runLCRROTALT_v4 == True: if runOne_Pass: acc = lcrModelAlt_hierarchical_v4_one_pass.main(FLAGS.train_path, test, accuracyOnt, test_size, remaining_size, sort_ind, FLAGS.num_buckets) tf.reset_default_graph() elif runBaby_Steps == True: tf.reset_default_graph() acc = lcrModelAlt_hierarchical_v4_baby_steps.main(FLAGS.train_path, test, accuracyOnt, test_size, remaining_size, sort_ind, FLAGS.num_buckets) tf.reset_default_graph() else: acc = lcrModelAlt_hierarchical_v4_trainevaltest.main(FLAGS.hyper_train_path, FLAGS.hyper_eval_path, test, FLAGS.train_path, accuracyOnt, test_size, remaining_size) tf.reset_default_graph()
def main(_): loadData = False useOntology = False runCABASC = False runLCRROT = False runLCRROTINVERSE = False runLCRROTALT = False runSVM = False runlcrDoubleRAA = True runINVMULTIHOP1 = False runlcrDoubleRAAtype2 = False weightanalysis = False #determine if backupmethod is used if runCABASC or runLCRROT or runLCRROTALT or runLCRROTINVERSE or runSVM or runlcrDoubleRAA or runINVMULTIHOP1: backup = True else: backup = False # retrieve data and wordembeddings train_size, test_size, train_polarity_vector, test_polarity_vector = loadDataAndEmbeddings( FLAGS, loadData) print(test_size) remaining_size = 250 accuracyOnt = 0.87 if useOntology == True: print('Starting Ontology Reasoner') Ontology = OntReasoner() #out of sample accuracy accuracyOnt, remaining_size = Ontology.run(backup, FLAGS.test_path, runSVM) #in sample accuracy Ontology = OntReasoner() accuracyInSampleOnt, remaining_size = Ontology.run( backup, FLAGS.train_path, runSVM) if runSVM == True: test = FLAGS.remaining_svm_test_path else: test = FLAGS.remaining_test_path print('train acc = {:.4f}, test acc={:.4f}, remaining size={}'.format( accuracyOnt, accuracyOnt, remaining_size)) else: if runSVM == True: test = FLAGS.test_svm_path else: test = FLAGS.test_path # LCR-Rot model if runLCRROT == True: _, pred1, fw1, bw1, tl1, tr1, sent, target, true = lcrModel.main( FLAGS.train_path, test, accuracyOnt, test_size, remaining_size) tf.reset_default_graph() # LCR-Rot-inv model if runLCRROTINVERSE == True: lcrModelInverse.main(FLAGS.train_path, test, accuracyOnt, test_size, remaining_size) tf.reset_default_graph() # LCR-Rot-hop model if runLCRROTALT == True: _, pred2, fw2, bw2, tl2, tr2 = lcrModelAlt.main( FLAGS.train_path, test, accuracyOnt, test_size, remaining_size) tf.reset_default_graph() if runlcrDoubleRAA == True: _, pred2, fw2, bw2, tl2, tr2 = lcrDoubleRAA.main( FLAGS.train_path, test, accuracyOnt, test_size, remaining_size) tf.reset_default_graph() if runINVMULTIHOP1 == True: _, pred2, fw2, bw2, tl2, tr2 = lcrinvmodel2.main( FLAGS.train_path, test, accuracyOnt, test_size, remaining_size) tf.reset_default_graph() if runlcrDoubleRAAtype2 == True: _, pred2, fw2, bw2, tl2, tr2 = lcrDoubleRAAtype2.main( FLAGS.train_path, test, accuracyOnt, test_size, remaining_size) tf.reset_default_graph() # BoW model if runSVM == True: svmModel.main(FLAGS.train_svm_path, test, accuracyOnt, test_size, remaining_size) print('Finished program succesfully')
def main(_): loadData = False useOntology = False runCABASC = False runLCRROT = False runLCRROTINVERSE = False runLCRROTALT = True runSVM = False #determine if backupmethod is used if runCABASC or runLCRROT or runLCRROTALT or runLCRROTINVERSE or runSVM: backup = True else: backup = False BASE_train = "data/programGeneratedData/crossValidation" + str( FLAGS.year) + '/cross_train_' BASE_val = "data/programGeneratedData/crossValidation" + str( FLAGS.year) + '/cross_val_' BASE_svm_train = "data/programGeneratedData/crossValidation" + str( FLAGS.year) + '/svm/cross_train_svm_' BASE_svm_val = "data/programGeneratedData/crossValidation" + str( FLAGS.year) + '/svm/cross_val_svm_' REMAIN_val = "data/programGeneratedData/crossValidation" + str( FLAGS.year) + '/cross_val_remainder_' REMAIN_svm_val = "data/programGeneratedData/crossValidation" + str( FLAGS.year) + '/svm/cross_val_remainder_' # Number of k-fold cross validations split_size = 10 # retrieve data and wordembeddings train_size, test_size, train_polarity_vector, test_polarity_vector = loadCrossValidation( FLAGS, split_size, loadData) remaining_size = 248 accuracyOnt = 0.87 if useOntology == True: print('Starting Ontology Reasoner') acc = [] remaining_size_vec = [] #k-fold cross validation for i in range(split_size): Ontology = OntReasoner() accuracyOnt, remaining_size = Ontology.run( backup, BASE_val + str(i) + '.txt', runSVM, True, i) acc.append(accuracyOnt) remaining_size_vec.append(remaining_size) with open( "cross_results_" + str(FLAGS.year) + "/ONTOLOGY_" + str(FLAGS.year) + '.txt', 'w') as result: print(str(split_size) + '-fold cross validation results') print('Accuracy: {}, St Dev:{}'.format(np.mean(np.asarray(acc)), np.std(np.asarray(acc)))) print(acc) result.write('size:' + str(test_size)) result.write('accuracy: ' + str(acc) + '\n') result.write('remaining size: ' + str(remaining_size_vec) + '\n') result.write('Accuracy: {}, St Dev:{} \n'.format( np.mean(np.asarray(acc)), np.std(np.asarray(acc)))) if runSVM == True: test = REMAIN_svm_val else: test = REMAIN_val else: if runSVM == True: test = BASE_svm_val else: test = BASE_val if runLCRROT == True: acc = [] #k-fold cross validation for i in [8]: acc1, _, _, _, _, _, _, _, _ = lcrModel.main( BASE_train + str(i) + '.txt', test + str(i) + '.txt', accuracyOnt, test_size[i], remaining_size) acc.append(acc1) tf.reset_default_graph() print('iteration: ' + str(i)) with open( "cross_results_" + str(FLAGS.year) + "/LCRROT_" + str(FLAGS.year) + '.txt', 'w') as result: result.write(str(acc) + '\n') result.write('Accuracy: {}, St Dev:{} /n'.format( np.mean(np.asarray(acc)), np.std(np.asarray(acc)))) print(str(split_size) + '-fold cross validation results') print('Accuracy: {}, St Dev:{}'.format(np.mean(np.asarray(acc)), np.std(np.asarray(acc)))) if runLCRROTINVERSE == True: acc = [] #k-fold cross validation for i in range(split_size): acc1, _, _, _, _, _ = lcrModelInverse.main( BASE_train + str(i) + '.txt', test + str(i) + '.txt', accuracyOnt, test_size[i], remaining_size) acc.append(acc1) tf.reset_default_graph() print('iteration: ' + str(i)) with open( "cross_results_" + str(FLAGS.year) + "/LCRROT_INVERSE_" + str(FLAGS.year) + '.txt', 'w') as result: result.write(str(acc)) result.write('Accuracy: {}, St Dev:{} /n'.format( np.mean(np.asarray(acc)), np.std(np.asarray(acc)))) print(str(split_size) + '-fold cross validation results') print('Accuracy: {}, St Dev:{}'.format(np.mean(np.asarray(acc)), np.std(np.asarray(acc)))) if runLCRROTALT == True: acc = [] #k-fold cross validation for i in range(split_size): acc1, _, _, _, _, _ = lcrModelAlt.main( BASE_train + str(i) + '.txt', test + str(i) + '.txt', accuracyOnt, test_size[i], remaining_size) acc.append(acc1) tf.reset_default_graph() print('iteration: ' + str(i)) with open( "cross_results_" + str(FLAGS.year) + "/LCRROT_ALT_" + str(FLAGS.year) + '.txt', 'w') as result: result.write(str(acc)) result.write('Accuracy: {}, St Dev:{} /n'.format( np.mean(np.asarray(acc)), np.std(np.asarray(acc)))) print(str(split_size) + '-fold cross validation results') print('Accuracy: {}, St Dev:{}'.format(np.mean(np.asarray(acc)), np.std(np.asarray(acc)))) if runCABASC == True: acc = [] #k-fold cross validation for i in range(split_size): acc1, _, _ = cabascModel.main(BASE_train + str(i) + '.txt', REMAIN_val + str(i) + '.txt', accuracyOnt, test_size[i], remaining_size) acc.append(acc1) tf.reset_default_graph() print('iteration: ' + str(i)) with open( "cross_results_" + str(FLAGS.year) + "/CABASC_" + str(FLAGS.year) + '.txt', 'w') as result: result.write(str(acc)) result.write('Accuracy: {}, St Dev:{} /n'.format( np.mean(np.asarray(acc)), np.std(np.asarray(acc)))) print(str(split_size) + '-fold cross validation results') print('Accuracy: {}, St Dev:{}'.format(np.mean(np.asarray(acc)), np.std(np.asarray(acc)))) if runSVM == True: acc = [] #k-fold cross validation for i in range(split_size): acc1 = svmModel.main(BASE_svm_train + str(i) + '.txt', test + str(i) + '.txt', accuracyOnt, test_size[i], remaining_size) acc.append(acc1) tf.reset_default_graph() with open( "cross_results_" + str(FLAGS.year) + "/SVM_" + str(FLAGS.year) + '.txt', 'w') as result: print(str(split_size) + '-fold cross validation results') print('Accuracy: {:.5f}, St Dev:{:.4f}'.format( np.mean(np.asarray(acc)), np.std(np.asarray(acc)))) result.write(str(acc)) result.write('Accuracy: {:.5f}, St Dev:{:.4f} /n'.format( np.mean(np.asarray(acc)), np.std(np.asarray(acc)))) print('Finished program succesfully')
def main(_): loadData = False # only for non-contextualised word embeddings. # Use prepareBERT for BERT (and BERT_Large) and prepareELMo for ELMo useOntology = False # When run together with runLCRROTALT, the two-step method is used runLCRROTALT = False runSVM = False runCABASC = False runLCRROT = False runLCRROTINVERSE = False weightanalysis = False runLCRROTALT_v1 = False runLCRROTALT_v2 = False runLCRROTALT_v3 = False runLCRROTALT_v4 = True runAdversarial = False #Save and Restore if desired Save = False RestoreSave = False restore_path = '/Users/ronhochstenbach/Desktop/Ectrie Thesis/Venv_Thesis/Saved_Models/2020-05-29 23:08:39.394204_BERT_2016/Iter_0-470' #do not add .meta! # determine if backupmethod is used if runCABASC or runLCRROT or runLCRROTALT or runLCRROTINVERSE or runSVM or runLCRROTALT_v1 or runLCRROTALT_v2 or runLCRROTALT_v3 or runLCRROTALT_v4 or runAdversarial: backup = True else: backup = False # retrieve data and wordembeddings train_size, test_size, train_polarity_vector, test_polarity_vector = loadDataAndEmbeddings( FLAGS, loadData) print(train_size) print(test_size) remaining_size = 250 accuracyOnt = 0.87 if useOntology == True: print('Starting Ontology Reasoner') # in sample accuracy Ontology = OntReasoner() accuracyOnt, remaining_size = Ontology.run(backup, FLAGS.test_path_ont, runSVM) # out of sample accuracy # Ontology = OntReasoner() # accuracyInSampleOnt, remainingInSample_size = Ontology.run(backup,FLAGS.train_path_ont, runSVM) if runSVM == True: test = FLAGS.remaining_svm_test_path else: test = FLAGS.remaining_test_path print(test[0]) print('train acc = {:.4f}, test acc={:.4f}, remaining size={}'.format( accuracyOnt, accuracyOnt, remaining_size)) else: if runSVM == True: test = FLAGS.test_svm_path else: test = FLAGS.test_path # LCR-Rot-hop model if runLCRROTALT == True: _, pred2, fw2, bw2, tl2, tr2 = lcrModelAlt.main( FLAGS.train_path, test, accuracyOnt, test_size, remaining_size) tf.reset_default_graph() if runLCRROTALT_v1 == True: _, pred2, fw2, bw2, tl2, tr2 = lcrModelAlt_hierarchical_v1.main( FLAGS.train_path, test, accuracyOnt, test_size, remaining_size) tf.reset_default_graph() if runLCRROTALT_v2 == True: _, pred2, fw2, bw2, tl2, tr2 = lcrModelAlt_hierarchical_v2.main( FLAGS.train_path, test, accuracyOnt, test_size, remaining_size) tf.reset_default_graph() if runLCRROTALT_v3 == True: _, pred2, fw2, bw2, tl2, tr2 = lcrModelAlt_hierarchical_v3.main( FLAGS.train_path, test, accuracyOnt, test_size, remaining_size) tf.reset_default_graph() if runLCRROTALT_v4 == True: _, pred2, fw2, bw2, tl2, tr2 = lcrModelAlt_hierarchical_v4.main( FLAGS.train_path, test, accuracyOnt, test_size, remaining_size) tf.reset_default_graph() if runAdversarial == True: print('Running Adversarial') _, pred2, fw2, bw2, tl2, tr2 = adversarial.main( FLAGS.train_path, test, accuracyOnt, test_size, remaining_size, learning_rate_dis=0.02, learning_rate_gen=0.002, keep_prob=0.3, momentum_dis=0.9, momentum_gen=0.36, l2=0.00001, k=3, WriteFile=True) tf.reset_default_graph()