def main(): update_config(base_config) results = {"test_name": [], "acc": []} for seed in seeds: for train_amount in train_amounts: for unlab_amount in unlab_amounts: for test_amount in test_amounts: info_to_save = { "seed": seed, "train_amount": train_amount, "unlab_amount": unlab_amount, "test_amount": test_amount } indices_name = str(seed) + "_" + str( train_amount) + "train" + str( unlab_amount) + "unlab" + str(test_amount) + "test" update_config({"indices": indices_name}) docs, labels, tvt_idx = load_data() for i in range(repeats_per_seed): results = test_all_models(results, docs, labels, tvt_idx, info_to_save)
def main(): update_config(base_config) results = { "test_name" : [], "acc" : [] } for seed in seeds: for language in languages: info_to_save = { "seed" : seed, "train_amount" : train_amount, "unlab_amount" : unlab_amount, "test_amount" : test_amount, "language" : language } data_name = "reuters_%s_FULL_min5" % language indices_name = str(seed) + "_" + str(train_amount) + "train" + str(unlab_amount) + "unlab" + str(test_amount) + "test" update_config({"dataset":data_name, "indices" : indices_name}) docs, labels, tvt_idx = load_data() for i in range(repeats_per_seed): results = test_all_models(results, docs, labels, tvt_idx, info_to_save)
def main(): results = { "test_name": [], "acc": [], "ductive": [], "unique_embeddings": [], } update_config(transductive_settings) for seed in seeds: for unlabeled_amount in unlabeled_amounts: for labeled_amount in labeled_amounts: info_to_save = { "seed": seed, "unlabeled_amount": unlabeled_amount, "labeled_amount": labeled_amount } indices_name = str(seed) + "_" + str( labeled_amount) + "lab" + str(unlabeled_amount) + "unlab" update_config({"indices": indices_name}) docs, labels, tvt_idx = load_data() for i in range(repeats_per_seed): update_config({"unique_document_embeddings": True}) results = test_all_models(results, docs, labels, tvt_idx, info_to_save) update_config({"unique_document_embeddings": False}) results = test_all_models(results, docs, labels, tvt_idx, info_to_save)
def main(): results = { "seed": [], "unlabeled_amount": [], "labeled_amount": [], "acc": [], } for var in variables_to_follow: results[var] = [] for seed in seeds: for unlabeled_amount in unlabeled_amounts: for labeled_amount in labeled_amounts: # Load data indices_name = str(seed) + "_" + str( labeled_amount) + "lab" + str(unlabeled_amount) + "unlab" update_config({"indices": indices_name}) docs, labels, tvt_idx = load_data() # Test configs for cfg in configs: update_config(cfg) for i in range(repeats_per_seed): # print to say where we at summary_string = "" for var in variables_to_follow: summary_string += var + ":" + str( config[var]) + "," print("Working on {" + summary_string[:-1] + "}") # evaluate try: result = evaluate_current_config(docs, labels, tvt_idx, verbose=True) # save results["seed"].append(seed) results["unlabeled_amount"].append( unlabeled_amount) results["labeled_amount"].append(labeled_amount) results["acc"].append(result) for var in variables_to_follow: results[var].append(config[var]) save_dic(results) except KeyboardInterrupt: exit() except Exception as e: print(summary_string, "gone wrong! Error:", e)
def main(): docs, labels, (t_idx, v_idx, test_idx) = load_data() in_training_test = test_idx[:len(test_idx) // 2] out_training_test = test_idx[len(test_idx) // 2:] # train on (train, val, in_training_test) train_docs = docs[t_idx + v_idx + in_training_test] train_labels = labels[t_idx + v_idx + in_training_test] train_and_intest_indices = (t_idx, v_idx, range( len(t_idx) + len(v_idx), len(train_labels))) a = 0 b = 0 # test on (in_training_test) only_in_training_docs = docs[t_idx[:a] + v_idx[:b] + in_training_test] only_in_training_labels = labels[t_idx[:a] + v_idx[:b] + in_training_test] only_in_training_indices = ([t_idx[:a]], [v_idx[:b]], range(a + b, len(only_in_training_labels))) # test on (out_training_test) only_out_training_docs = docs[t_idx[:a] + v_idx[:b] + out_training_test] only_out_training_labels = labels[t_idx[:a] + v_idx[:b] + out_training_test] only_out_training_indices = ([t_idx[:a]], [v_idx[:b]], range(a + b, len(only_out_training_labels))) # test on (train, val, out_training_test) train_and_outtest_docs = docs[t_idx + v_idx + out_training_test] train_and_outtest_labels = labels[t_idx + v_idx + out_training_test] train_and_outtest_indices = (t_idx, v_idx, range( len(t_idx) + len(v_idx), len(train_and_outtest_labels))) train_and_intest_dataset = DocumentGraphDataset(train_docs, train_labels, train_and_intest_indices) only_in_training_dataset = DocumentGraphDataset( only_in_training_docs, only_in_training_labels, only_in_training_indices, force_vocab=train_and_intest_dataset.vocab) only_out_training_dataset = DocumentGraphDataset( only_out_training_docs, only_out_training_labels, only_out_training_indices, force_vocab=train_and_intest_dataset.vocab) train_and_outtest_dataset = DocumentGraphDataset( train_and_outtest_docs, train_and_outtest_labels, train_and_outtest_indices, force_vocab=train_and_intest_dataset.vocab) model = None best_val_loss = float('inf') time_since_best = 0 for i in range(config["epochs"]): if model is None: model = create_model(train_and_intest_dataset) trainer = Trainer(train_and_intest_dataset, model) else: trainer.update_data(train_and_intest_dataset) train_loss, val_loss = trainer.train_epoch() test_acc_train_and_intest = trainer.test() trainer.update_data(only_in_training_dataset) test_acc_only_in_training = trainer.test() trainer.update_data(only_out_training_dataset) test_acc_only_out_training = trainer.test() trainer.update_data(train_and_outtest_dataset) test_acc_train_and_outtest = trainer.test() print("[epoch %02d] Train loss %.4f, Val loss %.4f" % (i, train_loss, val_loss)) print(" acc on in training test, with training docs in graph: %.4f" % (test_acc_train_and_intest)) print( " acc on in training test, WITHOUT training docs in graph: %.4f" % (test_acc_only_in_training)) print( " acc on out of training test, WITHOUT training docs in graph: %.4f" % (test_acc_only_out_training)) print( " acc on out of training test, with training docs in graph: %.4f" % (test_acc_train_and_outtest)) # Early stopping time_since_best += 1 if val_loss < best_val_loss: best_val_loss = val_loss time_since_best = 0 if config["terminate_early"] and time_since_best >= config[ "terminate_patience"]: print("\n[RESULT!] Final test score: see above") break
def main(): update_config(quick_config) docs, labels, tvt_idx = load_data() dataset = DocumentGraphDataset(docs, labels, tvt_idx) model = create_model(dataset) trainer = Trainer(dataset, model) # trainer.save_initial_reps() best_val_loss = float('inf') time_since_best = 0 best_val_loss_acc = 0 high_score = 0 for i in range(config["epochs"]): # split for special debug printing if config["sampled_training"] and config["unsupervised_loss"]: # trainer.save_sage_reps() train_loss, val_loss, unsup_train_loss_pos, unsup_train_loss_neg, unsup_val_loss_pos, unsup_val_loss_neg, unsup_test_pos, unsup_test_neg = trainer.train_epoch( ) test_acc, test_loss, unsup_test_loss_pos, unsup_test_loss_neg = trainer.test( ) total_train = train_loss + unsup_train_loss_pos + unsup_train_loss_neg total_val = val_loss + unsup_val_loss_pos + unsup_val_loss_neg total_test = test_loss + unsup_test_loss_pos + unsup_test_loss_neg print("[epoch %02d] Test Acc %.4f (Trained %s-supervised)" % (i, test_acc, config['sup_mode'])) print("\t Train Loss: %.4f (%.4f / %.4f / %.4f) (%.0f%% sup)" % (total_train, train_loss, unsup_train_loss_pos, unsup_train_loss_neg, train_loss / total_train * 100)) print("\t Val Loss: %.4f (%.4f / %.4f / %.4f) (%.0f%% sup)" % (total_val, val_loss, unsup_val_loss_pos, unsup_val_loss_neg, val_loss / total_val * 100)) print("\t Training on test Losses: %.4f, %.4f, %.1f%% of total" % (unsup_test_pos, unsup_test_neg, (unsup_test_pos + unsup_test_neg) / (unsup_test_pos + unsup_test_neg + total_train) * 100)) # print("\t Test Loss: %.4f (%.4f / %.4f / %.4f) (%.0f%% sup)" % (total_test, test_loss, unsup_test_loss_pos, unsup_test_loss_neg, test_loss / total_test)) val_loss = total_val else: train_loss, val_loss = trainer.train_epoch() test_acc = trainer.test() high_score = max(test_acc, high_score) print( "[epoch %02d] Train loss %.4f, Val loss %.4f, Test Acc %.4f, Highscore: %.4f" % (i, train_loss, val_loss, test_acc, high_score)) # Early stopping time_since_best += 1 if val_loss < best_val_loss: best_val_loss = val_loss time_since_best = 0 best_val_loss_acc = test_acc if config["terminate_early"] and time_since_best >= config[ "terminate_patience"]: if config["sampled_training"] and config["unsupervised_loss"]: test_acc, test_loss, unsup_test_loss_pos, unsup_test_loss_neg = trainer.test( ) print("\n[RESULT!] Final test score: ", best_val_loss_acc) else: test_acc = trainer.test() break print("\n[RESULT!] Final test score: ", best_val_loss_acc)
def main(): # load data docs, labels, tvt_idx = load_data() train_idx, val_idx, test_idx = tvt_idx model = None results = { "epochs": [], "average_train_loss": [], "average_val_loss": [], "average_test_acc": [], "split_amount": [], "acc": [] } # create model with vocab for entire dataset dataset = DocumentGraphDataset(docs, labels, tvt_idx) model = create_model(dataset) trainer = Trainer(dataset, model) for i in range(config["epochs"]): number_of_splits = random.randint(1, 40) # split in x random divisions split_train_amount = len(train_idx) // number_of_splits split_val_amount = len(val_idx) // number_of_splits split_test_amount = len(test_idx) // number_of_splits print("Splitting %i segments, %i %i %i" % (number_of_splits, split_train_amount, split_val_amount, split_test_amount)) # train on different random splits random.shuffle(train_idx) random.shuffle(val_idx) random.shuffle(test_idx) train_losses = [] val_losses = [] test_accs = [] for split_i in range(number_of_splits): split_train_idx = train_idx[split_i * split_train_amount:(split_i + 1) * split_train_amount] split_val_idx = val_idx[split_i * split_val_amount:(split_i + 1) * split_val_amount] split_test_idx = test_idx[split_i * split_test_amount:(split_i + 1) * split_test_amount] # this ain't right dataset = DocumentGraphDataset( docs, labels, (split_train_idx, split_val_idx, split_test_idx)) if model is None: model = create_model(dataset) trainer = Trainer(dataset, model) else: trainer.update_data(dataset) train_loss, val_loss = trainer.train_epoch() test_acc = trainer.test() print("split %02d: (%.4f, %.4f, ! %.4f !), " % (split_i, train_loss, val_loss, test_acc), end="") train_losses.append(train_loss) val_losses.append(val_loss) test_accs.append(test_acc) # test on entire graph dataset = DocumentGraphDataset(docs, labels, tvt_idx) trainer.update_data(dataset) test_acc = trainer.test() print("\n\n[epoch %02d] Test Acc on entire dataset %.4f\n\n" % (i, test_acc)) # summary results["epochs"].append(i) results["average_train_loss"].append( float(sum(train_losses) / len(train_losses))) results["average_val_loss"].append( float(sum(val_losses) / len(val_losses))) results["average_test_acc"].append( float(sum(test_accs) / len(test_accs))) results["split_amount"].append(number_of_splits) results["acc"].append(test_acc) df = pd.DataFrame(results) df.to_csv('./results/' + config['experiment_name'] + '.csv')