def main(): """ Parsing command line parameters, reading data, fitting and scoring a SimGNN model. """ args = parameter_parser() tab_printer(args) trainer = SimGNNTrainer(args) if args.measure_time: trainer.measure_time() else: if args.load: trainer.load() else: trainer.fit() trainer.score() if args.save: trainer.save() if args.notify: import os import sys if sys.platform == "linux": os.system('notify-send SimGNN "Program is finished."') elif sys.platform == "posix": os.system(""" osascript -e 'display notification "SimGNN" with title "Program is finished."' """) else: raise NotImplementedError("No notification support for this OS.")
def main(): """ Parsing command line parameters. Reading data, embedding base graph, creating persona graph and learning a splitter. Saving the persona mapping and the embedding. """ args = parameter_parser() torch.manual_seed(args.seed) tab_printer(args) """ 1. read graph and load as torch dataset """ graph, graph_ingr_only = graph_reader(args.input_nodes, args.input_edges) """ 2. Metapath2vec with MetaPathWalker - Ingredient-Ingredient / Ingredient-Food-like Compound / Ingredient-Drug-like Compound """ if args.idx_embed == 'Node2vec': node2vec = Node2Vec(args, graph) node2vec.train() else: metapath2vec = Metapath2Vec(args, graph) metapath2vec.train() """ 3. Plot your embedding if you like """ plot_embedding(args, graph) """ 4. Evaluate Node Classification & Node Clustering """ evaluate(args, graph)
def main(): """ Parsing command lines, creating target matrix, fitting SINE and saving the embedding. """ args = parameter_parser() tab_printer(args) model = SINETrainer(args) model.fit() model.save_embedding()
def main(): """ Parsing command lines, creating target matrix, fitting an Attention Walker and saving the embedding. """ args = parameter_parser() tab_printer(args) model = AttentionWalkTrainer(args) model.fit() model.save_model()
def main(): """ Parsing command line parameters, reading data, fitting and scoring a SimGNN model. """ args = parameter_parser() tab_printer(args) trainer = SimGNNTrainer(args) trainer.fit() trainer.score()
def main(): """ Parsing command line parameters, processing graphs, fitting a CapsGNN. """ args = parameter_parser() tab_printer(args) model = CapsGNNTrainer(args) model.fit() model.score() model.save_predictions()
def main(): """ Parsing command line parameters, reading data, fitting an EdMot clustering and scoring the model. """ args = parameter_parser() tab_printer(args) graph = graph_reader(args.edge_path) model = EdMot(graph, args.components, args.cutoff) memberships = model.fit() membership_saver(args.membership_path, memberships)
def main(): """ Parsing command line parameters, processing graphs, fitting a GAM. """ args = parameter_parser() tab_printer(args) model = GAMTrainer(args) model.fit() model.score() model.save_predictions_and_logs()
def main(): """ Parsing command lines, creating target matrix, fitting BANE and saving the embedding. """ args = parameter_parser() tab_printer(args) P = read_graph(args) X = read_features(args) model = BANE(args, P, X) model.fit() model.save_embedding()
def main(): """ Parsing command lines, creating target matrix, fitting DANMF and saving the embedding. """ args = parameter_parser() tab_printer(args) graph = read_graph(args) model = DANMF(graph, args) model.pre_training() model.training() if args.calculate_loss: loss_printer(model.loss)
def main(): """ Parsing command line parameters, reading data, fitting an NGCN and scoring the model. """ args = parameter_parser() torch.manual_seed(args.seed) tab_printer(args) graph = graph_reader(args.edge_path) features = feature_reader(args.features_path) target = target_reader(args.target_path) trainer = Trainer(args, graph, features, target) trainer.fit()
def main(): """ Parsing command line parameters, creating EgoNets, creating a partition of the persona graph. Saving the memberships. """ args = parameter_parser() tab_printer(args) graph = graph_reader(args.edge_path) splitter = EgoNetSplitter(graph, args.resolution) splitter.create_egonets() splitter.map_personalities() splitter.create_persona_graph() splitter.create_partitions() membership_saver(args.output_path, splitter.overlapping_partitions)
def execute_factorization(): """ Reading the target matrix, running optimization and saving to hard drive. """ args = parameter_parser() tab_printer(args) X = read_features(args.input_path) print("\nTraining started.\n") model = ADMM_NMF(X, args) model.optimize() print("\nFactors saved.\n") model.save_user_factors() model.save_item_factors()
def main(): """ Parsing command lines, creating target matrix, fitting an SGCN, predicting edge signs, and saving the embedding. """ args = parameter_parser() tab_printer(args) edges = read_graph(args) trainer = SignedGCNTrainer(args, edges) trainer.setup_dataset() trainer.create_and_train_model() if args.test_size > 0: trainer.save_model() score_printer(trainer.logs) save_logs(args, trainer.logs)
def __init__(self): self.args = parameter_parser() print("\nEnumerating unique labels.\n") self.training_graphs = glob.glob(self.args.training_graphs + "*.json") self.testing_graphs = glob.glob(self.args.testing_graphs + "*.json") self.graph_pairs = self.training_graphs + self.testing_graphs self.global_labels = set() for self.graph_pair in tqdm(self.graph_pairs): self.data = process(self.graph_pair) self.global_labels = self.global_labels.union(set(self.data["labels_1"])) self.global_labels = self.global_labels.union(set(self.data["labels_2"])) self.global_labels = list(self.global_labels) self.global_labels = {val:index for index, val in enumerate(self.global_labels)} self.number_of_labels = len(self.global_labels)
def main(): """ Parsing command line parameters. Reading data, embedding base graph, creating persona graph and learning a splitter. Saving the persona mapping and the embedding. """ args = parameter_parser() torch.manual_seed(args.seed) tab_printer(args) graph = graph_reader(args.edge_path) trainer = SplitterTrainer(graph, args) trainer.fit() trainer.save_embedding() trainer.save_persona_graph_mapping()
def main(): """ Parsing command line parameters, reading data, graph decomposition, fitting a ClusterGCN and scoring the model. """ args = parameter_parser() torch.manual_seed(args.seed) tab_printer(args) graph = graph_reader(args.edge_path) features = feature_reader(args.features_path) target = target_reader(args.target_path) clustering_machine = ClusteringMachine(args, graph, features, target) clustering_machine.decompose() gcn_trainer = ClusterGCNTrainer(args, clustering_machine) gcn_trainer.train() gcn_trainer.test()
def main(): """ Parsing command line parameters, reading data, doing sparsification, fitting a GWNN and saving the logs. """ args = parameter_parser() tab_printer(args) graph = graph_reader(args.edge_path) features = feature_reader(args.features_path) target = target_reader(args.target_path) sparsifier = WaveletSparsifier(graph, args.scale, args.approximation_order, args.tolerance) sparsifier.calculate_all_wavelets() trainer = GWNNTrainer(args, sparsifier, features, target) trainer.fit() trainer.score() save_logs(args, trainer.logs)
def main(): """ Parsing command line parameters, reading data, fitting an NGCN and scoring the model. """ args = parameter_parser() torch.manual_seed(args.seed) tab_printer(args) graph = graph_reader(args.edge_path) features = feature_reader(args.features_path) target = target_reader(args.target_path) trainer = Trainer(args, graph, features, target, True) trainer.fit() if args.model == "mixhop": trainer.evaluate_architecture() args = trainer.reset_architecture() trainer = Trainer(args, graph, features, target, False) trainer.fit()
Function to print the logs in a nice tabular format. :param args: Parameters used for the model. """ args = vars(args) keys = sorted(args.keys()) tab = Texttable() tab.add_rows([["Parameter", "Value"]] + [[k.replace("_"," ").capitalize(),args[k]] for k in keys]) print(tab.draw()) def read_graph(settings): """ Reading the edge list from the path and returning the networkx graph object. :param path: Path to the edge list. :return graph: Graph from edge list. """ if settings.edgelist_input: graph = nx.read_edgelist(settings.input) else: edge_list = pd.read_csv(settings.input).values.tolist() graph = nx.from_edgelist(edge_list) graph.remove_edges_from(graph.selfloop_edges()) return graph if __name__ == "__main__": settings = parameter_parser() tab_printer(settings) G = read_graph(settings) machine = WaveletMachine(G,settings) machine.create_embedding() machine.transform_and_save_embedding()
""" Main SimGNN model """ from tensorflow import keras import numpy as np from tqdm import tqdm, trange from parser import parameter_parser from utilities import data2, convert_to_keras, process, find_loss from simgnn import simgnn parser = parameter_parser() def train(model, x): batches = x.create_batches() global_labels = x.getlabels() print(global_labels) """ Training the Network Take every graph pair and train it as a batch. """ for epoch in range(0, parser.epochs): for index, batch in tqdm(enumerate(batches), total=len(batches), desc="Batches"): for graph_pair in batch: data = process(graph_pair) data = convert_to_keras(data, global_labels) x = np.array([data["features_1"]]) y = np.array([data["features_2"]]) a = np.array([data["edge_index_1"]]) b = np.array([data["edge_index_2"]])
out.to_csv(output_path, index=None) def main(args): """ Main function to read the graph list, extract features, learn the embedding and save it. :param args: Object with the arguments. """ graphs = glob.glob(args.input_path + "*.json") print("\nFeature extraction started.\n") document_collections = Parallel(n_jobs=args.workers)( delayed(feature_extractor)(g, args.wl_iterations) for g in tqdm(graphs)) print("\nOptimization started.\n") model = Doc2Vec(document_collections, size=args.dimensions, window=0, min_count=args.min_count, dm=0, sample=args.down_sampling, workers=args.workers, iter=args.epochs, alpha=args.learning_rate) save_embedding(args.output_path, model, graphs, args.dimensions) if __name__ == "__main__": args = parameter_parser() main(args)
def main(): print("Program start, environment initializing ...") torch.autograd.set_detect_anomaly(True) args = parameter_parser() utils.print2file(str(args), args.logDir, True) if args.device >= 0: os.environ['CUDA_VISIBLE_DEVICES'] = str(args.device) torch.manual_seed(args.seed) np.random.seed(args.seed) pic = {} # check if pickles, otherwise load data # pickle_name = args.data_prefix+args.dataset+"-"+str(args.bsize)+"-"+str(args.num_clusters)+"_main"+".pickle" # if os.path.isfile(pickle_name): # print("Loading Pickle.") # load_time = time.time() # pic = pickle.load(open(pickle_name, "rb")) # print("Loading Done. " + str(time.time()-load_time) + " seconds.") # else: if True: print("Data Pre-processing") # Load data (pic["train_adj"], full_adj, pic["train_feats"], pic["test_feats"], pic["y_train"], y_val, y_test, pic["train_mask"], pic["val_mask"], test_mask, _, pic["val_data"], pic["test_data"], num_data, visible_data) = utils.load_data(args.data_prefix, args.dataset, args.precalc, amazon=True) print("Partition graph and do preprocessing") if args.bsize > 1: _, pic["parts"] = partition_utils.partition_graph( pic["train_adj"], visible_data, args.num_clusters) pic["parts"] = [np.array(pt) for pt in pic["parts"]] (pic["features_batches"], pic["support_batches"], pic["y_train_batches"], pic["train_mask_batches"]) = utils.preprocess_multicluster_v2( pic["train_adj"], pic["parts"], pic["train_feats"], pic["y_train"], pic["train_mask"], args.num_clusters, args.bsize, args.diag_lambda) else: (pic["parts"], pic["features_batches"], pic["support_batches"], pic["y_train_batches"], pic["train_mask_batches"]) = utils.preprocess( pic["train_adj"], pic["train_feats"], pic["y_train"], pic["train_mask"], visible_data, args.num_clusters, args.diag_lambda) (_, pic["val_features_batches"], pic["val_support_batches"], pic["y_val_batches"], pic["val_mask_batches"]) = utils.preprocess( full_adj, pic["test_feats"], y_val, pic["val_mask"], np.arange(num_data), args.num_clusters_val, args.diag_lambda) (_, pic["test_features_batches"], pic["test_support_batches"], pic["y_test_batches"], pic["test_mask_batches"]) = utils.preprocess( full_adj, pic["test_feats"], y_test, test_mask, np.arange(num_data), args.num_clusters_test, args.diag_lambda) # pickle.dump(pic, open(pickle_name, "wb")) idx_parts = list(range(len(pic["parts"]))) print("Preparing model ...") model = StackedGCN(args, pic["test_feats"].shape[1], pic["y_train"].shape[1], precalc=args.precalc, num_layers=args.num_layers, norm=args.layernorm) w_server = model.cpu().state_dict() print("Start training ...") model_saved = "./model/" + args.dataset + "-" + args.logDir[6:-4] + ".pt" try: for epoch in range(args.epochs): # Training process w_locals, loss_locals, epoch_acc = [], [], [] all_time = [] best_val_acc = 0 for pid in range(len(pic["features_batches"])): # for pid in range(10): # Use preprocessed batch data package = { "features": pic["features_batches"][pid], "support": pic["support_batches"][pid], "y_train": pic["y_train_batches"][pid], "train_mask": pic["train_mask_batches"][pid] } model.load_state_dict(w_server) out_dict = slave_run_train(model, args, package, pid) w_locals.append(copy.deepcopy(out_dict['params'])) loss_locals.append(copy.deepcopy(out_dict['loss'])) all_time.append(out_dict["time"]) epoch_acc.append(out_dict["acc"]) # update global weights a_start_time = time.time() if args.agg == 'avg': w_server = average_agg(w_locals, args.dp) elif args.agg == 'att': w_server = weighted_agg(w_locals, w_server, args.epsilon, args.ord, dp=args.dp) else: exit('Unrecognized aggregation') model.load_state_dict(w_server) # agg_time = time.time() - a_start_time # print(str(sum(all_time)/len(all_time) + agg_time)) print2file( 'Epoch: ' + str(epoch) + ' Average Train acc: ' + str(sum(epoch_acc) / len(epoch_acc)), args.logDir, True) if epoch % args.val_freq == 0: val_cost, val_acc, val_micro, val_macro = evaluate( model, args, pic["val_features_batches"], pic["val_support_batches"], pic["y_val_batches"], pic["val_mask_batches"], pic["val_data"], pid="validation") log_str = 'Validateion set results: ' + 'cost= {:.5f} '.format( val_cost) + 'accuracy= {:.5f} '.format( val_acc) + 'mi F1= {:.5f} ma F1= {:.5f}'.format( val_micro, val_macro) print2file(log_str, args.logDir, True) if val_acc > best_val_acc: best_val_acc = val_acc torch.save(model.state_dict(), model_saved) print2file( "Best val_acc: " + str(best_val_acc) + " with epoch: " + str(epoch), args.logDir, True) torch.save( model.state_dict(), "./model/" + args.dataset + "-" + args.logDir[6:-4] + "Done.pt") print2file("Training Done. Model Saved.", args.logDir, True) # Test Model # Perform two test, one with last model, another with best val_acc model # 1) test_cost, test_acc, micro, macro = evaluate( model, args, pic["test_features_batches"], pic["test_support_batches"], pic["y_test_batches"], pic["test_mask_batches"], pic["test_data"], pid="Final test") log_str = 'Test set results: ' + 'cost= {:.5f} '.format( test_cost) + 'accuracy= {:.5f} '.format( test_acc) + 'mi F1= {:.5f} ma F1= {:.5f}'.format(micro, macro) print2file(log_str, args.logDir, True) # 2) test_model = StackedGCN(args, pic["test_feats"].shape[1], pic["y_train"].shape[1], precalc=args.precalc, num_layers=args.num_layers, norm=args.layernorm) test_model.load_state_dict(torch.load(model_saved)) test_model.eval() test_cost, test_acc, micro, macro = evaluate( test_model, args, pic["test_features_batches"], pic["test_support_batches"], pic["y_test_batches"], pic["test_mask_batches"], pic["test_data"], pid="Best test") log_str = 'Test set results: ' + 'cost= {:.5f} '.format( test_cost) + 'accuracy= {:.5f} '.format( test_acc) + 'mi F1= {:.5f} ma F1= {:.5f}'.format(micro, macro) print2file(log_str, args.logDir, True) except KeyboardInterrupt: print("==" * 20) print("Existing from training earlier than the plan.") print("End..so far so good.")