def read_data(ctrl, args): prefix = "./dataset/" + args.data if args.format == "metis": input_graph_path = prefix + ".metis" graph, mapping = read_graph(ctrl, input_graph_path, metis=True) else: input_graph_path = prefix + ".edgelist" graph, mapping = read_graph(ctrl, input_graph_path, edgelist=True) return input_graph_path, graph, mapping
def main(trial=None, args=None): """ Parsing command line parameters. Creating target matrix. Fitting an SGCN. Predicting edge signs and saving the embedding. """ # fix seed np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) random.seed(args.seed) tab_printer(args) # read data edges = read_graph(args) trainer = SHIGTrainer(args, edges) trainer.setup_dataset() # training trainer.create_and_train_model(trial) if trial.should_prune(): raise optuna.exceptions.TrialPruned() if args.metric_to_optimize is 'AUC': return trainer.logs["performance"][-1][1] elif args.metric_to_optimize is 'F1': return trainer.logs["performance"][-1][2]
def main(): graph, init_states = utils.read_graph(os.getcwd() + ct.EDGE_LIST_PATH, os.getcwd() + ct.INITIAL_STATE_PATH) init_paths = utils.initial_guess(init_states, graph) print('Original ') for path in init_paths: print(path) print(utils.collective_cost(graph, init_paths)) state, cost, hist = tabu_search(graph, init_paths) print('Annealed') for path in state: print(path) print(cost) utils.plot_graph_paths_max(graph, paths=init_paths, title='Individual Planning') utils.plot_graph_paths_max(graph, paths=state, title='Tabu Search Cooperative Planning') plt.show() x = range(len(hist)) plt.plot(x, hist) plt.xlabel("Step") plt.ylabel("Cost function") plt.show()
def main(): p = argparse.ArgumentParser( description= 'This script is for experiment of solving TSP with TS and SA') p.add_argument('-g', '--graph', type=str, help='path to graph csv file', required=True) option_args = p.parse_known_args()[0] path = option_args.graph if not os.path.exists(path): print("File not found") sys.exit(1) graph = read_graph(path) tabu_table = experiment(tabu_search, graph) sa_table = experiment(simulated_anealing, graph) print("## Tabu Search Result") print(tabu_table) print() print("## Simulated Anealing Result") print(sa_table) print()
def __init__(self, args): """ Initializing the training object. :param args: Arguments object. """ self.args = args self.graph = read_graph(self.args.edge_path) self.initialize_model_and_features()
def learn_model(args): """ Method to create adjacency matrix powers, read features, and learn embedding. :param args: Arguments object. """ A = read_graph(args.edge_path) model = GraRep(A, args) model.optimize() model.save_embedding()
def run_benchmark(): config = create_tf_config() graph_def = read_graph(FLAGS.input_graph) tf.import_graph_def(graph_def, name='') input_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name('inputs:0') output_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name('output_boxes:0') dummy_data_shape = list(input_tensor.shape) dummy_data_shape[0] = FLAGS.batch_size dummy_data = np.random.random(dummy_data_shape).astype(np.float32) if FLAGS.profiling != True: num_warmup = 200 total_iter = 1000 else: num_warmup = 20 total_iter = 100 total_time = 0.0 with tf.compat.v1.Session(config=config) as sess: print("Running warm-up") for i in range(num_warmup): sess.run(output_tensor, {input_tensor: dummy_data}) print("Warm-up complete") for i in range(1, total_iter + 1): start_time = time.time() sess.run(output_tensor, {input_tensor: dummy_data}) end_time = time.time() if i % 10 == 0: print( "Steps = {0}, {1:10.6f} samples/sec".format(i, FLAGS.batch_size / duration)) duration = end_time - start_time total_time += duration if FLAGS.profiling: options = tf.compat.v1.RunOptions( trace_level=tf.compat.v1.RunOptions.FULL_TRACE) run_metadata = tf.compat.v1.RunMetadata() sess.run(output_tensor, {input_tensor: dummy_data}, options=options, run_metadata=run_metadata) fetched_timeline = timeline.Timeline(run_metadata.step_stats) chrome_trace = fetched_timeline.generate_chrome_trace_format() with open("timeline_%s.json" % (time.time()), 'w') as f: f.write(chrome_trace) print("Average Thoughput: %f samples/sec" % (total_iter * FLAGS.batch_size / total_time))
def main(): """ Parsing command lines, creating target matrix, fitting BANE and saving the embedding. """ args = parameter_parser() tab_printer(args) P = read_graph(args) X = read_features(args) model = BANE(args, P, X) model.fit() model.save_embedding()
def __init__(self, args): """ Initializing the training object. :param args: Arguments parsed from command line. """ self.args = args self.graph = read_graph(self.args.edge_path) self.features = read_features(self.args.feature_path) self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.initialize_model() self.simulate_walks()
def main(): """ Parsing command lines, creating target matrix, fitting DANMF and saving the embedding. """ args = parameter_parser() tab_printer(args) graph = read_graph(args) model = DANMF(graph, args) model.pre_training() model.training() if args.calculate_loss: loss_printer(model.loss)
def read(self, key, cls): if (cls in self.__registry): id = self.__registry[cls] % key if (id in self.__last_read): logging.debug("Preventing reading of %s" % id) return self.__last_read[id] else: path = self.__build_path(key, cls) data = read_graph(path) self.__last_read = {} #FIXME: current it has only one item, which can be the right length? self.__last_read[id] = data return data else: return ""
def __init__(self): t = time.time() print "reading graph..." self.n_node, self.n_relation, self.graph = utils.read_graph( config.graph_filename) self.node_list = self.graph.keys() #range(0, self.n_node) print '[%.2f] reading graph finished. #node = %d #relation = %d' % ( time.time() - t, self.n_node, self.n_relation) t = time.time() print "read initial embeddings..." self.node_embed_init_d = utils.read_embeddings( filename=config.pretrain_node_emb_filename_d, n_node=self.n_node, n_embed=config.n_emb) self.node_embed_init_g = utils.read_embeddings( filename=config.pretrain_node_emb_filename_g, n_node=self.n_node, n_embed=config.n_emb) #self.rel_embed_init_d = utils.read_embeddings(filename=config.pretrain_rel_emb_filename_d, # n_node=self.n_node, # n_embed=config.n_emb) #self.rel_embed_init_g = utils.read_embeddings(filename=config.pretrain_rel_emb_filename_g, # n_node=self.n_node, # n_embed=config.n_emb) print "[%.2f] read initial embeddings finished." % (time.time() - t) print "build GAN model..." self.discriminator = None self.generator = None self.build_generator() self.build_discriminator() self.latest_checkpoint = tf.train.latest_checkpoint(config.model_log) self.saver = tf.train.Saver() self.dblp_evaluation = DBLP_evaluation() self.yelp_evaluation = Yelp_evaluation() self.aminer_evaluation = Aminer_evaluation() self.config = tf.ConfigProto() self.config.gpu_options.allow_growth = True self.init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) self.sess = tf.Session(config=self.config) self.sess.run(self.init_op) self.show_config()
def main(): """ Parsing command lines, creating target matrix, fitting an SGCN, predicting edge signs, and saving the embedding. """ args = parameter_parser() tab_printer(args) edges = read_graph(args) trainer = SignedGCNTrainer(args, edges) trainer.setup_dataset() trainer.create_and_train_model() if args.test_size > 0: trainer.save_model() score_printer(trainer.logs) save_logs(args, trainer.logs)
def load_graph(dataset, labels_is_onehot=True): features = read_feature("./data/" + dataset + ".feature", is_normalize=False) if os.path.exists("./data/" + dataset + ".label"): labels = read_label("./data/" + dataset + ".label", is_onehot=labels_is_onehot) else: labels = None G = read_graph("./data/" + dataset + '.edgelist') graph = Graph(features, G, labels) return graph
def read(self, key, cls): if (cls in self.__registry): id = self.__registry[cls] % key if (id in self.__last_read): logging.debug("Preventing reading of %s" % id) return self.__last_read[id] else: path = self.__build_path(key, cls) data = read_graph(path) self.__last_read = { } #FIXME: current it has only one item, which can be the right length? self.__last_read[id] = data return data else: return ""
def main(): p = argparse.ArgumentParser( description='This script is for solve TSP with simulated anealing') p.add_argument('-g', '--graph', type=str, help='path to graph csv file', required=True) option_args = p.parse_known_args()[0] path = option_args.graph if not os.path.exists(path): print("File not found") sys.exit(1) graph = read_graph(path) s = simulated_anealing(graph) print('Answer') print("Path:", s, ", Cost:", get_cost(s, graph))
def main(): """ Parsing command line parameters. Creating target matrix. Fitting an SGCN. Predicting edge signs and saving the embedding. """ args = parameter_parser() avg_auc = [] avg_f1 = [] avg_precision = [] avg_recall = [] avg_acc = [] for x in range(int(args.num_runs)): print("Iteration: ", x) tab_printer(args) edges = read_graph(args) trainer = SignedGCNTrainer(args, edges) trainer.setup_dataset() trainer.create_and_train_model() if args.test_size > 0: trainer.save_model() score_printer(trainer.logs) save_logs(args, trainer.logs) avg_auc.append(score_printer(trainer.logs, avg='auc')[0]) print("This run's AUC: ", "%.3f" % (score_printer(trainer.logs, avg='auc')[0])) print('-----') avg_f1.append(score_printer(trainer.logs, avg='auc')[1]) avg_precision.append(score_printer(trainer.logs, avg='auc')[2]) avg_recall.append(score_printer(trainer.logs, avg='auc')[3]) avg_acc.append(score_printer(trainer.logs, avg='auc')[4]) print('AUC averaged over {} runs: '.format(args.num_runs), "%.3f" % np.mean(avg_auc)) print('F1 averaged over {} runs: '.format(args.num_runs), "%.3f" % np.mean(avg_f1)) print('Precision averaged over {} runs: '.format(args.num_runs), "%.3f" % np.mean(avg_precision)) print('Recall averaged over {} runs: '.format(args.num_runs), "%.3f" % np.mean(avg_recall)) print('Accuracy averaged over {} runs: '.format(args.num_runs), "%.3f" % np.mean(avg_acc)) print('Max AUC: ', "%.3f" % max(avg_auc), 'Max F1: ', "%.3f" % max(avg_f1), 'Max Precision: ', "%.3f" % max(avg_precision), \ 'Max Recall: ', "%.3f" % max(avg_recall), 'Max Accuracy', "%.3f" % max(avg_acc))
def main(fun, trials=8): """ :param trials: :param fun: a function which should take 2 arguments 1. graph 2. init_states (i.e. the paths) [[..], [..], ..] And it should solve the problem and return a tuple (solution_paths, cost) see tabu_search for an example :return: """ graph, init_states = utils.read_graph(os.getcwd() + ct.EDGE_LIST_PATH, os.getcwd() + ct.INITIAL_STATE_PATH) init_paths = utils.initial_guess(init_states, graph) times = [] costs = [] # no use for this rn, but maybe it'll have a use later on? for visualization # or something... solutions = [] for _ in range(trials): # in case these are changed init_graph = dup(graph) init_state = dup(init_paths) start_time = datetime.now() res = fun(init_graph, init_state) solution = res[0] cost = res[1] end_time = datetime.now() times.append((end_time - start_time).seconds * 1000) costs.append(cost) solutions.append(solution) print('Avg cost: ' + str(np.mean(costs))) print('var cost: ' + str(np.var(costs))) print('Min cost:' + str(np.min(costs))) print('Avg time: ' + str(np.mean(times))) print('var time: ' + str(np.var(times))) print('Min time:' + str(np.min(times)))
def main(): startTime = datetime.datetime.now() initialPopulationSize = 100 # Determines the initial sample size of the search space. numberOfParents = 5 # Determines how many parents are selected to create a new generation. iterations = 30 # The number of generations that are created before terminating. graph, init_states = utils.read_graph(os.getcwd() + ct.EDGE_LIST_PATH, os.getcwd() + ct.INITIAL_STATE_PATH) populationPaths = create_population( init_states, graph, initialPopulationSize) # Creates the initial population populationPaths = makeEdgeProblem( populationPaths) # Converts the problem into an edge problem savedOriginalPaths = populationPaths # Saves the best state for comparison later for i in range( 0, iterations ): # Iterates the process of creating a new generation, starting from the random sampled initial population print("Starting iteration" + str(i + 1)) parents = [] for _ in range(numberOfParents): parent = selection(populationPaths, graph, int(round(initialPopulationSize / 2))) parents.append(parent[0]) populationPaths = next_generation(parents, graph, initialPopulationSize) output = selection(populationPaths, graph, initialPopulationSize) endTime = datetime.datetime.now() - startTime for i in range(0, len(output[0])): print("Agent" + str(i + 1) + '\'s path: ' + str(output[0][i])) print("Total cost: " + str(output[1])) print("Original cost: " + str(selection(savedOriginalPaths, graph, initialPopulationSize)[1])) print("Execution time with " + str(iterations) + " iterations: " + str(endTime.total_seconds() * 1000) + " ms")
def __init__(self): t = time.time() print('reading graph...') self.graph, self.n_node, self.node_list, self.node_list_s, self.egs = utils.read_graph(config.train_file) self.node_emd_shape = [2, self.n_node, config.n_emb] print('[%.2f] reading graph finished. #node = %d' % (time.time() - t, self.n_node)) self.dis_node_embed_init = None self.gen_node_embed_init = None if config.pretrain_dis_node_emb: t = time.time() print('reading initial embeddings...') dis_node_embed_init = np.array([utils.read_embeddings(filename=x, n_node=self.n_node, n_embed=config.n_emb) \ for x in [config.pretrain_dis_node_emb]]) gen_node_embed_init = np.array([utils.read_embeddings(filename=x, n_node=self.n_node, n_embed=config.n_emb) \ for x in [config.pretrain_gen_node_emb]]) print('[%.2f] read initial embeddings finished.' % (time.time() - t)) print('building DGGAN model...') self.discriminator = None self.generator = None self.build_generator() self.build_discriminator() if config.experiment == 'link_prediction': self.link_prediction = evaluation.LinkPrediction(config) self.config = tf.ConfigProto() self.config.gpu_options.allow_growth = True self.sess = tf.Session(config = self.config) self.saver = tf.train.Saver(max_to_keep=0) if config.pretrain_ckpt: print('restore...') pretrain_ckpt = tf.train.latest_checkpoint(config.pretrain_ckpt) self.saver.restore(self.sess, pretrain_ckpt) else: print('initial...') self.init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) self.sess.run(self.init_op)
def main(): """ Parsing command line parameters. Creating target matrix. Fitting an SGCN. Predicting edge signs and saving the embedding. """ args = parameter_parser() tab_printer(args) args.edge_path = '../input/bitcoin_otc.csv' args.embedding_path = '../output/embedding/bitcoin_otc_sgcn.csv' args.features_path = './input/bitcoin_otc.csv' args.regression_weights_path = '../output/weights/bitcoin_otc_sgcn.csv' args.epochs = 1 edges = read_graph(args) # 导入训练数据 trainer = SignedGCNTrainer(args, edges) trainer.setup_dataset() # 计算特征 trainer.create_and_train_model() if args.test_size > 0: trainer.save_model() score_printer(trainer.logs) save_logs(args, trainer.logs)
import sys import time import utils if __name__ == "__main__": try: start1 = time.time() filename = sys.argv[1] n = int(sys.argv[2]) strategy = sys.argv[3] graph = utils.read_graph(filename) start2 = time.time() nodes = utils.strategies[strategy](graph, n) end2 = time.time() print('Computation Time: %0.3f s' % (end2 - start2)) utils.write_nodes(nodes) end1 = time.time() print('Total Runtime: %0.3f s' % (end1 - start1)) except: print( " Generates output for the given graph according to the given strategy" ) print( " USAGE: python gen.py [input graph file] [number of seeds] [strategy]" ) print( " EXAMPLE: python gen.py testgraph1.json 10 closeness_centrality" ) print(" AVAILABLE STRATEGIES:")
def main(out, *rdfs): g = utils.read_graph(*rdfs) generate_void(g, out)
# scores_matrix_one_full = scores_matrix_one.A # scores_matrix_two_full = scores_matrix_two.A # plot_matrix(matrix = scores_matrix_one_full) # plot_matrix(matrix = scores_matrix_two_full) scores_matrix_one = sp.csr_matrix(np.triu(scores_matrix_one.A, k=1)) # k=1表示不包括对角线 scores_matrix_two = sp.csr_matrix(np.triu(scores_matrix_two.A, k=1)) # 读入train的binary数据 graph_train_path = get_trainset_path(base_dir=all_file_dir, graph_name=graph_name, connected_pattern='undirected', from_zeros_one='0') G = read_graph(weighted=0, input=graph_train_path, directed=0) train_binary = sp.csr_matrix(nx.convert_matrix.to_scipy_sparse_matrix(G)) train_binary = sp.csr_matrix(np.triu(train_binary.A, k=1)) # train_binary_full = train_binary.A # 或 train_binary = sp.csr_matrix(np.array(nx.to_numpy_matrix(G))) # 构建exist和nonexist的binary exist_binary = sp.csr_matrix(np.triu(train_binary.A, k=1)) # k=1表示不包括对角线 nonexist_binary = sp.csr_matrix(np.triu(np.ones(exist_binary.shape), k=1) - exist_binary.A) # 分数归一化到[0.0, 1.0] scores_matrix_one_norm = normalize_matrix(csr_matrix1 = scores_matrix_one) scores_matrix_two_norm = normalize_matrix(csr_matrix1 = scores_matrix_two) # plot_matrix(scores_matrix_one_norm.A)
def get_min_edge_cost(source, sink, graph): """ Gets the cost of the edage between the source and the sink. Returns Inf if there is no direct ebdge between the two """ result = [c for s, c in graph[source] if s == sink] if (any(result)): return result[0] return math.inf if __name__ == "__main__": graph = utils.read_graph("edges.txt", True) #init spannning treew with first node X = [1] # nodes in the spanning Tree so far V_X = graph # remaining nodes del V_X[1] TCost = 0 # total cost of spanning tree so far # init Min Const of crossing Tree Heap. (Heap contains (Key,Value) pairs => Key - min cost, value - remNode pairs) minCrossingCostHeap = DynamicKeyHeap( V_X.keys(), lambda remainingNode: get_min_edge_cost(remainingNode, 1, V_X)) while any(V_X): minCost, poppedNode = minCrossingCostHeap.pop_kvp() TCost += minCost
def auto_PNR(prex=None, graph_name=None, emb_method_name1=None, emb_method_name2=None): print('----------------------------------------------------------') time_start = time.time() # 初始化训练集和测试集的路径 # prex = 'preprocessing_code2//' # 改这里 all_file_dir = 'D:\hybridrec\dataset\split_train_test//' + prex binNum = 50 # 改这里 emb_method_name1 = emb_method_name1.lower() # 改这里 emb_method_name2 = emb_method_name2.lower() # 改这里 print("dataset: " + graph_name + '\n' + "baselines:" + emb_method_name1 + "," + emb_method_name2) conf_method1 = None conf_method2 = None if emb_method_name1 in all_embedding_methods: config_path_method1 = 'conf/' + emb_method_name1 + '.properties' config_method1 = configparser.ConfigParser() config_method1.read(config_path_method1) conf_method1 = dict(config_method1.items("hyperparameters")) if emb_method_name2 in all_embedding_methods: config_path_method2 = 'conf/' + emb_method_name2 + '.properties' config_method2 = configparser.ConfigParser() config_method2.read(config_path_method2) conf_method2 = dict(config_method2.items("hyperparameters")) # 初始化embedding和scores的路径 results_dir = 'D:\hybridrec/results//' + prex graph_results_dir = results_dir + graph_name + '//' # 计算emb method 1 if not ((emb_method_name1 == 'arope') or (emb_method_name1 == 'graph2gauss') or (is_heuristic_method(emb_method_name1) == True)): graph_train_path = get_trainset_path( base_dir=all_file_dir, graph_name=graph_name, connected_pattern=get_connp(emb_method_name1), from_zeros_one=get_from_zeros_one(emb_method_name1)) graph_results_path = graph_results_dir + graph_name + '_' + emb_method_name1 + '.emb' if not os.path.isfile(graph_results_path): run_emb_method(input=graph_train_path, output=graph_results_path, emb_method_name=emb_method_name1) # 计算emb method 2 if not ((emb_method_name2 == 'arope') or (emb_method_name2 == 'graph2gauss') or (is_heuristic_method(emb_method_name2) == True)): graph_train_path = get_trainset_path( base_dir=all_file_dir, graph_name=graph_name, connected_pattern=get_connp(emb_method_name2), from_zeros_one=get_from_zeros_one(emb_method_name2)) graph_results_path = graph_results_dir + graph_name + '_' + emb_method_name2 + '.emb' if not os.path.isfile(graph_results_path): run_emb_method(input=graph_train_path, output=graph_results_path, emb_method_name=emb_method_name2) # 计算scores1 if conf_method1 != None: embedding_size_method1 = int(conf_method1['embedding_size']) if emb_method_name1 == 'splitter': scores_matrix_one = inner_product_scores_splitter( graph_results_dir=graph_results_dir, dataset_name=graph_name, emb_method_name=emb_method_name1, col_start=0, col_end=embedding_size_method1 + 1, skiprows=1, delimiter=',') elif (emb_method_name1 == 'attentionwalk') or (emb_method_name1 == 'grarep'): scores_matrix_one = inner_product_scores( graph_results_dir=graph_results_dir, dataset_name=graph_name, emb_method_name=emb_method_name1, col_start=0, col_end=embedding_size_method1 + 1, skiprows=1, delimiter=',') elif (emb_method_name1 == 'drne') or (emb_method_name1 == 'prune'): scores_matrix_one = inner_product_scores( graph_results_dir=graph_results_dir, dataset_name=graph_name, emb_method_name=emb_method_name1, col_start=0, col_end=embedding_size_method1, skiprows=0, delimiter=' ') # embedding_size_method有一些是要+1有一些不需要的 elif (emb_method_name1 == 'arope'): scores_matrix_one = inner_product_scores_arope( all_file_dir=all_file_dir, graph_name=graph_name, graph_results_dir=graph_results_dir) elif (emb_method_name1 == 'graph2gauss'): scores_matrix_one = energy_kl_scores_graph2gauss( all_file_dir=all_file_dir, graph_name=graph_name, graph_results_dir=graph_results_dir) elif is_heuristic_method(emb_method_name1): scores_matrix_one = heuristic_scores( all_file_dir=all_file_dir, graph_name=graph_name, graph_results_dir=graph_results_dir, heuristic_method=emb_method_name1) else: scores_matrix_one = inner_product_scores( graph_results_dir=graph_results_dir, dataset_name=graph_name, emb_method_name=emb_method_name1, col_start=0, col_end=embedding_size_method1 + 1, skiprows=1, delimiter=' ') # 计算scores2 if conf_method2 != None: embedding_size_method2 = int(conf_method2['embedding_size']) if emb_method_name2 == 'splitter': scores_matrix_two = inner_product_scores_splitter( graph_results_dir=graph_results_dir, dataset_name=graph_name, emb_method_name=emb_method_name2, col_start=0, col_end=embedding_size_method2 + 1, skiprows=1, delimiter=',') elif (emb_method_name2 == 'attentionwalk') or (emb_method_name2 == 'grarep'): scores_matrix_two = inner_product_scores( graph_results_dir=graph_results_dir, dataset_name=graph_name, emb_method_name=emb_method_name2, col_start=0, col_end=embedding_size_method2 + 1, skiprows=1, delimiter=',') elif (emb_method_name2 == 'drne') or (emb_method_name2 == 'prune'): scores_matrix_two = inner_product_scores( graph_results_dir=graph_results_dir, dataset_name=graph_name, emb_method_name=emb_method_name2, col_start=0, col_end=embedding_size_method2, skiprows=0, delimiter=' ') elif (emb_method_name2 == 'arope'): scores_matrix_two = inner_product_scores_arope( all_file_dir=all_file_dir, graph_name=graph_name, graph_results_dir=graph_results_dir) elif (emb_method_name2 == 'graph2gauss'): scores_matrix_two = energy_kl_scores_graph2gauss( all_file_dir=all_file_dir, graph_name=graph_name, graph_results_dir=graph_results_dir) elif is_heuristic_method(emb_method_name2): scores_matrix_two = heuristic_scores( all_file_dir=all_file_dir, graph_name=graph_name, graph_results_dir=graph_results_dir, heuristic_method=emb_method_name2) else: scores_matrix_two = inner_product_scores( graph_results_dir=graph_results_dir, dataset_name=graph_name, emb_method_name=emb_method_name2, col_start=0, col_end=embedding_size_method2 + 1, skiprows=1, delimiter=' ') # scores取上三角(注意:1、前面需要保证所有的分数在右上角或占满整个矩阵。2、前面有些是右上角,有些是占满整个矩阵) # scores_matrix_one_full = scores_matrix_one.A # scores_matrix_two_full = scores_matrix_two.A # plot_matrix(matrix = scores_matrix_one_full) # plot_matrix(matrix = scores_matrix_two_full) scores_matrix_one = sp.csr_matrix(np.triu(scores_matrix_one.A, k=1)) # k=1表示不包括对角线 scores_matrix_two = sp.csr_matrix(np.triu(scores_matrix_two.A, k=1)) # 读入train的binary数据 graph_train_path = get_trainset_path(base_dir=all_file_dir, graph_name=graph_name, connected_pattern='undirected', from_zeros_one='0') G = read_graph(weighted=0, input=graph_train_path, directed=0) train_binary = sp.csr_matrix(nx.convert_matrix.to_scipy_sparse_matrix(G)) train_binary = sp.csr_matrix(np.triu(train_binary.A, k=1)) # train_binary_full = train_binary.A # 或 train_binary = sp.csr_matrix(np.array(nx.to_numpy_matrix(G))) # 构建exist和nonexist的binary exist_binary = sp.csr_matrix(np.triu(train_binary.A, k=1)) # k=1表示不包括对角线 nonexist_binary = sp.csr_matrix( np.triu(np.ones(exist_binary.shape), k=1) - exist_binary.A) # 分数归一化到[0.0, 1.0] scores_matrix_one_norm = normalize_matrix(csr_matrix1=scores_matrix_one) scores_matrix_two_norm = normalize_matrix(csr_matrix1=scores_matrix_two) # plot_matrix(scores_matrix_one_norm.A) # plot_matrix(scores_matrix_two_norm.A) del scores_matrix_one, scores_matrix_two gc.collect() # 划分bin val_max = 1.0 val_min = 0.0 # bin_array = sorted(divide_bin(val_max = val_max, val_min = val_min, binNum = binNum)) interval = float((val_max - val_min) / binNum) # 获取exist_binary和nonexist_binary的分数 exist_scores_one_list = (np.array(scores_matrix_one_norm[exist_binary > 0], dtype=float))[0] nonexist_scores_one_list = (np.array( scores_matrix_one_norm[nonexist_binary > 0], dtype=float))[0] exist_scores_two_list = (np.array(scores_matrix_two_norm[exist_binary > 0], dtype=float))[0] nonexist_scores_two_list = (np.array( scores_matrix_two_norm[nonexist_binary > 0], dtype=float))[0] # # 变为稀疏矩阵 # exist_scores_one_list_csr = sp.csr_matrix(exist_scores_one_list) # nonexist_scores_one_list_csr = sp.csr_matrix(nonexist_scores_one_list) # exist_scores_two_list_csr = sp.csr_matrix(exist_scores_two_list) # nonexist_scores_two_list_csr = sp.csr_matrix(nonexist_scores_two_list) # temp = scores_matrix_one_norm[exist_binary > 0][0] # 我怕在把分数变为list的时候出问题 # 初始化两个大小为binNum* bnNum的二维栅格 exist_raster_grids = np.zeros((binNum, binNum)) nonexist_raster_grids = np.zeros((binNum, binNum)) # 计算落在exist_raster_grids栅格的existing links的数量 exist_links_num = len(exist_scores_one_list) exist_row_col_zero_num = 0 # 那些两个矩阵的分数都是0的不作统计 for i in range(exist_links_num): # row_index和col_index的范围从0-->binNum-1 if (exist_scores_one_list[i] == 0.0) & (exist_scores_two_list[i] == 0.0): exist_row_col_zero_num = exist_row_col_zero_num + 1 continue row_index = int( get_row_col_index(score=exist_scores_one_list[i], interval=interval, binNum=binNum)) col_index = int( get_row_col_index(score=exist_scores_two_list[i], interval=interval, binNum=binNum)) exist_raster_grids[row_index, col_index] = exist_raster_grids[row_index, col_index] + 1 print("exist_row_col_zero_num:" + str(exist_row_col_zero_num)) print('sum exist_raster_grids:' + str(np.sum(exist_raster_grids))) # 计算落在nonexist_raster_grids栅格的nonexisting links的数量 nonexist_links_num = len(nonexist_scores_one_list) nonexist_row_col_zero_num = 0 # 那些两个矩阵的分数都是0的不作统计 for i in range(nonexist_links_num): # row_index和col_index的范围从0-->binNum-1 if (nonexist_scores_one_list[i] <= 0.0) & (nonexist_scores_two_list[i] <= 0.0): nonexist_row_col_zero_num = nonexist_row_col_zero_num + 1 continue row_index = int( get_row_col_index(score=nonexist_scores_one_list[i], interval=interval, binNum=binNum)) col_index = int( get_row_col_index(score=nonexist_scores_two_list[i], interval=interval, binNum=binNum)) nonexist_raster_grids[row_index, col_index] = nonexist_raster_grids[row_index, col_index] + 1 print("nonexist_row_col_zero_num:" + str(nonexist_row_col_zero_num)) print('sum nonexist_raster_grids:' + str(np.sum(nonexist_raster_grids))) # 计算PNR分数 N = train_binary.shape[0] print("Graph size:" + str(N) + '\n') L_T = np.sum(train_binary.A) O = N * (N - 1) / 2 coefficient = (O - L_T) / L_T PNR1 = coefficient * (exist_raster_grids / (nonexist_raster_grids + 1) ) # 分母加1避免出现inf或nan,不影响evaluation但是可能好看 PNR2 = (exist_raster_grids / nonexist_raster_grids) # inf和nan置为0 PNR2[np.isnan(PNR2)] = 0 PNR2[np.isinf(PNR2)] = 0 PNR2 = coefficient * PNR2 # 画图(注意:图的横纵坐标是从左上角开始的而不是想象中的左上角) # sns.heatmap(PNR1, cmap='Reds') # plt.savefig(graph_results_dir + emb_method_name1 +'_'+ emb_method_name2 + '_' +'bin_' + str(binNum) + "_PNR1.jpg") # plt.show() # sns.heatmap(PNR2, cmap='Reds') # plt.savefig(graph_results_dir + emb_method_name1 +'_'+ emb_method_name2 + '_'+ 'bin_' + str(binNum) + "_PNR2.jpg") # plt.show() # plt.matshow(PNR1) # 好丑 # plt.show() # 保存(exist_raster_grids、nonexist_raster_grids、PNR1、PNR2) save_ndarray_to_mat(exist_raster_grids, 'exist_raster_grids', graph_results_dir, graph_name, emb_method_name1, emb_method_name2, binNum) save_ndarray_to_mat(nonexist_raster_grids, 'nonexist_raster_grids', graph_results_dir, graph_name, emb_method_name1, emb_method_name2, binNum) save_ndarray_to_mat(PNR1, 'PNR1', graph_results_dir, graph_name, emb_method_name1, emb_method_name2, binNum) save_ndarray_to_mat(PNR2, 'PNR2', graph_results_dir, graph_name, emb_method_name1, emb_method_name2, binNum) # PNR调整分数(只调整non-existing link的部分) nonexist_scores_PNR_list = transfer_scores_PNR( scores_matrix_one_norm=scores_matrix_one_norm, scores_matrix_two_norm=scores_matrix_two_norm, train_binary=train_binary, PNR=PNR2, interval=interval, binNum=binNum) # weighted hybird方法的分数,0.5均权直接相加 scores_matrix_hybrid_norm = 0.5 * scores_matrix_one_norm + 0.5 * scores_matrix_two_norm nonexist_scores_hybrid_list = (np.array( scores_matrix_hybrid_norm[nonexist_binary > 0], dtype=float))[0] # 评估evaluation graph_test_path = get_testset_path(base_dir=all_file_dir, graph_name=graph_name) test_binary = get_test_matrix_binary(graph_test_path=graph_test_path, N=N) L_full = int(np.sum(test_binary)) L_array = np.array([ int(L_full / 20), int(L_full / 10), int(L_full / 5), int(L_full / 2), L_full ]) del scores_matrix_one_norm, scores_matrix_two_norm, exist_scores_one_list, exist_scores_two_list, scores_matrix_hybrid_norm gc.collect() AP_PNR, AUC_PNR, Precision_PNR, Recall_PNR, F1score_PNR=\ evaluators(train_binary=train_binary, test_binary=test_binary, scores_list=nonexist_scores_PNR_list, L_array=L_array) AP_method1, AUC_method1, Precision_method1, Recall_method1, F1score_method1=\ evaluators(train_binary=train_binary, test_binary=test_binary, scores_list=nonexist_scores_one_list, L_array=L_array) AP_method2, AUC_method2, Precision_method2, Recall_method2, F1score_method2=\ evaluators(train_binary=train_binary, test_binary=test_binary, scores_list=nonexist_scores_two_list, L_array=L_array) AP_weighted, AUC_weighted, Precision_weighted, Recall_weighted, F1score_weighted=\ evaluators(train_binary=train_binary, test_binary=test_binary, scores_list=nonexist_scores_hybrid_list, L_array=L_array) print('AP_PNR: ' + str(AP_PNR)) print('AP_method1: ' + str(AP_method1)) print('AP_method2: ' + str(AP_method2)) print('AP_weighted: ' + str(AP_weighted)) print('\n') print('AUC_PNR: ' + str(AUC_PNR)) print('AUC_method1: ' + str(AUC_method1)) print('AUC_method2: ' + str(AUC_method2)) print('AUC_weighted: ' + str(AUC_weighted)) print('\n') print('Precision_PNR: ' + str(Precision_PNR)) print('Precision_method1: ' + str(Precision_method1)) print('Precision_method2: ' + str(Precision_method2)) print('Precision_weighted: ' + str(Precision_weighted)) print('\n') print('Recall_PNR: ' + str(Recall_PNR)) print('Recall_method1: ' + str(Recall_method1)) print('Recall_method2: ' + str(Recall_method2)) print('Recall_weighted: ' + str(Recall_weighted)) print('\n') print('F1score_PNR: ' + str(F1score_PNR)) print('F1score_method1: ' + str(F1score_method1)) print('F1score_method2: ' + str(F1score_method2)) print('F1score_weighted: ' + str(F1score_weighted)) print('\n') write_to_excel(graph_name, emb_method_name1, emb_method_name2, Precision_PNR, Precision_method1, Precision_method2, Precision_weighted, Recall_PNR, Recall_method1, Recall_method2, Recall_weighted, F1score_PNR, F1score_method1, F1score_method2, F1score_weighted, AP_PNR, AP_method1, AP_method2, AP_weighted, AUC_PNR, AUC_method1, AUC_method2, AUC_weighted) time_end = time.time() print("time span: " + str((time_end - time_start) / 60.00) + " mins") # facebook_combined:bin=5, 1.5分钟 # facebook_combined:cn和pearson\aa和cn花了3.5分钟 # facebook_combined:graphdistance和cn花了11分钟 # facebook_combined: graphdistance和cn的PNE矩阵为全0 # facebooke_combined: attentionwalk和prone花了7.5分钟 # facebooke_combined: 有rootedpagerank的效果都很差; # arope比PNR好一点,SDNE和PRUE很差很差;drne和graph2gauss也是极差的但是PNR融合后表现极好; # blogcatalog:aa和ja花了3小时 # (path based--katz和graphdistance都十分慢,neighbor based和rank based很快) # google 15000 nodes: 2.5小时 print( '--------------------------------------------------------------------------------' ) pass
import utils as ut # Read the graphs ii = ut.read_graph("ii") sgi = ut.read_graph("sgi") ui = ut.read_graph("ui") # Compute global measures ut.graph_global_measures(ii, "ii") ut.graph_global_measures(sgi, "sgi") ut.graph_global_measures(ui, "ui") # Compute LCC global measures ut.graph_global_measures(ii, "ii", True) ut.graph_global_measures(sgi, "sgi", True) ut.graph_global_measures(ui, "ui", True) # Visualize graph ut.viz_graph(sgi, 'sgi') ut.viz_graph(ii, 'ii', cc=True) ut.viz_graph(ui, 'ui', cc=True)
import utils as ut ### read data ii = ut.read_graph("ii") ui = ut.read_graph("ui") ### Hypergeom Test ii_lou = map(lambda x: ut.hypergeom_test(ii, x), filter(ut.check_length_mod, ut.louvain(ii))) ii_mcl = map(lambda x: ut.hypergeom_test(ii, x), filter(ut.check_length_mod, ut.mcl(ii))) ui_lou = map(lambda x: ut.hypergeom_test(ui, x), filter(ut.check_length_mod, ut.louvain(ui))) ui_mcl = map(lambda x: ut.hypergeom_test(ui, x), filter(ut.check_length_mod, ut.mcl(ui))) # Create tables ii_mod = ut.create_table("ii_mod", list(ii_lou), list(ii_mcl)) ui_mod = ut.create_table("ui_mod", list(ui_lou), list(ui_mcl)) # Visualize clusters ut.louvain(ii, 'ii', viz=True) ut.louvain(ui, 'ui', viz=True) ut.mcl(ii, viz=True) ut.mcl(ui, viz=True)
def publish(input): g = utils.read_graph(input) describe_dataset(g) write_rdf_files(g) write_dump(g)
def auto_overlap(prex=None, graph_name=None, emb_method_name1=None, emb_method_name2=None, binNum=None): time_start = time.time() print('----------------------------------------------------------') print("dataset: " + graph_name + '\n' + "baselines:" + emb_method_name1 + "," + emb_method_name2) results_base_dir = 'D:\hybridrec//results//' all_file_dir = 'D:\hybridrec\dataset\split_train_test//' + prex results_dir = 'D:\hybridrec/results//' + prex graph_results_dir = results_dir + graph_name + '//' path_scores_method1 = results_base_dir + prex + graph_name + "//" + graph_name + "_" + emb_method_name1 + "_scores.mat" path_scores_method2 = results_base_dir + prex + graph_name + "//" + graph_name + "_" + emb_method_name2 + "_scores.mat" if not (os.path.exists(path_scores_method1) and os.path.exists(path_scores_method2)): print("dataset: " + graph_name + '----' + "baselines:" + emb_method_name1 + "," + emb_method_name2 + ': 分数未完全计算') if os.path.exists(path_scores_method1) and os.path.exists( path_scores_method2): # 获取归一化分数 scores_matrix_one_dict = (loadmat(path_scores_method1)) scores_matrix_two_dict = (loadmat(path_scores_method2)) scores_matrix_one = scores_matrix_one_dict['scores'] scores_matrix_two = scores_matrix_two_dict['scores'] if emb_method_name1 not in all_embedding_methods: scores_matrix_one = csr_matrix(np.triu(scores_matrix_one.A, k=1)) # k=1表示不包括对角线 if emb_method_name2 not in all_embedding_methods: scores_matrix_two = csr_matrix(np.triu(scores_matrix_two.A, k=1)) scores_matrix_one_norm = normalize_matrix( csr_matrix1=csr_matrix(scores_matrix_one)) # 去掉传参的csr_matrix()则会 scores_matrix_two_norm = normalize_matrix( csr_matrix1=csr_matrix(scores_matrix_two)) # 获取train_binary和test_binary graph_train_path = get_trainset_path(base_dir=all_file_dir, graph_name=graph_name, connected_pattern='undirected', from_zeros_one='0') graph_test_path = get_testset_path(base_dir=all_file_dir, graph_name=graph_name) G = read_graph(weighted=0, input=graph_train_path, directed=0) train_binary = csr_matrix(nx.convert_matrix.to_scipy_sparse_matrix(G)) train_binary = csr_matrix(np.triu(train_binary.A, k=1)) test_binary = get_test_matrix_binary(graph_test_path=graph_test_path, N=train_binary.shape[0]) # 读取plus的原始分数(未归一化) plus_scores_name = 'plus_' + graph_name + '_' + emb_method_name1 + '_' + emb_method_name2 + '_scores.mat' plus_scores_path = graph_results_dir + plus_scores_name scores_matrix_plus_dict = (loadmat(plus_scores_path)) scores_matrix_plus = scores_matrix_plus_dict['scores'] # 读取multiply的原始分数(未归一化) multiply_scores_name = 'multiply_' + graph_name + '_' + emb_method_name1 + '_' + emb_method_name2 + '_scores.mat' multiply_scores_path = graph_results_dir + multiply_scores_name scores_matrix_multiply_dict = (loadmat(multiply_scores_path)) scores_matrix_multiply = scores_matrix_multiply_dict['scores'] # 读取MLP的原始分数(未归一化) mlp_scores_name = 'mlp_' + graph_name + '_' + emb_method_name1 + '_' + emb_method_name2 + '_scores.mat' mlp_scores_path = graph_results_dir + mlp_scores_name scores_matrix_mlp_dict = (loadmat(mlp_scores_path)) scores_matrix_mlp = scores_matrix_mlp_dict['scores'] # 归一化hybrid分数 scores_matrix_plus_norm = normalize_matrix( csr_matrix1=scores_matrix_plus) scores_matrix_multiply_norm = normalize_matrix( csr_matrix1=scores_matrix_multiply) scores_matrix_mlp_norm = normalize_matrix( csr_matrix1=scores_matrix_mlp) # 计算plus、multiply、mlp、PNR的rasterization grids mlp_path = results_base_dir + prex + graph_name + "//" + "mlp_" + graph_name + "_" + emb_method_name1 + "_" + emb_method_name2 + "_50_count.mat" mlp_dict = (loadmat(mlp_path)) mlp_raster_grids = mlp_dict["count"] multiply_path = results_base_dir + prex + graph_name + "//" + "multiply_" + graph_name + "_" + emb_method_name1 + "_" + emb_method_name2 + "_50_count.mat" multiply_dict = (loadmat(multiply_path)) multiply_raster_grids = multiply_dict["count"] plus_path = results_base_dir + prex + graph_name + "//" + "plus_" + graph_name + "_" + emb_method_name1 + "_" + emb_method_name2 + "_50_count.mat" plus_dict = (loadmat(plus_path)) plus_raster_grids = plus_dict["count"] # plus_raster_grids = rasterization_grids(binNum=binNum, # train_binary=train_binary, # scores_matrix_DNN=scores_matrix_plus_norm, # scores_matrix_one_norm=scores_matrix_one_norm, # scores_matrix_two_norm=scores_matrix_two_norm) # multiply_raster_grids = rasterization_grids(binNum=binNum, # train_binary=train_binary, # scores_matrix_DNN=scores_matrix_multiply_norm, # scores_matrix_one_norm=scores_matrix_one_norm, # scores_matrix_two_norm=scores_matrix_two_norm) # mlp_raster_grids = rasterization_grids(binNum=binNum, # train_binary=train_binary, # scores_matrix_DNN=scores_matrix_mlp_norm, # scores_matrix_one_norm=scores_matrix_one_norm, # scores_matrix_two_norm=scores_matrix_two_norm) PNR_path = results_base_dir + prex + graph_name + "//" + "PNR2_" + graph_name + "_" + emb_method_name1 + "_" + emb_method_name2 + "_50_count.mat" PNR_dict = (loadmat(PNR_path)) PNR_raster_grids = PNR_dict["count"] exist_binary = csr_matrix(np.triu(train_binary.A, k=1)) # k=1表示不包括对角线 nonexist_binary = csr_matrix( np.triu(np.ones(exist_binary.shape), k=1) - exist_binary.A) # 获取plus的nonexist_scores_list nonexist_scores_plus_list = transfer_scores_PNR( scores_matrix_one_norm=scores_matrix_one_norm, scores_matrix_two_norm=scores_matrix_two_norm, train_binary=train_binary, PNR=plus_raster_grids, interval=float((1.0 - 0.0) / binNum), binNum=binNum) # 获取multiply的nonexist_scores_list nonexist_scores_multiply_list = transfer_scores_PNR( scores_matrix_one_norm=scores_matrix_one_norm, scores_matrix_two_norm=scores_matrix_two_norm, train_binary=train_binary, PNR=multiply_raster_grids, interval=float((1.0 - 0.0) / binNum), binNum=binNum) # 获取mlp的nonexist_scores_list nonexist_scores_mlp_list = transfer_scores_PNR( scores_matrix_one_norm=scores_matrix_one_norm, scores_matrix_two_norm=scores_matrix_two_norm, train_binary=train_binary, PNR=mlp_raster_grids, interval=float((1.0 - 0.0) / binNum), binNum=binNum) # 获取PNR的nonexist_scores_list nonexist_scores_PNR_list = transfer_scores_PNR( scores_matrix_one_norm=scores_matrix_one_norm, scores_matrix_two_norm=scores_matrix_two_norm, train_binary=train_binary, PNR=PNR_raster_grids, interval=float((1.0 - 0.0) / binNum), binNum=binNum) # 获取阈值 E_test = np.sum(test_binary.A) thresold_plus = get_list_thresold(nonexist_scores_plus_list, L=E_test) thresold_multiply = get_list_thresold(nonexist_scores_multiply_list, L=E_test) thresold_mlp = get_list_thresold(nonexist_scores_mlp_list, L=E_test) thresold_PNR = get_list_thresold(nonexist_scores_PNR_list, L=E_test) # 这里的trick, L=1/2 |E_test|!!!!!!!!!!! # thresold_plus = int(thresold_plus*0.5) # thresold_multiply = int(thresold_multiply * 0.5) # thresold_mlp = int(thresold_mlp * 0.5) # thresold_PNR = int(thresold_PNR * 0.5) # 修改grids plus_raster_grids = plus_raster_grids.A multiply_raster_grids = multiply_raster_grids.A mlp_raster_grids = mlp_raster_grids.A PNR_raster_grids = PNR_raster_grids.A # np.where(plus_raster_grids > thresold_plus, plus_raster_grids, 0) # np.where(multiply_raster_grids > thresold_multiply, multiply_raster_grids, 0) # np.where(mlp_raster_grids > thresold_mlp, mlp_raster_grids, 0) # np.where(PNR_raster_grids > thresold_PNR, PNR_raster_grids, 0) plus_raster_grids[plus_raster_grids <= thresold_plus] = 0.0 multiply_raster_grids[multiply_raster_grids <= thresold_multiply] = 0.0 mlp_raster_grids[mlp_raster_grids <= thresold_mlp] = 0.0 PNR_raster_grids[PNR_raster_grids <= thresold_PNR] = 0.0 plus_raster_grids[plus_raster_grids >= thresold_plus] = 1.0 multiply_raster_grids[multiply_raster_grids >= thresold_multiply] = 1.0 mlp_raster_grids[mlp_raster_grids >= thresold_mlp] = 1.0 PNR_raster_grids[PNR_raster_grids >= thresold_PNR] = 1.0 # 画图 # colors = ['OrangeRed', 'darkseagreen', 'dodgerblue', 'blueviolet'] colors = ['Red', 'green', 'blue', 'purple'] result = np.float32(PNR_raster_grids) result = cv2.GaussianBlur(result, (5, 5), 0) # (5, 5)表示高斯矩阵的长与宽都是5,标准差取0 title = graph_name + '-PNR-' + emb_method_name1 + '-' + emb_method_name2 plot_contourf_overlap(result=result, title=title, color=colors[0]) result = np.float32(plus_raster_grids) result = cv2.GaussianBlur(result, (5, 5), 0) # (5, 5)表示高斯矩阵的长与宽都是5,标准差取0 title = graph_name + '-plus-' + emb_method_name1 + '-' + emb_method_name2 plot_contourf_overlap(result=result, title=title, color=colors[1]) result = np.float32(multiply_raster_grids) result = cv2.GaussianBlur(result, (5, 5), 0) # (5, 5)表示高斯矩阵的长与宽都是5,标准差取0 title = graph_name + '-multiply-' + emb_method_name1 + '-' + emb_method_name2 plot_contourf_overlap(result=result, title=title, color=colors[2]) result = np.float32(mlp_raster_grids) result = cv2.GaussianBlur(result, (5, 5), 0) # (5, 5)表示高斯矩阵的长与宽都是5,标准差取0 title = graph_name + '-mlp-' + emb_method_name1 + '-' + emb_method_name2 plot_contourf_overlap(result=result, title=title, color=colors[3]) # # 计算plus的rasterization grids # plus_raster_grids = rasterization_grids(binNum=plus_binNum, # train_binary=train_binary, # scores_matrix_DNN=scores_matrix_plus_norm, # scores_matrix_one_norm=scores_matrix_one_norm, # scores_matrix_two_norm=scores_matrix_two_norm) # # plus_raster_grids = np.log10(plus_raster_grids) # 出现-inf而报错 # plus_raster_grids = normalize_matrix_full(csr_matrix1=csr_matrix(plus_raster_grids)) # plus_raster_grids = better_show_grids(csr_matrix1=plus_raster_grids) # # source = np.float32(plus_raster_grids.A) # result = cv2.GaussianBlur(source, (5, 5), 0) # title = graph_name + '-' + 'plus' +'-' + emb_method_name1 + '-' + emb_method_name2 # plot_contourf(result=result, title=title, binNum=10) # time_end = time.time() print("It takes : " + str((time_end - time_start) / 60.0) + " mins.") pass
assert len( datasets ) == 2, "Please input datasets pair!" # Assert whether the dataset input is a pair assert datasets[0] == datasets[ 1], "Unkown datasets pair!" # Assert the dataset input is same params.node_num = get_node_num(params) ## Get the initial embedding matrix if params.profile_feature: src_emb = load_embeddings(params, True) tgt_emb = load_embeddings(params, False) else: src_emb, tgt_emb = initialize_feature(params) ## Read original graph G_source_original, G_target_original = read_graph(params) ## Assign the original graph to G_source and G_target as the current graph G_source = G_source_original G_target = G_target_original G_source_edge_num = nx.number_of_edges(G_source) G_target_edge_num = nx.number_of_edges(G_target) print( "=====> number of source grpah edge: %d, number of target grpah edge: %d" % (G_source_edge_num, G_target_edge_num)) A_source = nx.adjacency_matrix(G_source) A_target = nx.adjacency_matrix(G_target) ## Get the adjacency matrix of the current graph and normalize it to facilitate graph convolution A_source_norm, A_target_norm = adjacency_matrix_normalize( params, G_source, G_target) ## Build model model = build_model(params)
# find the nearest vertice would suffice heapq.heappush(h, (dists[v], curr_node, v)) # queue is empty, exit while loop if not h: break # promote a vertice from trial tree to shortest path tree _, _ , curr_node = heapq.heappop(h) # use the dst only return dists[dst] if __name__ == '__main__': g = utils.read_graph() # res = run(g, SRC, DST) # print 'Your answer:', res # print 'Model answer:', nx.shortest_path_length(g, SRC, DST, 'weight') n = g.number_of_nodes() for _ in range(N_TESTS): # make a copy because calling run() will change 'queued' field of edge h = g.copy() src = random.randint(0, n - 1) dst = random.randint(0, n - 1) try: res1 = run(h, src, dst) res2 = nx.shortest_path_length(h, src, dst, 'weight') assert(res1==res2)
def auto_DNN(prex=None, graph_name=None, emb_method_name1=None, emb_method_name2=None, model_name=None, DNN_binNum=None): print('----------------------------------------------------------') print("dataset: " + graph_name + '\n' + "baselines:" + emb_method_name1 + "," + emb_method_name2) results_base_dir = 'D:\hybridrec//results//' all_file_dir = 'D:\hybridrec\dataset\split_train_test//' + prex results_dir = 'D:\hybridrec/results//' + prex graph_results_dir = results_dir + graph_name + '//' # (facebook_combined的规律:ratio越小则正负样本的预测准确率越高,花的时间也越少) ratio = 1 # 负样本的总数是正样 本的ratio倍 # 改这里 path_scores_method1 = results_base_dir + prex + graph_name + "//" + graph_name + "_" + emb_method_name1 + "_scores.mat" path_scores_method2 = results_base_dir + prex + graph_name + "//" + graph_name + "_" + emb_method_name2 + "_scores.mat" # Initialize the model,改这里 # hidden_layer_sizes=(10, 20, 10):三个隐藏层,分别10、20、10个神经元 if model_name == "mlp": model = MLPClassifier(hidden_layer_sizes=(10, 20), activation='relu', solver='adam', max_iter=200, alpha=0.01, batch_size=256, learning_rate='constant', learning_rate_init=0.001, shuffle=False, random_state=2020, early_stopping=True, validation_fraction=0.2, beta_1=0.9, beta_2=0.999, epsilon=1e-08, n_iter_no_change=10) pass if model_name == "svm": model = SVC(C=5, random_state=42) # 出问题了 pass if model_name == "lr": model = LogisticRegression(C=5, penalty='l1', tol=1e-6, random_state=42) # penalty 有l1和l2 pass if model_name == "lgbm": model = LGBMClassifier(num_leaves=31, learning_rate=0.1, n_estimators=64, random_state=42, n_jobs=-1) pass if model_name == "xgb": model = XGBClassifier(max_depth=5, learning_rate=0.1, n_jobs=-1, nthread=-1, gamma=0.06, min_child_weight=5, subsample=1, colsample_bytree=0.9, reg_alpha=0, reg_lambda=0.5, random_state=42) pass if model_name == "ld": model = LinearDiscriminantAnalysis(solver='lsqr') pass if model_name == "rf": model = RandomForestClassifier(n_estimators=50, max_depth=20, min_samples_split=2, min_samples_leaf=5, max_features="log2", random_state=12) pass if not (os.path.exists(path_scores_method1) and os.path.exists(path_scores_method2)): print("dataset: " + graph_name + '----' + "baselines:" + emb_method_name1 + "," + emb_method_name2 + ': 分数未完全计算') if os.path.exists(path_scores_method1) and os.path.exists( path_scores_method2): # 获取归一化分数 scores_matrix_one_dict = (loadmat(path_scores_method1)) scores_matrix_two_dict = (loadmat(path_scores_method2)) scores_matrix_one = scores_matrix_one_dict['scores'] scores_matrix_two = scores_matrix_two_dict['scores'] if emb_method_name1 not in all_embedding_methods: scores_matrix_one = csr_matrix(np.triu(scores_matrix_one.A, k=1)) # k=1表示不包括对角线 if emb_method_name2 not in all_embedding_methods: scores_matrix_two = csr_matrix(np.triu(scores_matrix_two.A, k=1)) scores_matrix_one_norm = normalize_matrix( csr_matrix1=csr_matrix(scores_matrix_one)) scores_matrix_two_norm = normalize_matrix( csr_matrix1=csr_matrix(scores_matrix_two)) # 获取train_binary和test_binary graph_train_path = get_trainset_path(base_dir=all_file_dir, graph_name=graph_name, connected_pattern='undirected', from_zeros_one='0') graph_test_path = get_testset_path(base_dir=all_file_dir, graph_name=graph_name) G = read_graph(weighted=0, input=graph_train_path, directed=0) train_binary = csr_matrix(nx.convert_matrix.to_scipy_sparse_matrix(G)) train_binary = csr_matrix(np.triu(train_binary.A, k=1)) test_binary = get_test_matrix_binary(graph_test_path=graph_test_path, N=train_binary.shape[0]) del scores_matrix_one, scores_matrix_two gc.collect() # 获取正样本的分数 exist_binary = csr_matrix(np.triu(train_binary.A, k=1)) # k=1表示不包括对角线 exist_scores_one_list = (np.array( scores_matrix_one_norm[exist_binary > 0], dtype=float))[0] exist_scores_two_list = (np.array( scores_matrix_two_norm[exist_binary > 0], dtype=float))[0] # 构建测试样本(正样本+负样本) X_train_1 = (np.array([exist_scores_one_list, exist_scores_two_list])).T X_train_0 = negative_samples( train_binary=train_binary, test_binary=test_binary, scores_matrix_one_norm=scores_matrix_one_norm, scores_matrix_two_norm=scores_matrix_two_norm, ratio=ratio) Y_train_1 = np.random.randint(1, 2, X_train_1.shape[0]) Y_train_0 = np.random.randint(0, 1, X_train_0.shape[0]) X_train = np.vstack((np.array(X_train_1), np.array(X_train_0))) Y_train = (np.hstack((np.array(Y_train_1), np.array(Y_train_0)))).T time_start = time.time() # 模型训练 model.fit(X_train, Y_train) # 模型预测 preds_0 = model.predict(X_train_0) preds_1 = model.predict(X_train_1) print(np.sum(preds_0)) print(np.sum(preds_1)) preds_0_proba = model.predict_proba(X_train_0) preds_1_proba = model.predict_proba(X_train_1) # 模型预测 scores_matrix_DNN = predicted_scores_DNN( model=model, train_binary=train_binary, test_binary=test_binary, scores_matrix_one_norm=scores_matrix_one_norm, scores_matrix_two_norm=scores_matrix_two_norm) save_DNN_hybrid_scores(scores_matrix_DNN=scores_matrix_DNN, method1=emb_method_name1, method2=emb_method_name2, graph_results_dir=graph_results_dir, dataset_name=graph_name, model_name=model_name) scores_matrix_DNN_norm = normalize_matrix( csr_matrix1=scores_matrix_DNN) # 计算DNN的rasterization grids DNN_raster_grids = rasterization_grids( binNum=DNN_binNum, train_binary=train_binary, scores_matrix_DNN=scores_matrix_DNN_norm, scores_matrix_one_norm=scores_matrix_one_norm, scores_matrix_two_norm=scores_matrix_two_norm) # DNN_raster_grids = np.log10(DNN_raster_grids) # 出现-inf而报错 DNN_raster_grids = normalize_matrix_full( csr_matrix1=csr_matrix(DNN_raster_grids)) DNN_raster_grids = better_show_grids(csr_matrix1=DNN_raster_grids) save_DNN_raster_scores(rastser_grids=DNN_raster_grids, method1=emb_method_name1, method2=emb_method_name2, graph_results_dir=graph_results_dir, dataset_name=graph_name, model_name=model_name, DNN_binNum=DNN_binNum) source = np.float32(DNN_raster_grids.A) result = cv2.GaussianBlur(source, (5, 5), 0) title = graph_name + '-' + model_name + '-' + emb_method_name1 + '-' + emb_method_name2 plot_contourf(result=result, title=title, binNum=10) # 读取PNR grids PNR_path = results_base_dir + prex + graph_name + "//" + "PNR1_" + graph_name + "_" + emb_method_name1 + "_" + emb_method_name2 + "_50_count.mat" if is_excel_file_exist(PNR_path): PNR_dict = (loadmat(PNR_path)) PNR_matrix = PNR_dict["count"] PNR_matrix = better_show_grids(csr_matrix1=PNR_matrix) source = np.float32(PNR_matrix.A) result = cv2.GaussianBlur(source, (5, 5), 0) #(5, 5)表示高斯矩阵的长与宽都是5,标准差取0 title = graph_name + '-PNR-' + emb_method_name1 + '-' + emb_method_name2 plot_contourf(result=result, title=title, binNum=10) # 评估DNN exist_binary = csr_matrix(np.triu(train_binary.A, k=1)) # k=1表示不包括对角线 nonexist_binary = csr_matrix( np.triu(np.ones(exist_binary.shape), k=1) - exist_binary.A) nonexist_scores_DNN_list = (np.array( scores_matrix_DNN[nonexist_binary > 0], dtype=float))[0] L_full = int(np.sum(test_binary)) L_array = np.array([ int(L_full / 20), int(L_full / 10), int(L_full / 5), int(L_full / 2), L_full ]) AP_DNN, AUC_DNN, Precision_DNN, Recall_DNN, F1score_DNN = \ evaluators(train_binary=train_binary, test_binary=test_binary, scores_list=nonexist_scores_DNN_list, L_array=L_array) # print('AP_DNN: ' + str(AP_DNN)) # print('\n') # print('AUC_DNN: ' + str(AUC_DNN)) # print('\n') # print('Precision_DNN: ' + str(Precision_DNN)) # print('\n') # print('Recall_DNN: ' + str(Recall_DNN)) # print('\n') # print('F1score_DNN: ' + str(F1score_DNN)) # print('\n') # 把precision、recall、F1score、AP写入excel文件 DNN_write_to_excel(DL_name=model_name, dataset_name=graph_name, method1=emb_method_name1, method2=emb_method_name2, precision_DL=Precision_DNN, recall_DL=Recall_DNN, F1score_DL=F1score_DNN, AP_DL=AP_DNN) time_end = time.time() print("It takes : " + str((time_end - time_start) / 60.0) + " mins.") pass
# -*- coding:utf-8 -*- import sys import os import networkx as nx import matplotlib.pyplot as plt lib = os.path.join(os.path.abspath('.'), 'lib') sys.path.insert(0, lib) import utils # USER:saicologic user_id = 1502 # G = nx.Graph() G = utils.read_graph(user_id) # undirected graph # 次数中心性 utils.show_ranking(G, 'degree_centrality', nx.degree_centrality(G)) # 近接中心性 utils.show_ranking(G, 'closeness_centrality', nx.closeness_centrality(G)) # 媒介中心性 utils.show_ranking(G, 'betweenness_centrality', nx.betweenness_centrality(G)) # 媒介中心性(組) utils.show_ranking(G, 'edge_betweenness_centrality', nx.edge_betweenness_centrality(G))
# Write on file the result output_file(final_partitioning) # THE RESULT OBTAINED WITH METIS start_meth = time.time() edge_cut , metis_partitioning = metis.part_graph(graphs_history[0], k) end_meth = time.time() m, s = divmod((end_meth - start_meth), 60) enlapsed_time = "%d minutes and %f seconds" % (m, s) print('the edge cut obtained with metis is ',edge_cut) print('the time taken by metis was ', enlapsed_time) parser = argparse.ArgumentParser(description='Partition the vertices of a graph in k roughly ' 'equal partitions such that the number of edges connecting vertices in different partitions' 'is minimized') parser.add_argument("k", help="The number of partitions", type = int) group = parser.add_mutually_exclusive_group() group.add_argument("-f", "--file", help="The file containing the graph to elaborate, if not specified a random graph is generated", type = str) group.add_argument("-r", "--random", nargs = 2, metavar = ('DEGREE','N_NODES'), help="Generate a random graph with degree and number of nodes specified ", type = int,default=[30,10000]) #default value for degree and number of nodes of the random generated graph args = parser.parse_args() if(args.file != None): g = read_graph(args.file) else: g = random_graph(args.random[0],args.random[1]) k_way_partitioning(args.k,g)