def main(): args = parse_args() assert not (args.visualise and args.embedding_dim > 2), "Can only visualise two dimensions" assert args.embedding_path is not None, "you must specify a path to save embedding" if not args.no_walks: assert args.walk_path is not None, "you must specify a path to save walks" random.seed(args.seed) np.random.seed(args.seed) tf.set_random_seed(args.seed) graph, features, node_labels = load_data(args) print ("Loaded dataset") if False: plot_degree_dist(graph, "degree distribution") configure_paths(args) print ("Configured paths") # build model num_nodes = len(graph) model = build_model(num_nodes, args) model, initial_epoch = load_weights(model, args) optimizer = ExponentialMappingOptimizer(lr=args.lr) loss = hyperbolic_softmax_loss(sigma=args.sigma) model.compile(optimizer=optimizer, loss=loss, target_tensors=[tf.placeholder(dtype=tf.int32)]) model.summary() callbacks = [ TerminateOnNaN(), EarlyStopping(monitor="loss", patience=args.patience, verbose=True), Checkpointer(epoch=initial_epoch, nodes=sorted(graph.nodes()), embedding_directory=args.embedding_path) ] positive_samples, negative_samples, probs = \ determine_positive_and_negative_samples(graph, features, args) del features # remove features reference to free up memory if args.use_generator: print ("Training with data generator with {} worker threads".format(args.workers)) training_generator = TrainingDataGenerator(positive_samples, probs, model, args) model.fit_generator(training_generator, workers=args.workers, max_queue_size=10, use_multiprocessing=args.workers>0, epochs=args.num_epochs, steps_per_epoch=len(training_generator), initial_epoch=initial_epoch, verbose=args.verbose, callbacks=callbacks ) else: print ("Training without data generator") train_x = np.append(positive_samples, negative_samples, axis=-1) train_y = np.zeros([len(train_x), 1, 1], dtype=np.int32 ) model.fit(train_x, train_y, shuffle=True, batch_size=args.batch_size, epochs=args.num_epochs, initial_epoch=initial_epoch, verbose=args.verbose, callbacks=callbacks ) print ("Training complete") if args.visualise: embedding = model.get_weights()[0] if embedding.shape[1] == 3: print ("projecting to poincare ball") embedding = hyperboloid_to_poincare_ball(embedding) draw_graph(graph, embedding, node_labels, path="2d-poincare-disk-visualisation.png")
def main(): args = parse_args() print ("Configured paths") # if os.path.exists(args.embedding_path): # os._exit(0) graph = nx.read_weighted_edgelist(args.edgelist, delimiter=" ", nodetype=None,create_using=nx.Graph()) graph_int = nx.read_weighted_edgelist(args.edgelist, delimiter=" ", nodetype=int,create_using=nx.Graph()) model = Struc2Vec(graph.to_directed(), walk_length=10, num_walks=8,workers=8, verbose=40 ) walks=model.return_walk_list() walks_int=[] for one_walk in walks: walks_int.append( [int(i) for i in one_walk]) walks=walks_int graph=graph_int #print(walks) assert not (args.visualise and args.embedding_dim > 2), "Can only visualise two dimensions" assert args.embedding_path is not None, "you must specify a path to save embedding" configure_paths(args) # build model num_nodes = len(graph) model = build_model(num_nodes, args) model, initial_epoch = load_weights(model, args) optimizer = ExponentialMappingOptimizer(lr=args.lr) loss = hyperbolic_softmax_loss(sigma=args.sigma) model.compile(optimizer=optimizer, loss=loss, target_tensors=[tf.placeholder(dtype=tf.int32)]) model.summary() callbacks = [ TerminateOnNaN(), EarlyStopping(monitor="loss", patience=args.patience, verbose=True), Checkpointer(epoch=initial_epoch, nodes=sorted(graph.nodes()), embedding_directory=args.embedding_path) ] positive_samples, negative_samples, probs = \ determine_positive_and_negative_samples(graph,walks,args) if args.use_generator: print ("Training with data generator with {} worker threads".format(args.workers)) training_generator = TrainingDataGenerator(positive_samples, probs, model, args) model.fit_generator(training_generator, workers=args.workers, max_queue_size=10, use_multiprocessing=args.workers>0, epochs=args.num_epochs, steps_per_epoch=len(training_generator), initial_epoch=initial_epoch, verbose=args.verbose, callbacks=callbacks ) else: print ("Training without data generator") train_x = np.append(positive_samples, negative_samples, axis=-1) train_y = np.zeros([len(train_x), 1, 1], dtype=np.int32 ) model.fit(train_x, train_y, shuffle=True, batch_size=args.batch_size, epochs=args.num_epochs, initial_epoch=initial_epoch, verbose=args.verbose, callbacks=callbacks ) print ("Training complete") embedding = model.get_weights()[0] embedding = hyperboloid_to_poincare_ball(embedding) print(embedding) ax=plot(embedding) theta = np.linspace(0, 2 * np.pi, 200) x = np.cos(theta) y = np.sin(theta) ax.plot(x, y, color="black", linewidth=2) ax.axis("equal") ax.figure.savefig("Hyper.pdf",bbox_inches='tight')