Ejemplo n.º 1
0
def main():
    args = parse_args()
    print ("Configured paths")
   # if os.path.exists(args.embedding_path):
   #     os._exit(0)
    graph = nx.read_weighted_edgelist(args.edgelist, delimiter=" ", nodetype=None,create_using=nx.Graph())
    graph_int = nx.read_weighted_edgelist(args.edgelist, delimiter=" ", nodetype=int,create_using=nx.Graph())
    model = Struc2Vec(graph.to_directed(), walk_length=10, num_walks=8,workers=8, verbose=40 )
    walks=model.return_walk_list()
    walks_int=[]
    for one_walk in walks:
        walks_int.append( [int(i) for i in one_walk])
        
    walks=walks_int
    graph=graph_int
    #print(walks)
    
    

    assert not (args.visualise and args.embedding_dim > 2), "Can only visualise two dimensions"
    assert args.embedding_path is not None, "you must specify a path to save embedding"


    
    configure_paths(args)

    
    # build model
    num_nodes = len(graph)
    
    model = build_model(num_nodes, args)
    model, initial_epoch = load_weights(model, args)
    optimizer = ExponentialMappingOptimizer(lr=args.lr)
    loss = hyperbolic_softmax_loss(sigma=args.sigma)
    model.compile(optimizer=optimizer, 
        loss=loss, 
        target_tensors=[tf.placeholder(dtype=tf.int32)])
    model.summary()

    callbacks = [
        TerminateOnNaN(),
        EarlyStopping(monitor="loss", 
            patience=args.patience, 
            verbose=True),
        Checkpointer(epoch=initial_epoch, 
            nodes=sorted(graph.nodes()), 
            embedding_directory=args.embedding_path)
    ]            

    positive_samples, negative_samples, probs = \
            determine_positive_and_negative_samples(graph,walks,args)

  
    if args.use_generator:
        print ("Training with data generator with {} worker threads".format(args.workers))
        training_generator = TrainingDataGenerator(positive_samples,  
                probs,
                model,
                args)

        model.fit_generator(training_generator, 
            workers=args.workers,
            max_queue_size=10, 
            use_multiprocessing=args.workers>0, 
            epochs=args.num_epochs, 
            steps_per_epoch=len(training_generator),
            initial_epoch=initial_epoch, 
            verbose=args.verbose,
            callbacks=callbacks
        )

    else:
        print ("Training without data generator")

        train_x = np.append(positive_samples, negative_samples, axis=-1)
        train_y = np.zeros([len(train_x), 1, 1], dtype=np.int32 )

        model.fit(train_x, train_y,
            shuffle=True,
            batch_size=args.batch_size, 
            epochs=args.num_epochs, 
            initial_epoch=initial_epoch, 
            verbose=args.verbose,
            callbacks=callbacks
        )

    print ("Training complete")

    embedding = model.get_weights()[0]
    embedding = hyperboloid_to_poincare_ball(embedding)
    print(embedding)
    ax=plot(embedding)
    theta = np.linspace(0, 2 * np.pi, 200)
    x = np.cos(theta)
    y = np.sin(theta)
    ax.plot(x, y, color="black", linewidth=2)
    ax.axis("equal")
    ax.figure.savefig("Hyper.pdf",bbox_inches='tight')
Ejemplo n.º 2
0
Archivo: main.py Proyecto: Jappy0/heat
def main():

	args = parse_args()

	assert not (args.visualise and args.embedding_dim > 2), "Can only visualise two dimensions"
	assert args.embedding_path is not None, "you must specify a path to save embedding"
	if not args.no_walks:
		assert args.walk_path is not None, "you must specify a path to save walks"

	random.seed(args.seed)
	np.random.seed(args.seed)
	tf.set_random_seed(args.seed)

	graph, features, node_labels = load_data(args)
	print ("Loaded dataset")

	if False:
		plot_degree_dist(graph, "degree distribution")

	configure_paths(args)

	print ("Configured paths")

	# build model
	num_nodes = len(graph)
	
	model = build_model(num_nodes, args)
	model, initial_epoch = load_weights(model, args)
	optimizer = ExponentialMappingOptimizer(lr=args.lr)
	loss = hyperbolic_softmax_loss(sigma=args.sigma)
	model.compile(optimizer=optimizer, 
		loss=loss, 
		target_tensors=[tf.placeholder(dtype=tf.int32)])
	model.summary()

	callbacks = [
		TerminateOnNaN(),
		EarlyStopping(monitor="loss", 
			patience=args.patience, 
			verbose=True),
		Checkpointer(epoch=initial_epoch, 
			nodes=sorted(graph.nodes()), 
			embedding_directory=args.embedding_path)
	]			

	positive_samples, negative_samples, probs = \
			determine_positive_and_negative_samples(graph, 
			features, args)

	del features # remove features reference to free up memory

	if args.use_generator:
		print ("Training with data generator with {} worker threads".format(args.workers))
		training_generator = TrainingDataGenerator(positive_samples,  
				probs,
				model,
				args)

		model.fit_generator(training_generator, 
			workers=args.workers,
			max_queue_size=10, 
			use_multiprocessing=args.workers>0, 
			epochs=args.num_epochs, 
			steps_per_epoch=len(training_generator),
			initial_epoch=initial_epoch, 
			verbose=args.verbose,
			callbacks=callbacks
		)

	else:
		print ("Training without data generator")

		train_x = np.append(positive_samples, negative_samples, axis=-1)
		train_y = np.zeros([len(train_x), 1, 1], dtype=np.int32 )

		model.fit(train_x, train_y,
			shuffle=True,
			batch_size=args.batch_size, 
			epochs=args.num_epochs, 
			initial_epoch=initial_epoch, 
			verbose=args.verbose,
			callbacks=callbacks
		)

	print ("Training complete")

	if args.visualise:
		embedding = model.get_weights()[0]
		if embedding.shape[1] == 3:
			print ("projecting to poincare ball")
			embedding = hyperboloid_to_poincare_ball(embedding)
		draw_graph(graph, 
			embedding, 
			node_labels, 
			path="2d-poincare-disk-visualisation.png")
Ejemplo n.º 3
0
def main():

	args = parse_args()

	test_results_dir = args.test_results_dir
	if not os.path.exists(test_results_dir):
		os.makedirs(test_results_dir, exist_ok=True)
	test_results_filename = os.path.join(test_results_dir, 
		"test_results.csv")

	if check_complete(test_results_filename, args.seed):
		return

	test_results_lock_filename = os.path.join(test_results_dir, 
		"test_results.lock")
	touch(test_results_lock_filename)

	_, _, node_labels = load_data(args)
	print ("Loaded dataset")

	embedding = load_embedding(args.dist_fn, args.embedding_directory)

	min_count = 10
	if node_labels.shape[1] == 1: # remove any node belonging to an under-represented class
		label_counts = Counter(node_labels.flatten())
		mask = np.array([label_counts[l] >= min_count
			for l in node_labels.flatten()])
		embedding = embedding[mask]
		node_labels = node_labels[mask]
	else:
		assert node_labels.shape[1] > 1
		idx = node_labels.sum(0) >= min_count
		node_labels = node_labels[:, idx]
		idx = node_labels.any(-1)
		embedding = embedding[idx]
		node_labels = node_labels[idx]

	if args.dist_fn == "hyperboloid":
		print ("loaded a hyperboloid embedding")
		# print ("projecting from hyperboloid to klein")
		# embedding = hyperboloid_to_klein(embedding)
		print ("projecting from hyperboloid to poincare")
		embedding = hyperboloid_to_poincare_ball(embedding)
		print ("projecting from poincare to klein")
		embedding = poincare_ball_to_klein(embedding)

	elif args.dist_fn == "poincare":
		print ("loaded a poincare embedding")
		# print ("projecting from poincare to klein")
		# embedding = poincare_ball_to_hyperboloid(embedding)
		# embedding = hyperboloid_to_klein(embedding)
		print ("projecting from poincare to klein")
		embedding = poincare_ball_to_klein(embedding)

	test_results = {}
	
	label_percentages, f1_micros, f1_macros = \
		evaluate_node_classification(embedding, node_labels)

	for label_percentage, f1_micro, f1_macro in zip(label_percentages, f1_micros, f1_macros):
		print ("{:.2f}".format(label_percentage), 
			"micro = {:.2f}".format(f1_micro), 
			"macro = {:.2f}".format(f1_macro) )
		test_results.update({"{:.2f}_micro".format(label_percentage): f1_micro})
		test_results.update({"{:.2f}_macro".format(label_percentage): f1_macro})

	k = 10
	k_fold_roc, k_fold_f1, k_fold_precision, k_fold_recall = \
		evaluate_kfold_label_classification(embedding, node_labels, k=k)

	test_results.update({
		"{}-fold-roc".format(k): k_fold_roc, 
		"{}-fold-f1".format(k): k_fold_f1,
		"{}-fold-precision".format(k): k_fold_precision,
		"{}-fold-recall".format(k): k_fold_recall,
		})

	print ("saving test results to {}".format(test_results_filename))
	threadsafe_save_test_results(test_results_lock_filename, test_results_filename, args.seed, data=test_results )