def main(args): ''' :param args: arguments for 1. training the skigram model for learning subgraph representations 2. construct the deep WL kernel using the learnt subgraph representations 3. performing graph classification using the WL and deep WL kernel :return: None ''' corpus_dir = args.corpus output_dir = args.output_dir batch_size = args.batch_size epochs = args.epochs embedding_size = args.embedding_size num_negsample = args.num_negsample learning_rate = args.learning_rate wlk_h = args.wlk_h label_filed_name = args.label_filed_name class_labels_fname = args.class_labels_file_name wl_extn = 'g2v'+str(wlk_h) assert os.path.exists(corpus_dir), "File {} does not exist".format(corpus_dir) # assert os.path.exists(output_dir), "Dir {} does not exist".format(output_dir) graph_files = get_files(dirname=corpus_dir, extn='.gexf', max_files=0) logging.info('Loaded {} graph file names form {}'.format(len(graph_files),corpus_dir)) t0 = time() wlk_relabel_and_dump_memory_version(graph_files, max_h=wlk_h, node_label_attr_name=label_filed_name) logging.info('dumped sg2vec sentences in {} sec.'.format(time() - t0)) t0 = time() embedding_fname = train_skipgram(corpus_dir, wl_extn, learning_rate, embedding_size, num_negsample, epochs, batch_size, output_dir, class_labels_fname)
def main(args): ''' :param args: arguments for 1. training the skigram model for learning subgraph representations 2. construct the deep WL kernel using the learnt subgraph representations 3. performing graph classification using the WL and deep WL kernel :return: None ''' corpus_dir = args.corpus output_dir = args.output_dir batch_size = args.batch_size epochs = args.epochs embedding_size = args.embedding_size num_negsample = args.num_negsample learning_rate = args.learning_rate valid_size = args.valid_size n_cpus = args.n_cpus wlk_h = args.wlk_h label_filed_name = args.label_filed_name class_labels_fname = args.class_labels_file_name wl_extn = 'WL'+str(wlk_h) assert os.path.exists(corpus_dir), "File {} does not exist".format(corpus_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) graph_files = get_files(dirname=corpus_dir, extn='.gexf', max_files=0) logging.info('Loaded {} graph file names form {}'.format(len(graph_files),corpus_dir)) t0 = time.time() Parallel(n_jobs=n_cpus)(delayed(dump_subgraph2vec_sentences)(f, wlk_h, label_filed_name) for f in graph_files) # for f in graph_files: dump_subgraph2vec_sentences (f, wlk_h, label_filed_name) logging.info('Dumped subgraph2vec sentences for all {} graphs in {} in {} sec'.format(len(graph_files), corpus_dir, round(time.time()-t0))) t0 = time.time() embedding_fname = train_skipgram(corpus_dir, wl_extn, learning_rate, embedding_size, num_negsample, epochs, batch_size, output_dir,valid_size) logging.info('Trained the skipgram model in {} sec.'.format(round(time.time()-t0, 2))) perform_classification (corpus_dir, wl_extn, embedding_fname, class_labels_fname)
max_h=wlk_h, node_label_attr_name=label_field_name) print("Generated Graph Document Corpus in %s seconds" % (round(time() - t0, 2))) ############################################# #### Neural Language Model Training ############################################# print("SKIPGRAM LEARNING PHASE") # train the skipgram architecture t0 = time() embedding_fname = train_skipgram(corpus_dir, wl_extension, learning_rate, embedding_size, num_negsample, epochs, batch_size, wlk_h, output_dir, min_count=minCount) print("Trained the skipgram model in %s seconds" % (round(time() - t0, 2))) print("SKIPGRAM LEARNING DONE") ############################################# #### Classification Phase on Learned Embeddings ############################################# print("DOING classification") # perform single classification and evaluation # classify_scores = perform_classification(corpus_dir, wl_extension, embedding_fname, class_labels_fname) # acc, prec, recall, f_score = classify_scores