parser.add_argument('--learning_rate', help = 'learning rate', type = float, default = 0.1) # 0.1/1.0 parser.add_argument('--neg_samp', help = 'negative sampling rate', type = int, default = 0) parser.add_argument('--embedding_size', help = 'embedding dimensions', type = int, default = 50) parser.add_argument('--hidden_size', help = 'number of hidden units', type = int, default = 1000) parser.add_argument('--window_size', help = 'window size in random walk sequences', type = int, default = 3) parser.add_argument('--path_size', help = 'length of random walk sequences', type = int, default = 10) parser.add_argument('--batch_size', help = 'the size of batch for training instances', type = int, default = 200) parser.add_argument('--g_batch_size', help = 'the batch size for graph', type = int, default = 200) parser.add_argument('--g_sample_size', help = 'the sample size from label information', type = int, default = 100) parser.add_argument('--g_learning_rate', help = "the learning rate of graphs", type = float, default = 1e-2) args = parser.parse_args() if 'pubmed' in args.link: x, y, tx, ty, graph = data.gen_pubmed_dataset(args.link, args.corpus, args.seeds) else: x, y, tx, ty, graph = data.gen_dataset(args.link, args.corpus, args.seeds) # OBJS = [x, y, tx, ty, graph] # NAMES = ['x', 'y', 'tx', 'ty', 'graph'] # DATASET = 'pubmed' # if 'citeseer' in args.link: # DATASET = 'citeseer' # if 'cora' in args.link: # DATASET = 'cora' # print 'saving {}'.format(DATASET) # for i in range(len(OBJS)): # cPickle.dump(OBJS[i], open('dump/trans.{}.{}'.format(DATASET, NAMES[i]), 'w'), cPickle.HIGHEST_PROTOCOL) # quit() m = model.model(args) m.add_data(x, tx, y, ty, graph)
fout.close() if __name__ == '__main__': if len(sys.argv) > 1 and sys.argv[1] == 'eval': if DATASET != 'diel': pos, neg = 0, 0 for line in open(OUTPUT_FILE): if 'true' not in line: continue if float(line.strip().split()[-1]) == 1.0: pos += 1 else: neg += 1 print 1.0 * pos / (pos + neg) if pos + neg > 0 else 0.0 quit() if DATASET == 'citeseer': x, y, tx, ty, graph = data.gen_dataset('../data/citeseer/citeseer.cites', '../data/citeseer/citeseer.content', 0) print_data(x, y, tx, ty, graph) if DATASET == 'cora': x, y, tx, ty, graph = data.gen_dataset('../data/cora/cora.cites', '../data/cora/cora.content', 0) print_data(x, y, tx, ty, graph) if DATASET == 'pubmed': x, y, tx, ty, graph = data.gen_pubmed_dataset('../data/pubmed/pubmed.cites', '../data/pubmed/pubmed.content', 0) print_data(x, y, tx, ty, graph) if DATASET == 'nell': # attention to the parameters in nell_main x, y, tx, ty, graph = nell_main.gen_dataset() print_data(x, y, tx, ty, graph) if DATASET == 'diel': pass
def classifier_input_fn(filenames, shuffle=False, batch_size=batch_size, repeat_num=10): return gen_dataset(filenames, shuffle, batch_size, repeat_num)