Exemple #1
0
    parser.add_argument('--learning_rate', help = 'learning rate', type = float, default = 0.1) # 0.1/1.0
    parser.add_argument('--neg_samp', help = 'negative sampling rate', type = int, default = 0)
    parser.add_argument('--embedding_size', help = 'embedding dimensions', type = int, default = 50)
    parser.add_argument('--hidden_size', help = 'number of hidden units', type = int, default = 1000)
    parser.add_argument('--window_size', help = 'window size in random walk sequences', type = int, default = 3)
    parser.add_argument('--path_size', help = 'length of random walk sequences', type = int, default = 10)
    parser.add_argument('--batch_size', help = 'the size of batch for training instances', type = int, default = 200)
    parser.add_argument('--g_batch_size', help = 'the batch size for graph', type = int, default = 200)
    parser.add_argument('--g_sample_size', help = 'the sample size from label information', type = int, default = 100)
    parser.add_argument('--g_learning_rate', help = "the learning rate of graphs", type = float, default = 1e-2)
    args = parser.parse_args()

    if 'pubmed' in args.link:
        x, y, tx, ty, graph = data.gen_pubmed_dataset(args.link, args.corpus, args.seeds)
    else:
        x, y, tx, ty, graph = data.gen_dataset(args.link, args.corpus, args.seeds)

    # OBJS = [x, y, tx, ty, graph]
    # NAMES = ['x', 'y', 'tx', 'ty', 'graph']
    # DATASET = 'pubmed'
    # if 'citeseer' in args.link:
    #     DATASET = 'citeseer'
    # if 'cora' in args.link:
    #     DATASET = 'cora'
    # print 'saving {}'.format(DATASET)
    # for i in range(len(OBJS)):
    #     cPickle.dump(OBJS[i], open('dump/trans.{}.{}'.format(DATASET, NAMES[i]), 'w'), cPickle.HIGHEST_PROTOCOL)
    # quit()

    m = model.model(args)
    m.add_data(x, tx, y, ty, graph)
Exemple #2
0
    fout.close()

if __name__ == '__main__':
    if len(sys.argv) > 1 and sys.argv[1] == 'eval':
        if DATASET != 'diel':
            pos, neg = 0, 0
            for line in open(OUTPUT_FILE):
                if 'true' not in line: continue
                if float(line.strip().split()[-1]) == 1.0:
                    pos += 1
                else:
                    neg += 1
            print 1.0 * pos / (pos + neg) if pos + neg > 0 else 0.0
        quit()

    if DATASET == 'citeseer':
        x, y, tx, ty, graph = data.gen_dataset('../data/citeseer/citeseer.cites', '../data/citeseer/citeseer.content', 0)
        print_data(x, y, tx, ty, graph)
    if DATASET == 'cora':
        x, y, tx, ty, graph = data.gen_dataset('../data/cora/cora.cites', '../data/cora/cora.content', 0)
        print_data(x, y, tx, ty, graph)
    if DATASET == 'pubmed':
        x, y, tx, ty, graph = data.gen_pubmed_dataset('../data/pubmed/pubmed.cites', '../data/pubmed/pubmed.content', 0)
        print_data(x, y, tx, ty, graph)
    if DATASET == 'nell': # attention to the parameters in nell_main
        x, y, tx, ty, graph = nell_main.gen_dataset()
        print_data(x, y, tx, ty, graph)
    if DATASET == 'diel':
        pass

 def classifier_input_fn(filenames, shuffle=False, batch_size=batch_size, repeat_num=10):
     return gen_dataset(filenames, shuffle, batch_size, repeat_num)