Exemple #1
0
def run_expreminent(look_forward, hidden_size, batch_size, epochs, dropout,
                    dataset):
    x, y = get_sentiments_prices(dataset['twitter_sentiments'],
                                 dataset["reddit_sentiments"],
                                 dataset["coin_price"], look_forward)

    for i in range(x.shape[1]):
        x[:, i] = normalize_array(x[:, i])

    # split into train and test sets
    train_x, test_x = split(x)
    train_y, test_y = split(y)

    train_x = np.reshape(train_x,
                         (train_x.shape[0], look_forward, train_x.shape[1]))
    test_x = np.reshape(test_x,
                        (test_x.shape[0], look_forward, test_x.shape[1]))

    model = create_model(hidden_size=hidden_size,
                         look_forward=look_forward,
                         dropout=dropout)
    model = train(model,
                  train_x,
                  train_y,
                  batch_size=batch_size,
                  epochs=epochs)
    y_pred = test(model, test_x)
    score = evaluate(test_y, y_pred)
    print('Test Score: %.2f RMSE' % score)
    return score
Exemple #2
0
def train_hypergcn(args):

    if args.dataset_name in ['citeseer', 'cora']:
        if args.do_svd:
            data_path = 'data/citeseer.pt' if args.dataset_name == 'citeseer' else 'data/cora_author_10cls300.pt'
        else:
            data_path = 'data/citeseer6cls3703.pt' if args.dataset_name == 'citeseer' else 'data/cora_author_10cls1000.pt'
        args = hypergraph.gen_data_cora(args,
                                        data_path=data_path,
                                        flip_edge_node=False)
    elif args.dataset_name in ['dblp', 'pubmed']:
        data_path = 'data/pubmed_data.pt' if args.dataset_name == 'pubmed' else 'data/dblp_data.pt'
        args = hypergraph.gen_data_dblp(args, data_path=data_path)
    else:
        raise Exception('dataset {} not supported'.format(args.dataset_name))
    #data_path = 'data/citeseer.pt' if args.dataset_name == 'citeseer' else 'data/cora_author.pt'
    #args = hypergraph.gen_data_cora(args, data_path=data_path)
    #dict_keys(['hypergraph', 'features', 'labels', 'n'])
    if args.predict_edge:
        hyp_struct = create_hypergcn_struct(args)
        n_edge = len(args.edge_X)
        n_node = len(args.all_labels)
        labels = torch.cat([args.all_labels, args.edge_classes], 0)
        X = torch.cat([args.v, args.edge_X], 0)

        #dict_keys(['hypergraph', 'features', 'labels', 'n'])
        #pdb.set_trace()
        datadict = {
            'hypergraph': hyp_struct,
            'features': args.v,
            'labels': labels,
            'n': len(args.v) + n_edge
        }

        n_labels = max(
            1, math.ceil(n_edge * utils.get_label_percent(args.dataset_name)))
        train_idx = torch.from_numpy(
            np.random.choice(n_edge, size=(n_labels, ),
                             replace=False)).to(torch.int64)

        all_idx = torch.LongTensor(list(range(n_edge)))
        all_idx[train_idx] = -1
        test_idx = all_idx[all_idx > -1]
        train_idx += n_node
        test_idx += n_node  #len(args.all_labels)
        hg_args = hgcn_config.parse()
    else:
        hyp_struct = create_hypergcn_struct(args)
        #dict_keys(['hypergraph', 'features', 'labels', 'n'])
        #pdb.set_trace()
        datadict = {
            'hypergraph': hyp_struct,
            'features': args.v,
            'labels': args.all_labels,
            'n': len(args.v)
        }
        train_idx = args.label_idx
        all_idx = torch.LongTensor(list(range(len(args.v))))

        all_idx[train_idx] = -1
        test_idx = all_idx[all_idx > -1]
        hg_args = hgcn_config.parse()
    '''
    HyperGCN = model.initialise(dataset, args)
    # train and test HyperGCN                                                                                                               
    HyperGCN = model.train(HyperGCN, dataset, train, args)
    '''
    hg_args.n_cls = int(args.all_labels.max() + 1)
    hg_args.fast = args.fast
    save_data = False  #True
    if save_data:
        full_datadict = dict(datadict)
        full_datadict.update({
            'train_idx': train_idx,
            'test_idx': test_idx,
            'n_cls': hg_args.n_cls
        })
        torch.save(full_datadict,
                   '../hypergcn/data/{}_torch.pt'.format(args.dataset_name))
        pdb.set_trace()

    hg = hgcn_model.initialise(datadict, hg_args)
    time0 = time.time()
    hg = hgcn_model.train(hg, datadict, train_idx, hg_args)
    dur = time.time() - time0
    hg_acc = hgcn_model.test(hg, datadict, test_idx, hg_args)

    print('hg acc ', hg_acc)
    #pdb.set_trace()
    return hg_acc, dur
Exemple #3
0
import argparse
import os
import tensorflow as tf

from model import model

parser = argparse.ArgumentParser()
parser.add_argument('--checkpoint_dir', type=str, default='checkpoint')
parser.add_argument('--results_dir', type=str, default='results')
parser.add_argument('--batch_size', type=int, default=64)
parser.add_argument('--c_dim', type=int, default=3)
parser.add_argument('--num_shots', type=int, default=3)
parser.add_argument('--dataset', type=str, default='dataset/tf_record/train.tfrecords')
parser.add_argument('--test_h', type=int, default=800)
parser.add_argument('--test_w', type=int, default=1200)
parser.add_argument('--save_freq', type=int, default=0)

args = parser.parse_args()

assert(os.path.exists(args.checkpoint_dir))

if not os.path.exists(args.results_dir):
    os.makedirs(args.results_dir)

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
with tf.Session(config=config) as sess:
    model = model(sess, args, train=False)
    model.test(args)
Exemple #4
0
import tensorflow as tf
from model import model
from data import data

def parse():
    parser =  argparse.ArgumentParser()
    parser.add_argument('--action', required=True)
    parser.add_argument('--datatype')
    parser.add_argument('--load', action='store_true')
    args = parser.parse_args()
    return args

args = parse()

if args.action == 'data':
    if args.datatype != 'gigaword' and args.datatype != 'reuters' and args.datatype != 'cnn':
        print('Invalid data type.')
    else:
        data = data(args)
        data.prepare_data(args)
else:
    sess = tf.Session()
    model = model(sess, args)
    if(args.action == 'pretrain'):
        model.pretrain()
    elif(args.action == 'train'):
        model.train()
    elif(args.action == 'test'):
        model.test()
    elif(args.action == 'save'):
        model.save()
Exemple #5
0
from model import model
from dataset.mnist import load_mnist

import numpy as np

if __name__ == '__main__':
    model = model()
    _, testset = load_mnist(normalize=True, one_hot_label=True)
    model.test(testset)
Exemple #6
0
# In[ ]:

from data import data

dataset, train, test = data.load(args)
print("length of train is", len(train))

# # initialise HyperGCN

# In[ ]:

from model import model

HyperGCN = model.initialise(dataset, args)

# # train and test HyperGCN

# In[ ]:

HyperGCN = model.train(HyperGCN, dataset, train, args)
acc = model.test(HyperGCN, dataset, test, args)
print(float(acc))

# # store result

# In[ ]:

# #  'r': [run all cells](https://stackoverflow.com/questions/33143753/jupyter-ipython-notebooks-shortcut-for-run-all)

# In[ ]:
Exemple #7
0
    output = {}
    for name in names:
        temp = {}
        name_idx = onehot_mapping[name]

        temp["vvec"] = [float(x) for x in v_mat[name_idx, :]]
        temp["uvec"] = [float(x) for x in u_mat[:, name_idx]]
        temp["avg"] = [float(x) for x in npy.add(v_mat[name_idx, :], u_mat[:, name_idx]) * 0.5]

        output[name] = temp

    output["confusion_matrix"] = confusion_matrix.tolist()

    with open("test.json", "w") as outfile:
        json.dump(output, outfile)

if __name__ == "__main__":
    fnames = preprocess.get_filenames()
    onehot_mapping, data_mat, label_mat, weight_mat = preprocess.prepare_data(fnames)

    # Test results
    test_data, test_lab = preprocess.prepare_test_data(onehot_mapping)

    model = model.Model(data_mat, label_mat, weight_mat)
    model.train(ModelConfig().numEpochs)

    conf_matrix = model.test(test_data, test_lab, species_clause=True)

    output_results(model, onehot_mapping, conf_matrix)
Exemple #8
0
model = model(num_classes=data_load.num_classes)
session_config = tf.ConfigProto(log_device_placement=False,
                                inter_op_parallelism_threads=0,
                                intra_op_parallelism_threads=0,
                                allow_soft_placement=True)
session_config.gpu_options.allow_growth = True
session_config.gpu_options.allocator_type = 'BFC'
with tf.Session(config=session_config) as sess:
    saver = tf.train.Saver()
    sess.run(tf.global_variables_initializer())
    best_dev_acc = 0
    biggest_patient = 20
    patient = 0
    for i in range(100):
        model.train(sess, data_load.train_datas, data_load.train_labels, 32)
        dev_acc = model.test(sess, data_load.dev_datas, data_load.dev_labels,
                             32)
        if dev_acc > best_dev_acc:
            best_dev_acc = dev_acc
            saver.save(sess, "./model/best_result.ckpt")
            patient = 0
            print("Epoch %d: best dev acc is updataed to %f" %
                  (i, best_dev_acc))
        else:
            patient += 1
            print("Epoch %d: best acc is not updataed, the patient is %d" %
                  (i, patient))
        if patient == biggest_patient:
            print("Patient is achieve biggest patient, training finished")
    saver.restore(sess, "./model/best_result.ckpt")
    test_acc = model.test(sess, data_load.test_datas, data_load.test_labels,
                          32)