def run_expreminent(look_forward, hidden_size, batch_size, epochs, dropout, dataset): x, y = get_sentiments_prices(dataset['twitter_sentiments'], dataset["reddit_sentiments"], dataset["coin_price"], look_forward) for i in range(x.shape[1]): x[:, i] = normalize_array(x[:, i]) # split into train and test sets train_x, test_x = split(x) train_y, test_y = split(y) train_x = np.reshape(train_x, (train_x.shape[0], look_forward, train_x.shape[1])) test_x = np.reshape(test_x, (test_x.shape[0], look_forward, test_x.shape[1])) model = create_model(hidden_size=hidden_size, look_forward=look_forward, dropout=dropout) model = train(model, train_x, train_y, batch_size=batch_size, epochs=epochs) y_pred = test(model, test_x) score = evaluate(test_y, y_pred) print('Test Score: %.2f RMSE' % score) return score
def train_hypergcn(args): if args.dataset_name in ['citeseer', 'cora']: if args.do_svd: data_path = 'data/citeseer.pt' if args.dataset_name == 'citeseer' else 'data/cora_author_10cls300.pt' else: data_path = 'data/citeseer6cls3703.pt' if args.dataset_name == 'citeseer' else 'data/cora_author_10cls1000.pt' args = hypergraph.gen_data_cora(args, data_path=data_path, flip_edge_node=False) elif args.dataset_name in ['dblp', 'pubmed']: data_path = 'data/pubmed_data.pt' if args.dataset_name == 'pubmed' else 'data/dblp_data.pt' args = hypergraph.gen_data_dblp(args, data_path=data_path) else: raise Exception('dataset {} not supported'.format(args.dataset_name)) #data_path = 'data/citeseer.pt' if args.dataset_name == 'citeseer' else 'data/cora_author.pt' #args = hypergraph.gen_data_cora(args, data_path=data_path) #dict_keys(['hypergraph', 'features', 'labels', 'n']) if args.predict_edge: hyp_struct = create_hypergcn_struct(args) n_edge = len(args.edge_X) n_node = len(args.all_labels) labels = torch.cat([args.all_labels, args.edge_classes], 0) X = torch.cat([args.v, args.edge_X], 0) #dict_keys(['hypergraph', 'features', 'labels', 'n']) #pdb.set_trace() datadict = { 'hypergraph': hyp_struct, 'features': args.v, 'labels': labels, 'n': len(args.v) + n_edge } n_labels = max( 1, math.ceil(n_edge * utils.get_label_percent(args.dataset_name))) train_idx = torch.from_numpy( np.random.choice(n_edge, size=(n_labels, ), replace=False)).to(torch.int64) all_idx = torch.LongTensor(list(range(n_edge))) all_idx[train_idx] = -1 test_idx = all_idx[all_idx > -1] train_idx += n_node test_idx += n_node #len(args.all_labels) hg_args = hgcn_config.parse() else: hyp_struct = create_hypergcn_struct(args) #dict_keys(['hypergraph', 'features', 'labels', 'n']) #pdb.set_trace() datadict = { 'hypergraph': hyp_struct, 'features': args.v, 'labels': args.all_labels, 'n': len(args.v) } train_idx = args.label_idx all_idx = torch.LongTensor(list(range(len(args.v)))) all_idx[train_idx] = -1 test_idx = all_idx[all_idx > -1] hg_args = hgcn_config.parse() ''' HyperGCN = model.initialise(dataset, args) # train and test HyperGCN HyperGCN = model.train(HyperGCN, dataset, train, args) ''' hg_args.n_cls = int(args.all_labels.max() + 1) hg_args.fast = args.fast save_data = False #True if save_data: full_datadict = dict(datadict) full_datadict.update({ 'train_idx': train_idx, 'test_idx': test_idx, 'n_cls': hg_args.n_cls }) torch.save(full_datadict, '../hypergcn/data/{}_torch.pt'.format(args.dataset_name)) pdb.set_trace() hg = hgcn_model.initialise(datadict, hg_args) time0 = time.time() hg = hgcn_model.train(hg, datadict, train_idx, hg_args) dur = time.time() - time0 hg_acc = hgcn_model.test(hg, datadict, test_idx, hg_args) print('hg acc ', hg_acc) #pdb.set_trace() return hg_acc, dur
import argparse import os import tensorflow as tf from model import model parser = argparse.ArgumentParser() parser.add_argument('--checkpoint_dir', type=str, default='checkpoint') parser.add_argument('--results_dir', type=str, default='results') parser.add_argument('--batch_size', type=int, default=64) parser.add_argument('--c_dim', type=int, default=3) parser.add_argument('--num_shots', type=int, default=3) parser.add_argument('--dataset', type=str, default='dataset/tf_record/train.tfrecords') parser.add_argument('--test_h', type=int, default=800) parser.add_argument('--test_w', type=int, default=1200) parser.add_argument('--save_freq', type=int, default=0) args = parser.parse_args() assert(os.path.exists(args.checkpoint_dir)) if not os.path.exists(args.results_dir): os.makedirs(args.results_dir) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: model = model(sess, args, train=False) model.test(args)
import tensorflow as tf from model import model from data import data def parse(): parser = argparse.ArgumentParser() parser.add_argument('--action', required=True) parser.add_argument('--datatype') parser.add_argument('--load', action='store_true') args = parser.parse_args() return args args = parse() if args.action == 'data': if args.datatype != 'gigaword' and args.datatype != 'reuters' and args.datatype != 'cnn': print('Invalid data type.') else: data = data(args) data.prepare_data(args) else: sess = tf.Session() model = model(sess, args) if(args.action == 'pretrain'): model.pretrain() elif(args.action == 'train'): model.train() elif(args.action == 'test'): model.test() elif(args.action == 'save'): model.save()
from model import model from dataset.mnist import load_mnist import numpy as np if __name__ == '__main__': model = model() _, testset = load_mnist(normalize=True, one_hot_label=True) model.test(testset)
# In[ ]: from data import data dataset, train, test = data.load(args) print("length of train is", len(train)) # # initialise HyperGCN # In[ ]: from model import model HyperGCN = model.initialise(dataset, args) # # train and test HyperGCN # In[ ]: HyperGCN = model.train(HyperGCN, dataset, train, args) acc = model.test(HyperGCN, dataset, test, args) print(float(acc)) # # store result # In[ ]: # # 'r': [run all cells](https://stackoverflow.com/questions/33143753/jupyter-ipython-notebooks-shortcut-for-run-all) # In[ ]:
output = {} for name in names: temp = {} name_idx = onehot_mapping[name] temp["vvec"] = [float(x) for x in v_mat[name_idx, :]] temp["uvec"] = [float(x) for x in u_mat[:, name_idx]] temp["avg"] = [float(x) for x in npy.add(v_mat[name_idx, :], u_mat[:, name_idx]) * 0.5] output[name] = temp output["confusion_matrix"] = confusion_matrix.tolist() with open("test.json", "w") as outfile: json.dump(output, outfile) if __name__ == "__main__": fnames = preprocess.get_filenames() onehot_mapping, data_mat, label_mat, weight_mat = preprocess.prepare_data(fnames) # Test results test_data, test_lab = preprocess.prepare_test_data(onehot_mapping) model = model.Model(data_mat, label_mat, weight_mat) model.train(ModelConfig().numEpochs) conf_matrix = model.test(test_data, test_lab, species_clause=True) output_results(model, onehot_mapping, conf_matrix)
model = model(num_classes=data_load.num_classes) session_config = tf.ConfigProto(log_device_placement=False, inter_op_parallelism_threads=0, intra_op_parallelism_threads=0, allow_soft_placement=True) session_config.gpu_options.allow_growth = True session_config.gpu_options.allocator_type = 'BFC' with tf.Session(config=session_config) as sess: saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) best_dev_acc = 0 biggest_patient = 20 patient = 0 for i in range(100): model.train(sess, data_load.train_datas, data_load.train_labels, 32) dev_acc = model.test(sess, data_load.dev_datas, data_load.dev_labels, 32) if dev_acc > best_dev_acc: best_dev_acc = dev_acc saver.save(sess, "./model/best_result.ckpt") patient = 0 print("Epoch %d: best dev acc is updataed to %f" % (i, best_dev_acc)) else: patient += 1 print("Epoch %d: best acc is not updataed, the patient is %d" % (i, patient)) if patient == biggest_patient: print("Patient is achieve biggest patient, training finished") saver.restore(sess, "./model/best_result.ckpt") test_acc = model.test(sess, data_load.test_datas, data_load.test_labels, 32)