def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument('-batsize', default=100) ##100 parser.add_argument('-nlayers', default=1, type=int, help='num_hid_layers before output') parser.add_argument('-hdim', default=200, type=int) ##200 for freyfaces parser.add_argument('-zdim', default=2, type=int) ##2 parser.add_argument('-lmbda', default=0., type=float, help='weight decay coeff') ##0.001 parser.add_argument('-lr', default=0.01, type=float, help='learning rate') ##0.01 parser.add_argument('-epochs', default=100, type=int) ##1000 parser.add_argument('-print_every', default=5, type=int) ##100 parser.add_argument('-save_every', default=50, type=int) ##1 parser.add_argument('-outfile', default='vae_model.pk') parser.add_argument('-dset', default='mnist') ##mnist freyfaces parser.add_argument('-COV', default=False, type=bool) parser.add_argument('-decM', default='gaussian', help='bernoulli | gaussian') args = parser.parse_args() batsize = args.batsize dset = args.dset data = load_dataset(dset) valid_fg = 0 dec_nonlin = T.nnet.relu ##T.nnet.softplus if dset=='mnist': train_x, train_y = data['train'] ##mnist: (N,784) valid_x, valid_y = data['valid'] num_valid_bats = valid_x.shape[0] / batsize print "valid data shape: ", valid_x.shape valid_fg = 1 elif dset=='freyfaces': train_x = data print "training data shape: ", train_x.shape model = VAE(train_x.shape[1], args, dec_nonlin=dec_nonlin) num_train_bats = train_x.shape[0] / batsize ##discard last <batsize begin = time.time() for i in xrange(args.epochs): for k in xrange(num_train_bats): x = train_x[k*batsize : (k+1)*batsize, :] eps = np.random.randn(x.shape[0], args.zdim).astype(floatX) cost = model.train(x, eps, i) ##update_times=epochs*num_train_bats j = i+1 if j % args.print_every == 0: ##(b+1) end = time.time() print('epoch %d, cost %.2f, time %.2fs' % (j, cost, end-begin)) begin = end if valid_fg == 1: valid_cost = 0 for l in xrange(num_valid_bats): x_val = valid_x[l*batsize:(l+1)*batsize, :] eps_val = np.zeros((x_val.shape[0], args.zdim), dtype=floatX) valid_cost = valid_cost + model.test(x_val, eps_val) valid_cost = valid_cost / num_valid_bats print('valid cost: %f' % valid_cost) if j % args.save_every == 0: ## with open(args.outfile, 'wb') as f: pk.dump(model, f, protocol=pk.HIGHEST_PROTOCOL) print('model saved')
def main(): args = utils.get_args() dataset = utils.load_dataset(os.path.join(args.data_path, DATASET_FILE)) index2word, word2index = utils.load_dicts(os.path.join(args.data_path, VOCABULARY_FILE)) print("Use dataset with {} sentences".format(dataset.shape[0])) batch_size = args.batch_size noise_size = args.noise_size with tf.Graph().as_default(), tf.Session() as session: lstm_gan = LSTMGAN( SENTENCE_SIZE, VOCABULARY_SIZE, word2index[SENTENCE_START_TOKEN], hidden_size_gen = args.hid_gen, hidden_size_disc = args.hid_disc, input_noise_size = noise_size, batch_size = batch_size, dropout = args.dropout, lr = args.lr, grad_cap = args.grad_clip ) session.run(tf.initialize_all_variables()) if args.save_model or args.load_model: saver = tf.train.Saver() if args.load_model: try: saver.restore(session, utils.SAVER_FILE) except ValueError: print("Cant find model file") sys.exit(1) while True: offset = 0. for dataset_part in utils.iterate_over_dataset(dataset, batch_size*args.disc_count): print("Start train discriminator wih offset {}...".format(offset)) for ind, batch in enumerate(utils.iterate_over_dataset(dataset_part, batch_size)): noise = np.random.random(size=(batch_size, noise_size)) cost = lstm_gan.train_disc_on_batch(session, noise, batch) print("Processed {} sentences with train cost = {}".format((ind+1)*batch_size, cost)) print("Start train generator...") for ind in range(args.gen_count): noise = np.random.random(size=(batch_size, noise_size)) cost = lstm_gan.train_gen_on_batch(session, noise) if args.gen_sent: sent = lstm_gan.generate_sent(session, np.random.random(size=(noise_size, ))) print(' '.join(index2word[i] for i in sent)) print("Processed {} noise inputs with train cost {}".format((ind+1)*batch_size, cost)) offset += batch_size*args.disc_count if args.save_model: saver.save(sess, utils.SAVER_FILE) print("Model saved")
def trainer(model_params): """Train a sketch-rnn model.""" np.set_printoptions(precision=8, edgeitems=6, linewidth=200, suppress=True) print('Loading data files.') train_set, model_params = utils.load_dataset(FLAGS.root_dir, FLAGS.dataset, model_params) reset_graph() model = sketch_rnn_model.Model(model_params) sess = tf.Session() sess.run(tf.global_variables_initializer()) if FLAGS.load_pretrain: load_pretrain(sess, FLAGS.vae_type, FLAGS.enc_type, FLAGS.dataset, FLAGS.basenet, FLAGS.log_root) if FLAGS.resume_training: resume_train(sess, FLAGS.load_dir, FLAGS.dataset, FLAGS.enc_type, FLAGS.basenet, FLAGS.feat_type, FLAGS.log_root) train(sess, model, train_set)
def main(): args = parse_arguments() hidden_size = 512 embed_size = 256 assert torch.cuda.is_available() print("[!] preparing dataset...") train_iter, val_iter, test_iter, DE, EN = load_dataset(args.batch_size) de_size, en_size = len(DE.vocab), len(EN.vocab) print("[TRAIN]:%d (dataset:%d)\t[TEST]:%d (dataset:%d)" % (len(train_iter), len(train_iter.dataset), len(test_iter), len(test_iter.dataset))) print("[DE_vocab]:%d [en_vocab]:%d" % (de_size, en_size)) print("[!] Instantiating models...") encoder = Encoder(de_size, embed_size, hidden_size, n_layers=2, dropout=0.5) decoder = Decoder(embed_size, hidden_size, en_size, n_layers=1, dropout=0.5) seq2seq = Seq2Seq(encoder, decoder).cuda() optimizer = optim.Adam(seq2seq.parameters(), lr=args.lr) print(seq2seq) best_val_loss = None for e in range(1, args.epochs+1): train(e, seq2seq, optimizer, train_iter, en_size, args.grad_clip, DE, EN) val_loss = evaluate(seq2seq, val_iter, en_size, DE, EN) print("[Epoch:%d] val_loss:%5.3f | val_pp:%5.2fS" % (e, val_loss, math.exp(val_loss))) # Save the model if the validation loss is the best we've seen so far. if not best_val_loss or val_loss < best_val_loss: print("[!] saving model...") if not os.path.isdir(".save"): os.makedirs(".save") torch.save(seq2seq.state_dict(), './.save/seq2seq_%d.pt' % (e)) best_val_loss = val_loss test_loss = evaluate(seq2seq, test_iter, en_size, DE, EN) print("[TEST] loss:%5.2f" % test_loss)
def tester(model_params): """Test model.""" np.set_printoptions(precision=8, edgeitems=6, linewidth=200, suppress=True) print('Hyperparams:') for key, val in model_params.values().iteritems(): print('%s = %s' % (key, str(val))) print('Loading data files.') test_set, sample_model_params, gen_model_params = utils.load_dataset(FLAGS.root_dir, FLAGS.dataset, model_params, inference_mode=True) reset_graph() sample_model = sketch_rnn_model.Model(sample_model_params) gen_model = sketch_rnn_model.Model(gen_model_params, reuse=True) sess = tf.Session() sess.run(tf.global_variables_initializer()) if FLAGS.dataset in ['shoesv2f_sup', 'shoesv2f_train']: dataset = 'shoesv2' else: dataset = FLAGS.dataset if FLAGS.resume_training: if FLAGS.load_dir == '': FLAGS.load_dir = FLAGS.log_root.split('runs')[0] + 'model_to_test/%s/' % dataset # set dir to load the model for testing FLAGS.load_dir = os.path.join(FLAGS.load_dir, FLAGS.basenet) load_checkpoint(sess, FLAGS.load_dir) # Write config file to json file. tf.gfile.MakeDirs(FLAGS.log_root) with tf.gfile.Open( os.path.join(FLAGS.log_root, 'model_config.json'), 'w') as f: json.dump(model_params.values(), f, indent=True) sample_test(sess, sample_model, gen_model, test_set, model_params.max_seq_len)
import matplotlib.pyplot as plt import numpy as np import tensorflow as tf from tensorflow.keras import layers from utils import load_json, load_dataset asd_data_path = '/home/elliot/PycharmProjects/abide/processed_data_files/asd_raw/debug_raw_img_asd.json' ctl_data_path = '/home/elliot/PycharmProjects/abide/processed_data_files/control_raw/debug_raw_img_ctl.json' from tensorflow.keras.optimizers import Adam lr = 0.0001 if __name__ == '__main__': x_train, y_train, x_valid, y_valid = load_dataset(asd_data_path, ctl_data_path) assert tf.keras.backend.image_data_format() == 'channels_last' x_train = np.expand_dims(x_train, axis=-1) x_valid = np.expand_dims(x_valid, axis=-1) input_shape = (61, 73, 61, 1) model = tf.keras.models.Sequential() model.add( layers.Conv3D(32, kernel_size=(3, 3, 3), activation='relu', input_shape=input_shape)) model.add(layers.Conv3D(32, kernel_size=(3, 3, 3), activation='relu')) model.add(layers.MaxPooling3D(pool_size=(2, 2, 2))) # model.add(layers.Dropout(0.25)) # model.add(layers.Conv3D(64, kernel_size=(3, 3, 3), activation='relu'))
parser = argparse.ArgumentParser(description='MatchineTranslation') parser.add_argument('--model',default="MyTransformer", type=str, help='choose a model: Transformer') args = parser.parse_args() if __name__ == '__main__': dataset = 'Data' # 数据集 model_name = args.model # MyTransformer x = import_module('models.' + model_name) #一个函数运行需要根据不同项目的配置,动态导入对应的配置文件运行。 config = x.Config(dataset) #进入到对应模型的__init__方法进行参数初始化 start_time = time.time() print("Loading data...") train_dataset, valid_dataset, en_tokenizer, zh_tokenizer = load_dataset(config.dataset_path, config, config.num_samples) config.input_vocab_size = en_tokenizer.vocab_size + 2 config.target_vocab_size = zh_tokenizer.vocab_size + 2 model = x.MyModel(config) transformer = model.createModel() learning_rate = CustomizedSchedule(config.d_model) optimizer = keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9) checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=transformer)
def main(args): """ set up params, log dir, splits, encode the data, and run the training """ logger.info("software version {}".format(utils.__version__)) # set up log directory & save the args file to it log_dir = log_dir_name(args) logger.info("log directory is {}".format(log_dir)) utils.mkdir(log_dir) save_args(vars(args), join(log_dir, "args.txt")) # load the dataset if args.dataset_name != "": dataset_file = constants.DATASETS[args.dataset_name]["ds_fn"] else: dataset_file = args.dataset_file logger.info("loading dataset from {}".format(dataset_file)) ds = utils.load_dataset(ds_fn=dataset_file) # load the dataset split or create one if args.split_dir != "": if isdir(args.split_dir): logger.info("loading split from {}".format(args.split_dir)) split = sd.load_split_dir(args.split_dir) if isinstance(split, list): raise ValueError( "this script doesn't support multiple reduced train size replicates in a single run. " "run each one individually by specifying the split dir of the replicate. " ) else: raise FileNotFoundError( "specified split dir doesn't exist: {}".format(args.split_dir)) else: # create a classic train-tune-test split based on the specified args logger.info( "creating a train/test split with tr={}, tu={}, and te={}, seed={}" .format(args.train_size, args.tune_size, args.test_size, args.split_rseed)) split, _ = sd.train_tune_test(ds, train_size=args.train_size, tune_size=args.tune_size, test_size=args.test_size, rseed=args.split_rseed) # error checking for split -- make sure we have a train set if "train" not in split: raise ValueError( "no train set in dataset split. specify a split with a train set to proceed." ) if "tune" not in split: raise ValueError( "no tune set in dataset split. specify a split with a tune set to proceed. " "the tune set is used for early stopping and logging statistics to tensorboard. " "if you dont want a tune set, and instead just prefer to have a train and test set, " "just name your test set as the tune set so it is compatible with the script. " ) # save the split indices that are going to be used for this model to the log directory for the model # this isn't as good as explicitly saving a split using split_dataset.py because the directory name will # not be informative. todo if loading a split_dir, it would be good to copy over the directory name logger.info("backing up split to log dir {}".format(join(log_dir, "split"))) sd.save_split(split, join(log_dir, "split")) # figure out the wt_aa and wt_offset for encoding data if args.dataset_name != "": wt_aa = constants.DATASETS[args.dataset_name]["wt_aa"] wt_ofs = constants.DATASETS[args.dataset_name]["wt_ofs"] else: wt_aa = args.wt_aa wt_ofs = args.wt_ofs # create the dataset dictionary, containing encoded data, scores, etc, based on the splits data = collections.defaultdict(dict) data["ds"] = ds for set_name, idxs in split.items(): data["idxs"][set_name] = idxs data["variants"][set_name] = ds.iloc[idxs]["variant"].tolist() # we are using "score" as the default target, but support for multiple scores could be added here data["scores"][set_name] = ds.iloc[idxs]["score"].to_numpy() # encode the data logger.info("encoding {} set variants using {} encoding".format( set_name, args.encoding)) data["encoded_data"][set_name] = enc.encode( encoding=args.encoding, variants=data["variants"][set_name], wt_aa=wt_aa, wt_offset=wt_ofs) evaluations = run_training(data, log_dir, args)
def train_seq_malGAN(): """ main training function: first train subD, then alternately train boxD and malG :return: None """ max_seq_len = 1024 # make workspace directory for current mission and copy code timeTag = datetime.now().strftime('%Y-%m-%d') #timeTag = '2017-11-19' dir_path = '../tensorflow_result/' if not os.path.exists(dir_path): os.mkdir(dir_path) dir_path = '../tensorflow_result/' + timeTag if not os.path.exists(dir_path): os.mkdir(dir_path) if os.path.exists(os.path.join(dir_path, 'code')): shutil.rmtree(os.path.join(dir_path, 'code')) shutil.copytree('.', os.path.join(dir_path, 'code')) log_path = dir_path + '/log.txt' score_template = 'TPR %(TPR)f\tFPR %(FPR)f\tAccuracy %(Accuracy)f\tAUC %(AUC)f' print((str(datetime.now()) + '\tStart training seq_malGAN.')) # define substituteD as subD, black box D as boxD and malware Genarator as G boxD = blackboxDiscriminator(cell_type='LSTM', rnn_layers=[128], is_bidirectional=True, attention_layers=[128], ff_layers=[128], batch_size=64, num_token=161, max_seq_len=max_seq_len * 2, num_class=2, learning_rate=0.001, scope='black_box_D', model_path=dir_path + '/black_box_D_model') # boxD_params = {'vocab_num': 160, 'embedding_dim': 160, 'hidden_dim': 128, 'is_bidirectional': False, # 'max_seq_len': 1024, 'attention_layers': None, 'ff_layers': [512], 'class_num': 2} # G_params = {} print((str(datetime.now()) + '\tFinish defining subD, boxD and G.')) # load data X_malware, seqLen_malware, X_benigh, seqLen_benigh = \ load_dataset('../data/API_rand_trainval_len_2048.txt', max_seq_len, 0) X = np.vstack((X_malware, X_benigh)) seqLen = np.hstack((seqLen_malware, seqLen_benigh)) Y = np.array([1] * len(X_malware) + [0] * len(X_benigh)) X_malware_test, seqLen_malware_test, X_benigh_test, seqLen_benigh_test = \ load_dataset('../data/API_rand_test_len_2048.txt', max_seq_len, 0) X_test = np.vstack((X_malware_test, X_benigh_test)) seqLen_test = np.hstack((seqLen_malware_test, seqLen_benigh_test)) Y_test = np.array([1] * len(X_malware_test) + [0] * len(X_benigh_test)) print((str(datetime.now()) + '\tFinish loading data.')) print((str(datetime.now()) + '\tlen(X)=%d\tlen(X_malware)=%d\tlen(X_benigh)=%d\t' % (len(X), len(X_malware), len(X_benigh)))) print((str(datetime.now()) + '\tlen(X_test)=%d\tlen(X_malware_test)=%d\tlen(X_benigh_test)=%d' % (len(X_test), len(X_malware_test), len(X_benigh_test)))) # train substitute Discrimanator first print((str(datetime.now()) + '\tStart training black box Discriminator.')) boxD.train(np.hstack((X, np.zeros_like(X))), seqLen, Y, max_epochs=50, max_epochs_val=5) print((str(datetime.now()) + '\tFinish training subD.')) print((str(datetime.now()) + '\tTraining set result:')) print((score_template % evaluate(boxD, np.hstack( (X, np.zeros_like(X))), seqLen, Y))) print((str(datetime.now()) + '\tTest set result:')) print((score_template % evaluate(boxD, np.hstack( (X_test, np.zeros_like(X_test))), seqLen_test, Y_test)))
import os import numpy as np from sklearn.neighbors import KNeighborsClassifier from sklearn.cross_validation import KFold from sklearn.cross_validation import cross_val_score from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler from utils import CHART_DIR, DATA_DIR, load_dataset features, labels = load_dataset('seeds.tsv') #had to write custom function to parse the file since it contains float and string data # initialize a classifier instance classifier = KNeighborsClassifier(n_neighbors=5, weights='uniform', algorithm='auto', leaf_size=30, p=2, metric='minkowski', metric_params=None) # compute 10-fold cross-validation means = [] k = 1 for k in range(1, 20, 2): classifier.n_neighbors = k # normalize all features to same scale classifier = Pipeline([('norm', StandardScaler()), ('knn', classifier)]) for training, testing in KFold(features.shape[0], n_folds=10, shuffle=True): #need to shuffle the features first before creating folds since the labels are created in contiguous manner classifier.fit(features[training], labels[training]) predictions = classifier.predict(features[testing])
Q, mask, A = get_batch(begin,end,q_test,a_test,batch_size,max_q,Na) a_pred = sess.run(model_outputs['answer_pred'], feed_dict={model_outputs['question']:Q, model_outputs['mask']:mask, model_outputs['answer']:A}) equals = 1*np.equal(A.argmax(axis=1),a_pred) equals = list(equals[:end-begin]) acc += equals acc = tf.reduce_mean(tf.to_float(acc)) acc_s = tf.scalar_summary("acc_tf",acc,name="acc_tf") acc,acc_s = sess.run([acc,acc_s]) writer.add_summary(acc_s,step) return acc if __name__=="__main__": q_train = load_dataset('datasets/coco/train/questions.idxs') q_test = load_dataset('datasets/coco/test/questions.idxs') a_train = load_dataset('datasets/coco/train/answers.idxs') a_test = load_dataset('datasets/coco/test/answers.idxs') q_i2w, q_w2i = load_vocab('datasets/coco/train/questions.vocab') a_i2w, a_w2i = load_vocab('datasets/coco/train/answers.vocab') max_q = len(max(q_train, key=lambda x:len(x)))+1 Nq = len(q_i2w) Na = len(a_i2w) dh = 50 #LSTM hidden state dimension dq = 75 #Question embedding dimension da = 50 #Answer embedding dimension batch_size = 64
from compute_cost import compute_cost from gradient_descent import gradient_descent from predict import predict from utils import load_dataset, add_x0, feature_normalize import numpy as np import matplotlib.pyplot as plt data = load_dataset("data.txt") X_Original = data[:, 0:2] y = data[:, 2:3] plt.scatter(X_Original[:, 0], X_Original[:, 1], c=y, s=50, cmap=plt.cm.Spectral) X, mu, sigma = feature_normalize(X_Original) plt.show() X = add_x0(X) m = X.shape[0] n = X.shape[1] learning_rate = .3 theta = np.zeros((n, 1)) max_iter = 400 his = np.zeros((max_iter, 1))
betas=(opt.beta1, 0.999)) # --------- loss functions ------------------------------------ mse_criterion = nn.MSELoss() bce_criterion = nn.BCELoss() # --------- transfer to gpu ------------------------------------ netEP.cuda() netEC.cuda() netD.cuda() netC.cuda() mse_criterion.cuda() bce_criterion.cuda() # --------- load a dataset ------------------------------------ train_data, test_data = utils.load_dataset(opt) train_loader = DataLoader(train_data, num_workers=opt.data_threads, batch_size=opt.batch_size, shuffle=True, drop_last=True, pin_memory=True) test_loader = DataLoader(test_data, num_workers=opt.data_threads, batch_size=opt.batch_size, shuffle=True, drop_last=True, pin_memory=True)
def set_dataset(self, data_path, labels_path=''): self.dataset = utils.load_dataset(data_path, self._graph, labels_path) return
if __name__ == "__main__": argv = sys.argv[1:] parser = argparse.ArgumentParser() parser.add_argument('-q','--question', required=True, choices=["1.1", "1.2", "2.1", "2.2", "3.1", "4.3"]) io_args = parser.parse_args() question = io_args.question if question == "1.1": # Q1.1 - This should print the answers to Q 1.1 # Load the fluTrends dataset X, names = utils.load_dataset("fluTrends") # part 1: min, max, mean, median and mode print "Min = %.3f" % np.amin(X) print "Max = %.3f" % np.amax(X) print "Mean = %.3f" % np.mean(X) print "Median = %.3f" % np.median(X) print "Mode = %.3f" % utils.mode(X) # part 2: quantiles print "10th quantile = %.3f" % np.percentile(X, 10) print "25th quantile = %.3f" % np.percentile(X, 25) print "50th quantile = %.3f" % np.percentile(X, 50) print "75th quantile = %.3f" % np.percentile(X, 75) print "90th quantile = %.3f" % np.percentile(X, 90)
def main(): args = parse_arguments() hidden_size = 512 embed_size = 256 assert torch.cuda.is_available() # visdom for plotting vis_g = VisdomWriter("Generator Loss", xlabel='Iteration', ylabel='Loss') vis_d = VisdomWriter("Negative Discriminator Loss", xlabel='Iteration', ylabel='Loss') print("[!] preparing dataset...") train_iter, val_iter, test_iter, DE, EN = load_dataset(args.batch_size) de_size, en_size = len(DE.vocab), len(EN.vocab) print("de_vocab_size: %d en_vocab_size: %d" % (de_size, en_size)) print("[!] Instantiating models...") encoder = Encoder(de_size, embed_size, hidden_size, n_layers=2, dropout=0.5) decoder = Decoder(embed_size, hidden_size, en_size, n_layers=1, dropout=0.5) G = Seq2Seq(encoder, decoder).cuda() D = Discriminator(en_size, embed_size, hidden_size).cuda() optimizer_D = optim.Adam(D.parameters(), lr=2e-4, betas=(0.5, 0.9)) optimizer_G = optim.Adam(G.parameters(), lr=1e-4, betas=(0.5, 0.9)) # TTUR paper https://arxiv.org/abs/1706.08500 # pretrained # G.load_state_dict(torch.load("./.tmp/21.pt")) curriculum = 1 dis_loss = [] gen_loss = [] for e in range(1, args.epochs+1): # Training for b, batch in enumerate(train_iter): src, len_src = batch.src trg, len_trg = batch.trg src, trg = src.cuda(), trg.cuda() # (1) Update D network enable_gradients(D) disable_gradients(G) G.eval() D.train() # clamp parameters to a cube for p in D.parameters(): p.data.clamp_(-0.01, 0.01) D.zero_grad() loss_d = D_loss(D, G, src, trg, args.lamb, curriculum) loss_d.backward() optimizer_D.step() dis_loss.append(loss_d.data[0]) # (2) Update G network if b % 10 == 0: enable_gradients(G) disable_gradients(D) D.eval() G.train() G.zero_grad() loss_g = G_loss(D, G, src, trg, curriculum) loss_g.backward() optimizer_G.step() gen_loss.append(loss_g.data[0]) # plot losses if b % 10 == 0 and b > 1: vis_d.update(-loss_d.data[0]) vis_g.update(loss_g.data[0]) if e % 10 == 0 and e > 1: ce_loss = evaluate(e, G, val_iter, en_size, DE, EN, curriculum) print(ce_loss) if e % 100 == 0 and e > 1: curriculum += 1
parser.add_argument("--verbose", default=1, type=int) parser.add_argument("--evaluate", default=1, type=int) parser.add_argument("--glovefile", default="data/glove.6B.300d.txt", type=str) args = parser.parse_args() w2v = args.vectorization_method PoS = args.PoS_method NER = args.NER_method regressor = args.regressor if w2v == "glove": _define_global(args.glovefile) if args.evaluate: X, y = load_dataset(args.training_set, args.verbose) distance_estimator = _build_distance_estimator(X, y, w2v, PoS, NER, regressor, verbose=1) pickle.dump(distance_estimator, open("traning_distance_model.pickle", "wb"), protocol=pickle.HIGHEST_PROTOCOL) score = dict() X_test, y_test = load_dataset(args.test_set_headlines, verbose=1) score["headlines_score"] = sts_score(distance_estimator, X_test, y_test) X_test, y_test = load_dataset(args.test_set_images, verbose=1) score["images_score"] = sts_score(distance_estimator, X_test, y_test) X_test, y_test = load_dataset(args.test_set_answers_students, verbose=1) score["answers_students_score"] = sts_score(distance_estimator, X_test, y_test) if args.verbose == 1: print score
def train(data_path, *, base_output_path="models", run_name=None, data_name=None, net_name="leap_cnn", clean=False, box_dset="box", confmap_dset="confmaps", val_size=0.15, preshuffle=True, filters=64, rotate_angle=15, epochs=100, batch_size=32, batches_per_epoch=50, val_batches_per_epoch=10, viz_idx=0, reduce_lr_factor=0.1, reduce_lr_patience=3, reduce_lr_min_delta=1e-5, reduce_lr_cooldown=0, reduce_lr_min_lr=1e-10): """ Trains the network and saves the intermediate results to an output directory. :param data_path: Path to an HDF5 file with box and confmaps datasets :param base_output_path: Path to folder in which the run data folder will be saved :param run_name: Name of the training run. If not specified, will be formatted according to other parameters. :param data_name: Name of the dataset for use in formatting run_name :param net_name: Name of the network for use in formatting run_name :param clean: If True, deletes the contents of the run output path :param box_dset: Name of the box dataset in the HDF5 data file :param confmap_dset: Name of the confidence maps dataset in the HDF5 data file :param preshuffle: If True, shuffle prior to splitting the dataset, otherwise validation set will be the last frames :param val_size: Fraction of dataset to use as validation :param filters: Number of filters to use as baseline (see create_model) :param rotate_angle: Images will be augmented by rotating by +-rotate_angle :param epochs: Number of epochs to train for :param batch_size: Number of samples per batch :param batches_per_epoch: Number of batches per epoch (validation is evaluated at the end of the epoch) :param val_batches_per_epoch: Number of batches for validation :param viz_idx: Index of the sample image to use for visualization :param reduce_lr_factor: Factor to reduce the learning rate by (see ReduceLROnPlateau) :param reduce_lr_patience: How many epochs to wait before reduction (see ReduceLROnPlateau) :param reduce_lr_min_delta: Minimum change in error required before reducing LR (see ReduceLROnPlateau) :param reduce_lr_cooldown: How many epochs to wait after reduction before LR can be reduced again (see ReduceLROnPlateau) :param reduce_lr_min_lr: Minimum that the LR can be reduced down to (see ReduceLROnPlateau) """ # Load box, confmap = load_dataset(data_path, X_dset=box_dset, Y_dset=confmap_dset) viz_sample = (box[viz_idx], confmap[viz_idx]) box, confmap, val_box, val_confmap, train_idx, val_idx = train_val_split(box, confmap, val_size=val_size, shuffle=preshuffle) # Pull out metadata img_size = box.shape[1:] num_output_channels = confmap.shape[-1] print("img_size:", img_size) print("num_output_channels:", num_output_channels) # Build run name if needed if data_name == None: data_name = os.path.splitext(os.path.basename(data_path))[0] if run_name == None: # Ex: "WangMice-DiegoCNN_v1.0_filters=64_rot=15_lrfactor=0.1_lrmindelta=1e-05" run_name = "%s-%s_filters=%d_rot=%d_lrfactor=%.1f_lrmindelta=%g" % (data_name, net_name, filters, rotate_angle, reduce_lr_factor, reduce_lr_min_delta) print("data_name:", data_name) print("run_name:", run_name) # Create network model = create_model(net_name, img_size, num_output_channels, filters=filters, summary=True) if model == None: print("Could not find model:", net_name) return # Initialize run directories run_path = create_run_folders(run_name, base_path=base_output_path, clean=clean) savemat(os.path.join(run_path, "training_info.mat"), {"data_path": data_path, "val_idx": val_idx, "train_idx": train_idx, "base_output_path": base_output_path, "run_name": run_name, "data_name": data_name, "net_name": net_name, "clean": clean, "box_dset": box_dset, "confmap_dset": confmap_dset, "preshuffle": preshuffle, "val_size": val_size, "filters": filters, "rotate_angle": rotate_angle, "epochs": epochs, "batch_size": batch_size, "batches_per_epoch": batches_per_epoch, "val_batches_per_epoch": val_batches_per_epoch, "viz_idx": viz_idx, "reduce_lr_factor": reduce_lr_factor, "reduce_lr_patience": reduce_lr_patience, "reduce_lr_min_delta": reduce_lr_min_delta, "reduce_lr_cooldown": reduce_lr_cooldown, "reduce_lr_min_lr": reduce_lr_min_lr}) # Save initial network model.save(os.path.join(run_path, "initial_model.h5")) input_layers = [x.name for x in model.input_layers] output_layers = [x.name for x in model.output_layers] # Data augmentation if len(input_layers) > 1 or len(output_layers) > 1: train_datagen = MultiInputOutputPairedImageAugmenter(input_layers, output_layers, box, confmap, batch_size=batch_size, shuffle=True, theta=(-rotate_angle, rotate_angle)) val_datagen = MultiInputOutputPairedImageAugmenter(input_layers, output_layers, val_box, val_confmap, batch_size=batch_size, shuffle=True, theta=(-rotate_angle, rotate_angle)) else: train_datagen = PairedImageAugmenter(box, confmap, batch_size=batch_size, shuffle=True, theta=(-rotate_angle, rotate_angle)) val_datagen = PairedImageAugmenter(val_box, val_confmap, batch_size=batch_size, shuffle=True, theta=(-rotate_angle, rotate_angle)) # Initialize training callbacks history_callback = LossHistory(run_path=run_path) reduce_lr_callback = ReduceLROnPlateau(monitor="val_loss", factor=reduce_lr_factor, patience=reduce_lr_patience, verbose=1, mode="auto", epsilon=reduce_lr_min_delta, cooldown=reduce_lr_cooldown, min_lr=reduce_lr_min_lr) checkpointer = ModelCheckpoint(filepath=os.path.join(run_path, "weights/weights.{epoch:03d}-{val_loss:.9f}.h5"), verbose=1, save_best_only=False) viz_grid_callback = LambdaCallback(on_epoch_end=lambda epoch, logs: show_confmap_grid(model, *viz_sample, plot=True, save_path=os.path.join(run_path, "viz_confmaps/confmaps_%03d.png" % epoch), show_figure=False)) viz_pred_callback = LambdaCallback(on_epoch_end=lambda epoch, logs: show_pred(model, *viz_sample, save_path=os.path.join(run_path, "viz_pred/pred_%03d.png" % epoch), show_figure=False)) # Train! epoch0 = 0 t0_train = time() training = model.fit_generator( train_datagen, initial_epoch=epoch0, epochs=epochs, verbose=1, # use_multiprocessing=True, # workers=8, steps_per_epoch=batches_per_epoch, max_queue_size=512, shuffle=False, validation_data=val_datagen, validation_steps=val_batches_per_epoch, callbacks = [ reduce_lr_callback, checkpointer, history_callback, viz_pred_callback, viz_grid_callback ] ) # Compute total elapsed time for training elapsed_train = time() - t0_train print("Total runtime: %.1f mins" % (elapsed_train / 60)) # Save final model model.history = history_callback.history model.save(os.path.join(run_path, "final_model.h5"))
def f(l): def g(w): return 1 / 2 * w**2 - 2 * w + 5 / 2 + l * abs(w)**(1 / 2) return g if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('-q', '--question', required=True) io_args = parser.parse_args() question = io_args.question if question == "2": data = utils.load_dataset("logisticData") XBin, yBin = data['X'], data['y'] XBinValid, yBinValid = data['Xvalid'], data['yvalid'] model = linear_model.logReg(maxEvals=400) model.fit(XBin, yBin) print("\nlogReg Training error %.3f" % utils.classification_error(model.predict(XBin), yBin)) print("logReg Validation error %.3f" % utils.classification_error(model.predict(XBinValid), yBinValid)) print("# nonZeros: %d" % (model.w != 0).sum()) elif question == "2.1": data = utils.load_dataset("logisticData") XBin, yBin = data['X'], data['y']
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument('--batch_size', default=100) # XXX using sample size of one parser.add_argument('--nlayers', default=1, type=int, help='number of hidden layers in MLP before output layers') parser.add_argument('--hdim', default=500, type=int, help='dimension of hidden layer') parser.add_argument('--zdim', default=2, type=int, help='dimension of continuous latent variable') parser.add_argument('--lmbda', default=0.001, type=float, help='weight decay coefficient') parser.add_argument('--lr', default=0.01, type=float, help='learning rate') parser.add_argument('--epochs', default=1000, type=int, help='number of passes over dataset') parser.add_argument('--print_every', default=100, type=int, help='how often to print cost') parser.add_argument('--save_every', default=1, type=int, help='how often to save model (in terms of epochs)') parser.add_argument('--outfile', default='vae_model.pk', help='output file to save model to') parser.add_argument('--dset', default='mnist', choices=['mnist'], help='dataset to use') args = parser.parse_args() print(args) # run SGVB algorithm # N x d data = load_dataset(dset=args.dset) train_x, train_y = data['train'] #print(train_x[0, :]) # values in [0, 1] #print(train_y[0:10]) # seems to already be shuffled valid_x, valid_y = data['valid'] decs = {'mnist': 'bernoulli'} model = VAE(train_x.shape[1], args, dec=decs[args.dset]) expcost = None num_train_batches = train_x.shape[0] / args.batch_size num_valid_batches = valid_x.shape[0] / args.batch_size valid_freq = num_train_batches for b in xrange(args.epochs * num_train_batches): k = b % num_train_batches x = train_x[k * args.batch_size:(k + 1) * args.batch_size, :] eps = np.random.randn(x.shape[0], args.zdim).astype(floatX) cost = model.train(x, eps) if not expcost: expcost = cost else: expcost = 0.01 * cost + 0.99 * expcost if (b + 1) % args.print_every == 0: print('iter %d, cost %f, expcost %f' % (b + 1, cost, expcost)) if (b + 1) % valid_freq == 0: valid_cost = 0 for l in xrange(num_valid_batches): x_val = valid_x[l * args.batch_size:(l + 1) * args.batch_size, :] eps_val = np.zeros((x_val.shape[0], args.zdim), dtype=floatX) valid_cost = valid_cost + model.test(x_val, eps_val) valid_cost = valid_cost / num_valid_batches print('valid cost: %f' % valid_cost) if (b + 1) % (num_train_batches * args.save_every) == 0: print('saving model') with open(args.outfile, 'wb') as f: pickle.dump(model, f, protocol=pickle.HIGHEST_PROTOCOL) # XXX just pickling the entire model for now print('saving final model') with open(args.outfile, 'wb') as f: pickle.dump(model, f, protocol=pickle.HIGHEST_PROTOCOL)
def main(argv): del argv # unused arg tf.enable_v2_behavior() dataset_train, ds_info = utils.load_dataset(tfds.Split.TRAIN, with_info=True) dataset_test = utils.load_dataset(tfds.Split.TEST) dataset_train = dataset_train.batch(FLAGS.batch_size) dataset_test = dataset_test.batch(FLAGS.batch_size) model = deterministic.resnet_v1( input_shape=ds_info.features['image'].shape, depth=20, num_classes=ds_info.features['label'].num_classes, l2=0.) logging.info('Model input shape: %s', model.input_shape) logging.info('Model output shape: %s', model.output_shape) logging.info('Model number of weights: %s', model.count_params()) # Search for checkpoints from their index file; then remove the index suffix. ensemble_filenames = tf.io.gfile.glob( os.path.join(FLAGS.output_dir, '**/*.ckpt.index')) ensemble_filenames = [filename[:-6] for filename in ensemble_filenames] ensemble_size = len(ensemble_filenames) logging.info('Ensemble size: %s', ensemble_size) logging.info('Ensemble number of weights: %s', ensemble_size * model.count_params()) logging.info('Ensemble filenames: %s', str(ensemble_filenames)) # Collect the logits output for each ensemble member and train/test data # point. We also collect the labels. # TODO(trandustin): Refactor data loader so you can get the full dataset in # memory without looping. logits_train = [] logits_test = [] labels_train = [] labels_test = [] for m, ensemble_filename in enumerate(ensemble_filenames): model.load_weights(ensemble_filename) logits = [] for features, labels in dataset_train: logits.append(model(features, training=False)) if m == 0: labels_train.append(labels) logits = tf.concat(logits, axis=0) logits_train.append(logits) if m == 0: labels_train = tf.concat(labels_train, axis=0) logits = [] for features, labels in dataset_test: logits.append(model(features, training=False)) if m == 0: labels_test.append(labels) logits = tf.concat(logits, axis=0) logits_test.append(logits) if m == 0: labels_test = tf.concat(labels_test, axis=0) logging.info('Predictions completed for checkpoint %s', ensemble_filename) metrics = {} # Compute the ensemble's NLL and Gibbs cross entropy for each data point. # Then average over the dataset. nll_train = ensemble_negative_log_likelihood(labels_train, logits_train) nll_test = ensemble_negative_log_likelihood(labels_test, logits_test) gibbs_ce_train = gibbs_cross_entropy(labels_train, logits_train) gibbs_ce_test = gibbs_cross_entropy(labels_test, logits_test) metrics['train_nll'] = tf.reduce_mean(nll_train) metrics['test_nll'] = tf.reduce_mean(nll_test) metrics['train_gibbs_cross_entropy'] = tf.reduce_mean(gibbs_ce_train) metrics['test_gibbs_cross_entropy'] = tf.reduce_mean(gibbs_ce_test) # Given the per-element logits tensor of shape [ensemble_size, dataset_size, # num_classes], average over the ensemble members' probabilities. Then # compute accuracy and average over the dataset. probs_train = tf.reduce_mean(tf.nn.softmax(logits_train), axis=0) probs_test = tf.reduce_mean(tf.nn.softmax(logits_test), axis=0) accuracy_train = tf.keras.metrics.sparse_categorical_accuracy( labels_train, probs_train) accuracy_test = tf.keras.metrics.sparse_categorical_accuracy( labels_test, probs_test) metrics['train_accuracy'] = tf.reduce_mean(accuracy_train) metrics['test_accuracy'] = tf.reduce_mean(accuracy_test) logging.info('Metrics: %s', metrics)
import os if __name__ == "__main__": argv = sys.argv[1:] parser = argparse.ArgumentParser() parser.add_argument('-q', '--question', required=True, choices=["2.1", "2.2", "3.1", "4.1", "4.3"]) io_args = parser.parse_args() question = io_args.question if question == "2.1": # Load the data in the form of dictionary data = utils.load_dataset("basisData") X = data['X'] y = data['y'] Xtest = data['Xtest'] ytest = data['ytest'] # get the number of rows(n) and columns(d) n, d = X.shape t = Xtest.shape[0] # Fit least-squares model model = linear_model.LeastSquares() model.fit(X, y) # Compute training error yhat = model.predict(X)
if len(args) == 0: print_usage() sys.exit(-1) separator = "|" length_threshold = 4 payload_max_length = 50 prune = False count = False for opt, value in opts: if opt == "-t": separator = value elif opt == "-l": length_threshold = int(value) elif opt == "-p": payload_max_length = int(value) elif opt == "-x": prune = True elif opt == "-c": count = True if prune: TermFrequencyUtils.prune_terms(args[0]) elif count: TermFrequencyUtils.multiply_by_count(args[0], args[1]) else: dataset = utils.load_dataset(args[0]) tf = TermFrequencyUtils.find_common_term_frequencies(dataset, payload_max_length, length_threshold) TermFrequencyUtils.serialize_term_frequencies(tf)
def main(): # Set hyper-parameters. batch_size = 32 epochs = 100 model_path = 'atmodel.h5' enc_arch = 'encoder.json' dec_arch = 'decoder.json' data_path = '../data/w16to19abeconv.txt' num_words = 10000 num_data = 12755 # Data loading. en_texts, ja_texts = load_dataset(data_path) en_texts, ja_texts = en_texts[:num_data], ja_texts[:num_data] # Preprocessings. #ja_texts = preprocess_ja(ja_texts) ja_texts = preprocess_dataset(ja_texts) en_texts = preprocess_dataset(en_texts) x_train, x_test, y_train, y_test = train_test_split(en_texts, ja_texts, test_size=0.2, random_state=42) en_vocab = build_vocabulary(x_train, num_words) ja_vocab = build_vocabulary(y_train, num_words) print(x_train[:3]) print(y_train[:3]) x_train, y_train = create_dataset(x_train, y_train, en_vocab, ja_vocab) print(en_vocab.word_index) print(ja_vocab.word_index) # Build a simple model. encoder = Encoder(num_words) decoder = Decoder(num_words) # Build an attention model. #encoder = Encoder(num_words, return_sequences=True) #decoder = AttentionDecoder(num_words) seq2seq = Seq2seq(encoder, decoder) model = seq2seq.build() model.compile(optimizer='adam', loss='sparse_categorical_crossentropy') # Train the model. callbacks = [ EarlyStopping(patience=10), ModelCheckpoint(model_path, save_best_only=True, save_weights_only=True) ] """ model.fit(x=x_train, y=y_train, batch_size=batch_size, epochs=epochs, callbacks=callbacks, validation_split=0.1)""" encoder.save_as_json(enc_arch) decoder.save_as_json(dec_arch) # Inference. encoder = Encoder.load(enc_arch, model_path) decoder = Decoder.load(dec_arch, model_path) api = InferenceAPI(encoder, decoder, en_vocab, ja_vocab) #api = InferenceAPIforAttention(encoder, decoder, en_vocab, ja_vocab) texts = sorted(set(en_texts[:50]), key=len) texts = ["お聞きしたいと思います", "さっき の 答弁 全く 納得 できません", "全く 納得 い き ません", "ありがとうございました", "おはようございます",\ "よろしいでしょうか", "是非 よろしくお願いいたします", "もう少し 具体的に 教えて いただける と 助 か る んですけれども", "ちょっと 待 って", "質問 主 意 書 では 当然 混 同 は しておりません",\ "正 式 な 要求 でいい んですか", "時間ですので まとめて ください", "ちょっと 静粛に お願いします", "よろしいですか", "静粛に お願いします",\ "答弁 を まとめて ください", "時間 ですから", "驚 き の答弁 ですね", "それは いつ ごろ でしょうか", "そのとおり です" ] for text in texts: decoded = api.predict(text=text) print('入力: {}'.format(text)) print('応答: {}'.format(decoded)) y_test = [y.split(' ')[1:-1] for y in y_test] bleu_score = evaluate_bleu(x_test, y_test, api) print('BLEU: {}'.format(bleu_score))
def main(): # Get arguments args = parse_args() # Set random seed torch.manual_seed(args.seed) # Cuda use_cuda = False if torch.cuda.is_available(): if not args.cuda: print("WARNING: You have a CUDA device, so you \ should probably run with --cuda") else: use_cuda = True torch.cuda.manual_seed(args.seed) # Load data + text fields print('=' * 89) train_iter, val_iter, test_iter, SRC, TRG = utils.load_dataset( batch_size=args.batch_size, use_pretrained_emb=args.pretrained_emb, save_dir=SAVE_DIR ) print('=' * 89) # Intialize model enc = models.EncoderRNN( input_size=len(SRC.vocab), emb_size=(SRC.vocab.vectors.size(1) if args.pretrained_emb == 'fastText' else args.emb_size), embeddings=(SRC.vocab.vectors if args.pretrained_emb == 'fastText' else None), max_norm=args.emb_maxnorm, padding_idx=SRC.vocab.stoi['<pad>'], hidden_size=args.hidden_size, num_layers=args.num_layers, dropout=args.dropout, bidirectional=args.bidirectional ) decoder = models.AttnDecoderRNN if args.attention else models.DecoderRNN dec = decoder( enc_num_directions=enc.num_directions, enc_hidden_size=args.hidden_size, use_context=args.use_context, input_size=len(TRG.vocab), emb_size=(TRG.vocab.vectors.size(1) if args.pretrained_emb else args.emb_size), embeddings=(TRG.vocab.vectors if args.pretrained_emb else None), max_norm=args.emb_maxnorm, padding_idx=TRG.vocab.stoi['<pad>'], hidden_size=args.hidden_size, num_layers=args.num_layers, dropout=args.dropout, bidirectional=False # args.bidirectional ) model = models.Seq2Seq(enc, dec, use_cuda=use_cuda) if use_cuda: model.cuda() print(model) # Intialize loss criterion = torch.nn.CrossEntropyLoss( ignore_index=TRG.vocab.stoi["<pad>"]) # Create optimizer if args.optimizer == 'Adam': optim = torch.optim.Adam elif args.optimizer == 'Adadelta': optim = torch.optim.Adadelta elif args.optimizer == 'Adagrad': optim = torch.optim.Adagrad else: optim = torch.optim.SGD optimizer = optim(model.parameters(), lr=args.lr) # Create scheduler lambda_lr = lambda epoch: 0.5 if epoch > 8 else 1 scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lambda_lr) # Train best_val_loss = None fname = './{}/{}.pt'.format(SAVE_DIR, args.save) print('=' * 89) try: for epoch in range(1, args.epochs+1): epoch_start_time = time.time() attns = train(epoch, model, train_iter, criterion, optimizer, use_cuda, args, SRC, TRG) val_loss = evaluate(model, val_iter, criterion, use_cuda) # Log results print('-' * 89) print('| end of epoch {:3d} | time: {:5.2f}s ' '| valid loss {:5.2f} | valid ppl {:8.2f}'.format( epoch, (time.time() - epoch_start_time), val_loss, math.exp(val_loss))) print('-' * 89) # Save the model if validation loss is best we've seen so far if not best_val_loss or val_loss < best_val_loss: if not os.path.isdir(SAVE_DIR): os.makedirs(SAVE_DIR) torch.save(model, fname) best_val_loss = val_loss # Anneal learning rate scheduler.step() except KeyboardInterrupt: print('-' * 89) print('Exiting from training early') # Load the best saved model with open(fname, 'rb') as f: model = torch.load(f) # Run on test data test_loss = evaluate(model, test_iter, criterion, use_cuda) # Log results print('=' * 89) print('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format( test_loss, math.exp(test_loss))) print('=' * 89)
parser.add_argument("--glovefile", default='data/glove.6B.300d.txt', type=str) args = parser.parse_args() w2v = args.vectorization_method PoS = args.PoS_method NER = args.NER_method regressor = args.regressor if w2v == 'glove': _load_glove(args.glovefile, verbose=args.verbose) if args.evaluate: if args.training_estimator is None: X, y = load_dataset(args.training_set, args.verbose) distance_estimator = _build_distance_estimator(X, y, w2v, PoS, NER, regressor, verbose=1) pickle.dump(distance_estimator, open("traning_distance_model" + regressor + ".pickle", "wb"), protocol=pickle.HIGHEST_PROTOCOL) else: distance_estimator = pickle.load(
def __init__(self, path): self.meta, self.elems = load_dataset(path) self.samples = self._create_samples()
def main(): parser = argparse.ArgumentParser(description='Graphs') parser.add_argument( '-p', dest='pickle_folder', default='./out_pickles') parser.add_argument('-d', dest='dataset', required=True, choices=['adult', 'recidivism', 'lending'], help='dataset to use') parser.add_argument('-m', dest='model', required=True, choices=['xgboost', 'logistic', 'nn'], help='model: xgboost, logistic or nn') parser.add_argument( '-o', dest='output_folder', default='./results') args = parser.parse_args() dataset = utils.load_dataset(args.dataset, balance=True) dataset_name = args.dataset algorithm = args.model z_anchor = pickle.load( open(os.path.join(args.pickle_folder, '%s-anchor-%s' % ( dataset_name, algorithm)))) z_lime = pickle.load( open(os.path.join(args.pickle_folder, '%s-lime-%s' % ( dataset_name, algorithm)))) preds_validation = z_anchor['model'].predict( z_anchor['encoder'].transform( dataset.data[z_anchor['validation_idx']])) preds_test = z_anchor['model'].predict( z_anchor['encoder'].transform( dataset.data[z_anchor['test_idx']])) ret = {} ret['accuracy'] = sklearn.metrics.accuracy_score( dataset.labels[z_anchor['test_idx']], preds_test) print('accuracy', ret['accuracy']) print('Lime weights') val_weights, val_vals = utils.compute_lime_weight_vals( z_lime['exps'], dataset.data[z_lime['validation_idx']], dataset.data[z_lime['validation_idx']]) print('Submodular anchor') picked, precs, recs = submodular_anchor_precrecall( z_anchor, dataset, preds_validation, preds_test, 10) ret['anchor_submodular'] = (picked, precs, recs) anchor_prec = precs[-1] print('Submodular lime pred') picked, precs, recs, t1, t2 = submodular_lime_precrecall( z_lime, dataset, preds_validation, preds_test, 10, val_weights, val_vals, desired_precision=anchor_prec, to_change='pred', verbose=True) ret['lime_pred_submodular'] = (picked, precs, recs) ret['lime_pred_submodular_threshold'] = t2 print('Random anchor') (prec, cov, prec_std, cov_std) = random_anchor_precrecall( z_anchor, dataset, preds_validation, preds_test, 1, do_all=True) ret['anchor_1'] = (prec, cov, prec_std, cov_std) print('Random lime') (prec, cov, prec_std, cov_std, _, _) = random_lime_precrecall( z_lime, dataset, preds_validation, preds_test, k=1, desired_precision=0.0, to_change='distance', verbose=True, do_all=True) ret['lime_naive_1'] = (prec, cov, prec_std, cov_std) # print('Distance random lime') # (prec, cov, prec_std, cov_std, t1, t2) = random_lime_precrecall( # z_lime, dataset, preds_validation, preds_test, k=1, # desired_precision=0.0, to_change='distance', verbose=True, # do_all=True, threshold=ret['lime_distance_submodular_threshold']) # ret['lime_distance_1'] = (prec, cov, prec_std, cov_std) # ret['lime_distance_1_threshold'] = t1 print('Pred random lime') (prec, cov, prec_std, cov_std, t1, t2) = random_lime_precrecall( z_lime, dataset, preds_validation, preds_test, k=1, desired_precision=0.0, to_change='pred', verbose=True, do_all=True, pred_threshold=ret['lime_pred_submodular_threshold']) ret['lime_pred_1'] = (prec, cov, prec_std, cov_std) ret['lime_pred_1_threshold'] = t2 def random_fn_lime(k): return random_lime_precrecall( z_lime, dataset, preds_validation, preds_test, k=k, desired_precision=0.0, to_change='pred', verbose=True, do_all=False, pred_threshold=ret['lime_pred_submodular_threshold'])[:4] def random_fn_anchor(k): return random_anchor_precrecall( z_anchor, dataset, preds_validation, preds_test, k, do_all=False) ret['anchor_random'] = random_until_k(random_fn_anchor, 10) ret['lime_pred_random'] = random_until_k(random_fn_lime, 10) path = os.path.join(args.output_folder, '%s-%s.pickle' % ( dataset_name, algorithm)) pickle.dump(ret, open(path, 'w'))
import numpy as np from utils import load_dataset, im_convert device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # load model path = 'D:/model' model = torch.load(path) # classes classes = ('ant', 'bee') # load data data_path = 'ants_and_bees' _, validation_loader = load_dataset(data_path) dataiter = iter(validation_loader) images, labels = dataiter.next() images = images.to(device) labels = labels.to(device) output = model(images) _, preds = torch.max(output, 1) fig = plt.figure(figsize=(25, 4)) for idx in np.arange(20): ax = fig.add_subplot(2, 10, idx + 1, xticks=[], yticks=[]) plt.imshow(im_convert(images[idx])) ax.set_title("{} ({})".format(str(classes[preds[idx].item()]), str(classes[labels[idx].item()])),
args = parser.parse_args() logging.basicConfig(level=logging.INFO) with open(os.path.join(args.dsave, 'config.json')) as f: j = json.load(f) args_save = Namespace(**j) print('args_save', type(args_save)) args_save.gpu = args.gpu args_save.forward_pass_time = args.forward_pass_time args_save.batch_size = args.batch_size args_save.old_encoder = args.old_encoder pprint(args_save) dataset, ontology, vocab, Eword = load_dataset(args.dataset) model = load_model(args_save.model, args_save, ontology, vocab) model.load_best_save(directory=args.dsave) if args.gpu is not None: model.cuda(args.gpu) print(dataset.keys()) if args.split not in dataset.keys(): print(splits + ' file not found') #dataset[args.split].dialogues = dataset[args.split].dialogues[:1117] logging.info('Making predictions for {} dialogues and {} turns'.format( len(dataset[args.split]), len(list(dataset[args.split].iter_turns())))) start = time.time() preds, attention_best_pass, most_attentive_arc_weights, all_attention_arcs, padded_confnet_words = model.run_pred(
''' # TODO: Update with actual prediction logic N = user_id_N.size yhat_N = ag_np.ones(N) return yhat_N def calc_loss_wrt_parameter_dict(self, param_dict, data_tuple): ''' Compute loss at given parameters Args ---- param_dict : dict Keys are string names of parameters Values are *numpy arrays* of parameter values Returns ------- loss : float scalar ''' # TODO compute loss y_N = data_tuple[2] yhat_N = self.predict(data_tuple[0], data_tuple[1], **param_dict) loss_total = 0.0 return loss_total if __name__ == '__main__': train_tuple, valid_tuple, test_tuple, n_users, n_items = load_dataset() model = CollabFilterMeanOnly(n_epochs=50) model.init_parameter_dict(n_users, n_items, train_tuple) model.fit(train_tuple, valid_tuple)
def main(dataset_name, disease_label, evaluated_dataset): """Calculate the performance of the classifier in each iteration of the bootstrap method.""" # ---------------------------------------------------------------------------- n_bootstrap = 1000 participants_path = PROJECT_ROOT / 'data' / evaluated_dataset / 'participants.tsv' freesurfer_path = PROJECT_ROOT / 'data' / evaluated_dataset / 'freesurferData.csv' outputs_dir = PROJECT_ROOT / 'outputs' ids_path = outputs_dir / (evaluated_dataset + '_homogeneous_ids.csv') hc_label = 1 # ---------------------------------------------------------------------------- # Set random seed random_seed = 42 np.random.seed(random_seed) rn.seed(random_seed) classifier_dir = PROJECT_ROOT / 'outputs' / 'classifier_analysis' classifier_dataset_dir = classifier_dir / dataset_name classifier_dataset_analysis_dir = classifier_dataset_dir / '{:02d}_vs_{:02d}'.format( hc_label, disease_label) classifier_storage_dir = classifier_dataset_analysis_dir / 'models' generalization_dir = classifier_dataset_analysis_dir / 'generalization' generalization_dir.mkdir(exist_ok=True) evaluated_dataset_df = load_dataset(participants_path, ids_path, freesurfer_path) aucs_test = [] # ---------------------------------------------------------------------------- for i_bootstrap in tqdm(range(n_bootstrap)): rvm = load(classifier_storage_dir / '{:03d}_rvr.joblib'.format(i_bootstrap)) scaler = load(classifier_storage_dir / '{:03d}_scaler.joblib'.format(i_bootstrap)) x_data = evaluated_dataset_df[COLUMNS_NAME].values tiv = evaluated_dataset_df['EstimatedTotalIntraCranialVol'].values tiv = tiv[:, np.newaxis] x_data = (np.true_divide(x_data, tiv)).astype('float32') x_data = np.concatenate( (x_data[evaluated_dataset_df['Diagn'] == hc_label], x_data[evaluated_dataset_df['Diagn'] == disease_label]), axis=0) y_data = np.concatenate( (np.zeros(sum(evaluated_dataset_df['Diagn'] == hc_label)), np.ones(sum(evaluated_dataset_df['Diagn'] == disease_label)))) # Scaling using inter-quartile x_data = scaler.transform(x_data) pred = rvm.predict(x_data) predictions_proba = rvm.predict_proba(x_data) auc = roc_auc_score(y_data, predictions_proba[:, 1]) aucs_test.append(auc) aucs_df = pd.DataFrame(columns=['AUCs'], data=aucs_test) aucs_df.to_csv(generalization_dir / '{:}_aucs.csv'.format(evaluated_dataset), index=False) results = pd.DataFrame(columns=['Measure', 'Value']) results = results.append({ 'Measure': 'mean', 'Value': np.mean(aucs_test) }, ignore_index=True) results = results.append( { 'Measure': 'upper_limit', 'Value': np.percentile(aucs_test, 97.5) }, ignore_index=True) results = results.append( { 'Measure': 'lower_limit', 'Value': np.percentile(aucs_test, 2.5) }, ignore_index=True) results.to_csv(generalization_dir / '{:}_aucs_summary.csv'.format(evaluated_dataset), index=False)
import torch import numpy as np import os import json from utils import load_dataset, count_parameters from config import Config from tqdm import tqdm from model import Model config = Config() dataset, ontology, vocab = load_dataset() print('Slots: ', ontology.slots) slot_dict = {s: {'slot_id': idx} for idx, s in enumerate(ontology.slots)} for s in ontology.slots: if s != 'request': slot_dict[s]['values'] = { value: { 'value_id': idx, 'num': [vocab.word2index(w) for w in value.split()] } for idx, value in enumerate([config.NONE_TOKEN] + ontology.values[s]) } else: slot_dict[s]['values'] = { value: { 'value_id': idx, 'num': [vocab.word2index(w) for w in value.split()] } for idx, value in enumerate(ontology.values[s])
activation=None, kernel_regularizer=l2(1e-5)) def call(self, x): out = self.dense1(x) out = self.odeblock(out) out = self.dense2(out) return out def compute_output_shape(self, input_shape): return tf.TensorShape([input_shape[0], self.output_dim]) if not os.path.isfile('experiments/datasets/single_pendulum_x_train.npy'): x_train, y_train, x_val, y_val = create_dataset() x_train, y_train, x_val, y_val = load_dataset() if args.synthetic_derivative: y_train = np.gradient(x_train)[1] / 0.01 x_train = np.reshape(x_train, (-1, 2)) y_train = np.reshape(y_train, (-1, 2)) x_val = np.reshape(x_val, (-1, 2)) y_val = np.reshape(y_val, (-1, 2)) c = np.arange(len(x_train)) np.random.shuffle(c) x_train = x_train[c[::int(100 / args.dataset_size)]] y_train = y_train[c[::int(100 / args.dataset_size)]] model = ODENet(hidden_dim=8, output_dim=y_train.shape[-1])
parser = ArgumentParser() parser.add_argument('dsave', help='save location of model') parser.add_argument('--split', help='split to evaluate on', default='test') parser.add_argument('--gpu', type=int, help='gpu to use', default=0) parser.add_argument('--fout', help='optional save file to store the predictions') args = parser.parse_args() logging.basicConfig(level=logging.INFO) with open(os.path.join(args.dsave, 'config.json')) as f: args_save = Namespace(**json.load(f)) args_save.gpu = args.gpu pprint(args_save) dataset, ontology, vocab, Eword = load_dataset() model = Tracker(args_save.model, args_save, ontology, vocab) model.load_best_save(directory=args.dsave) if args.gpu is not None: model.cuda(args.gpu) logging.info('Making predictions for {} dialogues and {} turns'.format( len(dataset[args.split]), len(list(dataset[args.split].iter_turns())))) preds = model.run_pred(dataset[args.split], args_save) pprint(dataset[args.split].evaluate_preds(preds)) if args.fout: with open(args.fout, 'wt') as f: # predictions is a list of sets, need to convert to list of lists to make it JSON serializable json.dump([list(p) for p in preds], f, indent=2)
import os import numpy as np from sklearn.neighbors import KNeighborsClassifier from sklearn.cross_validation import KFold from sklearn.cross_validation import cross_val_score from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler from utils import CHART_DIR, DATA_DIR, load_dataset features, labels = load_dataset( 'seeds.tsv' ) #had to write custom function to parse the file since it contains float and string data # initialize a classifier instance classifier = KNeighborsClassifier(n_neighbors=5, weights='uniform', algorithm='auto', leaf_size=30, p=2, metric='minkowski', metric_params=None) # compute 10-fold cross-validation means = [] k = 1 for k in range(1, 20, 2): classifier.n_neighbors = k # normalize all features to same scale classifier = Pipeline([('norm', StandardScaler()), ('knn', classifier)])
if args.adjoint: from tfdiffeq import odeint_adjoint as odeint else: from tfdiffeq import odeint PLOT_DIR = 'plots/mass_spring_damper/learnedode/' TIME_OF_RUN = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") device = 'gpu:' + str(args.gpu) if len(gpus) else 'cpu:0' t = tf.linspace(0., 10., args.data_size) if args.dtype == 'float64': t = tf.cast(t, tf.float64) if not os.path.isfile('experiments/datasets/mass_spring_damper_x_train.npy'): x_train, _, x_val, _ = create_dataset() x_train, _, x_val, _ = load_dataset() x_train = x_train.astype(args.dtype) x_val = x_val.astype(args.dtype) x_val_extrap = tf.convert_to_tensor(x_val[0].reshape(-1, 1, 2)) x_val_interp = tf.convert_to_tensor(x_val[1].reshape(-1, 1, 2)) makedirs(PLOT_DIR) def get_batch(): # pick random data series n = np.random.choice(np.arange(x_train.shape[0], dtype=np.int64), args.batch_size, replace=True) # pick random starting time
def mnist(args): data = load_dataset(dset='mnist') train_x, train_y = balanced_subset(data, 'train', args.data_frac) valid_x, valid_y = balanced_subset(data, 'valid', args.data_frac) test_x, test_y = data['test'] num_train_batches = train_x.shape[0] / args.batch_size num_valid_batches = valid_x.shape[0] / args.batch_size valid_freq = num_train_batches model = SoftMax(train_x.shape[1], NUM_CLASSES, args.optimizer) expcost = None vcosts=[] perfs = [] model_best = None prev_perf = 0.0 perf = 0.0 for b in xrange(args.epochs * num_train_batches): k = b % num_train_batches x = train_x[k * args.batch_size:(k + 1) * args.batch_size, :] y = train_y[k * args.batch_size:(k + 1) * args.batch_size] cost = model.train(x, y, args.lr) if not expcost: expcost = cost else: expcost = 0.01 * cost + 0.99 * expcost if (b + 1) % args.print_every == 0: print('iter %d, cost %f, expcost %f' % (b + 1, cost, expcost)) if (b + 1) % valid_freq == 0: perf, _ = measure_perf(valid_x, valid_y, model, args) perfs.append(perf) print('correct/total: %f' % (perf)) if len(perfs) > 32: old_perf = perfs.pop(0) max_perf = max(perfs) if old_perf >= max_perf: print('Peak perf: %f (data_frac=%f, lr=%f)' % (old_perf, args.data_frac, args.lr)) test_perf, _ = measure_perf(test_x, test_y, model, args) print('Test perf: %f (data_frac=%f, lr=%f)' % (test_perf, args.data_frac, args.lr)) return test_perf break # perf, vcost = measure_perf(valid_x, valid_y, model, args) # vcosts.append(vcost) # print('validation perf, cost: %f, %f' % (perf, vcost)) # if len(vcosts) > 32: # old_vcost = vcosts.pop(0) # low_vcost = min(vcosts) # if old_vcost <= low_vcost: # print('Low vcost: %f (hdim=%d, nlayers=%d, data_frac=%f, lr=%f)' % (old_vcost, args.hdim, args.nlayers, args.data_frac, args.lr)) # test_perf, _ = measure_perf(test_x, test_y, model, args) # print('Test perf: %f (hdim=%d, nlayers=%d, data_frac=%f, lr=%f)' % (test_perf, args.hdim, args.nlayers, args.data_frac, args.lr)) # return test_perf # break if (b + 1) % (num_train_batches * args.save_every) == 0: if perf > prev_perf: prev_perf = perf print('saving model') with open(pjoin(args.expdir, 'model.pk'), 'wb') as f: pickle.dump(model, f, protocol=pickle.HIGHEST_PROTOCOL) # XXX just pickling the entire model for now if perf > prev_perf: print('saving final model') with open(pjoin(args.expdir, 'model.pk'), 'wb') as f: pickle.dump(model, f, protocol=pickle.HIGHEST_PROTOCOL)
def set_dataset(self,batch_size,num_samples,noise,random_state): self.train_epoch, self.data, self.test_epoch = utils.load_dataset(batch_size, self.load_func,False, num_samples, noise, random_state) self.X_train, self.y_train, self.X_test, self.y_test = self.data self.save_training_pts()
default=10, dest='critic_iters', help='The number of discriminator weight updates per generator update (default: 10)') parser.add_argument('--lambda', '-p', type=int, default=10, dest='lamb', help='The gradient penalty lambda hyperparameter (default: 10)') return parser.parse_args() args = parse_args() lines, charmap, inv_charmap = utils.load_dataset( path=args.training_data, max_length=args.seq_length ) if not os.path.isdir(args.output_dir): os.makedirs(args.output_dir) if not os.path.isdir(os.path.join(args.output_dir, 'checkpoints')): os.makedirs(os.path.join(args.output_dir, 'checkpoints')) if not os.path.isdir(os.path.join(args.output_dir, 'samples')): os.makedirs(os.path.join(args.output_dir, 'samples')) # pickle to avoid encoding errors with json with open(os.path.join(args.output_dir, 'charmap.pickle'), 'wb') as f: pickle.dump(charmap, f)
def question3(dataset_id, train_test, save_plots = False, no_outputs = False): x, y = utils.load_dataset(dataset_id, 'train') x0 = x[np.where(y[:, 0] == 0)] x1 = x[np.where(y[:, 0] == 1)] x_bias = np.hstack((np.ones((x.shape[0], 1)), x)) inner_prod_inv = np.linalg.inv(x_bias.transpose().dot(x_bias)) # Normal equation solution w = (inner_prod_inv.dot(x_bias.transpose())).dot(y) feat1 = np.linspace(np.min(x[:, 0]), np.max(x[:, 0]), 20) feat2 = (-w[1]*feat1 - w[0] + 0.5)/w[2] if (not no_outputs): # Outputs print('Parameter vector for dataset',dataset_id,train_test,':',w) plt.scatter(x0[:, 0], x0[:, 1], c ='r', marker = 'x', label = 'Class 0') plt.scatter(x1[:, 0], x1[:, 1], c ='b', marker = 'x', label = 'Class 1') plt.plot(feat1, feat2, c = 'g', label = 'Decision Boundary') plt.legend(loc='lower left', scatterpoints = 1) plt.xlabel('Feature 1') plt.ylabel('Feature 2') if (save_plots): name = 'Q3_' + dataset_id + '_' + train_test + '.png' plt.savefig(name) plt.show() plt.clf() #--------- Question 4 ---------------- if (train_test == 'test'): x, y = utils.load_dataset(dataset_id, 'test') x_bias = np.hstack((np.ones((x.shape[0], 1)), x)) correct = 0 for idx in range(x.shape[0]): sample = x_bias[idx, :] label = y[idx, 0] if (w.transpose().dot(sample) >= 0.5): y_tilde = 1.0 else: y_tilde = 0.0 correct += (label == y_tilde) misclassif_error = 1 - correct/x.shape[0] return misclassif_error
import tensorflow as tf from keras.utils import plot_model from sklearn.model_selection import train_test_split from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau import utils n_signals = 12 win_size = 128 experiment_name = "2020-07-31_8-23-46_device1" win_data_file = f"data/win_data_{experiment_name}.txt" win_label_file = f"data/win_label_{experiment_name}.txt" # Load data: X has the form of [n_wins, win_size, n_signals] X, y = utils.load_dataset(win_data_file, win_label_file, win_size, n_signals) """ ************************************* HYPER-PARAMETERS *************************************""" MODEL_NAME = "model_stacked_LSTM" # Ex: model_LSTM, model_stacked_LSTM, model_CNN1D_LSTM_v1 (model zoo from utils) n_hiddens = 128 # for LSTM layers n_frames = 4 # for Timedistributed layer-based models (win_size should be divided by n_frames with no remainder) verbose, epochs, batch_size = 2, 100, 128 """******************************** CHECKPOINT ********************************""" # The callback takes a couple of arguments to configure checkpointing. checkpoint_path = f"pretrained_models/ckp_{MODEL_NAME}" bool_save_ckp = True # Create checkpoint callback cp_callback = ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', save_best_only=False, save_weights_only=False,
def main(argv): del argv # unused arg tf.io.gfile.makedirs(FLAGS.output_dir) logging.info('Saving checkpoints at %s', FLAGS.output_dir) tf.random.set_seed(FLAGS.seed) if FLAGS.use_gpu: logging.info('Use GPU') strategy = tf.distribute.MirroredStrategy() else: logging.info('Use TPU at %s', FLAGS.tpu if FLAGS.tpu is not None else 'local') resolver = tf.distribute.cluster_resolver.TPUClusterResolver( tpu=FLAGS.tpu) tf.config.experimental_connect_to_cluster(resolver) tf.tpu.experimental.initialize_tpu_system(resolver) strategy = tf.distribute.TPUStrategy(resolver) per_core_batch_size = FLAGS.per_core_batch_size // FLAGS.ensemble_size batch_size = per_core_batch_size * FLAGS.num_cores check_bool = FLAGS.train_proportion > 0 and FLAGS.train_proportion <= 1 assert check_bool, 'Proportion of train set has to meet 0 < prop <= 1.' drop_remainder_validation = True if not FLAGS.use_gpu: # This has to be True for TPU traing, otherwise the batchsize of images in # the validation set can't be determined by TPU compile. assert drop_remainder_validation, 'drop_remainder must be True in TPU mode.' train_dataset = utils.load_dataset(split=tfds.Split.TRAIN, name=FLAGS.dataset, batch_size=batch_size, use_bfloat16=FLAGS.use_bfloat16, repeat=True, proportion=FLAGS.train_proportion) validation_proportion = 1 - FLAGS.train_proportion validation_dataset = utils.load_dataset( split=tfds.Split.VALIDATION, name=FLAGS.dataset, batch_size=batch_size, use_bfloat16=FLAGS.use_bfloat16, repeat=True, proportion=validation_proportion, drop_remainder=drop_remainder_validation) clean_test_dataset = utils.load_dataset(split=tfds.Split.TEST, name=FLAGS.dataset, batch_size=batch_size, use_bfloat16=FLAGS.use_bfloat16) train_dataset = strategy.experimental_distribute_dataset(train_dataset) validation_dataset = strategy.experimental_distribute_dataset( validation_dataset) test_datasets = { 'clean': strategy.experimental_distribute_dataset(clean_test_dataset), } if FLAGS.corruptions_interval > 0: if FLAGS.dataset == 'cifar10': load_c_dataset = utils.load_cifar10_c else: load_c_dataset = functools.partial(utils.load_cifar100_c, path=FLAGS.cifar100_c_path) corruption_types, max_intensity = utils.load_corrupted_test_info( FLAGS.dataset) for corruption in corruption_types: for intensity in range(1, max_intensity + 1): dataset = load_c_dataset(corruption_name=corruption, corruption_intensity=intensity, batch_size=batch_size, use_bfloat16=FLAGS.use_bfloat16) test_datasets['{0}_{1}'.format(corruption, intensity)] = ( strategy.experimental_distribute_dataset(dataset)) ds_info = tfds.builder(FLAGS.dataset).info train_sample_size = ds_info.splits[ 'train'].num_examples * FLAGS.train_proportion steps_per_epoch = int(train_sample_size / batch_size) train_sample_size = int(train_sample_size) steps_per_eval = ds_info.splits['test'].num_examples // batch_size num_classes = ds_info.features['label'].num_classes if FLAGS.use_bfloat16: policy = tf.keras.mixed_precision.experimental.Policy('mixed_bfloat16') tf.keras.mixed_precision.experimental.set_policy(policy) summary_writer = tf.summary.create_file_writer( os.path.join(FLAGS.output_dir, 'summaries')) logging.info('Building Keras model.') depth = 28 width = 10 dict_ranges = {'min': FLAGS.min_l2_range, 'max': FLAGS.max_l2_range} ranges = [dict_ranges for _ in range(6)] # 6 independent l2 parameters model_config = { 'key_to_index': { 'input_conv_l2_kernel': 0, 'group_l2_kernel': 1, 'group_1_l2_kernel': 2, 'group_2_l2_kernel': 3, 'dense_l2_kernel': 4, 'dense_l2_bias': 5, }, 'ranges': ranges, 'test': None } lambdas_config = LambdaConfig(model_config['ranges'], model_config['key_to_index']) if FLAGS.e_body_hidden_units > 0: e_body_arch = '({},)'.format(FLAGS.e_body_hidden_units) else: e_body_arch = '()' e_shared_arch = '()' e_activation = 'tanh' filters_resnet = [16] for i in range(0, 3): # 3 groups of blocks filters_resnet.extend([16 * width * 2**i] * 9) # 9 layers in each block # e_head dim for conv2d is just the number of filters (only # kernel) and twice num of classes for the last dense layer (kernel + bias) e_head_dims = [x for x in filters_resnet] + [2 * num_classes] with strategy.scope(): e_models = e_factory( lambdas_config.input_shape, e_head_dims=e_head_dims, e_body_arch=eval(e_body_arch), # pylint: disable=eval-used e_shared_arch=eval(e_shared_arch), # pylint: disable=eval-used activation=e_activation, use_bias=FLAGS.e_model_use_bias, e_head_init=FLAGS.init_emodels_stddev) model = wide_resnet_hyperbatchensemble( input_shape=ds_info.features['image'].shape, depth=depth, width_multiplier=width, num_classes=num_classes, ensemble_size=FLAGS.ensemble_size, random_sign_init=FLAGS.random_sign_init, config=lambdas_config, e_models=e_models, l2_batchnorm_layer=FLAGS.l2_batchnorm, regularize_fast_weights=FLAGS.regularize_fast_weights, fast_weights_eq_contraint=FLAGS.fast_weights_eq_contraint, version=2) logging.info('Model input shape: %s', model.input_shape) logging.info('Model output shape: %s', model.output_shape) logging.info('Model number of weights: %s', model.count_params()) # build hyper-batchensemble complete ------------------------- # Initialize Lambda distributions for tuning lambdas_mean = tf.reduce_mean( log_uniform_mean([lambdas_config.log_min, lambdas_config.log_max])) lambdas0 = tf.random.normal((FLAGS.ensemble_size, lambdas_config.dim), lambdas_mean, 0.1 * FLAGS.ens_init_delta_bounds) lower0 = lambdas0 - tf.constant(FLAGS.ens_init_delta_bounds) lower0 = tf.maximum(lower0, 1e-8) upper0 = lambdas0 + tf.constant(FLAGS.ens_init_delta_bounds) log_lower = tf.Variable(tf.math.log(lower0)) log_upper = tf.Variable(tf.math.log(upper0)) lambda_parameters = [log_lower, log_upper] # these variables are tuned clip_lambda_parameters(lambda_parameters, lambdas_config) # Optimizer settings to train model weights # Linearly scale learning rate and the decay epochs by vanilla settings. # Note: Here, we don't divide the epochs by 200 as for the other uncertainty # baselines. base_lr = FLAGS.base_learning_rate * batch_size / 128 lr_decay_epochs = [int(l) for l in FLAGS.lr_decay_epochs] lr_schedule = utils.LearningRateSchedule( steps_per_epoch, base_lr, decay_ratio=FLAGS.lr_decay_ratio, decay_epochs=lr_decay_epochs, warmup_epochs=FLAGS.lr_warmup_epochs) optimizer = tf.keras.optimizers.SGD(lr_schedule, momentum=0.9, nesterov=True) # tuner used for optimizing lambda_parameters tuner = tf.keras.optimizers.Adam(FLAGS.lr_tuning) metrics = { 'train/negative_log_likelihood': tf.keras.metrics.Mean(), 'train/accuracy': tf.keras.metrics.SparseCategoricalAccuracy(), 'train/loss': tf.keras.metrics.Mean(), 'train/ece': um.ExpectedCalibrationError(num_bins=FLAGS.num_bins), 'train/disagreement': tf.keras.metrics.Mean(), 'train/average_kl': tf.keras.metrics.Mean(), 'train/cosine_similarity': tf.keras.metrics.Mean(), 'test/negative_log_likelihood': tf.keras.metrics.Mean(), 'test/accuracy': tf.keras.metrics.SparseCategoricalAccuracy(), 'test/ece': um.ExpectedCalibrationError(num_bins=FLAGS.num_bins), 'test/gibbs_nll': tf.keras.metrics.Mean(), 'test/gibbs_accuracy': tf.keras.metrics.SparseCategoricalAccuracy(), 'test/disagreement': tf.keras.metrics.Mean(), 'test/average_kl': tf.keras.metrics.Mean(), 'test/cosine_similarity': tf.keras.metrics.Mean(), 'validation/loss': tf.keras.metrics.Mean(), 'validation/loss_entropy': tf.keras.metrics.Mean(), 'validation/loss_ce': tf.keras.metrics.Mean() } corrupt_metrics = {} for i in range(FLAGS.ensemble_size): metrics['test/nll_member_{}'.format(i)] = tf.keras.metrics.Mean() metrics['test/accuracy_member_{}'.format(i)] = ( tf.keras.metrics.SparseCategoricalAccuracy()) if FLAGS.corruptions_interval > 0: for intensity in range(1, max_intensity + 1): for corruption in corruption_types: dataset_name = '{0}_{1}'.format(corruption, intensity) corrupt_metrics['test/nll_{}'.format(dataset_name)] = ( tf.keras.metrics.Mean()) corrupt_metrics['test/accuracy_{}'.format( dataset_name)] = ( tf.keras.metrics.SparseCategoricalAccuracy()) corrupt_metrics['test/ece_{}'.format(dataset_name)] = ( um.ExpectedCalibrationError(num_bins=FLAGS.num_bins)) checkpoint = tf.train.Checkpoint(model=model, lambda_parameters=lambda_parameters, optimizer=optimizer) latest_checkpoint = tf.train.latest_checkpoint(FLAGS.output_dir) initial_epoch = 0 if latest_checkpoint and FLAGS.restore_checkpoint: # checkpoint.restore must be within a strategy.scope() so that optimizer # slot variables are mirrored. checkpoint.restore(latest_checkpoint) logging.info('Loaded checkpoint %s', latest_checkpoint) initial_epoch = optimizer.iterations.numpy() // steps_per_epoch @tf.function def train_step(iterator): """Training StepFn.""" def step_fn(inputs): """Per-Replica StepFn.""" images, labels = inputs images = tf.tile(images, [FLAGS.ensemble_size, 1, 1, 1]) # generate lambdas lambdas = log_uniform_sample(per_core_batch_size, lambda_parameters) lambdas = tf.reshape(lambdas, (FLAGS.ensemble_size * per_core_batch_size, lambdas_config.dim)) with tf.GradientTape() as tape: logits = model([images, lambdas], training=True) if FLAGS.use_bfloat16: logits = tf.cast(logits, tf.float32) if FLAGS.use_gibbs_ce: # Average of single model CEs # tiling of labels should be only done for Gibbs CE loss labels = tf.tile(labels, [FLAGS.ensemble_size]) negative_log_likelihood = tf.reduce_mean( tf.keras.losses.sparse_categorical_crossentropy( labels, logits, from_logits=True)) else: # Ensemble CE uses no tiling of the labels negative_log_likelihood = ensemble_crossentropy( labels, logits, FLAGS.ensemble_size) # Note: Divide l2_loss by sample_size (this differs from uncertainty_ # baselines implementation.) l2_loss = sum(model.losses) / train_sample_size loss = negative_log_likelihood + l2_loss # Scale the loss given the TPUStrategy will reduce sum all gradients. scaled_loss = loss / strategy.num_replicas_in_sync grads = tape.gradient(scaled_loss, model.trainable_variables) # Separate learning rate for fast weights. grads_and_vars = [] for grad, var in zip(grads, model.trainable_variables): if (('alpha' in var.name or 'gamma' in var.name) and 'batch_norm' not in var.name): grads_and_vars.append( (grad * FLAGS.fast_weight_lr_multiplier, var)) else: grads_and_vars.append((grad, var)) optimizer.apply_gradients(grads_and_vars) probs = tf.nn.softmax(logits) per_probs = tf.split(probs, num_or_size_splits=FLAGS.ensemble_size, axis=0) per_probs_stacked = tf.stack(per_probs, axis=0) metrics['train/ece'].update_state(labels, probs) metrics['train/loss'].update_state(loss) metrics['train/negative_log_likelihood'].update_state( negative_log_likelihood) metrics['train/accuracy'].update_state(labels, logits) diversity_results = um.average_pairwise_diversity( per_probs_stacked, FLAGS.ensemble_size) for k, v in diversity_results.items(): metrics['train/' + k].update_state(v) if grads_and_vars: grads, _ = zip(*grads_and_vars) strategy.run(step_fn, args=(next(iterator), )) @tf.function def tuning_step(iterator): """Tuning StepFn.""" def step_fn(inputs): """Per-Replica StepFn.""" images, labels = inputs images = tf.tile(images, [FLAGS.ensemble_size, 1, 1, 1]) with tf.GradientTape(watch_accessed_variables=False) as tape: tape.watch(lambda_parameters) # sample lambdas if FLAGS.sample_and_tune: lambdas = log_uniform_sample(per_core_batch_size, lambda_parameters) else: lambdas = log_uniform_mean(lambda_parameters) lambdas = tf.repeat(lambdas, per_core_batch_size, axis=0) lambdas = tf.reshape(lambdas, (FLAGS.ensemble_size * per_core_batch_size, lambdas_config.dim)) # ensemble CE logits = model([images, lambdas], training=False) ce = ensemble_crossentropy(labels, logits, FLAGS.ensemble_size) # entropy penalty for lambda distribution entropy = FLAGS.tau * log_uniform_entropy(lambda_parameters) loss = ce - entropy scaled_loss = loss / strategy.num_replicas_in_sync gradients = tape.gradient(loss, lambda_parameters) tuner.apply_gradients(zip(gradients, lambda_parameters)) metrics['validation/loss_ce'].update_state( ce / strategy.num_replicas_in_sync) metrics['validation/loss_entropy'].update_state( entropy / strategy.num_replicas_in_sync) metrics['validation/loss'].update_state(scaled_loss) strategy.run(step_fn, args=(next(iterator), )) @tf.function def test_step(iterator, dataset_name, num_eval_samples=0): """Evaluation StepFn.""" n_samples = num_eval_samples if num_eval_samples >= 0 else -num_eval_samples if num_eval_samples >= 0: # the +1 accounts for the fact that we add the mean of lambdas ensemble_size = FLAGS.ensemble_size * (1 + n_samples) else: ensemble_size = FLAGS.ensemble_size * n_samples def step_fn(inputs): """Per-Replica StepFn.""" # Note that we don't use tf.tile for labels here images, labels = inputs images = tf.tile(images, [ensemble_size, 1, 1, 1]) # get lambdas samples = log_uniform_sample(n_samples, lambda_parameters) if num_eval_samples >= 0: lambdas = log_uniform_mean(lambda_parameters) lambdas = tf.expand_dims(lambdas, 1) lambdas = tf.concat((lambdas, samples), 1) else: lambdas = samples # lambdas with shape (ens size, samples, dim of lambdas) rep_lambdas = tf.repeat(lambdas, per_core_batch_size, axis=1) rep_lambdas = tf.reshape(rep_lambdas, (ensemble_size * per_core_batch_size, -1)) # eval on testsets logits = model([images, rep_lambdas], training=False) if FLAGS.use_bfloat16: logits = tf.cast(logits, tf.float32) probs = tf.nn.softmax(logits) per_probs = tf.split(probs, num_or_size_splits=ensemble_size, axis=0) # per member performance and gibbs performance (average per member perf) if dataset_name == 'clean': for i in range(FLAGS.ensemble_size): # we record the first sample of lambdas per batch-ens member first_member_index = i * (ensemble_size // FLAGS.ensemble_size) member_probs = per_probs[first_member_index] member_loss = tf.keras.losses.sparse_categorical_crossentropy( labels, member_probs) metrics['test/nll_member_{}'.format(i)].update_state( member_loss) metrics['test/accuracy_member_{}'.format(i)].update_state( labels, member_probs) labels_tile = tf.tile(labels, [ensemble_size]) metrics['test/gibbs_nll'].update_state( tf.reduce_mean( tf.keras.losses.sparse_categorical_crossentropy( labels_tile, logits, from_logits=True))) metrics['test/gibbs_accuracy'].update_state(labels_tile, probs) # ensemble performance negative_log_likelihood = ensemble_crossentropy( labels, logits, ensemble_size) probs = tf.reduce_mean(per_probs, axis=0) if dataset_name == 'clean': metrics['test/negative_log_likelihood'].update_state( negative_log_likelihood) metrics['test/accuracy'].update_state(labels, probs) metrics['test/ece'].update_state(labels, probs) else: corrupt_metrics['test/nll_{}'.format( dataset_name)].update_state(negative_log_likelihood) corrupt_metrics['test/accuracy_{}'.format( dataset_name)].update_state(labels, probs) corrupt_metrics['test/ece_{}'.format( dataset_name)].update_state(labels, probs) if dataset_name == 'clean': per_probs_stacked = tf.stack(per_probs, axis=0) diversity_results = um.average_pairwise_diversity( per_probs_stacked, ensemble_size) for k, v in diversity_results.items(): metrics['test/' + k].update_state(v) strategy.run(step_fn, args=(next(iterator), )) logging.info('--- Starting training using %d examples. ---', train_sample_size) train_iterator = iter(train_dataset) validation_iterator = iter(validation_dataset) start_time = time.time() for epoch in range(initial_epoch, FLAGS.train_epochs): logging.info('Starting to run epoch: %s', epoch) for step in range(steps_per_epoch): train_step(train_iterator) do_tuning = (epoch >= FLAGS.tuning_warmup_epochs) if do_tuning and ((step + 1) % FLAGS.tuning_every_x_step == 0): tuning_step(validation_iterator) # clip lambda parameters if outside of range clip_lambda_parameters(lambda_parameters, lambdas_config) current_step = epoch * steps_per_epoch + (step + 1) max_steps = steps_per_epoch * FLAGS.train_epochs time_elapsed = time.time() - start_time steps_per_sec = float(current_step) / time_elapsed eta_seconds = (max_steps - current_step) / steps_per_sec message = ('{:.1%} completion: epoch {:d}/{:d}. {:.1f} steps/s. ' 'ETA: {:.0f} min. Time elapsed: {:.0f} min'.format( current_step / max_steps, epoch + 1, FLAGS.train_epochs, steps_per_sec, eta_seconds / 60, time_elapsed / 60)) if step % 20 == 0: logging.info(message) # evaluate on test data datasets_to_evaluate = {'clean': test_datasets['clean']} if (FLAGS.corruptions_interval > 0 and (epoch + 1) % FLAGS.corruptions_interval == 0): datasets_to_evaluate = test_datasets for dataset_name, test_dataset in datasets_to_evaluate.items(): test_iterator = iter(test_dataset) logging.info('Testing on dataset %s', dataset_name) for step in range(steps_per_eval): if step % 20 == 0: logging.info('Starting to run eval step %s of epoch: %s', step, epoch) test_step(test_iterator, dataset_name, FLAGS.num_eval_samples) logging.info('Done with testing on %s', dataset_name) corrupt_results = {} if (FLAGS.corruptions_interval > 0 and (epoch + 1) % FLAGS.corruptions_interval == 0): corrupt_results = utils.aggregate_corrupt_metrics( corrupt_metrics, corruption_types, max_intensity) logging.info('Train Loss: %.4f, Accuracy: %.2f%%', metrics['train/loss'].result(), metrics['train/accuracy'].result() * 100) logging.info('Validation Loss: %.4f, CE: %.4f, Entropy: %.4f', metrics['validation/loss'].result(), metrics['validation/loss_ce'].result(), metrics['validation/loss_entropy'].result()) logging.info('Test NLL: %.4f, Accuracy: %.2f%%', metrics['test/negative_log_likelihood'].result(), metrics['test/accuracy'].result() * 100) for i in range(FLAGS.ensemble_size): logging.info( 'Member %d Test Loss: %.4f, Accuracy: %.2f%%', i, metrics['test/nll_member_{}'.format(i)].result(), metrics['test/accuracy_member_{}'.format(i)].result() * 100) total_results = { name: metric.result() for name, metric in metrics.items() } total_results.update({ name: metric.result() for name, metric in corrupt_metrics.items() }) total_results.update(corrupt_results) with summary_writer.as_default(): for name, result in total_results.items(): tf.summary.scalar(name, result, step=epoch + 1) for metric in metrics.values(): metric.reset_states() # save checkpoint and lambdas config if (FLAGS.checkpoint_interval > 0 and (epoch + 1) % FLAGS.checkpoint_interval == 0): checkpoint_name = checkpoint.save( os.path.join(FLAGS.output_dir, 'checkpoint')) lambdas_cf = lambdas_config.get_config() filepath = os.path.join(FLAGS.output_dir, 'lambdas_config.p') with tf.io.gfile.GFile(filepath, 'wb') as fp: pickle.dump(lambdas_cf, fp, protocol=pickle.HIGHEST_PROTOCOL) logging.info('Saved checkpoint to %s', checkpoint_name)
parser.add_argument('--content_dim', type=int, default=128, help='size of the content vector') parser.add_argument('--pose_dim', type=int, default=10, help='size of the pose vector') parser.add_argument('--image_width', type=int, default=128, help='the height / width of the input image to network') parser.add_argument('--channels', default=3, type=int) parser.add_argument('--dataset', default='kth', help='dataset to train with') parser.add_argument('--max_step', type=int, default=20, help='maximum distance between frames') parser.add_argument('--sd_weight', type=float, default=0.0001, help='weight on adversarial loss') parser.add_argument('--sd_nf', type=int, default=100, help='number of layers') parser.add_argument('--content_model', default='dcgan_unet', help='model type (dcgan | dcgan_unet | vgg_unet)') parser.add_argument('--pose_model', default='dcgan', help='model type (dcgan | unet | resnet)') parser.add_argument('--data_threads', type=int, default=24, help='number of parallel data loading threads') parser.add_argument('--normalize', action='store_true', help='if true, normalize pose vector') parser.add_argument('--data_type', default='drnet', help='speed up data loading for drnet training') opt = parser.parse_args() train_data, test_data = utils.load_dataset(opt) test_loader = DataLoader(test_data, num_workers=opt.data_threads, batch_size=opt.batch_size, shuffle=True, drop_last=True, pin_memory=True) def get_testing_batch(): while True: for sequence in test_loader: batch = utils.normalize_data(opt, dtype, sequence) yield batch testing_batch_generator = get_testing_batch()
def getLSTM(params, filename): ''' This function uses the parameters to fetch an LSTM network and then trains it if the user wants so. After training it saves the model in the ./models folder with the name str(params) so that the model can be easily recognized and called. Otherwise it can load a previously trained model and return the predicting function. Parameters ---------- params : The parameters needed by the model in the form of a dictionary filename : An optional parameter which is required when we want to load a previously buildt model Returns ------- predictor : A predictor function using which we can get the labels for the test data References ---------- http://colinraffel.com/talks/hammer2015recurrent.pdf ''' input_var = T.ftensor3('input_var') l_out = lstm(input_var, params) target_values = T.fmatrix('target_output') network_output = lasagne.layers.get_output(l_out) cost = T.mean((network_output - target_values)**2) all_params = lasagne.layers.get_all_params(l_out) updates = lasagne.updates.adagrad( cost, all_params, params['LEARNING_RATE']) pred = theano.function([input_var], network_output, allow_input_downcast=True) if filename: print "Loading a previously saved " + params['NAME'] all_param_values = np.load("./models/" + filename + '.npy') for i in range(len(all_param_values)): all_param_values[i] = all_param_values[i].astype('float32') all_params = lasagne.layers.get_all_params(l_out) for p, v in zip(all_params, all_param_values): p.set_value(v) else: print('loading data for ' + params['NAME']) X_train1, y_train1, X_val1, y_val1 = load_dataset(X1, y1, params['NUM_FEATURES'], params['SEQ_LENGTH']) X_train2, y_train2, X_val2, y_val2 = load_dataset(X2, y2, params['NUM_FEATURES'], params['SEQ_LENGTH']) print('compiling the ' + params['NAME']) train = theano.function([input_var, target_values], cost, updates=updates, allow_input_downcast=True) validate = theano.function( [input_var, target_values], cost, allow_input_downcast=True) old_valerr = [10, 10] for epoch in range(params['NUM_EPOCHS']): print "Training the network..." train_err = 0 train_batches = 0 old_netout = l_out start_time = time.time() for batch in iterate_minibatches(X_train1, y_train1, params['BATCH_SIZE'], params['SEQ_LENGTH'], shuffle=False): # if train_batches % 50 == 0: # print "batch number " + str(train_batches) inputs, targets = batch train_err += train(inputs, targets) train_batches += 1 for batch in iterate_minibatches(X_train2, y_train2, params['BATCH_SIZE'], params['SEQ_LENGTH'], shuffle=False): # if train_batches % 50 == 0: # print "batch number " + str(train_batches) inputs, targets = batch train_err += train(inputs, targets) train_batches += 1 # And a full pass over the validation data: val_err = 0 val_acc = 0 val_batches = 0 for batch in iterate_minibatches(X_val1, y_val1, params['BATCH_SIZE'], params['SEQ_LENGTH'], shuffle=False): inputs, targets = batch err = validate(inputs, targets) val_err += err # val_acc += acc val_batches += 1 # Then we print the results for this epoch: print("Epoch {} of {} (Composite addresses) took {:.3f}s".format( epoch + 1, params['NUM_EPOCHS'], time.time() - start_time)) print(" training loss:\t\t{:.6f}".format( train_err / train_batches)) print(" validation loss:\t\t{:.6f}".format(val_err / val_batches)) # print(" validation accuracy:\t\t{:.2f} %".format(val_acc/val_batches * 100)) # to prevent overfitting # or val_err - old_valerr[0] < 0.001: # or old_valerr[0] - val_err < 0.001: if val_err - old_valerr[0] > 0.03: print "overfitting or model reached saturation...\n" print old_valerr l_out = old_netout break old_netout = l_out old_valerr[0] = val_err val_err = 0 val_acc = 0 val_batches = 0 for batch in iterate_minibatches(X_val2, y_val2, params['BATCH_SIZE'], params['SEQ_LENGTH'], shuffle=False): inputs, targets = batch err = validate(inputs, targets) val_err += err # val_acc += acc val_batches += 1 # Then we print the results for this epoch: print("Epoch {} of {} (OneLine addresses) took {:.3f}s".format( epoch + 1, params['NUM_EPOCHS'], time.time() - start_time)) print(" training loss:\t\t{:.6f}".format( train_err / train_batches)) print(" validation loss:\t\t{:.6f}".format(val_err / val_batches)) # print(" validation accuracy:\t\t{:.2f} %\n".format(val_acc/val_batches * 100)) old_valerr[1] = val_err print "saving the parameters..." all_param_values = [p.get_value() for p in all_params] np.save("./models/" + str(params), all_param_values) return pred