def compare_methods(method_names=['Odeint', 'Verlet']): emitter = Emitter() file_to_read = 'solar_system.json' load_data(file_to_read, emitter) particles_1 = emitter.particles particles_2 = deepcopy(particles_1) delta_t = 0.1 tick_count = 100 metric_list = [] exec_time = {method_names[0]: .0, method_names[1]: .0} for i in range(tick_count): if not particles_1 or not particles_2: break start_time = time() particles_1 = calculate_system_motion(particles_1, delta_t, method_names[0]) exec_time[method_names[0]] += time() - start_time start_time = time() particles_2 = calculate_system_motion(particles_2, delta_t, method_names[1]) exec_time[method_names[1]] += time() - start_time metric = .0 for p_1, p_2 in zip(particles_1, particles_2): dist = np.array(p_1.coordinates) - np.array(p_2.coordinates) metric += np.linalg.norm(dist) metric_list.append(metric) ticks = range(len(metric_list)) _built_plot(ticks, metric_list, method_names, delta_t) test_file = 'test.txt' _write_to_file(test_file, exec_time, metric_list, delta_t)
def _on_loading_click(self, event): self._clear() file_name = 'solar_system.json' load_data(file_name, self._emitter) self._is_solar_mode = True coords = [] for p in self._emitter.particles: coords.append(np.abs(p.coordinates)) self._max_coord = np.max(coords) masses = dict(enumerate([p.mass for p in self._emitter.particles])) sorted_keys = sorted(masses, key=masses.get) sizes = np.linspace(50, 200, len(masses)) for i in range(len(masses)): key = sorted_keys.index(i) self._emitter.particles[i].radius = sizes[key]
def compare_methods_accuracy(method_names, max_time, tick_count, particles_count=None): emitter = Emitter() if particles_count is None: file_to_read = 'solar_system.json' load_data(file_to_read, emitter) particles = emitter.particles else: particles = emitter.generate_particles(particles_count) ticks = range(tick_count) total_metric_list = [] runtime = [] results = [] for name in method_names: print(f'{name} is executed') start_time = time() result = calculate_system_motion(name, deepcopy(particles), max_time, tick_count) runtime.append(time() - start_time) results.append(result) for i in range(len(results)): local_metric_list = [] for j in range(len(results[0])): metric = .0 for k in range(len(results[0][0])): dist = results[0][j][k][:2] - results[i][j][k][:2] metric += np.linalg.norm(dist) local_metric_list.append(metric) total_metric_list.append(local_metric_list) delta_t = max_time / tick_count _built_metric_plot(method_names, ticks, total_metric_list, delta_t) test_file = 'test.txt' _write_to_file(test_file, method_names, len(particles), runtime, total_metric_list, delta_t)
def main(args): ''' ADMIN ''' '''----------------------------------------------------------------------- ''' img_path = os.path.join(REPORTS_DIR, 'matrices', args.load) if not os.path.exists(img_path): os.makedirs(img_path) ''' DATA ''' '''----------------------------------------------------------------------- ''' tf = load_tf(args.data_dir, "{}-train.pickle".format(args.filename)) X, y = load_data(args.data_dir, "{}-train.pickle".format(args.filename)) for ij, jet in enumerate(X): jet["content"] = tf.transform(jet["content"]) X_valid_uncropped, y_valid_uncropped = X[:1000], y[:1000] X_valid, y_valid, _, _ = crop(X_valid_uncropped, y_valid_uncropped, return_cropped_indices=True) X_pos, X_neg = find_balanced_samples(X_valid, y_valid, args.n_viz) ''' MODEL ''' '''----------------------------------------------------------------------- ''' # Initialization with open(os.path.join(MODELS_DIR, args.load, 'settings.pickle'), "rb") as f: settings = pickle.load(f, encoding='latin-1') Transform = settings["transform"] Predict = settings["predict"] model_kwargs = settings["model_kwargs"] with open(os.path.join(MODELS_DIR, args.load, 'model_state_dict.pt'), 'rb') as f: state_dict = torch.load(f) model = Predict(Transform, **model_kwargs) model.load_state_dict(state_dict) if torch.cuda.is_available(): model.cuda() ''' GET MATRICES ''' '''----------------------------------------------------------------------- ''' AA_pos = get_matrices(model, X_pos) AA_neg = get_matrices(model, X_neg) ''' PLOT MATRICES ''' '''----------------------------------------------------------------------- ''' viz(AA_pos, os.path.join(img_path, 'positive')) viz(AA_neg, os.path.join(img_path, 'negative'))
def main(): eh = EvaluationExperimentHandler(args) ''' GET RELATIVE PATHS TO DATA AND MODELS ''' '''----------------------------------------------------------------------- ''' if args.model_list_file is None: assert args.root_model_dir is not None model_paths = [(args.root_model_dir, args.root_model_dir)] else: with open(args.model_list_file, newline='') as f: reader = csv.DictReader(f) lines = [l for l in reader] model_paths = [(l['model'], l['filename']) for l in lines[0:]] logging.info("DATASET\n{}".format("\n".join(args.filename))) data_path = args.filename logging.info("MODEL PATHS\n{}".format("\n".join(mp for (_, mp) in model_paths))) def evaluate_models(X, yy, w, model_filenames, batch_size=64): rocs = [] fprs = [] tprs = [] inv_fprs = [] for i, filename in enumerate(model_filenames): if 'DS_Store' not in filename: logging.info("\t[{}] Loading {}".format(i, filename)), model = load_model(filename) if torch.cuda.is_available(): model.cuda() model_test_file = os.path.join(filename, 'test-rocs.pickle') work = not os.path.exists(model_test_file) if work: model.eval() offset = 0 yy_pred = [] n_batches, remainder = np.divmod(len(X), batch_size) for i in range(n_batches): X_batch = X[offset:offset + batch_size] X_var = wrap_X(X_batch) yy_pred.append(unwrap(model(X_var))) unwrap_X(X_var) offset += batch_size if remainder > 0: X_batch = X[-remainder:] X_var = wrap_X(X_batch) yy_pred.append(unwrap(model(X_var))) unwrap_X(X_var) yy_pred = np.squeeze(np.concatenate(yy_pred, 0), 1) #Store Y_pred and Y_test to disc np.save(args.data_dir + 'Y_pred_60.csv', yy_pred) np.save(args.data_dir + 'Y_test_60.csv', yy) logging.info('Files Saved') logdict = dict( model=filename.split('/')[-1], yy=yy, yy_pred=yy_pred, w_valid=w[:len(yy_pred)], ) eh.log(**logdict) roc = eh.monitors['roc_auc'].value fpr = eh.monitors['roc_curve'].value[0] tpr = eh.monitors['roc_curve'].value[1] inv_fpr = eh.monitors['inv_fpr'].value with open(model_test_file, "wb") as fd: pickle.dump((roc, fpr, tpr, inv_fpr), fd) else: with open(model_test_file, "rb") as fd: roc, fpr, tpr, inv_fpr = pickle.load(fd) stats_dict = {'roc_auc': roc, 'inv_fpr': inv_fpr} eh.stats_logger.log(stats_dict) rocs.append(roc) fprs.append(fpr) tprs.append(tpr) inv_fprs.append(inv_fpr) logging.info("\tMean ROC AUC = {:.4f} Mean 1/FPR = {:.4f}".format( np.mean(rocs), np.mean(inv_fprs))) return rocs, fprs, tprs, inv_fprs def build_rocs(data, model_path, batch_size): X, y, w = data model_filenames = [ os.path.join(model_path, fn) for fn in os.listdir(model_path) ] rocs, fprs, tprs, inv_fprs = evaluate_models(X, y, w, model_filenames, batch_size) return rocs, fprs, tprs, inv_fprs ''' BUILD ROCS ''' '''----------------------------------------------------------------------- ''' if args.load_rocs is None and args.model_list_file is None: logging.info( 'Building ROCs for models trained on {}'.format(data_path)) tf = load_tf(args.data_dir, "{}-train.pickle".format(data_path)) X, y = load_data(args.data_dir, "{}-{}.pickle".format(data_path, args.set)) for ij, jet in enumerate(X): jet["content"] = tf.transform(jet["content"]) if args.n_test > 0: indices = torch.randperm(len(X)).numpy()[:args.n_test] X = [X[i] for i in indices] y = y[indices] X_test, y_test, cropped_indices, w_test = crop( X, y, 60, return_cropped_indices=True, pileup=args.pileup) data = (X_test, y_test, w_test) for model_path in model_paths: model_path = model_path[0] logging.info( '\tBuilding ROCs for instances of {}'.format(model_path)) logging.info('\tBuilding ROCs for instances of {}'.format( args.finished_models_dir)) logging.info('\tBuilding ROCs for instances of {}'.format( os.path.join(args.finished_models_dir, model_path))) r, f, t, inv_fprs = build_rocs( data, os.path.join(args.finished_models_dir, model_path), args.batch_size) #remove_outliers_csv(os.path.join(args.finished_models_dir, model_path)) absolute_roc_path = os.path.join( eh.exp_dir, "rocs-{}-{}.pickle".format("-".join(model_path.split('/')), data_path)) with open(absolute_roc_path, "wb") as fd: pickle.dump((r, f, t, inv_fprs), fd) else: for _, model_path in model_paths: previous_absolute_roc_path = os.path.join( args.root_exp_dir, model_path, "rocs-{}-{}.pickle".format("-".join(model_path.split('/')), data_path)) with open(previous_absolute_roc_path, "rb") as fd: r, f, t, inv_fprs = pickle.load(fd) absolute_roc_path = os.path.join( eh.exp_dir, "rocs-{}-{}.pickle".format("-".join(model_path.split('/')), data_path)) with open(absolute_roc_path, "wb") as fd: pickle.dump((r, f, t, inv_fprs), fd) ''' PLOT ROCS ''' '''----------------------------------------------------------------------- ''' colors = (('red', (228, 26, 28)), ('blue', (55, 126, 184)), ('green', (77, 175, 74)), ('purple', (162, 78, 163)), ('orange', (255, 127, 0))) colors = [(name, tuple(x / 256 for x in tup)) for name, tup in colors] for (label, model_path), (_, color) in zip(model_paths, colors): absolute_roc_path = os.path.join( eh.exp_dir, "rocs-{}-{}.pickle".format("-".join(model_path.split('/')), data_path)) with open(absolute_roc_path, "rb") as fd: r, f, t, inv_fprs = pickle.load(fd) if args.remove_outliers: r, f, t, inv_fprs = remove_outliers(r, f, t, inv_fprs) report_score(r, inv_fprs, label=label) plot_rocs(r, f, t, label=label, color=color) figure_filename = os.path.join(eh.exp_dir, 'rocs.png') plot_save(figure_filename) if args.plot: plot_show() eh.finished()
def test_dA(learning_rate=0.01, training_epochs=15, dataset='mnist.pkl.gz', batch_size=20, output_folder='dA_plots'): """ This demo is tested on MNIST :type learning_rate: float :param learning_rate: learning rate used for training the DeNosing AutoEncoder :type training_epochs: int :param training_epochs: number of epochs used for training :type dataset: string :param dataset: path to the picked dataset """ # load default MNIST image dataset datasets = load_data(dataset) #datasets = load_data('../data/synthetic.csv', 'csv') #datasets = load_data('../data/iris.csv','csv') train_set_x, train_set_y = datasets[0] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # start-snippet-2 # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images # end-snippet-2 if not os.path.isdir(output_folder): os.makedirs(output_folder) os.chdir(output_folder) #################################### # BUILDING THE MODEL NO CORRUPTION # #################################### rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) da = dA( numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible= 28 * 28, n_hidden= 500 ) cost, updates = da.get_cost_updates( corruption_level=0., learning_rate=learning_rate ) train_da = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size] } ) start_time = timeit.default_timer() ############ # TRAINING # ############ # go through training epochs for epoch in xrange(training_epochs): # go through trainng set c = [] for batch_index in xrange(n_train_batches): c.append(train_da(batch_index)) print 'Training epoch %d, cost ' % epoch, numpy.mean(c) print 'W dimensions ' + da.W.get_value().shape end_time = timeit.default_timer() training_time = (end_time - start_time) #print >> sys.stderr, ('The no corruption code for file ' + # os.path.split(__file__)[1] + # ' ran for %.2fm' % ((training_time) / 60.)) image = Image.fromarray( tile_raster_images(X=da.W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(10, 10), tile_spacing=(1, 1))) image.save('filters_corruption_0.png') # start-snippet-3 ##################################### # BUILDING THE MODEL CORRUPTION 30% # ##################################### rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) da = dA( numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible= 28 * 28, n_hidden= 500 ) cost, updates = da.get_cost_updates( corruption_level=0.3, learning_rate=learning_rate ) train_da = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size] } ) start_time = timeit.default_timer() ############ # TRAINING # ############ # go through training epochs for epoch in xrange(training_epochs): # go through trainng set c = [] for batch_index in xrange(n_train_batches): c.append(train_da(batch_index)) print 'Training epoch %d, cost ' % epoch, numpy.mean(c) end_time = timeit.default_timer() training_time = (end_time - start_time) #print >> sys.stderr, ('The 30% corruption code for file ' + # os.path.split(__file__)[1] + # ' ran for %.2fm' % (training_time / 60.)) # end-snippet-3 # start-snippet-4 #image = Image.fromarray(tile_raster_images( # X=da.W.get_value(borrow=True).T, # img_shape=(28, 28), tile_shape=(10, 10), # tile_spacing=(1, 1))) #image.save('filters_corruption_30.png') # end-snippet-4 os.chdir('../') return da
def train(args): model_type = MODEL_TYPES[args.model_type] eh = ExperimentHandler(args, os.path.join(MODELS_DIR, model_type)) signal_handler = eh.signal_handler ''' DATA ''' '''----------------------------------------------------------------------- ''' logging.warning("Loading data...") tf = load_tf(DATA_DIR, "{}-train.pickle".format(args.filename)) X, y = load_data(DATA_DIR, "{}-train.pickle".format(args.filename)) for jet in X: jet["content"] = tf.transform(jet["content"]) if args.n_train > 0: indices = torch.randperm(len(X)).numpy()[:args.n_train] X = [X[i] for i in indices] y = y[indices] logging.warning("Splitting into train and validation...") X_train, X_valid_uncropped, y_train, y_valid_uncropped = train_test_split( X, y, test_size=args.n_valid) logging.warning("\traw train size = %d" % len(X_train)) logging.warning("\traw valid size = %d" % len(X_valid_uncropped)) X_valid, y_valid, cropped_indices, w_valid = crop( X_valid_uncropped, y_valid_uncropped, return_cropped_indices=True) # add cropped indices to training data if args.add_cropped: X_train.extend([ x for i, x in enumerate(X_valid_uncropped) if i in cropped_indices ]) y_train = [y for y in y_train] y_train.extend([ y for i, y in enumerate(y_valid_uncropped) if i in cropped_indices ]) y_train = np.array(y_train) logging.warning("\tfinal train size = %d" % len(X_train)) logging.warning("\tfinal valid size = %d" % len(X_valid)) ''' MODEL ''' '''----------------------------------------------------------------------- ''' # Initialization Predict = PredictFromParticleEmbedding if args.load is None: Transform = TRANSFORMS[args.model_type] model_kwargs = { 'n_features': args.n_features, 'n_hidden': args.n_hidden, } if Transform in [MPNNTransform, GRNNTransformGated]: model_kwargs['n_iters'] = args.n_iters model_kwargs['leaves'] = args.leaves model = Predict(Transform, **model_kwargs) settings = { "transform": Transform, "predict": Predict, "model_kwargs": model_kwargs, "step_size": args.step_size, "args": args, } else: with open(os.path.join(args.load, 'settings.pickle'), "rb") as f: settings = pickle.load(f, encoding='latin-1') Transform = settings["transform"] Predict = settings["predict"] model_kwargs = settings["model_kwargs"] with open(os.path.join(args.load, 'model_state_dict.pt'), 'rb') as f: state_dict = torch.load(f) model = PredictFromParticleEmbedding(Transform, **model_kwargs) model.load_state_dict(state_dict) if args.restart: args.step_size = settings["step_size"] logging.warning(model) out_str = 'Number of parameters: {}'.format( sum(np.prod(p.data.numpy().shape) for p in model.parameters())) logging.warning(out_str) if torch.cuda.is_available(): model.cuda() signal_handler.set_model(model) ''' OPTIMIZER AND LOSS ''' '''----------------------------------------------------------------------- ''' optimizer = Adam(model.parameters(), lr=args.step_size) scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=args.decay) #scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.5) n_batches = int(np.ceil(len(X_train) / args.batch_size)) best_score = [-np.inf] # yuck, but works best_model_state_dict = copy.deepcopy(model.state_dict()) def loss(y_pred, y): l = log_loss(y, y_pred.squeeze(1)).mean() return l ''' VALIDATION ''' '''----------------------------------------------------------------------- ''' def callback(iteration, model): out_str = None def save_everything(model): with open(os.path.join(eh.exp_dir, 'model_state_dict.pt'), 'wb') as f: torch.save(model.state_dict(), f) with open(os.path.join(eh.exp_dir, 'settings.pickle'), "wb") as f: pickle.dump(settings, f) if iteration % 25 == 0: model.eval() offset = 0 train_loss = [] valid_loss = [] yy, yy_pred = [], [] for i in range(len(X_valid) // args.batch_size): idx = slice(offset, offset + args.batch_size) Xt, yt = X_train[idx], y_train[idx] X_var = wrap_X(Xt) y_var = wrap(yt) tl = unwrap(loss(model(X_var), y_var)) train_loss.append(tl) X = unwrap_X(X_var) y = unwrap(y_var) Xv, yv = X_valid[offset:offset + args.batch_size], y_valid[offset:offset + args.batch_size] X_var = wrap_X(Xv) y_var = wrap(yv) y_pred = model(X_var) vl = unwrap(loss(y_pred, y_var)) valid_loss.append(vl) Xv = unwrap_X(X_var) yv = unwrap(y_var) y_pred = unwrap(y_pred) yy.append(yv) yy_pred.append(y_pred) offset += args.batch_size train_loss = np.mean(np.array(train_loss)) valid_loss = np.mean(np.array(valid_loss)) yy = np.concatenate(yy, 0) yy_pred = np.concatenate(yy_pred, 0) roc_auc = roc_auc_score(yy, yy_pred, sample_weight=w_valid) # 1/fpr fpr, tpr, _ = roc_curve(yy, yy_pred, sample_weight=w_valid) inv_fpr = inv_fpr_at_tpr_equals_half(tpr, fpr) if np.isnan(inv_fpr): logging.warning("NaN in 1/FPR\n") if inv_fpr > best_score[0]: best_score[0] = inv_fpr save_everything(model) out_str = "{:5d}\t~loss(train)={:.4f}\tloss(valid)={:.4f}\troc_auc(valid)={:.4f}".format( iteration, train_loss, valid_loss, roc_auc, ) out_str += "\t1/FPR @ TPR = 0.5: {:.2f}\tBest 1/FPR @ TPR = 0.5: {:.2f}".format( inv_fpr, best_score[0]) scheduler.step(valid_loss) model.train() return out_str ''' TRAINING ''' '''----------------------------------------------------------------------- ''' logging.warning("Training...") for i in range(args.n_epochs): logging.info("epoch = %d" % i) logging.info("step_size = %.8f" % settings['step_size']) for j in range(n_batches): model.train() optimizer.zero_grad() start = torch.round( torch.rand(1) * (len(X_train) - args.batch_size)).numpy()[0].astype(np.int32) idx = slice(start, start + args.batch_size) X, y = X_train[idx], y_train[idx] X_var = wrap_X(X) y_var = wrap(y) l = loss(model(X_var), y_var) l.backward() optimizer.step() X = unwrap_X(X_var) y = unwrap(y_var) out_str = callback(j, model) if out_str is not None: signal_handler.results_strings.append(out_str) logging.info(out_str) scheduler.step() settings['step_size'] = args.step_size * (args.decay)**(i + 1) logging.info("FINISHED TRAINING") signal_handler.job_completed()
from monitors.losses import * from monitors.monitors import * from architectures import PredictFromParticleEmbedding #from architectures import AdversarialParticleEmbedding from loading import load_data from loading import load_tf from loading import crop from sklearn.utils import shuffle filename = 'antikt-kt' data_dir = '/scratch/psn240/capstone/data/w-vs-qcd/pickles/' tf = load_tf(data_dir, "{}-train.pickle".format(filename)) X, y = load_data(data_dir, "{}-train.pickle".format(filename)) for ij, jet in enumerate(X): jet["content"] = tf.transform(jet["content"]) Z = [0] * len(y) print(len(X)) print(len(y)) filename = 'antikt-kt-pileup25-new' data_dir = '/scratch/psn240/capstone/data/w-vs-qcd/pickles/' tf_pileup = load_tf(data_dir, "{}-train.pickle".format(filename)) X_pileup, y_pileup = load_data(data_dir, "{}-train.pickle".format(filename)) for ij, jet in enumerate(X_pileup): jet["content"] = tf_pileup.transform(jet["content"]) Z_pileup = [1] * len(y)
def test_SdA(finetune_lr=0.1, pretraining_epochs=15, pretrain_lr=0.001, training_epochs=1000, dataset='mnist.pkl.gz', batch_size=1): """ Demonstrates how to train and test a stochastic denoising autoencoder. This is demonstrated on MNIST. :type learning_rate: float :param learning_rate: learning rate used in the finetune stage (factor for the stochastic gradient) :type pretraining_epochs: int :param pretraining_epochs: number of epoch to do pretraining :type pretrain_lr: float :param pretrain_lr: learning rate to be used during pre-training :type n_iter: int :param n_iter: maximal number of iterations ot run the optimizer :type dataset: string :param dataset: path the the pickled dataset """ datasets = ld.load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size # numpy random generator # start-snippet-3 numpy_rng = numpy.random.RandomState(89677) print '... building the model' # construct the stacked denoising autoencoder class sda = SdA(numpy_rng=numpy_rng, n_ins=28 * 28, hidden_layers_sizes=[1000, 1000, 1000], n_outs=10) # end-snippet-3 start-snippet-4 ######################### # PRETRAINING THE MODEL # ######################### print '... getting the pretraining functions' pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size) print '... pre-training the model' start_time = timeit.default_timer() ## Pre-train layer-wise corruption_levels = [.1, .2, .3] for i in xrange(sda.n_layers): # go through pretraining epochs for epoch in xrange(pretraining_epochs): # go through the training set c = [] for batch_index in xrange(n_train_batches): c.append(pretraining_fns[i](index=batch_index, corruption=corruption_levels[i], lr=pretrain_lr)) print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch), print numpy.mean(c) end_time = timeit.default_timer() print >> sys.stderr, ('The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) # end-snippet-4 ######################## # FINETUNING THE MODEL # ######################## # get the training, validation and testing function for the model print '... getting the finetuning functions' train_fn, validate_model, test_model = sda.build_finetune_functions( datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr) print '... finetunning the model' # early-stopping parameters patience = 10 * n_train_batches # look as this many examples regardless patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf test_score = 0. start_time = timeit.default_timer() done_looping = False epoch = 0 while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_fn(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: validation_losses = validate_model() this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = test_model() test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print(('Optimization complete with best validation score of %f %%, ' 'on iteration %i, ' 'with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The training code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
import sklearn import torch import torch.nn as nn from torch.autograd import Variable import torch.nn.functional as F import numpy as np import matplotlib.pyplot as plt import torch.optim import loading from pylab import mpl from sklearn.metrics import accuracy_score x, y = loading.load_data('train.csv') x1, y1 = loading.load_data('test.csv') x = torch.FloatTensor(x) y = torch.LongTensor(y) x = Variable(x) y = Variable(y) x1 = torch.FloatTensor(x1) y1 = torch.LongTensor(y1) x1 = Variable(x1) y1 = Variable(y1) class Net(nn.Module): def __init__(self, n_feature, n_hidden, n_out): super(Net, self).__init__() self.hidden = nn.Linear(n_feature, n_hidden) self.out = nn.Linear(n_hidden, n_out)
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, dataset='mnist.pkl.gz', batch_size=20, n_hidden=500): """ Demonstrate stochastic gradient descent optimization for a multilayer perceptron This is demonstrated on MNIST. :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization) :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: the path of the MNIST dataset file from http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz """ datasets = ld.load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) # construct the MLP class classifier = MLP(rng=rng, input=x, n_in=28 * 28, n_hidden=n_hidden, n_out=10) # start-snippet-4 # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr) # end-snippet-4 # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # start-snippet-5 # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [(param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams)] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) # end-snippet-5 ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in xrange(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(i) for i in xrange(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def train(args): _, Transform, model_type = TRANSFORMS[args.model_type] args.root_exp_dir = os.path.join(MODELS_DIR, model_type, str(args.iters)) eh = ExperimentHandler(args) ''' DATA ''' '''----------------------------------------------------------------------- ''' logging.warning("Loading data...") tf = load_tf(args.data_dir, "{}-train.pickle".format(args.filename)) X, y = load_data(args.data_dir, "{}-train.pickle".format(args.filename)) for ij, jet in enumerate(X): jet["content"] = tf.transform(jet["content"]) if args.n_train > 0: indices = torch.randperm(len(X)).numpy()[:args.n_train] X = [X[i] for i in indices] y = y[indices] logging.warning("Splitting into train and validation...") X_train, X_valid_uncropped, y_train, y_valid_uncropped = train_test_split( X, y, test_size=args.n_valid, random_state=0) logging.warning("\traw train size = %d" % len(X_train)) logging.warning("\traw valid size = %d" % len(X_valid_uncropped)) X_valid, y_valid, cropped_indices, w_valid = crop( X_valid_uncropped, y_valid_uncropped, 0, return_cropped_indices=True, pileup=args.pileup) # add cropped indices to training data if not args.dont_add_cropped: X_train.extend([ x for i, x in enumerate(X_valid_uncropped) if i in cropped_indices ]) y_train = [y for y in y_train] y_train.extend([ y for i, y in enumerate(y_valid_uncropped) if i in cropped_indices ]) y_train = np.array(y_train) logging.warning("\tfinal train size = %d" % len(X_train)) logging.warning("\tfinal valid size = %d" % len(X_valid)) ''' MODEL ''' '''----------------------------------------------------------------------- ''' # Initialization logging.info("Initializing model...") Predict = PredictFromParticleEmbedding if args.load is None: model_kwargs = { 'features': args.features, 'hidden': args.hidden, 'iters': args.iters, 'leaves': args.leaves, } model = Predict(Transform, **model_kwargs) settings = { "transform": Transform, "predict": Predict, "model_kwargs": model_kwargs, "step_size": args.step_size, "args": args, } else: with open(os.path.join(args.load, 'settings.pickle'), "rb") as f: settings = pickle.load(f, encoding='latin-1') Transform = settings["transform"] Predict = settings["predict"] model_kwargs = settings["model_kwargs"] model = PredictFromParticleEmbedding(Transform, **model_kwargs) try: with open(os.path.join(args.load, 'cpu_model_state_dict.pt'), 'rb') as f: state_dict = torch.load(f) except FileNotFoundError as e: with open(os.path.join(args.load, 'model_state_dict.pt'), 'rb') as f: state_dict = torch.load(f) model.load_state_dict(state_dict) if args.restart: args.step_size = settings["step_size"] logging.warning(model) out_str = 'Number of parameters: {}'.format( sum(np.prod(p.data.numpy().shape) for p in model.parameters())) logging.warning(out_str) if torch.cuda.is_available(): logging.warning("Moving model to GPU") model.cuda() logging.warning("Moved model to GPU") eh.signal_handler.set_model(model) ''' OPTIMIZER AND LOSS ''' '''----------------------------------------------------------------------- ''' logging.info("Building optimizer...") optimizer = Adam(model.parameters(), lr=args.step_size) scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=args.decay) #scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.5) n_batches = int(len(X_train) // args.batch_size) best_score = [-np.inf] # yuck, but works best_model_state_dict = copy.deepcopy(model.state_dict()) def loss(y_pred, y): l = log_loss(y, y_pred.squeeze(1)).mean() return l ''' VALIDATION ''' '''----------------------------------------------------------------------- ''' def callback(epoch, iteration, model): if iteration % n_batches == 0: t0 = time.time() model.eval() offset = 0 train_loss = [] valid_loss = [] yy, yy_pred = [], [] for i in range(len(X_valid) // args.batch_size): idx = slice(offset, offset + args.batch_size) Xt, yt = X_train[idx], y_train[idx] X_var = wrap_X(Xt) y_var = wrap(yt) tl = unwrap(loss(model(X_var), y_var)) train_loss.append(tl) X = unwrap_X(X_var) y = unwrap(y_var) Xv, yv = X_valid[idx], y_valid[idx] X_var = wrap_X(Xv) y_var = wrap(yv) y_pred = model(X_var) vl = unwrap(loss(y_pred, y_var)) valid_loss.append(vl) Xv = unwrap_X(X_var) yv = unwrap(y_var) y_pred = unwrap(y_pred) yy.append(yv) yy_pred.append(y_pred) offset += args.batch_size train_loss = np.mean(np.array(train_loss)) valid_loss = np.mean(np.array(valid_loss)) yy = np.concatenate(yy, 0) yy_pred = np.concatenate(yy_pred, 0) t1 = time.time() logging.info("Modeling validation data took {}s".format(t1 - t0)) logdict = dict( epoch=epoch, iteration=iteration, yy=yy, yy_pred=yy_pred, w_valid=w_valid[:len(yy_pred)], #w_valid=w_valid, train_loss=train_loss, valid_loss=valid_loss, settings=settings, model=model) eh.log(**logdict) scheduler.step(valid_loss) model.train() ''' TRAINING ''' '''----------------------------------------------------------------------- ''' eh.save(model, settings) logging.warning("Training...") iteration = 1 for i in range(args.epochs): logging.info("epoch = %d" % i) logging.info("step_size = %.8f" % settings['step_size']) t0 = time.time() for _ in range(n_batches): iteration += 1 model.train() optimizer.zero_grad() start = torch.round( torch.rand(1) * (len(X_train) - args.batch_size)).numpy()[0].astype(np.int32) idx = slice(start, start + args.batch_size) X, y = X_train[idx], y_train[idx] X_var = wrap_X(X) y_var = wrap(y) l = loss(model(X_var), y_var) l.backward() optimizer.step() X = unwrap_X(X_var) y = unwrap(y_var) callback(i, iteration, model) t1 = time.time() logging.info("Epoch took {} seconds".format(t1 - t0)) scheduler.step() settings['step_size'] = args.step_size * (args.decay)**(i + 1) eh.finished()
def read_file(file): data = loading.load_data(file) return data
def train(args): _, Transform, model_type = TRANSFORMS[args.model_type] args.root_exp_dir = os.path.join(MODELS_DIR, model_type, str(args.iters)) eh = ExperimentHandler(args) ''' DATA ''' '''----------------------------------------------------------------------- ''' logging.warning("Loading pileup antikt-kt-pileup40 data...") tf_pileup_40 = load_tf(args.data_dir, "{}-train.pickle".format(args.filename)) X_pileup_40, y_pileup_40 = load_data( args.data_dir, "{}-train.pickle".format(args.filename)) for ij, jet in enumerate(X_pileup_40): jet["content"] = tf_pileup_40.transform(jet["content"]) if args.n_train > 0: indices = torch.randperm(len(X_pileup_40)).numpy()[:args.n_train] X_pileup_40 = [X_pileup_40[i] for i in indices] y_pileup_40 = y_pileup_40[indices] logging.warning("Splitting into train and validation...") X_train_pileup_40, X_valid_uncropped_pileup_40, y_train_pileup_40, y_valid_uncropped_pileup_40 = train_test_split( X_pileup_40, y_pileup_40, test_size=args.n_valid, random_state=0) logging.warning("\traw train size = %d" % len(X_train_pileup_40)) logging.warning("\traw valid size = %d" % len(X_valid_uncropped_pileup_40)) X_valid_pileup_40, y_valid_pileup_40, cropped_indices_40, w_valid_40 = crop( X_valid_uncropped_pileup_40, y_valid_uncropped_pileup_40, pileup_lvl=40, return_cropped_indices=True, pileup=args.pileup) # add cropped indices to training data if not args.dont_add_cropped: X_train_pileup_40.extend([ x for i, x in enumerate(X_valid_uncropped_pileup_40) if i in cropped_indices_40 ]) y_train_pileup_40 = [y for y in y_train_pileup_40] y_train_pileup_40.extend([ y for i, y in enumerate(y_valid_uncropped_pileup_40) if i in cropped_indices_40 ]) y_train_pileup_40 = np.array(y_train_pileup_40) Z_train_pileup_40 = [0] * len(y_train_pileup_40) Z_valid_pileup_40 = [0] * len(y_valid_pileup_40) ''' DATA ''' '''----------------------------------------------------------------------- ''' logging.warning("Loading pileup antikt-kt-pileup50 data...") args.filename = 'antikt-kt-pileup50' args.pileup = False tf_pileup_50 = load_tf(args.data_dir, "{}-train.pickle".format(args.filename)) X_pileup_50, y_pileup_50 = load_data( args.data_dir, "{}-train.pickle".format(args.filename)) for ij, jet in enumerate(X_pileup_50): jet["content"] = tf_pileup_50.transform(jet["content"]) if args.n_train > 0: indices = torch.randperm(len(X_pileup_50)).numpy()[:args.n_train] X_pileup_50 = [X_pileup_50[i] for i in indices] y_pileup_50 = y_pileup_50[indices] logging.warning("Splitting into train and validation...") X_train_pileup_50, X_valid_uncropped_pileup_50, y_train_pileup_50, y_valid_uncropped_pileup_50 = train_test_split( X_pileup_50, y_pileup_50, test_size=args.n_valid, random_state=0) logging.warning("\traw train size = %d" % len(X_train_pileup_50)) logging.warning("\traw valid size = %d" % len(X_valid_uncropped_pileup_50)) X_valid_pileup_50, y_valid_pileup_50, cropped_indices_50, w_valid_50 = crop( X_valid_uncropped_pileup_50, y_valid_uncropped_pileup_50, pileup_lvl=50, return_cropped_indices=True, pileup=args.pileup) # add cropped indices to training data if not args.dont_add_cropped: X_train_pileup_50.extend([ x for i, x in enumerate(X_valid_uncropped_pileup_50) if i in cropped_indices_50 ]) y_train_pileup_50 = [y for y in y_train_pileup_50] y_train_pileup_50.extend([ y for i, y in enumerate(y_valid_uncropped_pileup_50) if i in cropped_indices_50 ]) y_train_pileup_50 = np.array(y_train_pileup_50) Z_train_pileup_50 = [1] * len(y_train_pileup_50) Z_valid_pileup_50 = [1] * len(y_valid_pileup_50) ''' DATA ''' '''----------------------------------------------------------------------- ''' logging.warning("Loading pileup antikt-kt-pileup60 data...") args.filename = 'antikt-kt-pileup60' args.pileup = False tf_pileup_60 = load_tf(args.data_dir, "{}-train.pickle".format(args.filename)) X_pileup_60, y_pileup_60 = load_data( args.data_dir, "{}-train.pickle".format(args.filename)) for ij, jet in enumerate(X_pileup_60): jet["content"] = tf_pileup_60.transform(jet["content"]) if args.n_train > 0: indices = torch.randperm(len(X_pileup_60)).numpy()[:args.n_train] X_pileup_60 = [X_pileup_60[i] for i in indices] y_pileup_60 = y_pileup_60[indices] logging.warning("Splitting into train and validation...") X_train_pileup_60, X_valid_uncropped_pileup_60, y_train_pileup_60, y_valid_uncropped_pileup_60 = train_test_split( X_pileup_60, y_pileup_60, test_size=args.n_valid, random_state=0) logging.warning("\traw train size = %d" % len(X_train_pileup_60)) logging.warning("\traw valid size = %d" % len(X_valid_uncropped_pileup_60)) X_valid_pileup_60, y_valid_pileup_60, cropped_indices_60, w_valid_60 = crop( X_valid_uncropped_pileup_60, y_valid_uncropped_pileup_60, pileup_lvl=60, return_cropped_indices=True, pileup=args.pileup) # add cropped indices to training data if not args.dont_add_cropped: X_train_pileup_60.extend([ x for i, x in enumerate(X_valid_uncropped_pileup_60) if i in cropped_indices_60 ]) y_train_pileup_60 = [y for y in y_train_pileup_60] y_train_pileup_60.extend([ y for i, y in enumerate(y_valid_uncropped_pileup_60) if i in cropped_indices_60 ]) y_train_pileup_60 = np.array(y_train_pileup_60) Z_train_pileup_60 = [2] * len(y_train_pileup_60) Z_valid_pileup_60 = [2] * len(y_valid_pileup_60) X_train = np.concatenate( (X_train_pileup_40, X_train_pileup_50, X_train_pileup_60), axis=0) X_valid = np.concatenate( (X_valid_pileup_40, X_valid_pileup_50, X_valid_pileup_60), axis=0) y_train = np.concatenate( (y_train_pileup_40, y_train_pileup_50, y_train_pileup_60), axis=0) y_valid = np.concatenate( (y_valid_pileup_40, y_valid_pileup_50, y_valid_pileup_60), axis=0) Z_train = np.concatenate( (Z_train_pileup_40, Z_train_pileup_50, Z_train_pileup_60), axis=0) Z_valid = np.concatenate( (Z_valid_pileup_40, Z_valid_pileup_50, Z_valid_pileup_60), axis=0) w_valid = np.concatenate((w_valid_40, w_valid_50, w_valid_60), axis=0) X_train, y_train, Z_train = shuffle(X_train, y_train, Z_train, random_state=0) X_valid, y_valid, Z_valid = shuffle(X_valid, y_valid, Z_valid, random_state=0) logging.warning("\tfinal X train size = %d" % len(X_train)) logging.warning("\tfinal X valid size = %d" % len(X_valid)) logging.warning("\tfinal Y train size = %d" % len(y_train)) logging.warning("\tfinal Y valid size = %d" % len(y_valid)) logging.warning("\tfinal Z train size = %d" % len(Z_train)) logging.warning("\tfinal Z valid size = %d" % len(Z_valid)) logging.warning("\tfinal w valid size = %d" % len(w_valid)) ''' MODEL ''' '''----------------------------------------------------------------------- ''' # Initialization logging.info("Initializing model...") Predict = PredictFromParticleEmbedding if args.load is None: model_kwargs = { 'features': args.features, 'hidden': args.hidden, 'iters': args.iters, 'leaves': args.leaves, 'batch': args.batch_size, } logging.info('No previous models') model = Predict(Transform, **model_kwargs) adversarial_model = AdversarialParticleEmbedding(**model_kwargs) settings = { "transform": Transform, "predict": Predict, "model_kwargs": model_kwargs, "step_size": args.step_size, "args": args, } else: with open(os.path.join(args.load, 'settings.pickle'), "rb") as f: settings = pickle.load(f, encoding='latin-1') Transform = settings["transform"] Predict = settings["predict"] model_kwargs = settings["model_kwargs"] model = PredictFromParticleEmbedding(Transform, **model_kwargs) try: with open(os.path.join(args.load, 'cpu_model_state_dict.pt'), 'rb') as f: state_dict = torch.load(f) except FileNotFoundError as e: with open(os.path.join(args.load, 'model_state_dict.pt'), 'rb') as f: state_dict = torch.load(f) model.load_state_dict(state_dict) if args.restart: args.step_size = settings["step_size"] logging.warning(model) logging.warning(adversarial_model) out_str = 'Number of parameters: {}'.format( sum(np.prod(p.data.numpy().shape) for p in model.parameters())) out_str_adversarial = 'Number of parameters: {}'.format( sum( np.prod(p.data.numpy().shape) for p in adversarial_model.parameters())) logging.warning(out_str) logging.warning(out_str_adversarial) if torch.cuda.is_available(): logging.warning("Moving model to GPU") model.cuda() logging.warning("Moved model to GPU") else: logging.warning("No cuda") eh.signal_handler.set_model(model) ''' OPTIMIZER AND LOSS ''' '''----------------------------------------------------------------------- ''' logging.info("Building optimizer...") optimizer = Adam(model.parameters(), lr=args.step_size) optimizer_adv = Adam(adversarial_model.parameters(), lr=args.step_size) scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=args.decay) scheduler_adv = lr_scheduler.ExponentialLR(optimizer_adv, gamma=args.decay) #scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.5) n_batches = int(len(X_train) // args.batch_size) best_score = [-np.inf] # yuck, but works best_model_state_dict = copy.deepcopy(model.state_dict()) def loss_adversarial(y_pred, y): return -(y * torch.log(y_pred) + (1. - y) * torch.log(1. - y_pred)) def loss(y_pred, y): l = log_loss(y, y_pred.squeeze(1)).mean() return l ''' VALIDATION ''' '''----------------------------------------------------------------------- ''' def callback(epoch, iteration, model): if iteration % n_batches == 0: t0 = time.time() model.eval() offset = 0 train_loss = [] valid_loss = [] yy, yy_pred = [], [] for i in range(len(X_valid) // args.batch_size): idx = slice(offset, offset + args.batch_size) Xt, yt = X_train[idx], y_train[idx] X_var = wrap_X(Xt) y_var = wrap(yt) y_pred_1 = model(X_var) tl = unwrap(loss(y_pred_1, y_var)) train_loss.append(tl) X = unwrap_X(X_var) y = unwrap(y_var) Xv, yv = X_valid[idx], y_valid[idx] X_var = wrap_X(Xv) y_var = wrap(yv) y_pred = model(X_var) vl = unwrap(loss(y_pred, y_var)) valid_loss.append(vl) Xv = unwrap_X(X_var) yv = unwrap(y_var) y_pred = unwrap(y_pred) yy.append(yv) yy_pred.append(y_pred) offset += args.batch_size train_loss = np.mean(np.array(train_loss)) valid_loss = np.mean(np.array(valid_loss)) yy = np.concatenate(yy, 0) yy_pred = np.concatenate(yy_pred, 0) t1 = time.time() logging.info("Modeling validation data took {}s".format(t1 - t0)) logging.info(len(yy_pred)) logging.info(len(yy)) logging.info(len(w_valid)) logdict = dict( epoch=epoch, iteration=iteration, yy=yy, yy_pred=yy_pred, w_valid=w_valid[:len(yy_pred)], #w_valid=w_valid, train_loss=train_loss, valid_loss=valid_loss, settings=settings, model=model) eh.log(**logdict) scheduler.step(valid_loss) model.train() ''' TRAINING ''' '''----------------------------------------------------------------------- ''' eh.save(model, settings) logging.warning("Training...") iteration = 1 loss_rnn = [] loss_adv = [] logging.info("Lambda selected = %.8f" % args.lmbda) for i in range(args.epochs): logging.info("epoch = %d" % i) logging.info("step_size = %.8f" % settings['step_size']) t0 = time.time() for _ in range(n_batches): iteration += 1 model.train() adversarial_model.train() optimizer.zero_grad() optimizer_adv.zero_grad() start = torch.round( torch.rand(1) * (len(X_train) - args.batch_size)).numpy()[0].astype(np.int32) idx = slice(start, start + args.batch_size) X, y, Z = X_train[idx], y_train[idx], Z_train[idx] X_var = wrap_X(X) y_var = wrap(y) #Z_var = wrap(Z, 'long') y_pred = model(X_var) #l = loss(y_pred, y_var) - loss(adversarial_model(y_pred), Z_var) #print(adversarial_model(y_pred)) Z_var = Variable(torch.squeeze(torch.from_numpy(Z))) #print(Z_var) l_rnn = loss(y_pred, y_var) loss_rnn.append(l_rnn.data.cpu().numpy()[0]) l_adv = F.nll_loss(adversarial_model(y_pred), Z_var) loss_adv.append(l_adv.data.cpu().numpy()[0]) l = l_rnn - (args.lmbda * l_adv) #Taking step on classifier optimizer.step() l.backward(retain_graph=True) #Taking step on advesarial optimizer_adv.step() l_adv.backward() X = unwrap_X(X_var) y = unwrap(y_var) callback(i, iteration, model) t1 = time.time() logging.info("Epoch took {} seconds".format(t1 - t0)) scheduler.step() scheduler_adv.step() settings['step_size'] = args.step_size * (args.decay)**(i + 1) #logging.info(loss_rnn) #logging.info('==================================================') #logging.info(loss_adv) logging.info('PID : %d' % os.getpid()) pathset = os.path.join(args.data_dir, str(os.getpid())) os.mkdir(pathset) np.save(os.path.join(pathset, 'rnn_loss.csv'), np.array(loss_rnn)) np.save(os.path.join(pathset, 'adv_loss.csv'), np.array(loss_adv)) eh.finished()
def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000, dataset='mnist.pkl.gz', batch_size=600): """ Demonstrate stochastic gradient descent optimization of a log-linear model This is demonstrated on MNIST. :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: the path of the MNIST dataset file from http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz """ datasets = ld.load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # generate symbolic variables for input (x and y represent a # minibatch) x = T.matrix('x') # data, presented as rasterized images y = T.ivector('y') # labels, presented as 1D vector of [int] labels # construct the logistic regression class # Each MNIST image has size 28*28 classifier = LogisticRegression(input=x, n_in=28 * 28, n_out=10) # the cost we minimize during training is the negative log likelihood of # the model in symbolic format cost = classifier.negative_log_likelihood(y) # compiling a Theano function that computes the mistakes that are made by # the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # compute the gradient of cost with respect to theta = (W,b) g_W = T.grad(cost=cost, wrt=classifier.W) g_b = T.grad(cost=cost, wrt=classifier.b) # start-snippet-3 # specify how to update the parameters of the model as a list of # (variable, update expression) pairs. updates = [(classifier.W, classifier.W - learning_rate * g_W), (classifier.b, classifier.b - learning_rate * g_b)] # compiling a Theano function `train_model` that returns the cost, but in # the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) # end-snippet-3 ############### # TRAIN MODEL # ############### print '... training the model' # early-stopping parameters patience = 5000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf test_score = 0. start_time = timeit.default_timer() done_looping = False epoch = 0 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in xrange(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss # test it on the test set test_losses = [ test_model(i) for i in xrange(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of' ' best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) # save the best model with open('best_model.pkl', 'w') as f: cPickle.dump(classifier, f) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print(('Optimization complete with best validation score of %f %%,' 'with test performance %f %%') % (best_validation_loss * 100., test_score * 100.)) print 'The code run for %d epochs, with %f epochs/sec' % ( epoch, 1. * epoch / (end_time - start_time)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time)))