def generate_pred(subset): predictions_paths = glob.glob("predictions/"+subset+"--"+config+"-*") if len(predictions_paths) < times_to_run: for path in run_paths: cmd = "python predict_convnet.py %s %s %s"%(config,path,subset) print cmd os.system(cmd) predictions_paths = glob.glob("predictions/"+subset+"--"+config+"-*") assert len(predictions_paths) == times_to_run print "Loading %s set predictions"%subset predictions_list = [np.load(path) for path in predictions_paths] predictions_stack = np.array(predictions_list).astype("float32") # num_sources x num_datapoints x 121 uniform_blend = predictions_stack.mean(0) if subset=="valid": t_valid = data.labels_train[np.load("validation_split_v1.pkl")['indices_valid']] loss_uniform_selected = utils.log_loss(uniform_blend, t_valid) print print config print "%s score: %.6f"%(subset,loss_uniform_selected) print target_path = "predictions/%s--%s--%s--%s.npy" % (subset, "blend_"+config, config, "avg-prob") np.save(target_path, uniform_blend) print "saving in", target_path
def forwardPropagation(input, weights, bias, originalOutput, binarizedTruePrediction, prediction, numberOfSamples, numberOfNeuronsInLayers, classes, optimizer): #computing output of first hidden layer h1In = numpy.dot(input[:, :3], weights[0]) + numpy.repeat( numpy.array([bias[0]]), repeats=[numberOfSamples], axis=0) h1Output = utils.relu(h1In) #computing output of second hidden layer h2In = numpy.dot(h1Output, weights[1]) + numpy.repeat( numpy.array([bias[1]]), repeats=[numberOfSamples], axis=0) h2Output = utils.relu(h2In) #computing output of the output layer OIn = numpy.dot(h2Output, weights[2]) + numpy.repeat( numpy.array([bias[2]]), repeats=[numberOfSamples], axis=0) OOutput = utils.softmax(OIn) myPredictedValueListAsIntegers = numpy.argmax(OOutput, axis=1) # Computing overall error only for plotting graph if prediction == False: errorForGraph = utils.log_loss(binarizedTruePrediction, OOutput[:] + 0.00001) errorForPlottingGraphList.append(errorForGraph) accuracyScoreForGraph.append( utils.accuracy_score(originalOutput, myPredictedValueListAsIntegers)) backPropagation(input, OOutput, originalOutput, binarizedTruePrediction, h1Output, h2Output, numberOfNeuronsInLayers, classes, h1In, h2In, OIn, optimizer) else: return myPredictedValueListAsIntegers
def main(verbose: int = 1, print_freq: int = 100, restore: Union[bool, str] = True, val_freq: int = 1, run_id: str = "model", dset_name: str = "memento_frames", model_name: str = "frames", freeze_until_it: int = 1000, additional_metrics: Mapping[str, Callable] = {'rc': rc}, debug_n: Optional[int] = None, batch_size: int = cfg.BATCH_SIZE, require_strict_model_load: bool = False, restore_optimizer=True, optim_string='adam', lr=0.01) -> None: print("TRAINING MODEL {} ON DATASET {}".format(model_name, dset_name)) ckpt_savedir = os.path.join(cfg.DATA_SAVEDIR, run_id, cfg.CKPT_DIR) print("Saving ckpts to {}".format(ckpt_savedir)) logs_savepath = os.path.join(cfg.DATA_SAVEDIR, run_id, cfg.LOGDIR) print("Saving logs to {}".format(logs_savepath)) utils.makedirs([ckpt_savedir, logs_savepath]) last_ckpt_path = os.path.join(ckpt_savedir, "last_model.pth") device = utils.set_device() print('DEVICE', device) # model model = get_model(model_name, device) # print("model", model) model = DataParallel(model) # must call this before constructing the optimizer: # https://pytorch.org/docs/stable/optim.html model.to(device) # set up training # TODO better one? if optim_string == 'adam': optimizer = torch.optim.Adam(model.parameters(), lr=lr) elif optim_string == 'sgd': optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=0.0001) else: raise RuntimeError( "Unrecognized optimizer string {}".format(optim_string)) lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1) # criterion = MemAlphaLoss(device=device) # criterion = MemMSELoss() # criterion = lambda x, y: MemMSELoss()(x, y) + # CaptionsLoss(device=device)(x, y) losses = { 'mem_mse': MemMSELoss(device=device, weights=np.load("memento_weights.npy")), 'captions': CaptionsLoss(device=device, class_weights=cap_utils.get_vocab_weights()) } initial_epoch = 0 iteration = 0 unfrozen = False if restore: ckpt_path = restore if isinstance(restore, str) else last_ckpt_path if os.path.exists(ckpt_path): print("Restoring weights from {}".format(ckpt_path)) ckpt = torch.load(ckpt_path) utils.try_load_state_dict(model, ckpt['model_state_dict'], require_strict_model_load) if restore_optimizer: utils.try_load_optim_state(optimizer, ckpt['optimizer_state_dict'], require_strict_model_load) initial_epoch = ckpt['epoch'] iteration = ckpt['it'] else: ckpt_path = last_ckpt_path # dataset train_ds, val_ds, test_ds = get_dataset(dset_name) assert val_ds or test_ds if debug_n is not None: train_ds = Subset(train_ds, range(debug_n)) test_ds = Subset(test_ds, range(debug_n)) train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=cfg.NUM_WORKERS) test_dl = DataLoader(test_ds, batch_size=batch_size, shuffle=False, num_workers=cfg.NUM_WORKERS) # training loop start = time.time() try: for epoch in range(initial_epoch, cfg.NUM_EPOCHS): logger = SummaryWriter(logs_savepath) # effectively puts the model in train mode. # Opposite of model.eval() model.train() print("Epoch {}".format(epoch)) for i, (x, y_) in tqdm(enumerate(train_dl), total=len(train_ds) / batch_size): y: ModelOutput[MemModelFields] = ModelOutput(y_) iteration += 1 if not unfrozen and iteration > freeze_until_it: print("Unfreezing encoder") unfrozen = True for param in model.parameters(): param.requires_grad = True logger.add_scalar('DataTime', time.time() - start, iteration) x = x.to(device) y = y.to_device(device) out = ModelOutput(model(x, y.get_data())) loss_vals = {name: l(out, y) for name, l in losses.items()} # print("loss_vals", loss_vals) loss = torch.stack(list(loss_vals.values())) if verbose: print("stacked loss", loss) loss = loss.sum() # loss = criterion(out, y) # I think this zeros out previous gradients (in case people # want to accumulate gradients?) optimizer.zero_grad() loss.backward() optimizer.step() # logging utils.log_loss(logger, loss, loss_vals, iteration) logger.add_scalar('ItTime', time.time() - start, iteration) start = time.time() # display metrics # do some validation if (epoch + 1) % val_freq == 0: print("Validating...") model.eval() # puts model in validation mode val_iteration = iteration with torch.no_grad(): labels: Optional[ModelOutput[MemModelFields]] = None preds: Optional[ModelOutput[MemModelFields]] = None val_losses = [] for i, (x, y_) in tqdm(enumerate(test_dl), total=len(test_ds) / batch_size): val_iteration += 1 y = ModelOutput(y_) y_numpy = y.to_numpy() labels = y_numpy if labels is None else labels.merge( y_numpy) x = x.to(device) y = y.to_device(device) out = ModelOutput(model(x, y.get_data())) out_numpy = out.to_device('cpu').to_numpy() preds = out_numpy if preds is None else preds.merge( out_numpy) loss_vals = { name: l(out, y) for name, l in losses.items() } loss = torch.stack(list(loss_vals.values())).sum() utils.log_loss(logger, loss, loss_vals, val_iteration, phase='val') val_losses.append(loss) print("Calculating validation metric...") # print("preds", {k: v.shape for k, v in preds.items()}) # assert False metrics = { fname: f(labels, preds, losses) for fname, f in additional_metrics.items() } print("Validation metrics", metrics) for k, v in metrics.items(): if isinstance(v, numbers.Number): logger.add_scalar('Metric_{}'.format(k), v, iteration) metrics['total_val_loss'] = sum(val_losses) ckpt_path = os.path.join( ckpt_savedir, utils.get_ckpt_path(epoch, metrics)) save_ckpt(ckpt_path, model, epoch, iteration, optimizer, dset_name, model_name, metrics) # end of epoch lr_scheduler.step() save_ckpt(last_ckpt_path, model, epoch, iteration, optimizer, dset_name, model_name) except KeyboardInterrupt: print('Got keyboard interrupt, saving model...') save_ckpt(last_ckpt_path, model, epoch, iteration, optimizer, dset_name, model_name)
import sys import numpy as np import data import utils VALIDATION_SPLIT_PATH = "validation_split_v1.pkl" if len(sys.argv) != 2: sys.exit("Usage: eval_predictions.py <validation_predictions_path>") path = sys.argv[1] predictions = np.load(path) split = np.load(VALIDATION_SPLIT_PATH) labels_valid = data.labels_train[split['indices_valid']] loss = utils.log_loss(predictions, labels_valid) acc = utils.accuracy(predictions, labels_valid) loss_std = utils.log_loss_std(predictions, labels_valid) print("Validation loss:\t\t\t%.6f" % loss) print("Classification accuracy:\t\t%.2f%%" % (acc * 100)) print("Validation loss std:\t%.6f" % loss_std) print() for k in range(5): acc_k = utils.accuracy_topn(predictions, labels_valid, n=k + 1) print("Top-%d accuracy:\t\t%.2f%%" % (k + 1, acc_k * 100))
def _backprop(self, X, y, activations, deltas, coef_grads, intercept_grads): """Compute the MLP loss function and its corresponding derivatives with respect to each parameter: weights and bias vectors. Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) The input data. y : array-like, shape (n_samples,) The target values. activations : list, length = n_layers - 1 The ith element of the list holds the values of the ith layer. deltas : list, length = n_layers - 1 The ith element of the list holds the difference between the activations of the i + 1 layer and the backpropagated error. More specifically, deltas are gradients of loss with respect to z in each layer, where z = wx + b is the value of a particular layer before passing through the activation function coef_grads : list, length = n_layers - 1 The ith element contains the amount of change used to update the coefficient parameters of the ith layer in an iteration. intercept_grads : list, length = n_layers - 1 The ith element contains the amount of change used to update the intercept parameters of the ith layer in an iteration. Returns ------- loss : float coef_grads : list, length = n_layers - 1 intercept_grads : list, length = n_layers - 1 """ n_samples = X.shape[0] # Forward propagate activations = self._forward_pass(activations) # Get loss loss = log_loss(y, activations[-1]) # Backward propagate last = self.n_layers_ - 2 # The calculation of delta[last] here works with following # combinations of output activation and loss function: # sigmoid and binary cross entropy, softmax and categorical cross # entropy, and identity with squared loss deltas[last] = activations[-1] - y # Compute gradient for the last layer coef_grads, intercept_grads = self._compute_loss_grad( last, n_samples, activations, deltas, coef_grads, intercept_grads) # Iterate over the hidden layers for i in range(self.n_layers_ - 2, 0, -1): deltas[i - 1] = np.dot(deltas[i], self.coefs_[i].T) inplace_relu_derivative(activations[i], deltas[i - 1]) coef_grads, intercept_grads = self._compute_loss_grad( i - 1, n_samples, activations, deltas, coef_grads, intercept_grads) return loss, coef_grads, intercept_grads
predictions_list = [np.load(path) for path in paths] predictions_stack = np.array(predictions_list).astype("float32") # num_sources x num_datapoints x 121 uniform_blend = predictions_stack.mean(0) print paths[0] target_path = paths[0].replace("haralick","blend").replace("momentsinfo","blend") # target_path = "predictions/%s--%s--%s--%s.npy" % (subset, "blend_"+config, config, "avg-prob") if os.path.isfile(target_path): sys.exit("file %s already exists"%target_path) if subset=="valid": t_valid = data.labels_train[np.load("validation_split_v1.pkl")['indices_valid']] loss_uniform_selected = utils.log_loss(uniform_blend, t_valid) print print "%s score: %.6f"%(subset,loss_uniform_selected) print np.save(target_path, uniform_blend) print "saving in", target_path # ["valid--blend_featharalick_convroll4_1024_lesswd--featharalick_convroll4_1024_lesswd--avg-prob.npy" # ,"valid--blend_featharalick_convroll4_big_wd_maxout512--featharalick_convroll4_big_wd_maxout512--avg-prob.npy" # ,"valid--blend_featharalick_convroll4_big_weightdecay--featharalick_convroll4_big_weightdecay--avg-prob.npy" # ,"valid--blend_featharalick_convroll4_doublescale_fs5--featharalick_convroll4_doublescale_fs5--avg-prob.npy" # ,"valid--blend_featharalick_convroll5_preinit_resume_drop@420--featharalick_convroll5_preinit_resume_drop@420--avg-prob.npy" # ,"valid--blend_featharalick_convroll_all_broaden_7x7_weightdecay_resume--featharalick_convroll_all_broaden_7x7_weightdecay_resume--avg-prob.npy" # ,"valid--blend_featharalick_cp8--featharalick_cp8--avg-prob.npy"
if missing_predictions: print('\tPlease generate the following predictions:\n\t%s' % '\n\t'.join(missing_predictions)) sys.exit(0) # loading validation predictions s = np.load("validation_split_v1.pkl") t_valid = data.labels_train[s['indices_valid']] predictions_list = [np.load(path) for path in valid_predictions_paths] predictions_stack = np.array(predictions_list).astype( theano.config.floatX) # num_sources x num_datapoints x 121 print("Individual prediction errors") individual_prediction_errors = [ utils.log_loss(p, t_valid) for p in predictions_list ] del predictions_list for i in range(n_models): print(individual_prediction_errors[i], os.path.basename(valid_predictions_paths[i])) print() # optimizing weights X = theano.shared(predictions_stack) # source predictions t = theano.shared(utils.one_hot(t_valid)) # targets W = T.vector('W') s = T.nnet.softmax(W).reshape((W.shape[0], 1, 1)) weighted_avg_predictions = T.sum(X * s, axis=0) # T.tensordot(X, s, [[0], [0]])
def train(self, learning_schedule = {0: 0.015, 500: 0.0015, 800: 0.00015, 1000: 0.000015}, momentum = 0.9, max_epochs=3000, save_every = 20, save_path = os.getcwd()): self.save_every = save_every self.metadata_tmp_path = save_path+"/model_params.pkl" self.learning_rate_schedule = learning_schedule self.learning_rate = theano.shared(np.float32(self.learning_rate_schedule[0])) self.momentum = momentum #for trainer self.updates = nn.updates.nesterov_momentum(self.loss, self.all_params, self.learning_rate, self.momentum) train_fn = self.nesterov_trainer() #nesterov with momentum. train_set_iterator = DataLoader(os.getcwd(),train_test_valid='train') best_dev_loss = numpy.inf dev_set_iterator = DataLoader(os.getcwd(), train_test_valid='valid') dev_set_iterator.build_unequal_samples_map() #for loading the data onto the gpu #create_train_gen = lambda: train_set_iterator.create_gen(max_epochs) patience = 1000 patience_increase = 2. improvement_threshold = 0.995 done_looping = False print '... training the model' start_time = time.clock() epoch = 0 timer = None #for plotting self._costs = [] self._train_errors = [] self._dev_errors = [] while (epoch < max_epochs) and (not done_looping): losses_train = [] losses = [] avg_costs = [] timer = time.time() for iteration, (x, y) in enumerate(train_set_iterator): if iteration in self.learning_rate_schedule: lr = np.float32(self.learning_rate_schedule[iteration]) print " setting learning rate to %.7f" % lr self.learning_rate.set_value(lr) print " load training data onto GPU" avg_cost = train_fn(x, y) if np.isnan(avg_cost): raise RuntimeError("NaN DETECTED.") if type(avg_cost) == list: avg_costs.append(avg_cost[0]) else: avg_costs.append(avg_cost) #for saving the batch if ((iteration + 1) % save_every) == 0: print print "Saving metadata, parameters" with open(self.metadata_tmp_path, 'w') as f: pickle.dump({'losses_train': avg_costs,'param_values': nn.layers.get_all_param_values(self.output_layer)}, f, pickle.HIGHEST_PROTOCOL) mean_train_loss = numpy.mean(avg_costs) #print " mean training loss:\t\t%.6f" % mean_train_loss #losses_train.append(mean_train_loss) #accuracy assessment output = utils.one_hot(self.predict_(x)(),m=20) train_loss = utils.log_loss(output, y) acc = 1 - utils.accuracy(output, y) losses.append(train_loss) del output del x del y print(' epoch %i took %f seconds' % (epoch, time.time() - timer)) print(' epoch %i, avg costs %f' % (epoch, mean_train_loss)) print(' epoch %i, training error %f' % (epoch, acc)) #for plotting self._costs.append(mean_train_loss) self._train_errors.append(acc) #valid accuracy xd,yd = dev_set_iterator.random_batch() valid_output = utils.one_hot(self.predict_(xd)(),m=20) valid_acc = 1 - utils.accuracy(valid_output, yd) self._dev_errors.append(valid_acc) del valid_output del xd del yd if valid_acc < best_dev_loss: best_dev_loss = valid_acc best_params = copy.deepcopy(self.all_params ) print('!!! epoch %i, validation error of best model %f' % (epoch, valid_acc)) print print "Saving best performance parameters" with open(self.metadata_tmp_path, 'w') as f: pickle.dump({'losses_train': avg_costs,'param_values': nn.layers.get_all_param_values(self.output_layer)}, f, pickle.HIGHEST_PROTOCOL) if (valid_acc < best_dev_loss * improvement_threshold): patience = max(patience, iteration * patience_increase) if patience <= iteration: done_looping = True break epoch += 1
for x_shared, x_chunk_eval in zip(xs_shared, xs_chunk_eval): x_shared.set_value(x_chunk_eval) outputs_chunk = [] for b in xrange(num_batches_chunk_eval): out = compute_output(b) outputs_chunk.append(out) outputs_chunk = np.vstack(outputs_chunk) outputs_chunk = outputs_chunk[: chunk_length_eval] # truncate to the right length outputs.append(outputs_chunk) outputs = np.vstack(outputs) loss = utils.log_loss(outputs, labels) acc = utils.accuracy(outputs, labels) print " loss:\t%.6f" % loss print " acc:\t%.2f%%" % (acc * 100) print losses.append(loss) del outputs now = time.time() time_since_start = now - start_time time_since_prev = now - prev_time prev_time = now est_time_left = time_since_start * (float(config.num_chunks_train - (e + 1)) / float(e + 1 - chunks_train_idcs[0]))
def train(self, learning_schedule={ 0: 0.015, 500: 0.0015, 800: 0.00015, 1000: 0.000015 }, momentum=0.9, max_epochs=3000, save_every=20, save_path=os.getcwd()): self.save_every = save_every self.metadata_tmp_path = save_path + "/model_params.pkl" self.learning_rate_schedule = learning_schedule self.learning_rate = theano.shared( np.float32(self.learning_rate_schedule[0])) self.momentum = momentum #for trainer self.updates = nn.updates.nesterov_momentum(self.loss, self.all_params, self.learning_rate, self.momentum) train_fn = self.nesterov_trainer() #nesterov with momentum. train_set_iterator = DataLoader(os.getcwd(), train_test_valid='train') best_dev_loss = numpy.inf dev_set_iterator = DataLoader(os.getcwd(), train_test_valid='valid') dev_set_iterator.build_unequal_samples_map() #for loading the data onto the gpu #create_train_gen = lambda: train_set_iterator.create_gen(max_epochs) patience = 1000 patience_increase = 2. improvement_threshold = 0.995 done_looping = False print '... training the model' start_time = time.clock() epoch = 0 timer = None #for plotting self._costs = [] self._train_errors = [] self._dev_errors = [] while (epoch < max_epochs) and (not done_looping): losses_train = [] losses = [] avg_costs = [] timer = time.time() for iteration, (x, y) in enumerate(train_set_iterator): if iteration in self.learning_rate_schedule: lr = np.float32(self.learning_rate_schedule[iteration]) print " setting learning rate to %.7f" % lr self.learning_rate.set_value(lr) print " load training data onto GPU" avg_cost = train_fn(x, y) if np.isnan(avg_cost): raise RuntimeError("NaN DETECTED.") if type(avg_cost) == list: avg_costs.append(avg_cost[0]) else: avg_costs.append(avg_cost) #for saving the batch if ((iteration + 1) % save_every) == 0: print print "Saving metadata, parameters" with open(self.metadata_tmp_path, 'w') as f: pickle.dump( { 'losses_train': avg_costs, 'param_values': nn.layers.get_all_param_values( self.output_layer) }, f, pickle.HIGHEST_PROTOCOL) mean_train_loss = numpy.mean(avg_costs) #print " mean training loss:\t\t%.6f" % mean_train_loss #losses_train.append(mean_train_loss) #accuracy assessment output = utils.one_hot(self.predict_(x)(), m=20) train_loss = utils.log_loss(output, y) acc = 1 - utils.accuracy(output, y) losses.append(train_loss) del output del x del y print(' epoch %i took %f seconds' % (epoch, time.time() - timer)) print(' epoch %i, avg costs %f' % (epoch, mean_train_loss)) print(' epoch %i, training error %f' % (epoch, acc)) #for plotting self._costs.append(mean_train_loss) self._train_errors.append(acc) #valid accuracy xd, yd = dev_set_iterator.random_batch() valid_output = utils.one_hot(self.predict_(xd)(), m=20) valid_acc = 1 - utils.accuracy(valid_output, yd) self._dev_errors.append(valid_acc) del valid_output del xd del yd if valid_acc < best_dev_loss: best_dev_loss = valid_acc best_params = copy.deepcopy(self.all_params) print('!!! epoch %i, validation error of best model %f' % (epoch, valid_acc)) print print "Saving best performance parameters" with open(self.metadata_tmp_path, 'w') as f: pickle.dump( { 'losses_train': avg_costs, 'param_values': nn.layers.get_all_param_values( self.output_layer) }, f, pickle.HIGHEST_PROTOCOL) if (valid_acc < best_dev_loss * improvement_threshold): patience = max(patience, iteration * patience_increase) if patience <= iteration: done_looping = True break epoch += 1
import numpy as np import data import utils VALIDATION_SPLIT_PATH = "validation_split_v1.pkl" if len(sys.argv) != 2: sys.exit("Usage: eval_predictions.py <validation_predictions_path>") path = sys.argv[1] predictions = np.load(path) split = np.load(VALIDATION_SPLIT_PATH) labels_valid = data.labels_train[split["indices_valid"]] loss = utils.log_loss(predictions, labels_valid) acc = utils.accuracy(predictions, labels_valid) loss_std = utils.log_loss_std(predictions, labels_valid) print "Validation loss:\t\t\t%.6f" % loss print "Classification accuracy:\t\t%.2f%%" % (acc * 100) print "Validation loss std:\t%.6f" % loss_std print for k in xrange(5): acc_k = utils.accuracy_topn(predictions, labels_valid, n=k + 1) print "Top-%d accuracy:\t\t%.2f%%" % (k + 1, acc_k * 100)
if not os.path.isfile(path): missing_predictions.append(path) if missing_predictions: print '\tPlease generate the following predictions:\n\t%s' % '\n\t'.join(missing_predictions) sys.exit(0) # loading validation predictions s = np.load("validation_split_v1.pkl") t_valid = data.labels_train[s['indices_valid']] predictions_list = [np.load(path) for path in valid_predictions_paths] predictions_stack = np.array(predictions_list).astype(theano.config.floatX) # num_sources x num_datapoints x 121 print "Individual prediction errors" individual_prediction_errors = [utils.log_loss(p, t_valid) for p in predictions_list] del predictions_list for i in xrange(n_models): print individual_prediction_errors[i], os.path.basename(valid_predictions_paths[i]) print # optimizing weights X = theano.shared(predictions_stack) # source predictions t = theano.shared(utils.one_hot(t_valid)) # targets W = T.vector('W') s = T.nnet.softmax(W).reshape((W.shape[0], 1, 1)) weighted_avg_predictions = T.sum(X * s, axis=0) # T.tensordot(X, s, [[0], [0]]) error = nn_plankton.log_loss(weighted_avg_predictions, t) grad = T.grad(error, W)
def fit(self, X_train, y_train, layers, alpha, epochs): """ Main function that trans/fits the neural net by running through multiple epochs :param X_train: training data feautures :param y_train: training data labels; y_actual :param layers: number of hidden layers and neurons in each of them :param alpha: learning rate initailized value; later changed by adam optimizer :param epochs: number of iterations to be run :return: trained neural net parameters, to be used for predicting on test data """ # Initialize Weights and Biases parameters = self.initialize(layers) cost_function, learning_curve = [], [] # Binarize Labels classes = list(set(y_train)) y_bin = ut.label_binarize(y_train, classes) # Y = [1,0,0,0], [0,1,0,0]... for j in range(epochs): # Making batches of data # X = X_train # Y = y_train # # batch_slice = ut.gen_batches(X_train.shape[0], X_train.shape[0] / 1000) # for i in batch_slice: # X_train = X[int(i.start):int(i.stop) + 1] # y_train = Y[int(i.start):int(i.stop) + 1] # --------------------------------------------------- y_hat, parameters = self.forward_prop(X_train, parameters, layers) # y_hat is the predicted label probability by softmax # Utils: log_loss(y_true, y_prob) log_loss = ut.log_loss(y_bin, y_hat) # Back Propagation parameters = self.back_prop(X_train, y_bin, parameters, layers) # Prep variables for adam optimizer params, grads = self.prep_vars(layers, parameters) # Initialize constructor of adam optimizer learning_rate_init = alpha optimizer = AdamOptimizer(params, learning_rate_init) # updates weights with grads params = optimizer.update_params(grads) # update weights # Unpack results from Adam Optimizer parameters = self.params_unpack(params, parameters, layers) # Append log loss, to plot curve later cost_function.append(log_loss) # --------------------------------------------------- # Mapping if j == 0: class_dict = dict() for i in range(len(y_bin)): class_dict[str(y_bin[i])] = y_train[i] # --------------------------------------------------- # Making pyplots # print("cost_function 0, -1", cost_function[0], cost_function[-1]) # print("learning_curve 0, -1", learning_curve[0], learning_curve[-1]) # # learning_curve = pd.DataFrame(learning_curve) # learning_curve.to_csv("learning_curve_blackbox21.csv", header=False, index=False) # print("learning_curve", learning_curve) # # plt.plot(learning_curve) # plt.title("Learning Curve") # plt.xlabel("Epochs") # plt.ylabel("Accuracy") # plt.show() # # cost_function = pd.DataFrame(cost_function) # # plt.plot(cost_function) # plt.title("Logistic Loss") # plt.xlabel("Epochs") # plt.ylabel("Loss") # plt.show() return parameters, class_dict
num_batches_chunk_eval = int(np.ceil(chunk_length_eval / float(config.batch_size))) for x_shared, x_chunk_eval in zip(xs_shared, xs_chunk_eval): x_shared.set_value(x_chunk_eval) outputs_chunk = [] for b in xrange(num_batches_chunk_eval): out = compute_output(b) outputs_chunk.append(out) outputs_chunk = np.vstack(outputs_chunk) outputs_chunk = outputs_chunk[:chunk_length_eval] # truncate to the right length outputs.append(outputs_chunk) outputs = np.vstack(outputs) loss = utils.log_loss(outputs, labels) acc = utils.accuracy(outputs, labels) print " loss:\t%.6f" % loss print " acc:\t%.2f%%" % (acc * 100) print losses.append(loss) del outputs now = time.time() time_since_start = now - start_time time_since_prev = now - prev_time prev_time = now est_time_left = time_since_start * (float(config.num_chunks_train - (e + 1)) / float(e + 1 - chunks_train_idcs[0])) eta = datetime.now() + timedelta(seconds=est_time_left)