def fill_missing_bLSTM(panel, epochs=100): non_nans = 1 - np.isnan(panel.values) X_train = panel.fillna(0).values n_samples, n_timesteps, n_feat = X_train.shape main_input = Input(shape=(n_timesteps, n_feat), name='main_input') lstm = Bidirectional(LSTM(120, return_sequences=True))(main_input) unmasked_outputs = TimeDistributed(Dense(n_feat))(lstm) bool_input = Input(shape=(n_timesteps, n_feat), name='isnan_inputs') masked_outputs = merge([unmasked_outputs, bool_input], mode='mul') model = Model(input=[main_input, bool_input], output=masked_outputs) print model.count_params() unmasked_model = Model(input=main_input, output=unmasked_outputs) model.compile(optimizer='rmsprop', loss='mse') early_stopping = EarlyStopping(patience=20) history = model.fit([X_train, non_nans], X_train, nb_epoch=epochs, validation_split=0.1, callbacks=[early_stopping]) plot_loss(history) unmasked_model.compile(optimizer='rmsprop', loss='mae') X_train = unmasked_model.predict(X_train) fpanel = pd.Panel(data=X_train, items=panel.axes[0], major_axis=panel.axes[1], minor_axis=panel.axes[2]) return fpanel
def iterative_fill_bLSTM(panel, epochs=5000, iterations=5): nans = np.isnan(panel.values) + 0 ians = 1 - nans X_train = panel.fillna(0).values for iteration in range(iterations): n_samples, n_timesteps, n_feat = X_train.shape main_input = Input(shape=(n_timesteps, n_feat), name='main_input') lstm = Bidirectional( LSTM(120, dropout_W=0.5, dropout_U=0.2, return_sequences=True))(main_input) # lstm2 = Bidirectional(LSTM(60, dropout_W = 0.5, dropout_U = 0.2, return_sequences=True))(lstm) unmasked_outputs = TimeDistributed(Dense(n_feat))(lstm) nan_input = Input(shape=(n_timesteps, n_feat), name='is nan inputs') ian_input = Input(shape=(n_timesteps, n_feat), name='is a num inputs') masked_outputs = merge([unmasked_outputs, ian_input], mode='mul') only_original_outputs = merge([main_input, nan_input], mode='mul') final_output = merge([masked_outputs, only_original_outputs], mode='sum') model = Model(input=[main_input, ian_input, nan_input], output=final_output) model.compile(optimizer='rmsprop', loss='mse') early_stopping = EarlyStopping(patience=20) history = model.fit([X_train, ians, nans], X_train, nb_epoch=epochs, validation_split=0.1, callbacks=[early_stopping]) plot_loss(history) unmasked_model = Model(input=main_input, output=unmasked_outputs) unmasked_model.compile(optimizer='rmsprop', loss='mse') X_train = unmasked_model.predict(X_train) fpanel = pd.Panel(data=X_train, items=panel.axes[0], major_axis=panel.axes[1], minor_axis=panel.axes[2]) return fpanel
def train_vm(data_loader, val_loader, vm, fp, device, lr, weight_decay, iters, epochs=1, val_report_iter=50, model_save_path=None, loss_visualize=False, loss_name="dice", fg_thresh=0.5): # set model to train mode vm.feat_net.train() weight_num = sum(p.numel() for p in vm.feat_net.parameters() if p.requires_grad) logger.debug("Number of trainable parameters in VideoMatch: {}".format(weight_num)) optimizer = optim.Adam(vm.feat_net.parameters(), lr=lr, weight_decay=weight_decay) stop_training = False # save model on SIGINT (Ctrl + c) def sigint_handler(signal, frame): logger.info("Ctrl+c caught, stopping the training and saving the model...") nonlocal stop_training stop_training = True signal.signal(signal.SIGINT, sigint_handler) logger.debug("Using foreground threshold {}".format(fg_thresh)) logger.debug("Running untrained VideoMatch on validation set...") # check videomatch avg val accuracy vm_avg_val_score = 0. with torch.set_grad_enabled(False): for val_ref_frame, val_test_frame in tqdm(val_loader): # todo: waiting for optimization, it takes too long, about 12min (ref_img, ref_mask), (test_img, test_mask) = fp(val_ref_frame, val_test_frame) vm.seq_init(ref_img, ref_mask) fg_prob, _ = vm.predict_fg_bg(test_img) # vm_avg_val_score += segmentation_accuracy(fg_prob, test_mask.to(device)) vm_avg_val_score += segmentation_IOU(fg_prob.cpu(), test_mask, fg_thresh) logger.debug("Untrained Videomatch IOU on validation set: {:.3f}".format(vm_avg_val_score / len(val_loader))) if loss_name == "dice": loss_function = dice_loss elif loss_name == "bce": loss_function = torch.nn.BCELoss() elif loss_name == "balancedbce": loss_function = balanced_CE_loss else: raise ValueError("Loss function {} is uknown, use 'dice', 'bce' or 'balancedbce'!".format(loss_name)) logger.debug("Using loss function {}".format(loss_name)) logger.debug("Training started...") loss_list = [] val_score_list = [] for epoch in range(epochs): logger.debug("Epoch: \t[{}/{}]".format(epoch + 1, epochs)) avg_loss = 0. for i, (ref_frame, test_frame) in tqdm(enumerate(data_loader)): if i >= iters or stop_training: break # preprocess (ref_img, ref_mask), (test_img, test_mask) = fp(ref_frame, test_frame) test_mask = test_mask.unsqueeze(0).to(device).float() # initialize every time since reference image keeps changing vm.seq_init(ref_img, ref_mask) # Use softmaxed foreground probability and groundtruth to compute BCE loss fg_prob, _ = vm.predict_fg_bg(test_img) loss = loss_function(fg_prob, test_mask) avg_loss += loss.data.mean().cpu().numpy() if ((i + 1) % val_report_iter == 0 or i + 1 == iters) and i > 0: vm_avg_val_score = 0. val_cnt = 0 with torch.set_grad_enabled(False): for val_ref_frame, val_test_frame in val_loader: (ref_img, ref_mask), (test_img, test_mask) = fp(val_ref_frame, val_test_frame) vm.seq_init(ref_img, ref_mask) fg_prob, _ = vm.predict_fg_bg(test_img) vm_avg_val_score += segmentation_IOU(fg_prob.cpu(), test_mask, fg_thresh) val_cnt += 1 logger.debug("Iter [{:5d}/{}]:\tavg loss = {:.4f},\tavg val IOU = {:.3f}" .format(i + 1, iters, avg_loss / val_report_iter, vm_avg_val_score / val_cnt)) val_score_list.append(vm_avg_val_score / val_cnt) loss_list.append(avg_loss / val_report_iter) avg_loss = 0. # backpropagation optimizer.zero_grad() loss.backward() optimizer.step() if stop_training: break if model_save_path is not None: logger.info("Saving model to path {}".format(model_save_path)) vm.save_model(model_save_path) if loss_visualize: if not loss_list: logger.info("Loss list is empty, omitting loss visualization!") else: bins = 0 if len(loss_list) < 500 else 50 plot_loss(loss_list, val_score_list, val_report_iter, bins=bins) plt.show()
batch_size=opt.batch_size) valid_generator = ImageDataGenerator().flow(x_valid, y_valid, batch_size=opt.batch_size) model = CNN3() sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy']) callback = [ # EarlyStopping(monitor='val_loss', patience=50, verbose=True), # ReduceLROnPlateau(monitor='lr', factor=0.1, patience=15, verbose=True), ModelCheckpoint('../models/cnn3_best_weights.h5', monitor='val_acc', verbose=True, save_best_only=True, save_weights_only=True) ] history_ck = model.fit_generator( train_generator, steps_per_epoch=len(y_train) // opt.batch_size, epochs=opt.epochs, validation_data=valid_generator, validation_steps=len(y_valid) // opt.batch_size, callbacks=callback) his = history_ck if opt.plot_history: plot_loss(his.history, opt.dataset) plot_acc(his.history, opt.dataset)
def train_and_evaluate(model, train_dataloader, val_dataloader, optimizer, critierion, metrics, params, model_dir, restore_file=None): # reload weights from restore_file if specified if restore_file is not None: restore_path = os.path.join(args.model_dir, args.restore_file + '.pth.tar') logging.info("Restoring parameters from {}".format(restore_path)) utils.load_checkpoint(restore_path, model, optimizer) best_val_acc = 0.0 best_val_metrics = [] learning_rate_0 = params.learning_rate train_acc_series = [] val_acc_series = [] train_loss_series = [] for epoch in range(params.num_epochs): logging.info("Epoch {}/{}".format(epoch + 1, params.num_epochs)) # train model train_metrics = train(model, train_dataloader, optimizer, critierion, metrics, params) # learning rate exponential decay params.learning_rate = learning_rate_0 * np.exp(-params.exp_decay_k * epoch) # evaluate val_metrics = evaluate(model, critierion, val_dataloader, metrics, params) # find accuracy from validation dataset val_acc = val_metrics['accuracy'] is_best = val_acc >= best_val_acc # save weights utils.save_checkpoint({'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optim_dict': optimizer.state_dict()}, is_best=is_best, checkpoint=model_dir) # save accuracy / loss to array for plot train_acc_series.append(train_metrics['accuracy']) val_acc_series.append(val_metrics['accuracy']) train_loss_series.append(train_metrics['loss']) # If best_eval, best_save_path if is_best: logging.info("- Found new best accuracy") best_val_acc = val_acc best_val_metrics = val_metrics # Save best val metrics in a json file in the model directory best_json_path = os.path.join( model_dir, "metrics_val_best_weights.json") utils.save_dict_to_json(val_metrics, best_json_path) # Save latest val metrics in a json file in the model directory last_json_path = os.path.join( model_dir, "metrics_val_last_weights.json") utils.save_dict_to_json(val_metrics, last_json_path) print('******************************************') # plot visualized performance visualize.plot_train_val_accuracy(train_acc_series, val_acc_series) visualize.plot_loss(train_loss_series) # save best validation F1 score plot visualize.plot_individual_label_f1score(best_val_metrics)
def train_model(self, train_dataset, val_dataset, learning_rate, epochs, layers): """Train the model. train_dataset, val_dataset: Training and validation Dataset objects. learning_rate: The learning rate to train with epochs: Number of training epochs. Note that previous training epochs are considered to be done alreay, so this actually determines the epochs to train in total rather than in this particaular call. layers: Allows selecting wich layers to train. It can be: - A regular expression to match layer names to train - One of these predefined values: heaads: The RPN, classifier and mask heads of the network all: All the layers 3+: Train Resnet stage 3 and up 4+: Train Resnet stage 4 and up 5+: Train Resnet stage 5 and up """ # Pre-defined layer regular expressions layer_regex = { # all layers but the backbone "heads": r"(fpn.P5\_.*)|(fpn.P4\_.*)|(fpn.P3\_.*)|(fpn.P2\_.*)|(rpn.*)|(classifier.*)|(mask.*)", # From a specific Resnet stage and up "3+": r"(fpn.C3.*)|(fpn.C4.*)|(fpn.C5.*)|(fpn.P5\_.*)|(fpn.P4\_.*)|(fpn.P3\_.*)|(fpn.P2\_.*)|(rpn.*)|(classifier.*)|(mask.*)", "4+": r"(fpn.C4.*)|(fpn.C5.*)|(fpn.P5\_.*)|(fpn.P4\_.*)|(fpn.P3\_.*)|(fpn.P2\_.*)|(rpn.*)|(classifier.*)|(mask.*)", "5+": r"(fpn.C5.*)|(fpn.P5\_.*)|(fpn.P4\_.*)|(fpn.P3\_.*)|(fpn.P2\_.*)|(rpn.*)|(classifier.*)|(mask.*)", # All layers "all": ".*", } if layers in layer_regex.keys(): layers = layer_regex[layers] # Data generators train_set = data_generator_layer.Dataset(train_dataset, self.config, augment=True) train_generator = torch.utils.data.DataLoader(train_set, batch_size=1, shuffle=True, num_workers=4) val_set = data_generator_layer.Dataset(val_dataset, self.config, augment=True) val_generator = torch.utils.data.DataLoader(val_set, batch_size=1, shuffle=True, num_workers=4) # Train utils_log.log("\nStarting at epoch {}. LR={}\n".format(self.epoch+1, learning_rate)) utils_log.log("Checkpoint Path: {}".format(self.checkpoint_path)) self.set_trainable(layers) # Optimizer object # Add L2 Regularization # Skip gamma and beta weights of batch normalization layers. trainables_wo_bn = [param for name, param in self.named_parameters() if param.requires_grad and not 'bn' in name] trainables_only_bn = [param for name, param in self.named_parameters() if param.requires_grad and 'bn' in name] optimizer = optim.SGD([ {'params': trainables_wo_bn, 'weight_decay': self.config.WEIGHT_DECAY}, {'params': trainables_only_bn} ], lr=learning_rate, momentum=self.config.LEARNING_MOMENTUM) for epoch in range(self.epoch+1, epochs+1): utils_log.log("Epoch {}/{}.".format(epoch,epochs)) # Training loss = self.train_epoch(train_generator, optimizer, self.config.STEPS_PER_EPOCH) # Validation val_loss = self.valid_epoch(val_generator, self.config.VALIDATION_STEPS) # Statistics self.loss_history.append(loss) self.val_loss_history.append(val_loss) visualize.plot_loss(self.loss_history, self.val_loss_history, save=True, log_dir=self.log_dir) # Save model torch.save(self.state_dict(), self.checkpoint_path.format(epoch)) self.epoch = epochs
def main(reps, pretrained_w_path, do_module1, init_seed=0, load_t=0, num_epochs=200, batchsize=96, fine_tune=0, patience=500, lr_init=1e-3, optim='adagrad', toy=0, num_classes=374): res_root = '/home/hoa/Desktop/projects/resources' X_path = osp.join(res_root, 'datasets/corel5k/Xaug_train_b01c.npy') Y_path = osp.join(res_root, 'datasets/corel5k/Y_train.npy') MEAN_IMG_PATH = osp.join(res_root, 'models/ilsvrc_2012_mean.npy') snapshot = 50 # save model after every `snapshot` epochs drop_p = 0.5 # drop out prob. lambda2 = 0.0005 / 2 # l2-regularizer constant # step=patience/4 # decay learning after every `step` epochs lr_patience = 60 # for learning rate schedule, if optim=='momentum' if toy: # unit testing num_epochs = 10 data_multi = 3 reps = 2 #drop_p=0 #lambda2=0 # Create name tag for the experiment if fine_tune: full_or_tune = 'tune' # description tag for storing associated files else: full_or_tune = 'full' time_stamp = time.strftime("%y%m%d%H%M%S", time.localtime()) snapshot_root = '../snapshot_models/' # LOADING DATA print 'LOADING DATA ...' X = np.load(X_path) Y = np.load(Y_path) N = len(Y) print 'Raw X,Y shape', X.shape, Y.shape if len(X) != len(Y): print 'Inconsistent number of input images and labels. X is possibly augmented.' MEAN_IMG = np.load(MEAN_IMG_PATH).astype('float32') MEAN_IMG_227 = skimage.transform.resize(np.swapaxes( np.swapaxes(MEAN_IMG, 0, 1), 1, 2), (227, 227), mode='nearest', preserve_range=True) MEAN_IMG = np.swapaxes(np.swapaxes(MEAN_IMG_227, 1, 2), 0, 1).reshape( (1, 3, 227, 227)) all_metrics = [] # store metrics in each run time_profiles = { 'train_module1': [], 'train_module1_eff': [], 'train_module2': [], 'test': [] } # record training and testing time # PREPARE THEANO EXPRESSION FOR BOTH MODULES print 'COMPILING THEANO EXPRESSION ...' input_var = T.tensor4('inputs') target_var = T.imatrix('targets') network = build_model(num_classes=num_classes, input_var=input_var) # Create a loss expression for training prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.binary_crossentropy(prediction, target_var) weights = lasagne.layers.get_all_params(network, regularizable=True) l2reg = theano.shared(floatX(lambda2)) * T.sum( [T.sum(w**2) for w in weights]) loss = loss.mean() + l2reg lr = theano.shared(np.array(lr_init, dtype=theano.config.floatX)) lr_decay = np.array(1. / 3, dtype=theano.config.floatX) # Create update expressions for training params = lasagne.layers.get_all_params(network, trainable=True) # last-layer case is actually very simple: # `params` above is a list of all (W,b)-pairs # Therefore last layer's (W,b) is params[-2:] if fine_tune == 7: # tuning params from fc7 to fc8 params = params[-2:] # elif fine_tune == 6: # tuning params from fc6 to fc8 # params = params[-4:] # TODO adjust for per-layer training with local_lr if optim == 'momentum': updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=lr, momentum=0.9) elif optim == 'rmsprop': updates = lasagne.updates.rmsprop(loss, params, learning_rate=lr, rho=0.9, epsilon=1e-06) elif optim == 'adam': updates = lasagne.updates.adam(loss, params, learning_rate=lr, beta1=0.9, beta2=0.999, epsilon=1e-08) elif optim == 'adagrad': updates = lasagne.updates.adagrad(loss, params, learning_rate=lr, epsilon=1e-06) # Create a loss expression for validation/testing test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = lasagne.objectives.binary_crossentropy(test_prediction, target_var) test_loss = test_loss.mean() + l2reg # zero-one loss with threshold t = 0.5 for reference # zero_one_loss = T.abs_((test_prediction > theano.shared(floatX(0.5))) - target_var).sum(axis=1) #zero_one_loss /= target_var.shape[1].astype(theano.config.floatX) #zero_one_loss = zero_one_loss.mean() # Compile a function performing a backward pass (training step) on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: bwd_fn = theano.function( [input_var, target_var], loss, updates=updates, ) # Compile a second function performing a forward pass, # returns validation loss, 0/1 Error, score i.e. Xout: fwd_fn = theano.function([input_var, target_var], test_loss) # Create a theano function for computing score score = lasagne.layers.get_output(network, deterministic=True) score_fn = theano.function([input_var], score) def compute_score(X, Y, batchsize=batchsize, shuffle=False): out = np.zeros(Y.shape) batch_id = 0 for batch in iterate_minibatches(X, Y, batchsize, shuffle=False): inputs, _ = batch # Flip random half of the batch flip_idx = np.random.choice(len(inputs), size=len(inputs) / 2, replace=False) if len(flip_idx) > 1: inputs[flip_idx] = inputs[flip_idx, :, :, ::-1] # Substract mean image inputs = (inputs - MEAN_IMG).astype(theano.config.floatX) # MEAN_IMG is broadcasted numpy-way, take note if want theano expression instead if len(inputs) == batchsize: out[batch_id * batchsize:(batch_id + 1) * batchsize] = score_fn(inputs) batch_id += 1 else: out[batch_id * batchsize:] = score_fn(inputs) return out try: # MAIN LOOP FOR EACH RUN for seed in np.arange(reps) + init_seed: snapshot_name = str( num_classes) + 'alex' + time_stamp + full_or_tune + str(seed) # reset learning rate lr.set_value(lr_init) print '\nRUN', seed, '...' # Split train/val/test set # indicies = np.arange(len(Y)) # Y_train_val, Y_test, idx_train_val, idx_test = train_test_split( # Y, indicies, random_state=seed, train_size=float(2)/3) idx_train_val = np.arange(len(Y)) # Module 2 training set is composed of module 1 training and validation set idx_aug_train_val = data_aug(idx_train_val, mode='aug', isMat='idx', N=N) Xaug_train_val = X if Xaug_train_val.shape[1] != 3: Xaug_train_val = b01c_to_bc01(Xaug_train_val) Yaug_train_val = data_aug(Y, mode='aug', isMat='Y', N=N) # train/val/test set for module 1 Y_train, Y_val, idx_train, idx_val = train_test_split( Y, idx_train_val, random_state=seed) idx_aug_train = idx_train Xaug_train = Xaug_train_val[idx_aug_train] Yaug_train = Y_train idx_aug_val = idx_val Xaug_val = Xaug_train_val[idx_aug_val] Yaug_val = Y_val # Test set X_test = np.load( osp.join(res_root, 'datasets/corel5k/Xaug_test_b01c.npy')) if X_test.shape[1] != 3: X_test = b01c_to_bc01(X_test) Y_test = np.load(osp.join(res_root, 'datasets/corel5k/Y_test.npy')) print "Augmented train/val/test set size:", len(Xaug_train), len( Yaug_val), len(X_test) print "Augmented (X,Y) dtype:", Xaug_train.dtype, Yaug_val.dtype print "Processed Mean image:", MEAN_IMG.dtype, MEAN_IMG.shape if toy: # try to overfit a tiny subset of the data Xaug_train = Xaug_train[:batchsize * data_multi + batchsize / 2] Yaug_train = Yaug_train[:batchsize * data_multi + batchsize / 2] Xaug_val = Xaug_val[:batchsize + batchsize / 2] Yaug_val = Yaug_val[:batchsize + batchsize / 2] # Init by pre-trained weights, if any if len(pretrained_w_path) > 0: layer_list = lasagne.layers.get_all_layers( network) # 22 layers if pretrained_w_path.endswith('pkl'): # load reference_net # use case: weights initialized from pre-trained reference nets f = open(pretrained_w_path, 'r') w_list = pickle.load(f) # list of 11 (W,b)-pairs f.close() lasagne.layers.set_all_param_values( layer_list[-3], w_list[:-2]) # exclude (W,b) of fc8 # BIG NOTE: don't be confused, it's pure coincident that layer_list # and w_list have the same index here. The last element of layer_list are # [.., fc6, drop6, fc7, drop7, fc8], while w_list are # [..., W, b, W, b, W, b] which, eg w_list[-4] and w_list[-3] correspond to # params that are associated with fc7 i.e. params that connect drop6 to fc7 elif pretrained_w_path.endswith('npz'): # load self-trained net # use case: continue training from a snapshot model with np.load( pretrained_w_path ) as f: # NOTE: only load snapshot of the same `seed` w_list = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(network, w_list) elif pretrained_w_path.endswith( '/'): # init from 1 of the 30 snapshots from os import listdir import re files = [ f for f in listdir(pretrained_w_path) if osp.isfile(osp.join(pretrained_w_path, f)) ] for file_name in files: regex_seed = 'full%d_' % seed match_seed = re.search(regex_seed, file_name) if match_seed: regex = r"\d+[a-zA-Z]+\d+[a-zA-Z]+\d+\_\d+" match = re.search(regex, file_name) snapshot_name = match.group(0) print snapshot_name with np.load( osp.join(pretrained_w_path, snapshot_name) + '.npz') as f: w_list = [ f['arr_%d' % i] for i in range(len(f.files)) ] lasagne.layers.set_all_param_values( network, w_list) # START MODULE 1 module1_time = 0 if do_module1: print 'MODULE 1' training_history = {} training_history['iter_training_loss'] = [] training_history['iter_validation_loss'] = [] training_history['training_loss'] = [] training_history['validation_loss'] = [] training_history['learning_rate'] = [] # http://deeplearning.net/tutorial/gettingstarted.html#early-stopping # early-stopping parameters n_train_batches = Xaug_train.shape[0] / batchsize if Xaug_train.shape[0] % batchsize != 0: n_train_batches += 1 patience = patience # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is found lr_patience_increase = 1.01 improvement_threshold = 0.995 # a relative improvement of this much is # considered significant; a significant test # MIGHT be better validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatches before checking the network # on the validation set; in this case we # check every epoch best_params = None epoch_validation_loss = 0 # indicates that valid_loss has not been computed yet best_validation_loss = np.inf best_iter = -1 lr_iter = -1 test_score = 0. start_time = time.time() done_looping = False epoch = 0 # Finally, launch the training loop. print("Starting training...") # We iterate over epochs: print( "\nEpoch\tTrain Loss\tValid Loss\tBest-ValLoss-and-Iter\tTime\tL.Rate" ) sys.setrecursionlimit(10000) try: # Early-stopping implementation while (not done_looping) and (epoch < num_epochs): # In each epoch, we do a full pass over the training data: train_err = 0 train_batches = 0 start_time = time.time() for batch in iterate_minibatches(Xaug_train, Yaug_train, batchsize, shuffle=True): inputs, targets = batch # Horizontal flip half of the images bs = inputs.shape[0] indices = np.random.choice(bs, bs / 2, replace=False) inputs[indices] = inputs[indices, :, :, ::-1] # Substract mean image inputs = (inputs - MEAN_IMG).astype( theano.config.floatX) # MEAN_IMG is broadcasted numpy-way, take note if want theano expression instead train_err_batch = bwd_fn(inputs, targets) train_err += train_err_batch train_batches += 1 iter_now = epoch * n_train_batches + train_batches training_history['iter_training_loss'].append( train_err_batch) training_history['iter_validation_loss'].append( epoch_validation_loss) if (iter_now + 1) % validation_frequency == 0: # a full pass over the validation data: val_err = 0 #zero_one_err = 0 val_batches = 0 for batch in iterate_minibatches( Xaug_val, Yaug_val, batchsize, shuffle=False): inputs, targets = batch # Substract mean image inputs = (inputs - MEAN_IMG).astype( theano.config.floatX) # MEAN_IMG is broadcasted numpy-way, take note if want theano expression instead val_err_batch = fwd_fn(inputs, targets) val_err += val_err_batch val_batches += 1 epoch_validation_loss = val_err / val_batches if epoch_validation_loss < best_validation_loss: if epoch_validation_loss < best_validation_loss * improvement_threshold: patience = max( patience, iter_now * patience_increase) # lr_patience *= lr_patience_increase best_params = lasagne.layers.get_all_param_values( network) best_validation_loss = epoch_validation_loss best_iter = iter_now lr_iter = best_iter else: # decay learning rate if optim=='momentum' if optim == 'momentum' and ( iter_now - lr_iter) > lr_patience: lr.set_value(lr.get_value() * lr_decay) lr_iter = iter_now if patience <= iter_now: done_looping = True break # Record training history training_history['training_loss'].append(train_err / train_batches) training_history['validation_loss'].append( epoch_validation_loss) training_history['learning_rate'].append( lr.get_value()) epoch_time = time.time() - start_time module1_time += epoch_time # Then we print the results for this epoch: print("{}\t{:.6f}\t{:.6f}\t{:.6f}\t{}\t{:.3f}\t{}". format(epoch + 1, training_history['training_loss'][-1], training_history['validation_loss'][-1], best_validation_loss, best_iter + 1, epoch_time, training_history['learning_rate'][-1])) if ( epoch + 1 ) % snapshot == 0: # TODO try to save weights at best_iter snapshot_path_string = snapshot_root + snapshot_name + '_' + str( iter_now + 1) try: # use case: terminate experiment before reaching `reps` np.savez(snapshot_path_string + '.npz', *best_params) np.savez(snapshot_path_string + '_history.npz', training_history) plot_loss(training_history, snapshot_path_string + '_loss.png') # plot_conv_weights(lasagne.layers.get_all_layers(network)[1], # snapshot_path_string+'_conv1weights_') except KeyboardInterrupt, TypeError: print 'Did not save', snapshot_name + '_' + str( iter_now + 1) pass epoch += 1 except KeyboardInterrupt, MemoryError: # Sadly this can only catch KeyboardInterrupt pass print 'Training finished or KeyboardInterrupt (Training is never finished, only abandoned)' module1_time_eff = module1_time / iter_now * best_iter print('Total and Effective training time are {:.0f} and {:.0f}' ).format(module1_time, module1_time_eff) time_profiles['train_module1'].append(module1_time) time_profiles['train_module1_eff'].append(module1_time_eff) # Save model after num_epochs or KeyboardInterrupt if (epoch + 1) % snapshot != 0: # to avoid duplicate save snapshot_path_string = snapshot_root + snapshot_name + '_' + str( iter_now + 1) if not toy: try: # use case: terminate experiment before reaching `reps` print 'Saving model...' np.savez(snapshot_path_string + '.npz', *best_params) np.savez(snapshot_path_string + '_history.npz', training_history) plot_loss(training_history, snapshot_path_string + '_loss.png') # plot_conv_weights(lasagne.layers.get_all_layers(network)[1], # snapshot_path_string+'_conv1weights_') except KeyboardInterrupt, TypeError: print 'Did not save', snapshot_name + '_' + str( iter_now + 1) pass # And load them again later on like this: #with np.load('../snapshot_models/23alex16042023213910.npz') as f: # param_values = [f['arr_%d' % i] for i in range(len(f.files))] # or # training_history = f['arr_0'].items() # lasagne.layers.set_all_param_values(network, param_values) # END OF MODULE 1 # START MODULE 2 print '\nMODULE 2' if not do_module1: if pretrained_w_path.endswith('pkl'): snapshot_name = str( num_classes ) + 'alexOTS' # short for "off-the-shelf init" elif pretrained_w_path.endswith( 'npz'): # Resume from a SINGLE snapshot # extract name pattern, e.g. '23alex16042023213910full10' # from string '../snapshot_models/23alex16042023213910full10_100.npz' import re regex = r"\d+[a-zA-Z]+\d+[a-zA-Z]+\d+" match = re.search(regex, pretrained_w_path) snapshot_name = match.group(0) elif pretrained_w_path.endswith( '/'): # RESUMED FROM TRAINED MODULE 1 (ONE-TIME USE) from os import listdir import re files = [ f for f in listdir(pretrained_w_path) if osp.isfile(osp.join(pretrained_w_path, f)) ] for file_name in files: regex_seed = 'full%d_' % seed match_seed = re.search(regex_seed, file_name) if match_seed: regex = r"\d+[a-zA-Z]+\d+[a-zA-Z]+\d+\_\d+" match = re.search(regex, file_name) snapshot_name = match.group(0) print snapshot_name with np.load( osp.join(pretrained_w_path, snapshot_name) + '.npz') as f: w_list = [ f['arr_%d' % i] for i in range(len(f.files)) ] lasagne.layers.set_all_param_values( network, w_list) else: # MAIN BRANCH - assume do_module1 is True AND have run `snapshot` epochs if (epoch + 1) > snapshot: with np.load(snapshot_path_string + '.npz' ) as f: # reload the best params for module 1 w_list = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(network, w_list) score_train = compute_score(Xaug_train_val, Yaug_train_val) start_time = time.time() if load_t: from os import listdir import re if not pretrained_w_path.endswith('/'): files = [pretrained_w_path] else: files = [ f for f in listdir(pretrained_w_path) if osp.isfile(osp.join(pretrained_w_path, f)) ] for file_name in files: regex_seed = '{0}{1}'.format(full_or_tune, seed) match_seed = re.search(regex_seed, file_name) if match_seed: regex = r"\d+[a-zA-Z]+\d+[a-zA-Z]+\d+\_\d+" match = re.search(regex, file_name) snapshot_name = match.group(0) t_train = np.load( osp.join('t', '{0}.npy'.format(snapshot_name))) else: # MAIN BRANCH thresholds = Threshold(score_train, Yaug_train_val) thresholds.find_t_for( ) # determine t_train for each score_train. It will take a while t_train = np.asarray(thresholds.t) print 't_train is in ', t_train.min(), '..', t_train.max() # `thresholds` holds t_train vector in .t attribute print('t_train produced in {:.3f}s').format(time.time() - start_time) np.save('t/' + snapshot_name + '.npy', t_train) # Predictive model for t regr = linear_model.RidgeCV(cv=5) # Ridge() is LinearClassifier() with L2-reg regr.fit(score_train, t_train) time_profiles['train_module2'].append(time.time() - start_time) # END OF MODULE 2 # TESTING PHASE start_time = time.time() score_test = compute_score(X_test, Y_test) t_test = regr.predict(score_test) print 'original t_test is in ', min(t_test), '..', max(t_test) t_test[t_test > 1] = max(t_test[t_test < 1]) t_test[t_test < 0] = min( t_test[t_test > 0]) # ! Keep t_test in [0,1] print 'corrected t_test is in ', min(t_test), '..', max(t_test)
def main(reps, pretrained_w_path, do_module1, init_seed=0, load_t=0, num_epochs=200, batchsize=96, fine_tune=0, patience=500, lr_init = 1e-3, optim='adagrad', toy=0, num_classes=23): res_root = '/home/hoa/Desktop/projects/resources' X_path=osp.join(res_root, 'datasets/msrcv2/Xaug_b01c.npy') Y_path=osp.join(res_root, 'datasets/msrcv2/Y.npy') MEAN_IMG_PATH=osp.join(res_root, 'models/ilsvrc_2012_mean.npy') snapshot=50 # save model after every `snapshot` epochs drop_p=0.5 # drop out prob. lambda2=0.0005/2 # l2-regularizer constant # step=patience/4 # decay learning after every `step` epochs lr_patience=60 # for learning rate schedule, if optim=='momentum' if toy: # unit testing num_epochs=10 data_multi=3 reps = 2 #drop_p=0 #lambda2=0 # Create name tag for the experiment if fine_tune: full_or_tune = 'tune' # description tag for storing associated files else: full_or_tune = 'full' time_stamp=time.strftime("%y%m%d%H%M%S", time.localtime()) snapshot_root = '../snapshot_models/' snapshot_name = str(num_classes)+'alex'+time_stamp+full_or_tune # LOADING DATA print 'LOADING DATA ...' X = np.load(X_path) Y = np.load(Y_path) if X.shape[1]!=3: X = b01c_to_bc01(X) N = len(Y) print 'Raw X,Y shape', X.shape, Y.shape if len(X) != len(Y): print 'Inconsistent number of input images and labels. X is possibly augmented.' MEAN_IMG = np.load(MEAN_IMG_PATH) MEAN_IMG_227 = skimage.transform.resize( np.swapaxes(np.swapaxes(MEAN_IMG,0,1),1,2), (227,227), mode='nearest', preserve_range=True) MEAN_IMG = np.swapaxes(np.swapaxes(MEAN_IMG_227,1,2),0,1).reshape((1,3,227,227)) all_metrics = [] # store metrics in each run time_profiles = { 'train_module1': [], 'train_module1_eff': [], 'train_module2': [], 'test': [] } # record training and testing time # PREPARE THEANO EXPRESSION FOR BOTH MODULES print 'COMPILING THEANO EXPRESSION ...' input_var = T.tensor4('inputs') target_var = T.imatrix('targets') network = build_model(num_classes=num_classes, input_var=input_var) # Create a loss expression for training prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.binary_crossentropy(prediction, target_var) weights = lasagne.layers.get_all_params(network, regularizable=True) l2reg = theano.shared(floatX(lambda2))*T.sum([T.sum(w ** 2) for w in weights]) loss = loss.mean() + l2reg lr = theano.shared(np.array(lr_init, dtype=theano.config.floatX)) lr_decay = np.array(1./3, dtype=theano.config.floatX) # Create update expressions for training params = lasagne.layers.get_all_params(network, trainable=True) # last-layer case is actually very simple: # `params` above is a list of all (W,b)-pairs # Therefore last layer's (W,b) is params[-2:] if fine_tune == 7: # tuning params from fc7 to fc8 params = params[-2:] # elif fine_tune == 6: # tuning params from fc6 to fc8 # params = params[-4:] # TODO adjust for per-layer training with local_lr if optim=='momentum': updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=lr, momentum=0.9) elif optim=='rmsprop': updates = lasagne.updates.rmsprop(loss, params, learning_rate=lr, rho=0.9, epsilon=1e-06) elif optim=='adam': updates = lasagne.updates.adam( loss, params, learning_rate=lr, beta1=0.9, beta2=0.999, epsilon=1e-08) elif optim=='adagrad': updates = lasagne.updates.adagrad(loss, params, learning_rate=lr, epsilon=1e-06) # Create a loss expression for validation/testing test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = lasagne.objectives.binary_crossentropy(test_prediction, target_var) test_loss = test_loss.mean() + l2reg # zero-one loss with threshold t = 0.5 for reference # zero_one_loss = T.abs_((test_prediction > theano.shared(floatX(0.5))) - target_var).sum(axis=1) #zero_one_loss /= target_var.shape[1].astype(theano.config.floatX) #zero_one_loss = zero_one_loss.mean() # Compile a function performing a backward pass (training step) on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: bwd_fn = theano.function([input_var, target_var], loss, updates=updates,) # Compile a second function performing a forward pass, # returns validation loss, 0/1 Error, score i.e. Xout: fwd_fn = theano.function([input_var, target_var], test_loss) # Create a theano function for computing score score = lasagne.layers.get_output(network, deterministic=True) score_fn = theano.function([input_var], score) def compute_score(X, Y, batchsize=batchsize, shuffle=False): out = np.zeros(Y.shape) batch_id = 0 for batch in iterate_minibatches(X, Y, batchsize, shuffle=False): inputs, _ = batch # Flip random half of the batch flip_idx = np.random.choice(len(inputs),size=len(inputs)/2,replace=False) if len(flip_idx)>1: inputs[flip_idx] = inputs[flip_idx,:,:,::-1] # Substract mean image inputs = (inputs - MEAN_IMG).astype(theano.config.floatX) # MEAN_IMG is broadcasted numpy-way, take note if want theano expression instead if len(inputs)==batchsize: out[batch_id*batchsize : (batch_id+1)*batchsize] = score_fn(inputs) batch_id += 1 else: out[batch_id*batchsize : ] = score_fn(inputs) return out try: # MAIN LOOP FOR EACH RUN for seed in np.arange(reps)+init_seed: # reset learning rate lr.set_value(lr_init) print '\nRUN', seed, '...' # Split train/val/test set indicies = np.arange(len(Y)) Y_train_val, Y_test, idx_train_val, idx_test = train_test_split( Y, indicies, random_state=seed, train_size=float(2)/3) Y_train, Y_val, idx_train, idx_val = train_test_split( Y_train_val, idx_train_val, random_state=seed) print "Train/val/test set size:",len(idx_train),len(idx_val),len(idx_test) idx_aug_train = data_aug(idx_train, mode='aug', isMat='idx', N=N) Xaug_train = X[idx_aug_train] Yaug_train = data_aug(Y_train, mode='aug', isMat='Y', N=N) idx_aug_val = data_aug(idx_val, mode='aug', isMat='idx', N=N) Xaug_val = X[idx_aug_val] Yaug_val = data_aug(Y_val, mode='aug', isMat='Y', N=N) # Module 2 training set is composed of module 1 training and validation set idx_aug_train_val = data_aug(idx_train_val, mode='aug', isMat='idx', N=N) Xaug_train_val = X[idx_aug_train_val] Yaug_train_val = data_aug(Y_train_val, mode='aug', isMat='Y', N=N) # Test set X_test = X[idx_test] # Y_test is already returned in the first train_test_split print "Augmented train/val/test set size:",len(Xaug_train),len(Yaug_val), len(X_test) print "Augmented (X,Y) dtype:", Xaug_train.dtype, Yaug_val.dtype print "Processed Mean image:",MEAN_IMG.dtype,MEAN_IMG.shape if toy: # try to overfit a tiny subset of the data Xaug_train = Xaug_train[:batchsize*data_multi + batchsize/2] Yaug_train = Yaug_train[:batchsize*data_multi + batchsize/2] Xaug_val = Xaug_val[:batchsize + batchsize/2] Yaug_val = Yaug_val[:batchsize + batchsize/2] # Init by pre-trained weights, if any if len(pretrained_w_path)>0: layer_list = lasagne.layers.get_all_layers(network) # 22 layers if pretrained_w_path.endswith('pkl'): # load reference_net # use case: weights initialized from pre-trained reference nets f = open(pretrained_w_path, 'r') w_list = pickle.load(f) # list of 11 (W,b)-pairs f.close() lasagne.layers.set_all_param_values(layer_list[-3], w_list[:-2]) # exclude (W,b) of fc8 # BIG NOTE: don't be confused, it's pure coincident that layer_list # and w_list have the same index here. The last element of layer_list are # [.., fc6, drop6, fc7, drop7, fc8], while w_list are # [..., W, b, W, b, W, b] which, eg w_list[-4] and w_list[-3] correspond to # params that are associated with fc7 i.e. params that connect drop6 to fc7 elif pretrained_w_path.endswith('npz'): # load self-trained net # use case: continue training from a snapshot model with np.load(pretrained_w_path) as f: # NOTE: only load snapshot of the same `seed` # w_list = [f['arr_%d' % i] for i in range(len(f.files))] w_list = [f.items()['arr_%d' % i] for i in range(len(f.files))] # load from bkviz, one-time use lasagne.layers.set_all_param_values(network, w_list) elif pretrained_w_path.endswith('/'): # init from 1 of the 30 snapshots from os import listdir import re files = [f for f in listdir(pretrained_w_path) if osp.isfile(osp.join(pretrained_w_path, f))] for file_name in files: regex_seed = 'full%d_' %seed match_seed = re.search(regex_seed, file_name) if match_seed: regex = r"\d+[a-zA-Z]+\d+[a-zA-Z]+\d+\_\d+" match = re.search(regex, file_name) snapshot_name = match.group(0) print snapshot_name with np.load(osp.join(pretrained_w_path,snapshot_name)+'.npz') as f: w_list = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(network, w_list) # START MODULE 1 module1_time = 0 if do_module1: print 'MODULE 1' training_history={} training_history['iter_training_loss'] = [] training_history['iter_validation_loss'] = [] training_history['training_loss'] = [] training_history['validation_loss'] = [] training_history['learning_rate'] = [] # http://deeplearning.net/tutorial/gettingstarted.html#early-stopping # early-stopping parameters n_train_batches = Xaug_train.shape[0] / batchsize if Xaug_train.shape[0] % batchsize != 0: n_train_batches += 1 patience = patience # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is found lr_patience_increase = 1.01 improvement_threshold = 0.995 # a relative improvement of this much is # considered significant; a significant test # MIGHT be better validation_frequency = min(n_train_batches, patience/2) # go through this many # minibatches before checking the network # on the validation set; in this case we # check every epoch best_params = None epoch_validation_loss = 0 # indicates that valid_loss has not been computed yet best_validation_loss = np.inf best_iter = -1 lr_iter = -1 test_score = 0. start_time = time.time() done_looping = False epoch = 0 # Finally, launch the training loop. print("Starting training...") # We iterate over epochs: print("\nEpoch\tTrain Loss\tValid Loss\tBest-ValLoss-and-Iter\tTime\tL.Rate") sys.setrecursionlimit(10000) try: # Early-stopping implementation while (not done_looping) and (epoch<num_epochs): # In each epoch, we do a full pass over the training data: train_err = 0 train_batches = 0 start_time = time.time() for batch in iterate_minibatches(Xaug_train, Yaug_train, batchsize, shuffle=True): inputs, targets = batch # Horizontal flip half of the images bs = inputs.shape[0] indices = np.random.choice(bs, bs / 2, replace=False) inputs[indices] = inputs[indices, :, :, ::-1] # Substract mean image inputs = (inputs - MEAN_IMG).astype(theano.config.floatX) # MEAN_IMG is broadcasted numpy-way, take note if want theano expression instead train_err_batch = bwd_fn(inputs, targets) train_err += train_err_batch train_batches += 1 iter_now = epoch*n_train_batches + train_batches training_history['iter_training_loss'].append(train_err_batch) training_history['iter_validation_loss'].append(epoch_validation_loss) if (iter_now+1) % validation_frequency == 0: # a full pass over the validation data: val_err = 0 #zero_one_err = 0 val_batches = 0 for batch in iterate_minibatches(Xaug_val, Yaug_val, batchsize, shuffle=False): inputs, targets = batch # Substract mean image inputs = (inputs - MEAN_IMG).astype(theano.config.floatX) # MEAN_IMG is broadcasted numpy-way, take note if want theano expression instead val_err_batch = fwd_fn(inputs, targets) val_err += val_err_batch val_batches += 1 epoch_validation_loss = val_err / val_batches if epoch_validation_loss < best_validation_loss: if epoch_validation_loss < best_validation_loss*improvement_threshold: patience = max(patience, iter_now * patience_increase) # lr_patience *= lr_patience_increase best_params = lasagne.layers.get_all_param_values(network) best_validation_loss = epoch_validation_loss best_iter = iter_now lr_iter = best_iter else: # decay learning rate if optim=='momentum' if optim=='momentum' and (iter_now - lr_iter) > lr_patience: lr.set_value(lr.get_value() * lr_decay) lr_iter = iter_now if patience <= iter_now: done_looping = True break # Record training history training_history['training_loss'].append(train_err / train_batches) training_history['validation_loss'].append(epoch_validation_loss) training_history['learning_rate'].append(lr.get_value()) epoch_time = time.time() - start_time module1_time += epoch_time # Then we print the results for this epoch: print("{}\t{:.6f}\t{:.6f}\t{:.6f}\t{}\t{:.3f}\t{}".format( epoch+1, training_history['training_loss'][-1], training_history['validation_loss'][-1], best_validation_loss, best_iter+1, epoch_time, training_history['learning_rate'][-1] )) if (epoch+1)%snapshot==0: # TODO try to save weights at best_iter snapshot_path_string = snapshot_root+snapshot_name+str(seed)+'_'+str(iter_now+1) try: # use case: terminate experiment before reaching `reps` np.savez(snapshot_path_string+'.npz', *best_params) np.savez(snapshot_path_string+'_history.npz', training_history) plot_loss(training_history, snapshot_path_string+'_loss.png') # plot_conv_weights(lasagne.layers.get_all_layers(network)[1], # snapshot_path_string+'_conv1weights_') except KeyboardInterrupt, TypeError: print 'Did not save', snapshot_name+str(seed)+'_'+str(iter_now+1) pass epoch += 1 except KeyboardInterrupt, MemoryError: # Sadly this can only catch KeyboardInterrupt pass print 'Training finished or KeyboardInterrupt (Training is never finished, only abandoned)' module1_time_eff = module1_time / iter_now * best_iter print('Total and Effective training time are {:.0f} and {:.0f}').format( module1_time, module1_time_eff) time_profiles['train_module1'].append(module1_time) time_profiles['train_module1_eff'].append(module1_time_eff) # Save model after num_epochs or KeyboardInterrupt if (epoch+1)%snapshot!=0: # to avoid duplicate save snapshot_path_string = snapshot_root+snapshot_name+str(seed)+'_'+str(iter_now+1) if not toy: try: # use case: terminate experiment before reaching `reps` print 'Saving model...' np.savez(snapshot_path_string+'.npz', *best_params) np.savez(snapshot_path_string+'_history.npz', training_history) plot_loss(training_history, snapshot_path_string+'_loss.png') # plot_conv_weights(lasagne.layers.get_all_layers(network)[1], # snapshot_path_string+'_conv1weights_') except KeyboardInterrupt, TypeError: print 'Did not save', snapshot_name+str(seed)+'_'+str(iter_now+1) pass # And load them again later on like this: #with np.load('../snapshot_models/23alex16042023213910.npz') as f: # param_values = [f['arr_%d' % i] for i in range(len(f.files))] # or # training_history = f['arr_0'].items() # lasagne.layers.set_all_param_values(network, param_values) # END OF MODULE 1 # START MODULE 2 print '\nMODULE 2' if not do_module1: if pretrained_w_path.endswith('pkl'): snapshot_name = str(num_classes)+'alexOTS' # short for "off-the-shelf init" elif pretrained_w_path.endswith('npz'): # Resume from a SINGLE snapshot # extract name pattern, e.g. '23alex16042023213910full10' # from string '../snapshot_models/23alex16042023213910full10_100.npz' import re regex = r"\d+[a-zA-Z]+\d+[a-zA-Z]+\d+" match = re.search(regex, pretrained_w_path) snapshot_name = match.group(0) elif pretrained_w_path.endswith('/'): # RESUMED FROM TRAINED MODULE 1 (ONE-TIME USE) from os import listdir import re files = [f for f in listdir(pretrained_w_path) if osp.isfile(osp.join(pretrained_w_path, f))] for file_name in files: regex_seed = 'full%d_' %seed match_seed = re.search(regex_seed, file_name) if match_seed: regex = r"\d+[a-zA-Z]+\d+[a-zA-Z]+\d+\_\d+" match = re.search(regex, file_name) snapshot_name = match.group(0) print snapshot_name with np.load(osp.join(pretrained_w_path,snapshot_name)+'.npz') as f: w_list = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(network, w_list) else: # MAIN BRANCH - assume do_module1 is True AND have run `snapshot` epochs if (epoch+1)>snapshot: with np.load(snapshot_path_string+'.npz') as f: # reload the best params for module 1 w_list = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(network, w_list) score_train = compute_score(Xaug_train_val, Yaug_train_val) start_time = time.time() if load_t: # Server failed at the wrong time. We only have t backed-up if pretrained_w_path.endswith('/'): from os import listdir import re files = [f for f in listdir(pretrained_w_path) if osp.isfile(osp.join(pretrained_w_path, f))] for file_name in files: regex_seed = 'full%d_' %seed match_seed = re.search(regex_seed, file_name) if match_seed: regex = r"\d+[a-zA-Z]+\d+[a-zA-Z]+\d+\_\d+" match = re.search(regex, file_name) snapshot_name = match.group(0) t_train = np.load(osp.join('t','{0}.npy'.format(snapshot_name))) else: # MAIN BRANCH thresholds = Threshold(score_train, Yaug_train_val) thresholds.find_t_for() # determine t_train for each score_train. It will take a while t_train = np.asarray(thresholds.t) print 't_train is in ', t_train.min(), '..', t_train.max() # `thresholds` holds t_train vector in .t attribute print('t_train produced in {:.3f}s').format(time.time()-start_time) np.save('t/'+snapshot_name+str(seed)+'.npy', t_train) # Predictive model for t regr = linear_model.RidgeCV(cv=5) # Ridge() is LinearClassifier() with L2-reg regr.fit(score_train, t_train) time_profiles['train_module2'].append(time.time()-start_time) # END OF MODULE 2 # TESTING PHASE start_time = time.time() score_test = compute_score(X_test, Y_test) t_test = regr.predict(score_test) print 'original t_test is in ', min(t_test), '..', max(t_test) t_test[t_test>1] = max(t_test[t_test<1]) t_test[t_test<0] = min(t_test[t_test>0]) # ! Keep t_test in [0,1] print 'corrected t_test is in ', min(t_test), '..', max(t_test) # Predict label metrics = predict_label(score_test, Y_test, t_test, seed, num_classes, verbose=1) time_profiles['test'].append(time.time()-start_time) all_metrics.append(metrics)