def load_dataset_in_memory_and_resize(data_access, set, division, dataset_path, targets_path, tmp_size, final_size, batch_size): if data_access == "in-memory": with timer("Loading %s data"%set): dataset = InMemoryDataset(set, dataset_path, source_targets=targets_path, division=division) draw_data = np.copy(dataset.dataset) targets = np.copy(dataset.targets) del dataset elif data_access == "fuel": with timer("Loading %s data"%set): dataset = FuelDataset(set, tmp_size, batch_size=batch_size, shuffle=False, division=division) draw_data,targets = dataset.return_whole_dataset() del dataset else: raise Exception("Data access not available. Must be 'fuel' or 'in-memory'. Here : %s."%data_access) if tmp_size != final_size: # Resize images from the validset out = np.zeros((draw_data.shape[0], final_size[0], final_size[1], final_size[2]), dtype="float32") with timer("Resizing %s images"%set): for i in range(draw_data.shape[0]): out[i] = resize_pil(draw_data[i], final_size[0:2]) del draw_data return out, targets else: return draw_data, targets
def features_generator(data_access, dataset, targets, batch_size, tmp_size, final_size, bagging_size, bagging_iterator, multiple_input, preprocessing_func, preprocessing_args, pretrained_model): # Instantiate the dataset if data_access == "in-memory": train_dataset = InMemoryDataset("train", source=dataset, batch_size=batch_size, source_targets=targets) elif data_access == "fuel": train_dataset = FuelDataset("train", tmp_size, batch_size=batch_size, bagging=bagging_size, bagging_iterator=bagging_iterator) else: raise Exception( "Data access not available. Must be 'fuel' or 'in-memory'. Here : %s." % data_access) # Generator loop while 1: # Get next batch processed_batch, labels = get_next_batch(train_dataset, batch_size, final_size, preprocessing_func, preprocessing_args) if multiple_input == 1: features = pretrained_model.predict(processed_batch) yield features, labels else: features = pretrained_model.predict( [processed_batch for i in range(multiple_input)]) yield features, labels
def check_preprocessed_data(data_access, dataset, targets, batch_size, tmp_size, final_size, preprocessing_func, preprocessing_args, n=10): if data_access == "in-memory": train_dataset = InMemoryDataset("train", source=dataset, batch_size=batch_size, source_targets=targets) elif data_access == "fuel": train_dataset = FuelDataset("test", tmp_size, batch_size=batch_size, division="leaderboard", shuffle=False) else: raise Exception( "Data access not available. Must be 'fuel' or 'in-memory'. Here : %s." % data_access) # Compute only one batch start = time.time() batch, batch_targets = train_dataset.get_batch() batch_targets = convert_labels(batch_targets) processed_batch = np.zeros( (batch.shape[0], final_size[2], final_size[0], final_size[1]), dtype="float32") for k in range(batch_size): processed_batch[k] = preprocessing_func(batch[k], *preprocessing_args).transpose( 2, 0, 1) end = time.time() print "Batch Shape = ", processed_batch.shape, "with dtype =", processed_batch.dtype print "Targets Shape =", batch_targets.shape, "with dtype =", batch_targets.dtype for i in range(n): plt.figure(0) plt.gray() plt.clf() plt.title("(%d,%d)" % (batch_targets[i][0], batch_targets[i][1])) if batch.shape[1] == 3: plt.imshow(processed_batch[i].transpose(1, 2, 0)) else: plt.imshow(processed_batch[i, 0]) plt.show() print "Processing 1 batch took : %.5f" % (end - start)
def multi_features_generator(data_access, dataset, targets, batch_size, tmp_size, final_size, bagging_size, bagging_iterator, multiple_input, preprocessing_func, preprocessing_args, pretrained_models, mode="concat"): # Instantiate the dataset if data_access == "in-memory": train_dataset = InMemoryDataset("train", source=dataset, batch_size=batch_size, source_targets=targets) elif data_access == "fuel": train_dataset = FuelDataset("train", tmp_size, batch_size=batch_size, bagging=bagging_size, bagging_iterator=bagging_iterator) else: raise Exception( "Data access not available. Must be 'fuel' or 'in-memory'. Here : %s." % data_access) # Generator loop while 1: # Get next batch processed_batch, labels = get_next_batch(train_dataset, batch_size, final_size, preprocessing_func, preprocessing_args) if multiple_input == 1: features = [] for pretrained_model in pretrained_models: features.append( pretrained_model.predict(processed_batch, batch_size=1)) if mode == "concat": features = np.concatenate(features, axis=1) yield features, labels else: raise Exception("Generator does not work with multiple inputs")
def images_generator(data_access, dataset, targets, batch_size, tmp_size, final_size, bagging_size, bagging_iterator, multiple_input, division, preprocessing_func, preprocessing_args): """ Generator function used when using the keras function 'fit_on_generator'. Can work with InMemoryDataset, FuelDataset. Yield a tuple to the training containing a processed batch and targets. This can be done on the CPU, in parallel of a GPU training. See 'fit_on_generator' for more details. :param data_access: "in-memory" or "fuel" :param dataset: path to the dataset numpy file (not used when data_acces = "fuel") :param targets: path to the targets numpy file (not used when data_acces = "fuel") :param batch_size: :param tmp_size: Used when data_access == "fuel". Datastream will return images of size equal to tmp_size. :param final_size: size of images used for the training :param preprocessing_func: function which will be applied to each training batch :param preprocessing_args: arguments of the preprocessing function :return: tuple(batch,targets) """ if data_access == "in-memory": train_dataset = InMemoryDataset("train", source=dataset, batch_size=batch_size, source_targets=targets, division=division) elif data_access == "fuel": train_dataset = FuelDataset("train", tmp_size, batch_size=batch_size, bagging=bagging_size, bagging_iterator=bagging_iterator, division=division) else: raise Exception( "Data access not available. Must be 'fuel' or 'in-memory'. Here : %s." % data_access) while 1: # Get next batch processed_batch, labels = get_next_batch(train_dataset, batch_size, final_size, preprocessing_func, preprocessing_args) if multiple_input == 1: yield processed_batch, labels else: yield [processed_batch for i in range(multiple_input)], labels
def multiscale_predict(model, training_params, division="leaderboard", verbose=False): initial_input_shape = model.input_shape k = 0 for test_size in training_params.test_sizes: if verbose: print "\nTesting for size :" + str(test_size) # Get the best model if test_size[0] != model.input_shape[2] or test_size[ 1] != model.input_shape[3]: new_model = adapt_to_new_input( model, (test_size[2], test_size[0], test_size[1]), initial_input_shape[1:], verbose=True) else: new_model = model testset = FuelDataset("test", test_size, batch_size=training_params.test_batch_size, shuffle=False, division=division) preds, labels = predict(new_model, testset, training_params, flip=False, verbose=verbose) if k == 0: final_preds = np.copy(preds) else: final_preds += preds k += 1.0 # Predictions on the flipped testset flipped_preds, labels = predict(new_model, testset, training_params, flip=True, verbose=verbose) final_preds += flipped_preds k += 1.0 # Arithmetic averaging of predictions final_preds_arithm = final_preds / k return final_preds_arithm, labels
def get_features_on_exp(position, mode, N, training_params, verbose=False): model, path_model = get_best_model_from_exp(training_params.path_out) initial_input_shape = model.input_shape print "\n" + path_model k = 0 out = [] for test_size in training_params.test_sizes: if verbose: s = "\nTesting for size :" + str(test_size) print s # Get the best model if test_size[0] != model.input_shape[2] or test_size[ 1] != model.input_shape[3]: new_model = adapt_to_new_input( model, (test_size[2], test_size[0], test_size[1]), initial_input_shape[1:], verbose=True) else: new_model = model dataset = FuelDataset(mode, test_size, batch_size=training_params.test_batch_size, shuffle=False, division=training_params.division) preds, labels = get_features(new_model, dataset, position, N, training_params, True, flip=False) # Predictions on the flipped testset flipped_preds, flipped_labels = get_features(new_model, dataset, position, N, training_params, True, flip=True) out.append(preds) out.append(flipped_preds) return out, labels
def launch_adversarial_training(training_params): """ Load the data, and train a Keras model. :param training_params: a TrainingParams object which contains each parameter of the training :return: """ if os.path.exists(training_params.path_out) is False: os.mkdir(os.path.abspath(training_params.path_out)) ###### LOADING VALIDATION DATA ####### validset, valid_targets = load_dataset_in_memory_and_resize(training_params.data_access, "valid", training_params.dataset_path, training_params.targets_path, training_params.final_size, training_params.final_size, training_params.test_batch_size) valid_targets = convert_labels(valid_targets) ###### Preprocessing VALIDATION DATA ####### for mode in training_params.valid_preprocessing: validset = preprocess_dataset(validset, training_params, mode) # Transpose validset >> (N, channel, X, Y) validset = validset.transpose(0,3,1,2) # Multiple input ? if training_params.multiple_inputs>1: validset = [validset for i in range(training_params.multiple_inputs)] ###### MODEL INITIALIZATION ####### with timer("Model initialization"): model = training_params.initialize_model() if training_params.pretrained_model is not None: with timer("Pretrained Model initialization"): pretrained_model = training_params.initialize_pretrained_model() training_params.generator_args.append(pretrained_model) # preprocessed the validset if type(pretrained_model) is list: features = [] for pmodel in pretrained_model: features.append(pmodel.predict(validset)) validset = np.concatenate(features, axis=1) else: validset = pretrained_model.predict(validset) ###### SAVE PARAMS ###### s = training_params.print_params() # Save command f = open(training_params.path_out+"/command.txt", "w") f.writelines(" ".join(sys.argv)) f.writelines(s) f.close() # Print architecture print_architecture(model, path_out=training_params.path_out + "/architecture.txt") ###### TRAINING SET ####### train_dataset = FuelDataset("train", training_params.tmp_size, batch_size=training_params.batch_size, bagging=training_params.bagging_size, bagging_iterator=training_params.bagging_iterator) ###### ADVERSARIAL MAPPING ###### input_ = model.layers[0].input y_ = model.y layer_output = model.layers[-1].get_output() xent = K.categorical_crossentropy(y_, layer_output) loss = xent.mean() grads = K.gradients(loss, input_) get_grads = K.function([input_, y_], [loss, grads]) ###### TRAINING LOOP ####### count = training_params.fine_tuning epoch_count = 0 with timer("Training"): while training_params.learning_rate >= training_params.learning_rate_min and epoch_count<training_params.nb_max_epoch: if count != 0: # Restart from the best model with a lower LR model = training_params.initialize_model() model.load_weights(training_params.path_out+"/MEM_%d/best_model.cnn"%(count-1)) # Recompile get_grads input_ = model.layers[0].input y_ = model.y layer_output = model.layers[-1].get_output() xent = K.categorical_crossentropy(y_, layer_output) loss = xent.mean() grads = K.gradients(loss, input_) get_grads = K.function([input_, y_], [loss, grads]) best = 0.0 patience = training_params.max_no_best losses = [] adv_losses = [] accuracies = [] adv_accuracies = [] valid_losses = [] valid_accuracies = [] epoch_count = 0 no_best_count = 0 path = training_params.path_out + "/MEM_%d"%count if os.path.exists(path) is False: os.mkdir(path) # Log file f = open(path+"/log.txt", "w") f.write("LR = %.2f\n"%training_params.learning_rate) f.close() # Config file open(path+"/config.netconf", 'w').write(model.to_json()) while no_best_count < patience and epoch_count < training_params.nb_max_epoch: new = True loss = 0.0 adv_loss = 0.0 accuracy = 0.0 adv_accuracy = 0.0 # Trainset Loop N = training_params.Ntrain/(training_params.batch_size*1) for i in range(N): # Train print "\rEpoch %d : Batch %d over %d"%(epoch_count, i, N), processed_batch, labels = get_next_batch(train_dataset, training_params.batch_size, training_params.final_size, training_params.preprocessing_func, training_params.preprocessing_args) l, acc = model.train_on_batch(processed_batch, labels, accuracy=True) # Update stats if new: loss = l accuracy = acc else: loss = 0.9*loss + 0.1*l accuracy = 0.9*accuracy + 0.1*acc # Get adversarial examples l, grads = get_grads([processed_batch, labels]) updates = np.sign(grads) adversarials = processed_batch + updates # Train on adv examples adv_l, adv_acc = model.train_on_batch(adversarials, labels, accuracy=True) # Update stats if new: adv_loss = adv_l adv_accuracy = adv_acc new = False else: adv_loss = 0.9*adv_loss + 0.1*adv_l adv_accuracy = 0.9*adv_accuracy + 0.1*adv_acc # Store stats losses.append(loss) accuracies.append(accuracy) adv_losses.append(adv_loss) adv_accuracies.append(adv_accuracy) # Validset loss and accuracy out = model.predict(validset) valid_loss = categorical_crossentropy(valid_targets, out) count = np.sum(np.argmax(valid_targets, axis=1) - np.argmax(out, axis=1) == 0) score = float(count)/valid_targets.shape[0] valid_losses.append(valid_loss) valid_accuracies.append(score) # Stop criterion and Save model string = "***\nEpoch %d: Loss : %0.5f, Adv loss : %0.5f, Valid loss : %0.5f, " \ "Acc : %0.5f, Adv acc : %0.5f, Valid acc : %0.5f"%(epoch_count, losses[-1], adv_losses[-1], valid_losses[-1], accuracies[-1], adv_accuracies[-1], valid_accuracies[-1]) if score > best: no_best_count = 0 save_path = path+"/best_model.cnn" if training_params.verbose>0: string = string +"\tBEST\n" print string write_log(path+"/log.txt", string) best = score model.save_weights(save_path, overwrite=True) else: no_best_count += 1 save_path = path+"/last_epoch.cnn" if training_params.verbose>0: string = string + "\n" print string write_log(path+"/log.txt", string) model.save_weights(save_path, overwrite=True) epoch_count += 1 # Update learning rate training_params.learning_rate *= 0.1 training_params.update_model_args() with open(path + "/history.pkl","w") as f: pickle.dump(losses,f) pickle.dump(adv_losses,f) pickle.dump(valid_losses,f) pickle.dump(accuracies,f) pickle.dump(adv_accuracies,f) pickle.dump(valid_accuracies,f) count += 1
def test_model_on_exp(training_params, verbose=False, write_txt_file=False): model, path_model = get_best_model_from_exp(training_params.path_out) initial_input_shape = model.input_shape print "\n" + path_model k = 0 lines = [] for test_size in training_params.test_sizes: if verbose: s = "\nTesting for size :" + str(test_size) print s lines.append(s) # Get the best model if test_size[0] != model.input_shape[2] or test_size[ 1] != model.input_shape[3]: new_model = adapt_to_new_input( model, (test_size[2], test_size[0], test_size[1]), initial_input_shape[1:], verbose=True) else: new_model = model testset = FuelDataset("valid", test_size, batch_size=training_params.test_batch_size, shuffle=False, division="leaderboard") score, loss, preds, labels = test_model(new_model, testset, training_params, flip=False, verbose=verbose, return_preds=True) if write_txt_file: lines.append( "\n\tDraw testset score = %.5f\n\tDraw testset loss = %.5f" % (score, loss)) if k == 0: final_preds = np.copy(preds) else: final_preds += preds k += 1.0 # Predictions on the flipped testset flipped_score, flipped_loss, flipped_preds, labels = test_model( new_model, testset, training_params, flip=True, verbose=verbose, return_preds=True) if write_txt_file: lines.append( "\n\tFlipped testset score = %.5f\n\tFlipped testset loss = %.5f" % (flipped_score, flipped_loss)) final_preds += flipped_preds k += 1.0 # Arithmetic averaging of predictions final_preds_arithm = final_preds / k count = np.sum( np.argmax(labels, axis=1) - np.argmax(final_preds_arithm, axis=1) == 0) final_score_arithm = float(count) / labels.shape[0] if verbose: s = "\nFinal score (arithm) =%.5f" % final_score_arithm print s lines.append(s) if write_txt_file: f = open(training_params.path_out + "/testset_score.txt", "w") for line in lines: f.writelines(line) f.close() return final_preds_arithm, final_score_arithm, labels