def evaluator_predict_on_dataset(model_base, model_top, model_settings, x, osm): ''' Evaluate model by just loading data into memory. This is effective with OSM models, but with others it will likely flood the memory and be terminated - use generators in that case. :param model_base: base CNN model :param model_top: attached custom top classifier model :param model_settings: settings of the model (to recognize the type of the model) :param x: image data :param osm: list of corresponding osm vectors :return: Returns labeled data (labeled from images and osm) ''' if model_settings["model_type"] is 'simple_cnn_with_top': labels_base = model_base.predict(x, batch_size=32, verbose=1) labels_predicted = model_top.predict(labels_base, batch_size=32, verbose=1) elif model_settings["model_type"] is 'img_osm_mix': labels_base = model_base.predict(x, batch_size=32, verbose=1) osm_input = osm labels_predicted = model_top.predict([osm_input, labels_base], batch_size=32, verbose=1) elif model_settings["model_type"] is 'osm_only': osm_input = osm labels_predicted = model_top.predict(osm_input, batch_size=32, verbose=1) print len_(labels_predicted) labels_return = [] for label in labels_predicted: labels_return.append(label[0]) return labels_return
def evaluator_generators_predict(model_base, model_top, model_settings, img_generator, osm, size): ''' Use generators to evaluate model. :param model_base: base CNN model :param model_top: attached custom top classifier model :param model_settings: settings of the model (to recognize the type of the model) :param img_generator: image generator :param osm: list of corresponding osm vectors :param size: amount of images. :return: Returns labeled data (labeled from images and osm) ''' print img_generator, size if model_settings["model_type"] is 'simple_cnn_with_top': labels_base = model_base.predict_generator(img_generator, steps=size, verbose=1) print "len_(labels_base)", len_(labels_base) labels_predicted = model_top.predict(labels_base, batch_size=32, verbose=1) elif model_settings["model_type"] is 'img_osm_mix': labels_base = model_base.predict_generator(img_generator, steps=size, verbose=1) print "len_(labels_base)", len_(labels_base) osm_input = osm labels_predicted = model_top.predict([osm_input, labels_base], batch_size=32, verbose=1) print len_(labels_predicted) labels_return = [] for label in labels_predicted: labels_return.append(label[0]) return labels_return
def predict_from_generators(test_generator, val_generator, number_in_test, number_in_val, filename_features_train, filename_features_test, model): # Predict using generators (memory sensitive) bottleneck_features_train = model.predict_generator(test_generator, steps=number_in_test,verbose=1) print "saving train_features of size", len_(bottleneck_features_train), " into ",filename_features_train np.save(open(filename_features_train, 'w'), bottleneck_features_train) bottleneck_features_validation = model.predict_generator(val_generator, steps=number_in_val,verbose=1) print "saving val_features of size", len_(bottleneck_features_validation), " into ",filename_features_test np.save(open(filename_features_test, 'w'), bottleneck_features_validation)
def predict_and_save_features(x, y, x_val, y_val, filename_features_train, filename_features_test, model): # dimensions of x are (num,3,x_dim, y_dim) = (75, 3, 150, 150) bottleneck_features_train = model.predict(x,verbose=1) np.save(open(filename_features_train, 'w'), bottleneck_features_train) print "saving train_features of size", len_(bottleneck_features_train), " into ",filename_features_train bottleneck_features_validation = model.predict(x_val,verbose=1) print "saving val_features of size", len_(bottleneck_features_validation), " into ",filename_features_test np.save(open(filename_features_test, 'w'), bottleneck_features_validation)
def plot_only_averages(plt, special_histories, data_names, colors, custom_title, just='val', save=[False,'']): items_to_draw = [] names_to_print = [] linestyles = [] for i in range(0,len(special_histories)): if just=='val': items_to_draw.append(special_histories[i]['avg_val_loss']) linestyles.append('solid') names_to_print.append(data_names[i]) if just == 'train': items_to_draw.append(special_histories[i]['avg_loss']) linestyles.append('solid') names_to_print.append(data_names[i]) if just == 'both': items_to_draw.append(special_histories[i]['avg_loss']) linestyles.append('dashed') items_to_draw.append(special_histories[i]['avg_val_loss']) linestyles.append('solid') names_to_print.append(data_names[2*i]) names_to_print.append(data_names[2*i+1]) print len_(items_to_draw), items_to_draw print len_(names_to_print), names_to_print print special_histories[0].keys() plt.figure() leg = [] [plt, leg] = draw_items_for_legend(plt, leg, items_to_draw, names_to_print, colors, linestyles) for i in range(0,len(items_to_draw)): avg = items_to_draw[i] plt, _ = draw_avg_data(avg, colors[i], linestyles[i], plt) draw_titles_legends(plt, leg, custom_title) save_plot(plt, save[0], save[1]) plt.draw() return plt
def loadDataFromSegments(path_to_segments_file, SCORE, verbose=False, we_dont_care_about_missing_images=False): ''' Load lists from Segments :param path_to_segments_file: Segments file to be loaded. :param SCORE: flag for if we care for only scored Segments :param verbose: :param we_dont_care_about_missing_images: flag for if we care for only those Segments with images (OSM model doesnt need them.) :return: lists and Segments ''' Segments = DataOperations.LoadDataFile(path_to_segments_file) segments_dir = os.path.dirname(path_to_segments_file) + '/' __list_of_images, __labels, __osm, __segment_ids, flag_is_extended = KerasPreparation.LoadDataFromSegments(Segments, has_score=SCORE, path_to_images=segments_dir, we_dont_care_about_missing_images=we_dont_care_about_missing_images) if verbose: print "__list_of_images", len_(__list_of_images), __list_of_images[0:5] print "__labels", len_(__labels), __labels[0:5] print "__osm", len_(__osm) print "__segment_ids", len_(__segment_ids), __segment_ids[0:5] print "flag_is_extended", flag_is_extended return [__list_of_images, __labels, __osm, __segment_ids], Segments
def load_tmp_dataset(): ''' Example of how a dataset can be loaded. :return: ''' model_settings = {} # HACK model_settings["dataset_name"] = "miniset_640px" model_settings["pixels"] = 640 model_settings["number_of_images"] = None model_settings["seed"] = 13 #Settings["models"][0]["dump_file_override"] = 'SegmentsData.dump' model_settings["dump_file_override"] = 'SegmentsData_mark100.dump' dataset = DatasetHandler.CreateDataset.load_custom(model_settings["dataset_name"], model_settings["pixels"], desired_number=model_settings["number_of_images"], seed=model_settings["seed"], filename_override=model_settings["dump_file_override"]) [x, y] = dataset.getDataLabels() osm = dataset.getDataLabels_only_osm() print len_(x), len_(y), len_(osm) return x, y, osm
def expandOsmDataWithMultipleRadii(self, model_settings): ''' # idea is to load all the radii data we have available and add it to each of the segments # we assume the basic experiment definition ''' r50 = 'SegmentsData_marked_R50_4TablesN.dump' r100 = 'SegmentsData_marked_R50_4TablesN.dump' r200 = 'SegmentsData_marked_R200_4TablesN.dump' import DatasetHandler dataset_r50 = DatasetHandler.CreateDataset.load_custom( model_settings["dataset_name"], model_settings["pixels"], desired_number=model_settings["number_of_images"], seed=model_settings["seed"], filename_override=r50) r50osm = dataset_r50.getDataLabels_only_osm_raw() dataset_r100 = DatasetHandler.CreateDataset.load_custom( model_settings["dataset_name"], model_settings["pixels"], desired_number=model_settings["number_of_images"], seed=model_settings["seed"], filename_override=r100) r100osm = dataset_r100.getDataLabels_only_osm_raw() dataset_r200 = DatasetHandler.CreateDataset.load_custom( model_settings["dataset_name"], model_settings["pixels"], desired_number=model_settings["number_of_images"], seed=model_settings["seed"], filename_override=r200) r200osm = dataset_r200.getDataLabels_only_osm_raw() from Omnipresent import len_ print "osm", len(self.__osm), len_(self.__osm), self.__osm[0][0:10] print "osm50", len(r50osm), len_(r50osm), r50osm[0][0:10] print "osm50", len(r100osm), len_(r100osm), r100osm[0][0:10] print "osm200", len(r200osm), len_(r200osm), r200osm[0][0:10] new_osm = [] for i in range(0, len(r50osm)): osm_of_i = [] if model_settings["multiple_radii_mark"] == 'I': osm_of_i = list(r100osm[i]) + list(r50osm[i]) + list( r200osm[i]) elif model_settings["multiple_radii_mark"] == 'II': osm_of_i = list(r100osm[i]) + list(r200osm[i]) elif model_settings["multiple_radii_mark"] == 'III': osm_of_i = list(r100osm[i]) + list(r50osm[i]) new_osm.append(osm_of_i) print "enhanced", len(new_osm), len_(new_osm), new_osm[0][0:10] self.__osm = new_osm print "enhanced", len(self.__osm), len_( self.__osm), self.__osm[0][0:10]
def evaluator(model_file, settings_file, name_output_file, custom_target_geojson = None, show_segments_histo_stats = False, actually_save = True): ''' Main Evaluator function. :param model_file: path to model .h5 file. :param settings_file: path to settings file which was used to train this model. :param name_output_file: name of labeled geojson data :return: ''' model_base, model_top, model_settings = evaluator_load_model(model_file, settings_file) # Load data! if custom_target_geojson is None: path_to_segments_file = default_segments_path() else: path_to_segments_file = custom_target_geojson[1] we_dont_care_about_missing_images = False if model_settings["model_type"] is 'osm_only': we_dont_care_about_missing_images = True lists, Segments = loadDataFromSegments(path_to_segments_file, None, we_dont_care_about_missing_images=we_dont_care_about_missing_images) #lists = small_lists(lists) print "BEFORE MARKING" analyze_lists(lists) y_ref = lists[1] osm = osm_from_lists(lists) segment_ids = lists[3] if model_settings["model_type"] is 'osm_only': x = None y_pred = evaluator_predict_on_dataset(model_base, model_top, model_settings, x, osm) else: # these models rely on images, lets use generators for reasonable memory requirements img_generator = getImgGenerator_from_lists(lists) y_pred = evaluator_generators_predict(model_base, model_top, model_settings, img_generator[1], osm, img_generator[2]) print len(y_pred), y_pred[0:10] pred_list = [lists[0], y_pred, lists[2], lists[3]] print "AFTER MARKING" analyze_lists(pred_list) EvaluatedData = prepEvaluatedData(y_pred, segment_ids) Altered = AlterSegments(EvaluatedData, Segments, only_unknown_scores=True) if show_segments_histo_stats: from DatasetHandler import DatasetObj scores = [] for AltSeg in Altered: scores.append( AltSeg.Score ) print len_(scores) import DatasetHandler.DatasetVizualizators save_to_pdf = False labels = scores DatasetHandler.DatasetVizualizators.plotHistogram(labels, 'Score distribution histogram', num_bins=20) DatasetHandler.DatasetVizualizators.plotX_sortValues(labels, 'Distribution of score (sorted)', notReverse=True) if save_to_pdf: DatasetHandler.DatasetVizualizators.saveAllPlotsToPDF() DatasetHandler.DatasetVizualizators.show() if custom_target_geojson is None: GeoJSON = loadDefaultGEOJSON() else: geojson_to_be_marked = custom_target_geojson[0] GeoJSON = loadGeoJson(geojson_to_be_marked) evaluated_geojson = markGeoJSON(GeoJSON, Altered) path_geojson_out = name_output_file if actually_save: saveGeoJson(evaluated_geojson, path_geojson_out) # Ex post testing DefaultSegments = DataOperations.LoadDataFile(path_to_segments_file) traverseGeoJSON(evaluated_geojson, DefaultSegments) #print y_ref, y_pred if actually_save: np.savetxt('y_ref.out', y_ref, delimiter=',') np.savetxt('y_pred.out', y_pred, delimiter=',') return 0, 0 # Additional metrics from sklearn.metrics import mean_squared_error, mean_absolute_error mae = mean_absolute_error(y_ref, y_pred) mse = mean_squared_error(y_ref, y_pred) return mse, mae
def train_model(model, dataset, model_settings): ''' Train model on a dataset using these settings :param model: model to be trained :param dataset: dataset to be used :param model_settings: model setting to be read for specifics :return: ''' history = None if model_settings["model_type"] is 'simple_cnn_with_top': filename_features_train = model_settings["filename_features_train"] filename_features_test = model_settings["filename_features_test"] [y, y_val] = dataset.getDataLabels_split_only_y(validation_split=model_settings["validation_split"]) [train_data, train_labels, validation_data, validation_labels] = load_features(filename_features_train, filename_features_test, y, y_val) print "input shape of features", len_(train_data), "and labels", len_(train_labels) top_model = model[1] history = train_top_model(top_model, model_settings, train_data, train_labels, validation_data, validation_labels) # Finetuning print len_(history) print history if model_settings["finetune"]: finetune_model = None [train_data, train_labels, validation_data, validation_labels] = [None, None, None, None] # Cut at certain spots - only possible where Resnet50 structure allows it 172, 162, 152, 140, ... if model_settings["finetune_DEBUG_METHOD_OF_MODEL_GEN"]: finetune_features_train = model_settings["finetune_features_train"] finetune_features_test = model_settings["finetune_features_test"] [y, y_val] = dataset.getDataLabels_split_only_y(validation_split=model_settings["validation_split"]) [train_data, train_labels, validation_data, validation_labels] = load_features(finetune_features_train, finetune_features_test, y, y_val) print "FINE TUNE DATA input shape of features", len_(train_data), "and labels", len_(train_labels) # Fce (top, cnn, features_mid) > new_model model_cnn = model[0] top_cnn = model[1] cut = model_settings["finetune_num_of_cnn_layers"] shape = np.asarray(train_data).shape[1:] print shape finetune_model = build_finetune_model(model_cnn, top_cnn, cut, input_shape=shape) print "----- finetune model" print finetune_model.summary() plot_model(finetune_model, to_file='TEST_FINETUNE.png', show_shapes=True) # Its possible to do it anywhere, but that will bring it lengthy evaluation here on the spot without cooking else: n = model_settings["finetune_num_of_cnn_layers"] for layer in model[0].layers[:n]: print layer layer.trainable = False print "----- CNN MODEL" print model[0].summary() print "----- TOP MODEL" print model[1].summary() # New model is made from the cnn and top model finetune_model = join_two_models(model[0], model[1]) print "----- JOINED MODEL" print finetune_model.summary() plot_model(finetune_model, to_file='TEST_MODEL.png', show_shapes=True) [train_data, train_labels, validation_data, validation_labels] = dataset.getDataLabels_split(validation_split=model_settings["validation_split"]) # We have the model, now lets compute epochs_tmp = model_settings["epochs"] model_settings["epochs"] = model_settings["finetune_epochs"] optimizer_tmp = model_settings["optimizer"] model_settings["optimizer"] = model_settings["finetune_optimizer"] history_to_append = train_top_model(finetune_model, model_settings, train_data, train_labels, validation_data, validation_labels) model_settings["epochs"] = epochs_tmp model_settings["optimizer"] = optimizer_tmp # Append histories #{'val_mean_absolute_error': [0.27633494684393978, 0.27673623693381116], 'loss': [0.15686354677721928, 0.12237877659907737], 'mean_absolute_error': [0.3303849070751238, 0.30686430593424935], 'val_loss': [0.10361090554317957, 0.10128958691173875]} if model_settings["finetune"]: print history print history_to_append for key in history.keys(): history[key] += history_to_append[key] print history elif model_settings["model_type"] is 'img_osm_mix': if (model_settings["special_case"] is 'hack_dont_use_features'): # Get data # ps: be careful about their order when enhancing... # ImageGenerator for multiple inputs # 1 Build whole model now osm_shape = dataset.getShapeOfOsm() model = build_full_mixed_model(osm_shape) # 2 Train (which will take some time now...) [x, y, x_val, y_val] = dataset.getDataLabels_split(validation_split=model_settings["validation_split"]) [osm, osm_val] = dataset.getDataLabels_split_only_osm(validation_split=model_settings["validation_split"]) history = train_top_model(model, model_settings, [x, osm], y, [x_val, osm_val], y_val) print "special case scenario, mixed model, hack_dont_use_features" else: filename_features_train = model_settings["filename_features_train"] filename_features_test = model_settings["filename_features_test"] [osm, osm_val] = dataset.getDataLabels_split_only_osm(validation_split=model_settings["validation_split"]) [y, y_val] = dataset.getDataLabels_split_only_y(validation_split=model_settings["validation_split"]) [train_data, _, validation_data, _] = load_features(filename_features_train, filename_features_test, y, y_val) print len_(train_data), len_(y), len_(osm) top_model = model[1] history = train_top_model(top_model, model_settings, [osm, train_data], y, [osm_val, validation_data], y_val) elif model_settings["model_type"] is 'osm_only': [osm, osm_val] = dataset.getDataLabels_split_only_osm(validation_split=model_settings["validation_split"]) [y, y_val] = dataset.getDataLabels_split_only_y(validation_split=model_settings["validation_split"]) osm_model = model[0] history = train_top_model(osm_model, model_settings, osm, y, osm_val, y_val) else: print "Yet to be programmed." return history
def cook_features(models, datasets, Settings): ''' Makes sure that we have features available for the duo of model-dataset in our shared feature folder. If not, we will cook them. :param models: list of models (currently without their tops) :param datasets: list of dataset object :param Settings: settings :return: number of ready models ''' # cooking shared data [x, y, x_val, y_val] = [None, None, None, None] index = 0 for model in models: model_settings = Settings["models"][index] if model_settings["model_type"] is 'simple_cnn_with_top' or model_settings["model_type"] is 'img_osm_mix': dataset = datasets[ model_settings["dataset_pointer"] ] from ModelHandler.ModelOI import get_feature_file_names, do_we_need_to_cook #ps: if this is in the header of the file, it causes mutual import of each other - and TF yells... filename_features_train = model_settings["filename_features_train"] filename_features_test = model_settings["filename_features_test"] do_we_need_to_cook_bool = do_we_need_to_cook(filename_features_train, filename_features_test) print "Looking up files:", filename_features_train, filename_features_test if do_we_need_to_cook_bool: model_cnn = model[0] cooking_method = model_settings["cooking_method"] print "We need to cook, chosen method is", cooking_method #if True: if cooking_method == 'direct': if x is None: [x, y, x_val, y_val] = dataset.getDataLabels_split(validation_split=model_settings["validation_split"]) print len_(x) predict_and_save_features(x, y, x_val, y_val, filename_features_train, filename_features_test, model_cnn) #if True: elif cooking_method == 'generators': [order, order_val, image_generator, size, image_generator_val, size_val] = dataset.getImageGenerator(validation_split=model_settings["validation_split"]) print len_(order) predict_from_generators(image_generator, image_generator_val, size, size_val, filename_features_train, filename_features_test, model_cnn) else: print "No need to cook, the files already exist" # Finetuning also requires prepared feature files. if model_settings["finetune"]: finetune_features_train = model_settings["finetune_features_train"] finetune_features_test = model_settings["finetune_features_test"] do_we_need_to_cook_bool = do_we_need_to_cook(finetune_features_train, finetune_features_test) print "Looking up finetune feature files:", finetune_features_train, finetune_features_test if do_we_need_to_cook_bool: model_cnn = model[0] cooking_method = model_settings["cooking_method"] #n = len(model[0].layers) - model_settings["finetune_num_of_cnn_layers"] n = model_settings["finetune_num_of_cnn_layers"] print "------ Omitting layers:" for layer in model[0].layers[n:]: print layer.get_config()['name'], layer print "Saving this layers outputs:" print model_cnn.layers[n].get_config()['name'], model_cnn.layers[n], model_cnn.layers[n].get_config() from keras.models import Model model_middle = Model(inputs=model_cnn.input, outputs=model_cnn.layers[n].output) print "We need to for finetuning files too, chosen method is", cooking_method if cooking_method == 'direct': if x is None: [x, y, x_val, y_val] = dataset.getDataLabels_split(validation_split=model_settings["validation_split"]) print len_(x) predict_and_save_features(x, y, x_val, y_val, finetune_features_train, finetune_features_test, model_middle) elif cooking_method == 'generators': [order, order_val, image_generator, size, image_generator_val, size_val] = dataset.getImageGenerator(validation_split=model_settings["validation_split"]) print len_(order) predict_from_generators(image_generator, image_generator_val, size, size_val, finetune_features_train, finetune_features_test, model_middle) else: print "No need to cook finetune feature files, they already exist" elif model_settings["model_type"] is 'osm_only': # No need to cook features from images in this case print "Chosen model type (", model_settings["model_type"] ,") doesn't require features to be cooked and loaded." index += 1 return index
def k_fold_crossvalidation(model, dataset, model_settings): ''' # K fold crossvalidation scheme # includes proper loading of models, testing and processing of the results. ''' from ModelHandler.ModelTester import load_features from ModelHandler.ModelTester import train_top_model k = model_settings["crossvalidation_k"] # idea is to generate k=4 folds of indices # with dataset having been shuffled already, we can just use the indices 0-number_of_images number_of_images_total = dataset.num_of_images print "Total of ", number_of_images_total, " images." indices = range(0, number_of_images_total) indices_in_fjords = chunks(indices, k) #print indices_in_fjords print "sizes of fjords:", map(len, indices_in_fjords) # (tests) test = [] for fjord in indices_in_fjords: test += fjord if not test == indices: print 'not the same!' # fold indices are now prepared # collect all_inputs and all_outputs depending on the type of experiment we are running. all_inputs = [] all_outputs = [] # outputs are always score labels all_outputs = dataset.getDataLabels_only_y() features = [] active_model = None if model_settings["model_type"] is 'img_osm_mix' or model_settings[ "model_type"] is 'simple_cnn_with_top': filename_features_train = model_settings["filename_features_train"] filename_features_test = model_settings["filename_features_test"] [train_data, _, validation_data, _] = load_features(filename_features_train, filename_features_test, None, None) features = numpy.append(train_data, validation_data, 0) #arr_test = arr[0:split_at] #arr_val = arr[split_at:] # TODO: MODEL_TYPE_SPLIT if model_settings["model_type"] is 'simple_cnn_with_top': print "Prepare all_inputs and all_outputs for Image only model." # inputs are all the images, but for our model its the features all_inputs = features active_model = model[1] elif model_settings["model_type"] is 'osm_only': print "Prepare all_inputs and all_outputs for OSM only model." # inputs are all the osm vectors all_inputs = dataset.getDataLabels_only_osm() active_model = model[0] elif model_settings["model_type"] is 'img_osm_mix': print "Prepare all_inputs and all_outputs for Mixed model." # inputs list of features and osm vectors osms = dataset.getDataLabels_only_osm() all_inputs = [osms, features] active_model = model[1] print len_(osms), "and", len_(features) else: print "Yet to be programmed." # save active_models weights, so we don't cheat by cumulating better and better results... initial_weights = active_model.get_weights() print "Sizes of all_inputs:", len_(all_inputs), "and all_outputs:", len_( all_outputs) # variables for remembering data from histories last_training_errors = [] best_training_errors = [] last_validation_errors = [] best_validation_errors = [] all_histories_of_this_model = [] last_training_measure = [] best_training_measure = [] last_validation_measure = [] best_validation_measure = [] for selected_fjord in range(0, k): active_model.set_weights(initial_weights) train_indices, valid_indices = kfold(indices_in_fjords, selected_fjord) train_inputs = select_data(train_indices, all_inputs) valid_inputs = select_data(valid_indices, all_inputs) train_outputs = select_data(train_indices, all_outputs) valid_outputs = select_data(valid_indices, all_outputs) print "selected_fjord", selected_fjord print "Sizes of train_inputs:", len_( train_inputs), "and train_outputs:", len_(train_outputs) print "Sizes of valid_inputs:", len_( valid_inputs), "and valid_outputs:", len_(valid_outputs) # into training and result collecting history = train_top_model(active_model, model_settings, train_inputs, train_outputs, valid_inputs, valid_outputs) #print history measure = 'mean_absolute_error' error = 'loss' val_measure = 'val_' + measure val_error = 'val_' + error ''' {'val_mean_absolute_error': [0.00036219754838384688, 7.0134797169885132e-06, 3.973643103449831e-08, 3.973643103449831e-08, 3.973643103449831e-08], 'loss': [0.34813621640205383, 0.16262358427047729, 0.19959338009357452, 0.16045540571212769, 0.16040021181106567], 'mean_absolute_error': [0.50139808654785156, 0.22280247509479523, 0.25902602076530457, 0.21229584515094757, 0.21108284592628479], 'val_loss': [1.7484823899849289e-07, 9.398822692352482e-11, 4.7369517129061591e-15, 4.7369517129061591e-15, 4.7369517129061591e-15] } ''' # process history! all_histories_of_this_model.append(history) last_training_errors.append(history[error][-1]) last_validation_errors.append(history[val_error][-1]) last_training_measure.append(history[measure][-1]) last_validation_measure.append(history[val_measure][-1]) best_training_errors.append(best_min(history[error])) best_validation_errors.append(best_min(history[val_error])) best_training_measure.append(best_min(history[measure])) best_validation_measure.append(best_min(history[val_measure])) print "error", error print "last_training_errors", last_training_errors print "best_training_errors", best_training_errors print "last_validation_errors", last_validation_errors print "best_validation_errors", best_validation_errors print "measure", measure print "last_training_measure", last_training_measure print "best_training_measure", best_training_measure print "last_validation_measure", last_validation_measure print "best_validation_measure", best_validation_measure print "all_histories_of_this_model", all_histories_of_this_model special_history_dictionary = {} special_history_dictionary["last_training_errors"] = last_training_errors special_history_dictionary["best_training_errors"] = best_training_errors special_history_dictionary[ "last_validation_errors"] = last_validation_errors special_history_dictionary[ "best_validation_errors"] = best_validation_errors special_history_dictionary["last_training_measure"] = last_training_measure special_history_dictionary["best_training_measure"] = best_training_measure special_history_dictionary[ "last_validation_measure"] = last_validation_measure special_history_dictionary[ "best_validation_measure"] = best_validation_measure special_history_dictionary[ "all_histories_of_this_model"] = all_histories_of_this_model history = special_history_dictionary return history
def handle_noncanon_dataset(Settings, model_settings): ''' Special case scenario. We are creating a new custom dataset, instead of using one of the big officially used, "canon" datasets :param Settings: Setting for the whole experiment :param model_settings: Setting for our one dataset :return: ''' if model_settings["noncanon_dataset"] == 'expand_existing_dataset': # Idea: take an existing dataset and expand it via # Directly load the old segments file # for each segment # for each image # apply the custom ImageDataGenerator to generate new images (depending of settings) # save the new images into target folder as well as into this Segment # save edited Segments array into new SegmentsFile.dump debug_visual_output = False debug_txt_output = False from DatasetHandler.CreateDataset import get_path_for_dataset from Downloader.DataOperations import LoadDataFile from Downloader.KerasPreparation import LoadActualImages import numpy as np if debug_visual_output: from matplotlib import pyplot from keras.preprocessing.image import array_to_img import math target_folder = model_settings["dataset_name"] source_folder = model_settings["source_dataset"] filename_override = model_settings["dump_file_override"] source_segments_path = get_path_for_dataset(source_folder, filename_override) source_segments_dir = os.path.dirname(source_segments_path) + '/' if not file_exists(source_segments_dir + filename_override): print "WARNING !!!!" print '\t',"Careful, couldn't find the file", source_segments_dir + filename_override print '\t',"... we will instead be using ", source_segments_path target_segments_path = get_path_for_dataset(target_folder, '') target_segments_dir = os.path.dirname(target_segments_path) + '/' target_segments_path = target_segments_dir+model_settings["dump_file_expanded"] # Check if we don't alredy have it? if (file_exists(target_segments_path) and folder_exists(target_segments_dir+'images')): list1 = os.listdir(target_segments_dir+'images') list2 = os.listdir(source_segments_dir+'images') if len(list1)>=len(list2): # Seems like we have copied it correctly too print "We already have this dataset extended! (", len(list1), len(list2), ")" return else: print '\t', file_exists(target_segments_path), target_segments_path print '\t', folder_exists(target_segments_dir+'images'), target_segments_dir+'images' generated_images_folder = os.path.dirname(target_segments_path) + '/images/' print "source_segments_path", source_segments_path # /home/ekmek/Vitek/MGR-Project-Code/Data/StreetViewData/miniset_640px/SegmentsData.dump print "source_segments_dir", source_segments_dir # /home/ekmek/Vitek/MGR-Project-Code/Data/StreetViewData/miniset_640px/ print "target_segments_path", target_segments_path # /home/ekmek/Vitek/MGR-Project-Code/Data/StreetViewData/miniset_640px_expanded/SegmentsData_images_generated_test_folder_expanded.dump print "target_segments_dir", target_segments_dir # /home/ekmek/Vitek/MGR-Project-Code/Data/StreetViewData/miniset_640px_expanded/ # copy source_dataset -> target_dataset in dataset_name # from source_segments_dir/images to target_segments_dir/images source__path = source_segments_dir+'images' target__path = target_segments_dir+'images' copy_folder(source__path, target__path) # test the success of this copy process! # for each file in source_segments_dir/images check for a copy in target_segments_dir/images was_ok = False while not was_ok: was_ok = True list_of_source_files=os.listdir(source__path) for item in list_of_source_files: file_source = source__path + '/' + item file_target = target__path + '/' + item if not file_exists(file_target): was_ok = False else: # file exists, but maybe just to be parainoid secure, md5 compare? md5_1 = md5(file_source) md5_2 = md5(file_target) if md5_1 <> md5_2: was_ok = False if not was_ok: copy_file(file_source, file_target) print '-- was missing, now fixed:' + file_source size_of_batch = model_settings["noncanon_dataset_genfrom1"] image_generator = model_settings["noncanon_dataset_imagegenerator"] print "image_generator", image_generator Segments = LoadDataFile(source_segments_path) number_of_images_parsed = 0 for Segment in Segments: number_of_images = Segment.number_of_images for i_th_image in range(0,number_of_images): if Segment.hasLoadedImageI(i_th_image): filename = source_segments_dir+Segment.getImageFilename(i_th_image) number_of_images_parsed += 1 print filename # we have one image filepath - generate data x = LoadActualImages([filename]) y = np.array([Segment.SegmentId]) if debug_txt_output: print "ORIGINAL id", y, "ith", i_th_image, "img:", len_(x[0]) X_batch = [] y_batch = [] from DatasetHandler.custom_image import ImageDataGenerator as custom_ImageDataGenerator number_of_images_generated = 0 for x_gen, y_gen in image_generator.flow(x, y, batch_size=1, save_to_dir=generated_images_folder, save_prefix=str(y)+'_', save_format='jpg'): number_of_images_generated += 1 image = x_gen[0] filename_generated = y_gen[1][0] id = y_gen[0][0] if debug_txt_output: print id, filename_generated # save image on path filename_generated to the Segments hierarchy! print "Segment.number_of_images", Segment.number_of_images print "Segment.LocationsIndex", Segment.LocationsIndex print "Segment.DistinctLocations", Segment.DistinctLocations print "Segment.DistinctNearbyVector", Segment.DistinctNearbyVector print "Segment.HasLoadedImages", Segment.HasLoadedImages print "Segment.ErrorMessages", Segment.ErrorMessages # Value 200 is the marker location_index = Segment.LocationsIndex[i_th_image] + 1000 # accordingly we get Segment.DistinctLocations[location_index] and Segment.DistinctNearbyVector[location_index] has_img = Segment.HasLoadedImages[i_th_image] has_err = Segment.ErrorMessages[i_th_image] # Add to this Segment Segment.number_of_images += 1 Segment.LocationsIndex.append(location_index) Segment.HasLoadedImages.append(has_img) Segment.ErrorMessages.append(has_err) # Change filename and path new_filename_generated = target_segments_dir + 'images' + Segment.getImageFilename(Segment.number_of_images-1)[6:] if debug_txt_output: print "rename", filename_generated, "to", new_filename_generated shutil.move(filename_generated, new_filename_generated) print ".", new_filename_generated X_batch.append(image) y_batch.append(id) #print "id", y_gen, "img:", len_(x_gen), array_md5(image) if len(X_batch) == size_of_batch: if debug_txt_output: print "GENERATED ", len(y_batch), " images > ", len_(X_batch), y_batch if debug_visual_output: # create a grid of 3x3 images size_for_plot = int(math.floor(math.sqrt(size_of_batch-0.1))+1) size_for_plot_y = size_for_plot while size_of_batch <= size_for_plot*(size_for_plot_y-1): size_for_plot_y -= 1 print size_for_plot, "x", size_for_plot_y, " grid" for i in range(0, len(X_batch)): pyplot.subplot(size_for_plot_y,size_for_plot,i+1) img = X_batch[i] backimg = array_to_img(img) pyplot.imshow(backimg) # show the plot pyplot.show() break break # end generation for this one image if debug_txt_output: print "Save new images from id", y, " in", len_(X_batch) print "number_of_images_parsed", number_of_images_parsed from Downloader.DataOperations import SaveDataFile print "Saving new Segments file into ", target_segments_path SaveDataFile(target_segments_path, Segments) else: print "This type of noncanon dataset generation has not yet been implemented!"