Ejemplo n.º 1
0
def evaluator_predict_on_dataset(model_base, model_top, model_settings, x, osm):
    '''
    Evaluate model by just loading data into memory. This is effective with OSM models, but with others it will likely
    flood the memory and be terminated - use generators in that case.

    :param model_base: base CNN model
    :param model_top: attached custom top classifier model
    :param model_settings: settings of the model (to recognize the type of the model)
    :param x: image data
    :param osm: list of corresponding osm vectors
    :return: Returns labeled data (labeled from images and osm)
    '''
    if model_settings["model_type"] is 'simple_cnn_with_top':
        labels_base = model_base.predict(x, batch_size=32, verbose=1)
        labels_predicted = model_top.predict(labels_base, batch_size=32, verbose=1)
    elif model_settings["model_type"] is 'img_osm_mix':
        labels_base = model_base.predict(x, batch_size=32, verbose=1)
        osm_input = osm
        labels_predicted = model_top.predict([osm_input, labels_base], batch_size=32, verbose=1)
    elif model_settings["model_type"] is 'osm_only':
        osm_input = osm
        labels_predicted = model_top.predict(osm_input, batch_size=32, verbose=1)

    print len_(labels_predicted)
    labels_return = []
    for label in labels_predicted:
        labels_return.append(label[0])
    return labels_return
Ejemplo n.º 2
0
def evaluator_generators_predict(model_base, model_top, model_settings, img_generator, osm, size):
    '''
    Use generators to evaluate model.
    :param model_base: base CNN model
    :param model_top: attached custom top classifier model
    :param model_settings: settings of the model (to recognize the type of the model)
    :param img_generator: image generator
    :param osm: list of corresponding osm vectors
    :param size: amount of images.
    :return: Returns labeled data (labeled from images and osm)
    '''
    print img_generator, size

    if model_settings["model_type"] is 'simple_cnn_with_top':
        labels_base = model_base.predict_generator(img_generator, steps=size, verbose=1)
        print "len_(labels_base)", len_(labels_base)
        labels_predicted = model_top.predict(labels_base, batch_size=32, verbose=1)
    elif model_settings["model_type"] is 'img_osm_mix':
        labels_base = model_base.predict_generator(img_generator, steps=size, verbose=1)
        print "len_(labels_base)", len_(labels_base)
        osm_input = osm
        labels_predicted = model_top.predict([osm_input, labels_base], batch_size=32, verbose=1)

    print len_(labels_predicted)
    labels_return = []
    for label in labels_predicted:
        labels_return.append(label[0])
    return labels_return
Ejemplo n.º 3
0
def predict_from_generators(test_generator, val_generator, number_in_test, number_in_val, filename_features_train, filename_features_test, model):
    # Predict using generators (memory sensitive)
    bottleneck_features_train = model.predict_generator(test_generator, steps=number_in_test,verbose=1)
    print "saving train_features of size", len_(bottleneck_features_train), " into ",filename_features_train
    np.save(open(filename_features_train, 'w'), bottleneck_features_train)
    bottleneck_features_validation = model.predict_generator(val_generator, steps=number_in_val,verbose=1)
    print "saving val_features of size", len_(bottleneck_features_validation), " into ",filename_features_test
    np.save(open(filename_features_test, 'w'), bottleneck_features_validation)
Ejemplo n.º 4
0
def predict_and_save_features(x, y, x_val, y_val, filename_features_train, filename_features_test, model):
    # dimensions of x are (num,3,x_dim, y_dim) = (75, 3, 150, 150)
    bottleneck_features_train = model.predict(x,verbose=1)
    np.save(open(filename_features_train, 'w'), bottleneck_features_train)
    print "saving train_features of size", len_(bottleneck_features_train), " into ",filename_features_train
    bottleneck_features_validation = model.predict(x_val,verbose=1)
    print "saving val_features of size", len_(bottleneck_features_validation), " into ",filename_features_test
    np.save(open(filename_features_test, 'w'), bottleneck_features_validation)
Ejemplo n.º 5
0
def plot_only_averages(plt, special_histories, data_names, colors, custom_title, just='val', save=[False,'']):
    items_to_draw = []
    names_to_print = []
    linestyles = []

    for i in range(0,len(special_histories)):
        if just=='val':
            items_to_draw.append(special_histories[i]['avg_val_loss'])
            linestyles.append('solid')
            names_to_print.append(data_names[i])

        if just == 'train':
            items_to_draw.append(special_histories[i]['avg_loss'])
            linestyles.append('solid')
            names_to_print.append(data_names[i])

        if just == 'both':
            items_to_draw.append(special_histories[i]['avg_loss'])
            linestyles.append('dashed')
            items_to_draw.append(special_histories[i]['avg_val_loss'])
            linestyles.append('solid')

            names_to_print.append(data_names[2*i])
            names_to_print.append(data_names[2*i+1])

    print len_(items_to_draw), items_to_draw
    print len_(names_to_print), names_to_print
    print special_histories[0].keys()

    plt.figure()

    leg = []
    [plt, leg] = draw_items_for_legend(plt, leg, items_to_draw, names_to_print, colors, linestyles)

    for i in range(0,len(items_to_draw)):
        avg = items_to_draw[i]
        plt, _ = draw_avg_data(avg, colors[i], linestyles[i], plt)

    draw_titles_legends(plt, leg, custom_title)

    save_plot(plt, save[0], save[1])

    plt.draw()
    return plt
Ejemplo n.º 6
0
def loadDataFromSegments(path_to_segments_file, SCORE, verbose=False, we_dont_care_about_missing_images=False):
    '''
    Load lists from Segments
    :param path_to_segments_file: Segments file to be loaded.
    :param SCORE: flag for if we care for only scored Segments
    :param verbose:
    :param we_dont_care_about_missing_images: flag for if we care for only those Segments with images
    (OSM model doesnt need them.)
    :return: lists and Segments
    '''
    Segments = DataOperations.LoadDataFile(path_to_segments_file)
    segments_dir = os.path.dirname(path_to_segments_file) + '/'
    __list_of_images, __labels, __osm, __segment_ids, flag_is_extended = KerasPreparation.LoadDataFromSegments(Segments, has_score=SCORE, path_to_images=segments_dir, we_dont_care_about_missing_images=we_dont_care_about_missing_images)

    if verbose:
        print "__list_of_images", len_(__list_of_images), __list_of_images[0:5]
        print "__labels", len_(__labels), __labels[0:5]
        print "__osm", len_(__osm)
        print "__segment_ids", len_(__segment_ids), __segment_ids[0:5]
        print "flag_is_extended", flag_is_extended

    return [__list_of_images, __labels, __osm, __segment_ids], Segments
Ejemplo n.º 7
0
def load_tmp_dataset():
    '''
    Example of how a dataset can be loaded.
    :return:
    '''
    model_settings = {}
    # HACK
    model_settings["dataset_name"] = "miniset_640px"
    model_settings["pixels"] = 640
    model_settings["number_of_images"] = None
    model_settings["seed"] = 13

    #Settings["models"][0]["dump_file_override"] = 'SegmentsData.dump'
    model_settings["dump_file_override"] = 'SegmentsData_mark100.dump'

    dataset = DatasetHandler.CreateDataset.load_custom(model_settings["dataset_name"], model_settings["pixels"],
                                                       desired_number=model_settings["number_of_images"],
                                                       seed=model_settings["seed"],
                                                       filename_override=model_settings["dump_file_override"])
    [x, y] = dataset.getDataLabels()
    osm = dataset.getDataLabels_only_osm()
    print len_(x), len_(y), len_(osm)
    return x, y, osm
Ejemplo n.º 8
0
    def expandOsmDataWithMultipleRadii(self, model_settings):
        '''
        # idea is to load all the radii data we have available and add it to each of the segments
        # we assume the basic experiment definition
        '''
        r50 = 'SegmentsData_marked_R50_4TablesN.dump'
        r100 = 'SegmentsData_marked_R50_4TablesN.dump'
        r200 = 'SegmentsData_marked_R200_4TablesN.dump'

        import DatasetHandler
        dataset_r50 = DatasetHandler.CreateDataset.load_custom(
            model_settings["dataset_name"],
            model_settings["pixels"],
            desired_number=model_settings["number_of_images"],
            seed=model_settings["seed"],
            filename_override=r50)
        r50osm = dataset_r50.getDataLabels_only_osm_raw()
        dataset_r100 = DatasetHandler.CreateDataset.load_custom(
            model_settings["dataset_name"],
            model_settings["pixels"],
            desired_number=model_settings["number_of_images"],
            seed=model_settings["seed"],
            filename_override=r100)
        r100osm = dataset_r100.getDataLabels_only_osm_raw()
        dataset_r200 = DatasetHandler.CreateDataset.load_custom(
            model_settings["dataset_name"],
            model_settings["pixels"],
            desired_number=model_settings["number_of_images"],
            seed=model_settings["seed"],
            filename_override=r200)
        r200osm = dataset_r200.getDataLabels_only_osm_raw()

        from Omnipresent import len_
        print "osm", len(self.__osm), len_(self.__osm), self.__osm[0][0:10]
        print "osm50", len(r50osm), len_(r50osm), r50osm[0][0:10]
        print "osm50", len(r100osm), len_(r100osm), r100osm[0][0:10]
        print "osm200", len(r200osm), len_(r200osm), r200osm[0][0:10]

        new_osm = []
        for i in range(0, len(r50osm)):
            osm_of_i = []
            if model_settings["multiple_radii_mark"] == 'I':
                osm_of_i = list(r100osm[i]) + list(r50osm[i]) + list(
                    r200osm[i])
            elif model_settings["multiple_radii_mark"] == 'II':
                osm_of_i = list(r100osm[i]) + list(r200osm[i])
            elif model_settings["multiple_radii_mark"] == 'III':
                osm_of_i = list(r100osm[i]) + list(r50osm[i])

            new_osm.append(osm_of_i)

        print "enhanced", len(new_osm), len_(new_osm), new_osm[0][0:10]
        self.__osm = new_osm
        print "enhanced", len(self.__osm), len_(
            self.__osm), self.__osm[0][0:10]
Ejemplo n.º 9
0
def evaluator(model_file, settings_file, name_output_file, custom_target_geojson = None, show_segments_histo_stats = False, actually_save = True):
    '''
    Main Evaluator function.
    :param model_file: path to model .h5 file.
    :param settings_file: path to settings file which was used to train this model.
    :param name_output_file: name of labeled geojson data
    :return:
    '''
    model_base, model_top, model_settings = evaluator_load_model(model_file, settings_file)

    # Load data!
    if custom_target_geojson is None:
        path_to_segments_file = default_segments_path()
    else:
        path_to_segments_file = custom_target_geojson[1]

    we_dont_care_about_missing_images = False
    if model_settings["model_type"] is 'osm_only':
        we_dont_care_about_missing_images = True
    lists, Segments = loadDataFromSegments(path_to_segments_file, None, we_dont_care_about_missing_images=we_dont_care_about_missing_images)
    #lists = small_lists(lists)

    print "BEFORE MARKING"
    analyze_lists(lists)

    y_ref = lists[1]
    osm = osm_from_lists(lists)
    segment_ids = lists[3]

    if model_settings["model_type"] is 'osm_only':
        x = None
        y_pred = evaluator_predict_on_dataset(model_base, model_top, model_settings, x, osm)

    else:
        # these models rely on images, lets use generators for reasonable memory requirements
        img_generator = getImgGenerator_from_lists(lists)

        y_pred = evaluator_generators_predict(model_base, model_top, model_settings, img_generator[1], osm, img_generator[2])

    print len(y_pred), y_pred[0:10]

    pred_list = [lists[0], y_pred, lists[2], lists[3]]
    print "AFTER MARKING"
    analyze_lists(pred_list)

    EvaluatedData = prepEvaluatedData(y_pred, segment_ids)
    Altered = AlterSegments(EvaluatedData, Segments, only_unknown_scores=True)

    if show_segments_histo_stats:
        from DatasetHandler import DatasetObj

        scores = []
        for AltSeg in Altered:
            scores.append( AltSeg.Score )

        print len_(scores)

        import DatasetHandler.DatasetVizualizators
        save_to_pdf = False

        labels = scores
        DatasetHandler.DatasetVizualizators.plotHistogram(labels, 'Score distribution histogram', num_bins=20)
        DatasetHandler.DatasetVizualizators.plotX_sortValues(labels, 'Distribution of score (sorted)', notReverse=True)
        if save_to_pdf:
            DatasetHandler.DatasetVizualizators.saveAllPlotsToPDF()
        DatasetHandler.DatasetVizualizators.show()

    if custom_target_geojson is None:
        GeoJSON = loadDefaultGEOJSON()
    else:
        geojson_to_be_marked = custom_target_geojson[0]
        GeoJSON = loadGeoJson(geojson_to_be_marked)
    evaluated_geojson = markGeoJSON(GeoJSON, Altered)
    path_geojson_out = name_output_file

    if actually_save:
        saveGeoJson(evaluated_geojson, path_geojson_out)

    # Ex post testing
    DefaultSegments = DataOperations.LoadDataFile(path_to_segments_file)
    traverseGeoJSON(evaluated_geojson, DefaultSegments)


    #print y_ref, y_pred

    if actually_save:
        np.savetxt('y_ref.out', y_ref, delimiter=',')
        np.savetxt('y_pred.out', y_pred, delimiter=',')

    return 0, 0
    # Additional metrics
    from sklearn.metrics import mean_squared_error, mean_absolute_error
    mae = mean_absolute_error(y_ref, y_pred)
    mse = mean_squared_error(y_ref, y_pred)

    return mse, mae
Ejemplo n.º 10
0
def train_model(model, dataset, model_settings):
    '''
    Train model on a dataset using these settings
    :param model: model to be trained
    :param dataset: dataset to be used
    :param model_settings: model setting to be read for specifics
    :return:
    '''
    history = None

    if model_settings["model_type"] is 'simple_cnn_with_top':

        filename_features_train = model_settings["filename_features_train"]
        filename_features_test = model_settings["filename_features_test"]

        [y, y_val] = dataset.getDataLabels_split_only_y(validation_split=model_settings["validation_split"])
        [train_data, train_labels, validation_data, validation_labels] = load_features(filename_features_train, filename_features_test, y, y_val)

        print "input shape of features", len_(train_data), "and labels", len_(train_labels)

        top_model = model[1]
        history = train_top_model(top_model, model_settings, train_data, train_labels, validation_data, validation_labels)

        # Finetuning
        print len_(history)
        print history

        if model_settings["finetune"]:
            finetune_model = None
            [train_data, train_labels, validation_data, validation_labels] = [None, None, None, None]

            # Cut at certain spots - only possible where Resnet50 structure allows it 172, 162, 152, 140, ...
            if model_settings["finetune_DEBUG_METHOD_OF_MODEL_GEN"]:
                finetune_features_train = model_settings["finetune_features_train"]
                finetune_features_test = model_settings["finetune_features_test"]

                [y, y_val] = dataset.getDataLabels_split_only_y(validation_split=model_settings["validation_split"])
                [train_data, train_labels, validation_data, validation_labels] = load_features(finetune_features_train, finetune_features_test, y, y_val)

                print "FINE TUNE DATA input shape of features", len_(train_data), "and labels", len_(train_labels)

                # Fce (top, cnn, features_mid) > new_model
                model_cnn = model[0]
                top_cnn = model[1]
                cut = model_settings["finetune_num_of_cnn_layers"]
                shape = np.asarray(train_data).shape[1:]
                print shape

                finetune_model = build_finetune_model(model_cnn, top_cnn, cut, input_shape=shape)
                print "----- finetune model"
                print finetune_model.summary()

                plot_model(finetune_model, to_file='TEST_FINETUNE.png', show_shapes=True)

            # Its possible to do it anywhere, but that will bring it lengthy evaluation here on the spot without cooking
            else:

                n = model_settings["finetune_num_of_cnn_layers"]

                for layer in model[0].layers[:n]:
                    print layer
                    layer.trainable = False

                print "----- CNN MODEL"
                print model[0].summary()
                print "----- TOP MODEL"
                print model[1].summary()

                # New model is made from the cnn and top model
                finetune_model = join_two_models(model[0], model[1])

                print "----- JOINED MODEL"
                print finetune_model.summary()

                plot_model(finetune_model, to_file='TEST_MODEL.png', show_shapes=True)

                [train_data, train_labels, validation_data, validation_labels] = dataset.getDataLabels_split(validation_split=model_settings["validation_split"])

            # We have the model, now lets compute

            epochs_tmp = model_settings["epochs"]
            model_settings["epochs"] = model_settings["finetune_epochs"]
            optimizer_tmp = model_settings["optimizer"]
            model_settings["optimizer"] = model_settings["finetune_optimizer"]


            history_to_append = train_top_model(finetune_model, model_settings, train_data, train_labels, validation_data, validation_labels)
            model_settings["epochs"] = epochs_tmp
            model_settings["optimizer"] = optimizer_tmp

        # Append histories

        #{'val_mean_absolute_error': [0.27633494684393978, 0.27673623693381116], 'loss': [0.15686354677721928, 0.12237877659907737], 'mean_absolute_error': [0.3303849070751238, 0.30686430593424935], 'val_loss': [0.10361090554317957, 0.10128958691173875]}
        if model_settings["finetune"]:
            print history
            print history_to_append
            for key in history.keys():
                history[key] += history_to_append[key]
            print history


    elif model_settings["model_type"] is 'img_osm_mix':

        if (model_settings["special_case"] is 'hack_dont_use_features'):
            # Get data
            # ps: be careful about their order when enhancing...
            # ImageGenerator for multiple inputs
            # 1 Build whole model now
            osm_shape = dataset.getShapeOfOsm()
            model = build_full_mixed_model(osm_shape)

            # 2 Train (which will take some time now...)
            [x, y, x_val, y_val] = dataset.getDataLabels_split(validation_split=model_settings["validation_split"])
            [osm, osm_val] = dataset.getDataLabels_split_only_osm(validation_split=model_settings["validation_split"])

            history = train_top_model(model, model_settings, [x, osm], y, [x_val, osm_val], y_val)

            print "special case scenario, mixed model, hack_dont_use_features"
        else:

            filename_features_train = model_settings["filename_features_train"]
            filename_features_test = model_settings["filename_features_test"]

            [osm, osm_val] = dataset.getDataLabels_split_only_osm(validation_split=model_settings["validation_split"])
            [y, y_val] = dataset.getDataLabels_split_only_y(validation_split=model_settings["validation_split"])
            [train_data, _, validation_data, _] = load_features(filename_features_train, filename_features_test, y, y_val)

            print len_(train_data), len_(y), len_(osm)

            top_model = model[1]
            history = train_top_model(top_model, model_settings, [osm, train_data], y, [osm_val, validation_data], y_val)

    elif model_settings["model_type"] is 'osm_only':

        [osm, osm_val] = dataset.getDataLabels_split_only_osm(validation_split=model_settings["validation_split"])
        [y, y_val] = dataset.getDataLabels_split_only_y(validation_split=model_settings["validation_split"])

        osm_model = model[0]
        history = train_top_model(osm_model, model_settings, osm, y, osm_val, y_val)

    else:
        print "Yet to be programmed."

    return history
Ejemplo n.º 11
0
def cook_features(models, datasets, Settings):
    '''
    Makes sure that we have features available for the duo of model-dataset in our shared feature folder.
    If not, we will cook them.
    :param models: list of models (currently without their tops)
    :param datasets: list of dataset object
    :param Settings: settings
    :return: number of ready models
    '''
    # cooking shared data
    [x, y, x_val, y_val] = [None, None, None, None]

    index = 0
    for model in models:
        model_settings = Settings["models"][index]

        if model_settings["model_type"] is 'simple_cnn_with_top' or model_settings["model_type"] is 'img_osm_mix':

            dataset = datasets[ model_settings["dataset_pointer"] ]
            from ModelHandler.ModelOI import get_feature_file_names, do_we_need_to_cook
            #ps: if this is in the header of the file, it causes mutual import of each other - and TF yells...

            filename_features_train = model_settings["filename_features_train"]
            filename_features_test = model_settings["filename_features_test"]
            do_we_need_to_cook_bool = do_we_need_to_cook(filename_features_train, filename_features_test)
            print "Looking up files:", filename_features_train, filename_features_test

            if do_we_need_to_cook_bool:
                model_cnn = model[0]
                cooking_method = model_settings["cooking_method"]

                print "We need to cook, chosen method is", cooking_method
                #if True:
                if cooking_method == 'direct':
                    if x is None:
                        [x, y, x_val, y_val] = dataset.getDataLabels_split(validation_split=model_settings["validation_split"])
                        print len_(x)

                    predict_and_save_features(x, y, x_val, y_val, filename_features_train, filename_features_test, model_cnn)

                #if True:
                elif cooking_method == 'generators':
                    [order, order_val, image_generator, size, image_generator_val, size_val] = dataset.getImageGenerator(validation_split=model_settings["validation_split"])
                    print len_(order)

                    predict_from_generators(image_generator, image_generator_val, size, size_val, filename_features_train, filename_features_test, model_cnn)
            else:
                print "No need to cook, the files already exist"

            # Finetuning also requires prepared feature files.
            if model_settings["finetune"]:
                finetune_features_train = model_settings["finetune_features_train"]
                finetune_features_test = model_settings["finetune_features_test"]
                do_we_need_to_cook_bool = do_we_need_to_cook(finetune_features_train, finetune_features_test)
                print "Looking up finetune feature files:", finetune_features_train, finetune_features_test

                if do_we_need_to_cook_bool:
                    model_cnn = model[0]
                    cooking_method = model_settings["cooking_method"]

                    #n = len(model[0].layers) - model_settings["finetune_num_of_cnn_layers"]
                    n = model_settings["finetune_num_of_cnn_layers"]

                    print "------ Omitting layers:"
                    for layer in model[0].layers[n:]:
                        print layer.get_config()['name'], layer

                    print "Saving this layers outputs:"
                    print model_cnn.layers[n].get_config()['name'], model_cnn.layers[n], model_cnn.layers[n].get_config()

                    from keras.models import Model
                    model_middle = Model(inputs=model_cnn.input, outputs=model_cnn.layers[n].output)

                    print "We need to for finetuning files too, chosen method is", cooking_method
                    if cooking_method == 'direct':
                        if x is None:
                            [x, y, x_val, y_val] = dataset.getDataLabels_split(validation_split=model_settings["validation_split"])
                            print len_(x)
                        predict_and_save_features(x, y, x_val, y_val, finetune_features_train, finetune_features_test, model_middle)

                    elif cooking_method == 'generators':
                        [order, order_val, image_generator, size, image_generator_val, size_val] = dataset.getImageGenerator(validation_split=model_settings["validation_split"])
                        print len_(order)
                        predict_from_generators(image_generator, image_generator_val, size, size_val, finetune_features_train, finetune_features_test, model_middle)
                else:
                    print "No need to cook finetune feature files, they already exist"


        elif model_settings["model_type"] is 'osm_only':
            # No need to cook features from images in this case
            print "Chosen model type (", model_settings["model_type"] ,") doesn't require features to be cooked and loaded."
        index += 1
    return index
Ejemplo n.º 12
0
def k_fold_crossvalidation(model, dataset, model_settings):
    '''
    # K fold crossvalidation scheme
    # includes proper loading of models, testing and processing of the results.
    '''
    from ModelHandler.ModelTester import load_features
    from ModelHandler.ModelTester import train_top_model

    k = model_settings["crossvalidation_k"]

    # idea is to generate k=4 folds of indices
    # with dataset having been shuffled already, we can just use the indices 0-number_of_images

    number_of_images_total = dataset.num_of_images

    print "Total of ", number_of_images_total, " images."

    indices = range(0, number_of_images_total)

    indices_in_fjords = chunks(indices, k)
    #print indices_in_fjords
    print "sizes of fjords:", map(len, indices_in_fjords)

    # (tests)
    test = []
    for fjord in indices_in_fjords:
        test += fjord
    if not test == indices:
        print 'not the same!'

    # fold indices are now prepared
    # collect all_inputs and all_outputs depending on the type of experiment we are running.
    all_inputs = []
    all_outputs = []  # outputs are always score labels
    all_outputs = dataset.getDataLabels_only_y()

    features = []
    active_model = None
    if model_settings["model_type"] is 'img_osm_mix' or model_settings[
            "model_type"] is 'simple_cnn_with_top':
        filename_features_train = model_settings["filename_features_train"]
        filename_features_test = model_settings["filename_features_test"]
        [train_data, _, validation_data,
         _] = load_features(filename_features_train, filename_features_test,
                            None, None)

        features = numpy.append(train_data, validation_data, 0)

    #arr_test = arr[0:split_at]
    #arr_val = arr[split_at:]

    # TODO: MODEL_TYPE_SPLIT
    if model_settings["model_type"] is 'simple_cnn_with_top':
        print "Prepare all_inputs and all_outputs for Image only model."
        # inputs are all the images, but for our model its the features
        all_inputs = features
        active_model = model[1]

    elif model_settings["model_type"] is 'osm_only':
        print "Prepare all_inputs and all_outputs for OSM only model."
        # inputs are all the osm vectors
        all_inputs = dataset.getDataLabels_only_osm()
        active_model = model[0]

    elif model_settings["model_type"] is 'img_osm_mix':
        print "Prepare all_inputs and all_outputs for Mixed model."
        # inputs list of features and osm vectors
        osms = dataset.getDataLabels_only_osm()
        all_inputs = [osms, features]
        active_model = model[1]

        print len_(osms), "and", len_(features)
    else:
        print "Yet to be programmed."

    # save active_models weights, so we don't cheat by cumulating better and better results...
    initial_weights = active_model.get_weights()

    print "Sizes of all_inputs:", len_(all_inputs), "and all_outputs:", len_(
        all_outputs)

    # variables for remembering data from histories
    last_training_errors = []
    best_training_errors = []
    last_validation_errors = []
    best_validation_errors = []
    all_histories_of_this_model = []

    last_training_measure = []
    best_training_measure = []
    last_validation_measure = []
    best_validation_measure = []

    for selected_fjord in range(0, k):
        active_model.set_weights(initial_weights)

        train_indices, valid_indices = kfold(indices_in_fjords, selected_fjord)

        train_inputs = select_data(train_indices, all_inputs)
        valid_inputs = select_data(valid_indices, all_inputs)
        train_outputs = select_data(train_indices, all_outputs)
        valid_outputs = select_data(valid_indices, all_outputs)

        print "selected_fjord", selected_fjord
        print "Sizes of train_inputs:", len_(
            train_inputs), "and train_outputs:", len_(train_outputs)
        print "Sizes of valid_inputs:", len_(
            valid_inputs), "and valid_outputs:", len_(valid_outputs)

        # into training and result collecting
        history = train_top_model(active_model, model_settings, train_inputs,
                                  train_outputs, valid_inputs, valid_outputs)
        #print history

        measure = 'mean_absolute_error'
        error = 'loss'
        val_measure = 'val_' + measure
        val_error = 'val_' + error
        '''
         {'val_mean_absolute_error':
             [0.00036219754838384688, 7.0134797169885132e-06, 3.973643103449831e-08, 3.973643103449831e-08, 3.973643103449831e-08],
         'loss':
             [0.34813621640205383, 0.16262358427047729, 0.19959338009357452, 0.16045540571212769, 0.16040021181106567],
         'mean_absolute_error':
             [0.50139808654785156, 0.22280247509479523, 0.25902602076530457, 0.21229584515094757, 0.21108284592628479],
         'val_loss':
             [1.7484823899849289e-07, 9.398822692352482e-11, 4.7369517129061591e-15, 4.7369517129061591e-15, 4.7369517129061591e-15]
         }
        '''

        # process history!
        all_histories_of_this_model.append(history)
        last_training_errors.append(history[error][-1])
        last_validation_errors.append(history[val_error][-1])

        last_training_measure.append(history[measure][-1])
        last_validation_measure.append(history[val_measure][-1])

        best_training_errors.append(best_min(history[error]))
        best_validation_errors.append(best_min(history[val_error]))

        best_training_measure.append(best_min(history[measure]))
        best_validation_measure.append(best_min(history[val_measure]))

    print "error", error
    print "last_training_errors", last_training_errors
    print "best_training_errors", best_training_errors
    print "last_validation_errors", last_validation_errors
    print "best_validation_errors", best_validation_errors

    print "measure", measure
    print "last_training_measure", last_training_measure
    print "best_training_measure", best_training_measure
    print "last_validation_measure", last_validation_measure
    print "best_validation_measure", best_validation_measure

    print "all_histories_of_this_model", all_histories_of_this_model

    special_history_dictionary = {}
    special_history_dictionary["last_training_errors"] = last_training_errors
    special_history_dictionary["best_training_errors"] = best_training_errors
    special_history_dictionary[
        "last_validation_errors"] = last_validation_errors
    special_history_dictionary[
        "best_validation_errors"] = best_validation_errors

    special_history_dictionary["last_training_measure"] = last_training_measure
    special_history_dictionary["best_training_measure"] = best_training_measure
    special_history_dictionary[
        "last_validation_measure"] = last_validation_measure
    special_history_dictionary[
        "best_validation_measure"] = best_validation_measure

    special_history_dictionary[
        "all_histories_of_this_model"] = all_histories_of_this_model

    history = special_history_dictionary
    return history
Ejemplo n.º 13
0
def handle_noncanon_dataset(Settings, model_settings):
    '''
    Special case scenario.
    We are creating a new custom dataset, instead of using one of the big officially used, "canon" datasets
    :param Settings: Setting for the whole experiment
    :param model_settings: Setting for our one dataset
    :return:
    '''

    if model_settings["noncanon_dataset"] == 'expand_existing_dataset':
        # Idea: take an existing dataset and expand it via

        # Directly load the old segments file

        # for each segment
        #   for each image
        #       apply the custom ImageDataGenerator to generate new images (depending of settings)
        #       save the new images into target folder as well as into this Segment
        # save edited Segments array into new SegmentsFile.dump

        debug_visual_output = False
        debug_txt_output = False

        from DatasetHandler.CreateDataset import get_path_for_dataset
        from Downloader.DataOperations import LoadDataFile
        from Downloader.KerasPreparation import LoadActualImages
        import numpy as np

        if debug_visual_output:
            from matplotlib import pyplot
            from keras.preprocessing.image import array_to_img
            import math

        target_folder = model_settings["dataset_name"]
        source_folder = model_settings["source_dataset"]

        filename_override = model_settings["dump_file_override"]
        source_segments_path = get_path_for_dataset(source_folder, filename_override)
        source_segments_dir = os.path.dirname(source_segments_path) + '/'

        if not file_exists(source_segments_dir + filename_override):
            print "WARNING !!!!"
            print '\t',"Careful, couldn't find the file", source_segments_dir + filename_override
            print '\t',"... we will instead be using   ", source_segments_path


        target_segments_path = get_path_for_dataset(target_folder, '')
        target_segments_dir = os.path.dirname(target_segments_path) + '/'
        target_segments_path = target_segments_dir+model_settings["dump_file_expanded"]

        # Check if we don't alredy have it?
        if (file_exists(target_segments_path) and folder_exists(target_segments_dir+'images')):
            list1 = os.listdir(target_segments_dir+'images')
            list2 = os.listdir(source_segments_dir+'images')
            if len(list1)>=len(list2):
                # Seems like we have copied it correctly too

                print "We already have this dataset extended! (", len(list1), len(list2), ")"

                return
        else:
            print '\t', file_exists(target_segments_path), target_segments_path
            print '\t', folder_exists(target_segments_dir+'images'), target_segments_dir+'images'


        generated_images_folder = os.path.dirname(target_segments_path) + '/images/'

        print "source_segments_path", source_segments_path  # /home/ekmek/Vitek/MGR-Project-Code/Data/StreetViewData/miniset_640px/SegmentsData.dump
        print "source_segments_dir", source_segments_dir    # /home/ekmek/Vitek/MGR-Project-Code/Data/StreetViewData/miniset_640px/
        print "target_segments_path", target_segments_path  # /home/ekmek/Vitek/MGR-Project-Code/Data/StreetViewData/miniset_640px_expanded/SegmentsData_images_generated_test_folder_expanded.dump
        print "target_segments_dir", target_segments_dir    # /home/ekmek/Vitek/MGR-Project-Code/Data/StreetViewData/miniset_640px_expanded/

        # copy source_dataset -> target_dataset in dataset_name
        # from source_segments_dir/images to  target_segments_dir/images
        source__path = source_segments_dir+'images'
        target__path = target_segments_dir+'images'

        copy_folder(source__path, target__path)

        # test the success of this copy process!
        # for each file in source_segments_dir/images check for a copy in target_segments_dir/images

        was_ok = False

        while not was_ok:
            was_ok = True
            list_of_source_files=os.listdir(source__path)
            for item in list_of_source_files:
                file_source = source__path + '/' + item
                file_target = target__path + '/' + item

                if not file_exists(file_target):
                    was_ok = False
                else:
                    # file exists, but maybe just to be parainoid secure, md5 compare?
                    md5_1 = md5(file_source)
                    md5_2 = md5(file_target)

                    if md5_1 <> md5_2:
                        was_ok = False

                if not was_ok:
                    copy_file(file_source, file_target)

                    print '-- was missing, now fixed:' + file_source

        size_of_batch = model_settings["noncanon_dataset_genfrom1"]

        image_generator = model_settings["noncanon_dataset_imagegenerator"]
        print "image_generator", image_generator

        Segments = LoadDataFile(source_segments_path)

        number_of_images_parsed = 0
        for Segment in Segments:
            number_of_images = Segment.number_of_images
            for i_th_image in range(0,number_of_images):
                if Segment.hasLoadedImageI(i_th_image):
                    filename = source_segments_dir+Segment.getImageFilename(i_th_image)
                    number_of_images_parsed += 1
                    print filename

                    # we have one image filepath - generate data
                    x = LoadActualImages([filename])
                    y = np.array([Segment.SegmentId])
                    if debug_txt_output:
                        print "ORIGINAL id", y, "ith", i_th_image, "img:", len_(x[0])

                    X_batch = []
                    y_batch = []

                    from DatasetHandler.custom_image import ImageDataGenerator as custom_ImageDataGenerator
                    number_of_images_generated = 0
                    for x_gen, y_gen in image_generator.flow(x, y, batch_size=1, save_to_dir=generated_images_folder, save_prefix=str(y)+'_', save_format='jpg'):
                        number_of_images_generated += 1
                        image = x_gen[0]

                        filename_generated = y_gen[1][0]
                        id = y_gen[0][0]

                        if debug_txt_output:
                            print id, filename_generated

                            # save image on path filename_generated to the Segments hierarchy!
                            print "Segment.number_of_images", Segment.number_of_images
                            print "Segment.LocationsIndex", Segment.LocationsIndex
                            print "Segment.DistinctLocations", Segment.DistinctLocations
                            print "Segment.DistinctNearbyVector", Segment.DistinctNearbyVector
                            print "Segment.HasLoadedImages", Segment.HasLoadedImages
                            print "Segment.ErrorMessages", Segment.ErrorMessages

                        # Value 200 is the marker
                        location_index = Segment.LocationsIndex[i_th_image] + 1000
                        # accordingly we get Segment.DistinctLocations[location_index] and Segment.DistinctNearbyVector[location_index]
                        has_img = Segment.HasLoadedImages[i_th_image]
                        has_err = Segment.ErrorMessages[i_th_image]

                        # Add to this Segment
                        Segment.number_of_images += 1
                        Segment.LocationsIndex.append(location_index)
                        Segment.HasLoadedImages.append(has_img)
                        Segment.ErrorMessages.append(has_err)


                        # Change filename and path
                        new_filename_generated = target_segments_dir + 'images' + Segment.getImageFilename(Segment.number_of_images-1)[6:]
                        if debug_txt_output:
                            print "rename", filename_generated, "to", new_filename_generated

                        shutil.move(filename_generated, new_filename_generated)

                        print ".", new_filename_generated

                        X_batch.append(image)
                        y_batch.append(id)

                        #print "id", y_gen, "img:", len_(x_gen), array_md5(image)

                        if len(X_batch) == size_of_batch:

                            if debug_txt_output:
                                print "GENERATED ", len(y_batch), " images > ", len_(X_batch), y_batch

                            if debug_visual_output:
                                # create a grid of 3x3 images
                                size_for_plot = int(math.floor(math.sqrt(size_of_batch-0.1))+1)
                                size_for_plot_y = size_for_plot
                                while size_of_batch <= size_for_plot*(size_for_plot_y-1):
                                    size_for_plot_y -= 1

                                print size_for_plot, "x", size_for_plot_y, " grid"

                                for i in range(0, len(X_batch)):
                                    pyplot.subplot(size_for_plot_y,size_for_plot,i+1)

                                    img = X_batch[i]
                                    backimg = array_to_img(img)
                                    pyplot.imshow(backimg)
                                # show the plot
                                pyplot.show()
                                break

                            break # end generation for this one image
                    if debug_txt_output:
                        print "Save new images from id", y, " in", len_(X_batch)
        print "number_of_images_parsed", number_of_images_parsed

        from Downloader.DataOperations import SaveDataFile
        print "Saving new Segments file into ", target_segments_path
        SaveDataFile(target_segments_path, Segments)

    else:
        print "This type of noncanon dataset generation has not yet been implemented!"