def GetBackgroundSamples(self, dataset_name):
        if (dataset_name in self.dataset_tool_dict):
            return self.dataset_tool_dict[dataset_name]
        else:

            def GetProjectExplicitBase(base_dir_name="p5_afm_2018_demo"):
                cwd = os.getcwd()
                split_cwd = cwd.split("/")

                base_path_list = []
                for i in range(1, len(split_cwd)):
                    if (split_cwd[-i] == base_dir_name):
                        base_path_list = split_cwd[:-i + 1]

                if (base_path_list == []):
                    raise IOError(
                        'base project path could not be constructed. Are you running within: '
                        + base_dir_name)

                base_dir_path = "/".join(base_path_list)

                return base_dir_path

            base_dir = GetProjectExplicitBase(base_dir_name="p5_afm_2018_demo")

            #add dataset folder to sys path to allow for easy import
            datasets_path = os.path.join(base_dir, "datasets")
            sys.path.append(datasets_path)
            ###

            #import dataset tool
            from DatasetClass import DataSet

            #### load dataset json
            data_json_path = os.path.join(datasets_path, "datasets.json")

            datasets_json = None
            with open(data_json_path, "r") as f:
                datasets_json = json.load(f)

            ### get dataset details
            dataset_json = [
                dataset for dataset in datasets_json["datasets"]
                if dataset["dataset_name"] == dataset_name
            ][0]

            ### gather required information about the dataset
            if ("default_training_allocation_path" in dataset_json.keys()):
                file_path = dataset_json["default_training_allocation_path"]
                load_split = True
            else:
                file_path = dataset_json["ground_truth_csv_path"]
                load_split = False

            image_url_column = "image_path"
            ground_truth_column = "label"
            label_names = [
                label["label"] for label in dataset_json["labels"]
            ]  # gets all labels in dataset. To use a subset of labels, build a list manually
            label_names.sort()
            print(label_names)

            ### instantiate dataset tool
            csv_path = os.path.join(datasets_path, "dataset_csvs", file_path)
            print(csv_path)
            dataset_images_dir_path = os.path.join(datasets_path,
                                                   "dataset_images")

            dataset_tool = DataSet(csv_path,
                                   image_url_column,
                                   ground_truth_column,
                                   explicit_path_suffix=dataset_images_dir_path
                                   )  #instantiates a dataset tool

            dataset_tool.CreateLiveDataSet(
                dataset_max_size=-1,
                even_examples=True,
                y_labels_to_use=label_names
            )  #creates an organised list of dataset observations, evenly split between labels

            if (load_split):
                dataset_tool.ProduceDataFromTrainingSplitFile(
                    csv_path, explicit_path_suffix=dataset_images_dir_path)
            else:
                dataset_tool.SplitLiveData(
                    train_ratio=0.8, validation_ratio=0.1, test_ratio=0.1
                )  #splits the live dataset examples in to train, validation and test sets

        source = "train"
        train_x, train_y = dataset_tool.GetBatch(batch_size=1024,
                                                 even_examples=True,
                                                 y_labels_to_use=label_names,
                                                 split_batch=True,
                                                 split_one_hot=True,
                                                 batch_source=source)

        self.dataset_tool_dict[dataset_name] = train_x

        return train_x
Beispiel #2
0
        train_ratio=0.8, validation_ratio=0.1, test_ratio=0.1
    )  #splits the live dataset examples in to train, validation and test sets

### get example batch and display an image
display_example_image = False

if (display_example_image):
    ##select the source for the example
    # source = "full"
    # source = "train"
    # source = "validation"
    source = "test"

    x, y = dataset_tool.GetBatch(batch_size=128,
                                 even_examples=True,
                                 y_labels_to_use=label_names,
                                 split_batch=True,
                                 split_one_hot=True,
                                 batch_source=source)

    print(y[0])
    cv2_image = cv2.cvtColor(x[0], cv2.COLOR_RGB2BGR)
    cv2.imshow("image 0", cv2_image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

### instantiate the model
model_json_path = os.path.join(models_path, "models.json")

models_json = None
with open(model_json_path, "r") as f:
    models_json = json.load(f)
Beispiel #3
0
    explicit_path_suffix=dataset_images_dir_path)  #instantiates a dataset tool
dataset_tool.CreateLiveDataSet(
    dataset_max_size=-1, even_examples=True, y_labels_to_use=label_names
)  #creates an organised list of dataset observations, evenly split between labels

#load exisiting split
training_split_file = dataset_json["default_training_allocation_path"]
training_split_file_path = os.path.join(datasets_path, "dataset_csvs",
                                        training_split_file)
dataset_tool.ProduceDataFromTrainingSplitFile(
    training_split_file_path, explicit_path_suffix=dataset_images_dir_path)

source = "train"
train_x, train_y = dataset_tool.GetBatch(batch_size=-1,
                                         even_examples=True,
                                         y_labels_to_use=label_names,
                                         split_batch=True,
                                         split_one_hot=True,
                                         batch_source=source)

train_data_gen = keras.preprocessing.image.ImageDataGenerator(
    rotation_range=90,
    width_shift_range=0.3,
    height_shift_range=0.3,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    data_format="channels_last",
    validation_split=0,
    fill_mode="constant")

train_data_gen.fit(train_x)