def LoadDatasetFromJson(dataset_json,model_name = ""): file_path = dataset_json["ground_truth_csv_path"] image_url_column = "image_path" ground_truth_column = "label" label_names = [label["label"] for label in dataset_json["labels"]] # gets all labels in dataset. To use a subset of labels, build a list manually ### instantiate dataset tool csv_path = os.path.join(datasets_path,"dataset_csvs",file_path) dataset_images_dir_path = os.path.join(datasets_path,"dataset_images") dataset_tool = DataSet(csv_path,image_url_column,ground_truth_column,explicit_path_suffix =dataset_images_dir_path) #instantiates a dataset tool dataset_tool.CreateLiveDataSet(dataset_max_size = -1, even_examples=True, y_labels_to_use=label_names) #creates an organised list of dataset observations, evenly split between labels if(model_name == ""): training_split_file = dataset_json["default_training_allocation_path"] training_split_file_path = os.path.join(datasets_path,"dataset_csvs",training_split_file) dataset_tool.ProduceDataFromTrainingSplitFile(training_split_file_path, explicit_path_suffix = dataset_images_dir_path) else: #TODO allow for passing the model name and load the specific split from the models training(replace the code below with solution) training_split_file = dataset_json["default_training_allocation_path"] training_split_file_path = os.path.join(datasets_path,"dataset_csvs",training_split_file) dataset_tool.ProduceDataFromTrainingSplitFile(training_split_file_path, explicit_path_suffix = dataset_images_dir_path) #Code to create new split if required #dataset_tool.SplitLiveData(train_ratio=0.8,validation_ratio=0.1,test_ratio=0.1) #splits the live dataset examples in to train, validation and test sets return dataset_tool,label_names
def LoadFrameworkDataset(self, dataset_name, load_split_if_available=True, train_ratio=0.8, validation_ratio=0.1, test_ratio=0.1): dataset_json = [ dataset for dataset in self.datasets_json["datasets"] if dataset["dataset_name"] == dataset_name ][0] ### gather required information about the dataset if (load_split_if_available and "default_training_allocation_path" in dataset_json.keys()): file_path = dataset_json["default_training_allocation_path"] load_split = True else: file_path = dataset_json["ground_truth_csv_path"] load_split = False print("new training split will be created") mean = None std = None if ("mean" in dataset_json): mean = dataset_json["mean"] if ("std" in dataset_json): std = dataset_json["std"] image_url_column = "image_path" ground_truth_column = "label" ### instantiate dataset tool csv_path = os.path.join(self.datasets_path, "dataset_csvs", file_path) dataset_images_dir_path = os.path.join(self.datasets_path, "dataset_images") dataset_tool = DataSet(csv_path, image_url_column, ground_truth_column, explicit_path_suffix=dataset_images_dir_path, mean=mean, std=std) #instantiates a dataset tool if (load_split): dataset_tool.ProduceDataFromTrainingSplitFile( csv_path, explicit_path_suffix=dataset_images_dir_path) else: dataset_tool.SplitLiveData( train_ratio=train_ratio, validation_ratio=validation_ratio, test_ratio=test_ratio ) #splits the live dataset examples in to train, validation and test sets dataset_tool.OutputTrainingSplitAllocation( csv_path.replace(".csv", "_split.csv")) return dataset_json, dataset_tool
label["label"] for label in dataset_json["labels"] ] # gets all labels in dataset. To use a subset of labels, build a list manually label_names.sort() print(label_names) input_image_height = dataset_json["image_y"] input_image_width = dataset_json["image_x"] input_image_channels = dataset_json["image_channels"] ### instantiate dataset tool csv_path = os.path.join(datasets_path, "dataset_csvs", file_path) dataset_images_dir_path = os.path.join(datasets_path, "dataset_images") dataset_tool = DataSet( csv_path, image_url_column, ground_truth_column, explicit_path_suffix=dataset_images_dir_path) #instantiates a dataset tool dataset_tool.CreateLiveDataSet( dataset_max_size=-1, even_examples=True, y_labels_to_use=label_names ) #creates an organised list of dataset observations, evenly split between labels if (load_split): dataset_tool.ProduceDataFromTrainingSplitFile( csv_path, explicit_path_suffix=dataset_images_dir_path) else: dataset_tool.SplitLiveData( train_ratio=0.8, validation_ratio=0.1, test_ratio=0.1 ) #splits the live dataset examples in to train, validation and test sets
import os from DatasetClass import DataSet dataset_name = "imagenet_vehicles_birds_10" file_path = os.path.join("dataset_csvs", dataset_name + ".csv") image_url_column = "image_path" ground_truth_column = "label" dataset_tool = DataSet(file_path, image_url_column, ground_truth_column) image_dir = os.path.join("dataset_images", dataset_name) output_dir = os.path.join("dataset_images", dataset_name + "_resized") target_width = 128 target_height = 128 #cropping #original image size width = 128 #y height = 128 #x crop_images_before_resize = False if (crop_images_before_resize): ## calculate crop around centre mid_x = height / 2 mid_y = width / 2
def GetBackgroundSamples(self, dataset_name): if (dataset_name in self.dataset_tool_dict): return self.dataset_tool_dict[dataset_name] else: def GetProjectExplicitBase(base_dir_name="p5_afm_2018_demo"): cwd = os.getcwd() split_cwd = cwd.split("/") base_path_list = [] for i in range(1, len(split_cwd)): if (split_cwd[-i] == base_dir_name): base_path_list = split_cwd[:-i + 1] if (base_path_list == []): raise IOError( 'base project path could not be constructed. Are you running within: ' + base_dir_name) base_dir_path = "/".join(base_path_list) return base_dir_path base_dir = GetProjectExplicitBase(base_dir_name="p5_afm_2018_demo") #add dataset folder to sys path to allow for easy import datasets_path = os.path.join(base_dir, "datasets") sys.path.append(datasets_path) ### #import dataset tool from DatasetClass import DataSet #### load dataset json data_json_path = os.path.join(datasets_path, "datasets.json") datasets_json = None with open(data_json_path, "r") as f: datasets_json = json.load(f) ### get dataset details dataset_json = [ dataset for dataset in datasets_json["datasets"] if dataset["dataset_name"] == dataset_name ][0] ### gather required information about the dataset if ("default_training_allocation_path" in dataset_json.keys()): file_path = dataset_json["default_training_allocation_path"] load_split = True else: file_path = dataset_json["ground_truth_csv_path"] load_split = False image_url_column = "image_path" ground_truth_column = "label" label_names = [ label["label"] for label in dataset_json["labels"] ] # gets all labels in dataset. To use a subset of labels, build a list manually label_names.sort() print(label_names) ### instantiate dataset tool csv_path = os.path.join(datasets_path, "dataset_csvs", file_path) print(csv_path) dataset_images_dir_path = os.path.join(datasets_path, "dataset_images") dataset_tool = DataSet(csv_path, image_url_column, ground_truth_column, explicit_path_suffix=dataset_images_dir_path ) #instantiates a dataset tool dataset_tool.CreateLiveDataSet( dataset_max_size=-1, even_examples=True, y_labels_to_use=label_names ) #creates an organised list of dataset observations, evenly split between labels if (load_split): dataset_tool.ProduceDataFromTrainingSplitFile( csv_path, explicit_path_suffix=dataset_images_dir_path) else: dataset_tool.SplitLiveData( train_ratio=0.8, validation_ratio=0.1, test_ratio=0.1 ) #splits the live dataset examples in to train, validation and test sets source = "train" train_x, train_y = dataset_tool.GetBatch(batch_size=1024, even_examples=True, y_labels_to_use=label_names, split_batch=True, split_one_hot=True, batch_source=source) self.dataset_tool_dict[dataset_name] = train_x return train_x
label_names = [ label["label"] for label in dataset_json["labels"] ] # gets all labels in dataset. To use a subset of labels, build a list manually label_names.sort() print(label_names) input_image_height = dataset_json["image_y"] input_image_width = dataset_json["image_x"] input_image_channels = dataset_json["image_channels"] ### instantiate dataset tool csv_path = os.path.join(datasets_path, "dataset_csvs", file_path) dataset_images_dir_path = os.path.join(datasets_path, "dataset_images") dataset_tool = DataSet( csv_path, image_url_column, ground_truth_column, explicit_path_suffix=dataset_images_dir_path) #instantiates a dataset tool dataset_tool.CreateLiveDataSet( dataset_max_size=-1, even_examples=True, y_labels_to_use=label_names ) #creates an organised list of dataset observations, evenly split between labels #load exisiting split training_split_file = dataset_json["default_training_allocation_path"] training_split_file_path = os.path.join(datasets_path, "dataset_csvs", training_split_file) dataset_tool.ProduceDataFromTrainingSplitFile( training_split_file_path, explicit_path_suffix=dataset_images_dir_path) #CREATE NEW SPLIT #dataset_tool.SplitLiveData(train_ratio=0.8,validation_ratio=0.1,test_ratio=0.1) #splits the live dataset examples in to train, validation and test sets
### gather required information about the dataset file_path = dataset_json["ground_truth_csv_path"] image_url_column = "image_path" ground_truth_column = "label" label_names = [label["label"] for label in dataset_json["labels"]] # gets all labels in dataset. To use a subset of labels, build a list manually label_names.sort() print(label_names) input_image_height = dataset_json["image_y"] input_image_width = dataset_json["image_x"] input_image_channels = dataset_json["image_channels"] ### instantiate dataset tool csv_path = os.path.join(datasets_path,"dataset_csvs",file_path) dataset_images_dir_path = os.path.join(datasets_path,"dataset_images") dataset_tool = DataSet(csv_path,image_url_column,ground_truth_column,explicit_path_suffix =dataset_images_dir_path) #instantiates a dataset tool dataset_tool.CreateLiveDataSet(dataset_max_size = -1, even_examples=True, y_labels_to_use=label_names) #creates an organised list of dataset observations, evenly split between labels dataset_tool.SplitLiveData(train_ratio=0.8,validation_ratio=0.1,test_ratio=0.1) #splits the live dataset examples in to train, validation and test sets ### get example batch and display an image display_example_image = False if(display_example_image): ##select the source for the example # source = "full" # source = "train" # source = "validation" source = "test" x, y = dataset_tool.GetBatch(batch_size = 128,even_examples=True, y_labels_to_use=label_names, split_batch = True,split_one_hot = True, batch_source = source)