def main(): config.init() for crop_size, crop_size_stamp in zip(config.ALL_CROP_SIZE, config.ALL_CROP_SIZE_STAMP): crop = crop_size['crop'] size = crop_size['size'] dataset_path = common.dataset_path(config.DATASET, crop, size) out_dataset_path = common.dataset_path(config.DATASET + '_so', crop, size) print("") print("") print("Seed only dataset.") print("Original dataset: " + dataset_path) print("Out seed only dataset: " + out_dataset_path) print("") trainingset = ImageDataset() print("loading hdf5 dataset set: {}".format(dataset_path)) trainingset.load_hdf5(dataset_path) print("hdf5 file loaded.") print("Getting sub dataset from filter (seed files)...") seeds_dataset = trainingset.sub_dataset_from_filename(filename_start_with=FNAME_START_WITH, remove_empty_classes=True) print("Saving merged dataset in: " + out_dataset_path) seeds_dataset.save_hdf5(out_dataset_path) print("Done.") print("") print("All done.")
def split_dataset_helper( dataset_path, # type: basestring out_training_path, # type: basestring out_valid_path, # type: basestring split_options=[SplitOptions("", 0.3)], # type: list(SplitOptions) exclude_file_starting_with=[] # type: list(basestring) ): print("") print("") print("Split dataset -> train/valid") print("Dataset: " + dataset_path) print("") print("Train: " + out_training_path) print("Valid: " + out_valid_path) dataset = ImageDataset() print("loading dataset hdf5 file: {}".format(dataset_path)) dataset.load_hdf5(dataset_path) print("hdf5 file loaded.") print("Splitting the dataset") training, validation = dataset.validation_per_class_split( split_options, exclude_file_starting_with) print("Dataset splitted.") print("Training set length: {}".format(len(training.data))) print("Validation set length: {}".format(len(validation.data))) print("") print("Saving trainig on hdf5 file: " + out_training_path) training.save_hdf5(out_training_path) print("Saving validation on hdf5 file: " + out_valid_path) validation.save_hdf5(out_valid_path)
def duplicate_dataset_imgs_helper(dataset_path, out_dataset_path, fname_start, fname_end): print("") print("") print("Duplicate seed on:") print("Dataset: " + dataset_path) print("") print("Out Training Set: " + out_dataset_path) training_path = common.dataset_path(dataset_path, net) out_training_path = common.dataset_path(out_dataset_path, net) trainingset = ImageDataset() print("loading hdf5 training set: {}".format(training_path)) trainingset.load_hdf5(training_path) print("hdf5 file loaded.") print("Getting sub dataset from filename filters...") seeds_dataset = trainingset.sub_dataset_from_filename( filename_start_with=fname_start, filename_end_with=fname_end) print("Merging seed-only sub dataset with original dataset") trainingset.merge_with_dataset(seeds_dataset) print("Saving merged dataset in: " + out_training_path) trainingset.save_hdf5(out_training_path) print("Done.")
def exp_duplicate_seed(): dataset = cfg.DATASET for crop_size in cfg.all_crop_sizer: crop = crop_size['crop'] size = crop_size['size'] train_path = common.dataset_path(dataset + '_train', crop, size) train_path_ds = common.dataset_path(dataset + '_train_ds', crop, size) print("") print("") print("Duplicate seed on:") print("Training Set: " + train_path) print("") print("Out Training Set: " + train_path_ds) trainingset = ImageDataset() print("loading hdf5 training set: {}".format(train_path)) trainingset.load_hdf5(train_path) print("hdf5 file loaded.") print("Getting sub dataset from filter (seed files)...") seeds_dataset = trainingset.sub_dataset_from_filename(filename_start_with=FNAME_START_WITH) print("Merging seed-only sub dataset with original dataset") trainingset.merge_with_dataset(seeds_dataset) print("Saving merged dataset in: " + train_path_ds) trainingset.save_hdf5(train_path_ds) print("Done.") print("") print("All done.")
def main(): config.init() for crop_size, crop_size_stamp in zip(config.ALL_CROP_SIZE, config.ALL_CROP_SIZE_STAMP): crop = crop_size['crop'] size = crop_size['size'] dataset_path = common.dataset_path(config.DATASET, crop, size) out_validset_path = common.dataset_path(config.DATASET + '_sg_valid', crop, size) out_trainset_path = common.dataset_path(config.DATASET + '_sg_train', crop, size) print("") print("") print("Seed + Google train/valid set.") print("Original dataset: " + dataset_path) print("Out s+g trainset: " + out_trainset_path) print("Out s+g validset: " + out_validset_path) print("") trainingset = ImageDataset() print("loading hdf5 dataset set: {}".format(dataset_path)) trainingset.load_hdf5(dataset_path) print("hdf5 file loaded.") print("Getting sub dataset (seed dataset)") seeds_dataset = trainingset.sub_dataset_from_filename( filename_start_with="seed") print("Getting sub dataset (google dataset)") google_dataset = trainingset.sub_dataset_from_filename_multi( filename_start_with=["google"], filename_end_with=FNAME_END) print("Splitting google dataset in train/valid") google_train, google_valid = google_dataset.validation_per_class_split( [SplitOptions("", 0.33)]) print("Creating double_seeds_dataset") double_seeds_dataset = ImageDataset.merge_datasets( seeds_dataset, seeds_dataset) print( "Creating train dataset (merge google_train with double_seeds_dataset)" ) train = ImageDataset.merge_datasets(google_train, double_seeds_dataset) print("Creating valid dataset (merge google_valid with seeds_dataset)") valid = ImageDataset.merge_datasets(google_valid, seeds_dataset) print("Saving train on h5") train.save_hdf5(out_trainset_path) print("Saving valid on h5") valid.save_hdf5(out_validset_path) print("Done.") print("") print("All done.")
def features_extraction_helper(net, dataset_path, out_dataset_path, alternative_out_layer=None, batch_size=32, verbose=True): def printv(s): if verbose: print(s) dataset = ImageDataset() printv("\nloading dataset: " + dataset_path) try: dataset.load_hdf5(dataset_path) except IOError: print("Can't open selected file.") return printv("dataset loaded.") model = config.trained_net_dict[net]() if alternative_out_layer is None: alternative_out_layer = config.feature_layer_dict[net] feature_vectors = net_utils.extract_features(model, dataset, alternative_out_layer, batch_size, verbose) feature_vectors.save_hdf5(out_dataset_path) printv("Feature extracted dataset saved in: " + out_dataset_path)