def duplicate_dataset_imgs_helper(dataset_path, out_dataset_path, fname_start, fname_end): print("") print("") print("Duplicate seed on:") print("Dataset: " + dataset_path) print("") print("Out Training Set: " + out_dataset_path) training_path = common.dataset_path(dataset_path, net) out_training_path = common.dataset_path(out_dataset_path, net) trainingset = ImageDataset() print("loading hdf5 training set: {}".format(training_path)) trainingset.load_hdf5(training_path) print("hdf5 file loaded.") print("Getting sub dataset from filename filters...") seeds_dataset = trainingset.sub_dataset_from_filename( filename_start_with=fname_start, filename_end_with=fname_end) print("Merging seed-only sub dataset with original dataset") trainingset.merge_with_dataset(seeds_dataset) print("Saving merged dataset in: " + out_training_path) trainingset.save_hdf5(out_training_path) print("Done.")
def main(): config.init() for crop_size, crop_size_stamp in zip(config.ALL_CROP_SIZE, config.ALL_CROP_SIZE_STAMP): crop = crop_size['crop'] size = crop_size['size'] dataset_path = common.dataset_path(config.DATASET, crop, size) out_dataset_path = common.dataset_path(config.DATASET + '_so', crop, size) print("") print("") print("Seed only dataset.") print("Original dataset: " + dataset_path) print("Out seed only dataset: " + out_dataset_path) print("") trainingset = ImageDataset() print("loading hdf5 dataset set: {}".format(dataset_path)) trainingset.load_hdf5(dataset_path) print("hdf5 file loaded.") print("Getting sub dataset from filter (seed files)...") seeds_dataset = trainingset.sub_dataset_from_filename(filename_start_with=FNAME_START_WITH, remove_empty_classes=True) print("Saving merged dataset in: " + out_dataset_path) seeds_dataset.save_hdf5(out_dataset_path) print("Done.") print("") print("All done.")
def exp_duplicate_seed(): dataset = cfg.DATASET for crop_size in cfg.all_crop_sizer: crop = crop_size['crop'] size = crop_size['size'] train_path = common.dataset_path(dataset + '_train', crop, size) train_path_ds = common.dataset_path(dataset + '_train_ds', crop, size) print("") print("") print("Duplicate seed on:") print("Training Set: " + train_path) print("") print("Out Training Set: " + train_path_ds) trainingset = ImageDataset() print("loading hdf5 training set: {}".format(train_path)) trainingset.load_hdf5(train_path) print("hdf5 file loaded.") print("Getting sub dataset from filter (seed files)...") seeds_dataset = trainingset.sub_dataset_from_filename(filename_start_with=FNAME_START_WITH) print("Merging seed-only sub dataset with original dataset") trainingset.merge_with_dataset(seeds_dataset) print("Saving merged dataset in: " + train_path_ds) trainingset.save_hdf5(train_path_ds) print("Done.") print("") print("All done.")
def main(): config.init() for crop_size, crop_size_stamp in zip(config.ALL_CROP_SIZE, config.ALL_CROP_SIZE_STAMP): crop = crop_size['crop'] size = crop_size['size'] dataset_path = common.dataset_path(config.DATASET, crop, size) out_validset_path = common.dataset_path(config.DATASET + '_sg_valid', crop, size) out_trainset_path = common.dataset_path(config.DATASET + '_sg_train', crop, size) print("") print("") print("Seed + Google train/valid set.") print("Original dataset: " + dataset_path) print("Out s+g trainset: " + out_trainset_path) print("Out s+g validset: " + out_validset_path) print("") trainingset = ImageDataset() print("loading hdf5 dataset set: {}".format(dataset_path)) trainingset.load_hdf5(dataset_path) print("hdf5 file loaded.") print("Getting sub dataset (seed dataset)") seeds_dataset = trainingset.sub_dataset_from_filename( filename_start_with="seed") print("Getting sub dataset (google dataset)") google_dataset = trainingset.sub_dataset_from_filename_multi( filename_start_with=["google"], filename_end_with=FNAME_END) print("Splitting google dataset in train/valid") google_train, google_valid = google_dataset.validation_per_class_split( [SplitOptions("", 0.33)]) print("Creating double_seeds_dataset") double_seeds_dataset = ImageDataset.merge_datasets( seeds_dataset, seeds_dataset) print( "Creating train dataset (merge google_train with double_seeds_dataset)" ) train = ImageDataset.merge_datasets(google_train, double_seeds_dataset) print("Creating valid dataset (merge google_valid with seeds_dataset)") valid = ImageDataset.merge_datasets(google_valid, seeds_dataset) print("Saving train on h5") train.save_hdf5(out_trainset_path) print("Saving valid on h5") valid.save_hdf5(out_validset_path) print("Done.") print("") print("All done.")
def exp_split_dataset(): dataset = cfg.dataset for crop_size in cfg.all_crop_size: crop = crop_size['crop'] size = crop_size['size'] dataset_path = common.dataset_path(dataset, crop, size) train_path = common.dataset_path(dataset + '_train', crop, size) valid_path = common.dataset_path(dataset + '_valid', crop, size) split_dataset_helper(dataset_path, train_path, valid_path, split_options, exclude_file_starting_with) print("") print("All done.")
def main(): config.init() print NETS for feat_net in NETS: crop, size = config.crop_size(net=feat_net) train_name = config.DATASET + "_so" test_name = config.DATASET + '_so_test' DATASETS = [train_name, test_name] # DATASETS = [test_name] print DATASETS for dataset in DATASETS: in_dataset_path = common.dataset_path(dataset, crop, size) out_dataset_path = common.feat_path(dataset, feat_net) print("") print("Features net: {}".format(feat_net)) print("Input dataset: {}".format(in_dataset_path)) print("Out feat dataset: {}".format(out_dataset_path)) features_extraction_helper(feat_net, in_dataset_path, out_dataset_path, alternative_out_layer=None, verbose = True) print("Done.") print("") print("") print("All done.")
def exp_convert_folder_to_dataset(): dataset = cfg.dataset for crop_size in cfg.all_crop_size: crop = crop_size['crop'] size = crop_size['size'] folder_dataset_path = common.folder_dataset_path(dataset) out_h5 = common.dataset_path(dataset, crop, size) imdir_label_to_hdf5_dataset( folder_dataset_path, out_h5, im_crop=[crop, crop], im_size=[size, size], remove_label_with_no_imgs=False, # chunk_size_in_ram=300, # skip_big_imgs=False, # big_images_pixels=10000 * 10000, verbose=True)