def duplicate_dataset_imgs_helper(dataset_path, out_dataset_path, fname_start,
                                  fname_end):
    print("")
    print("")
    print("Duplicate seed on:")
    print("Dataset: " + dataset_path)
    print("")
    print("Out Training Set: " + out_dataset_path)

    training_path = common.dataset_path(dataset_path, net)
    out_training_path = common.dataset_path(out_dataset_path, net)

    trainingset = ImageDataset()
    print("loading hdf5 training set: {}".format(training_path))
    trainingset.load_hdf5(training_path)
    print("hdf5 file loaded.")

    print("Getting sub dataset from filename filters...")
    seeds_dataset = trainingset.sub_dataset_from_filename(
        filename_start_with=fname_start, filename_end_with=fname_end)
    print("Merging seed-only sub dataset with original dataset")
    trainingset.merge_with_dataset(seeds_dataset)
    print("Saving merged dataset in: " + out_training_path)
    trainingset.save_hdf5(out_training_path)
    print("Done.")
Beispiel #2
0
def main():
    config.init()

    for crop_size, crop_size_stamp in zip(config.ALL_CROP_SIZE, config.ALL_CROP_SIZE_STAMP):
        crop = crop_size['crop']
        size = crop_size['size']

        dataset_path = common.dataset_path(config.DATASET, crop, size)
        out_dataset_path =  common.dataset_path(config.DATASET + '_so', crop, size)


        print("")
        print("")
        print("Seed only dataset.")
        print("Original dataset: " + dataset_path)
        print("Out seed only dataset: " + out_dataset_path)
        print("")


        trainingset = ImageDataset()
        print("loading hdf5 dataset set: {}".format(dataset_path))
        trainingset.load_hdf5(dataset_path)
        print("hdf5 file loaded.")

        print("Getting sub dataset from filter (seed files)...")
        seeds_dataset = trainingset.sub_dataset_from_filename(filename_start_with=FNAME_START_WITH, remove_empty_classes=True)
        print("Saving merged dataset in: " + out_dataset_path)
        seeds_dataset.save_hdf5(out_dataset_path)
        print("Done.")

    print("")
    print("All done.")
def exp_duplicate_seed():
    dataset = cfg.DATASET
    for crop_size in cfg.all_crop_sizer:
        crop = crop_size['crop']
        size = crop_size['size']

        train_path = common.dataset_path(dataset + '_train', crop, size)
        train_path_ds = common.dataset_path(dataset + '_train_ds', crop, size)

        print("")
        print("")
        print("Duplicate seed on:")
        print("Training Set: " + train_path)
        print("")
        print("Out Training Set: " + train_path_ds)

        trainingset = ImageDataset()
        print("loading hdf5 training set: {}".format(train_path))
        trainingset.load_hdf5(train_path)
        print("hdf5 file loaded.")

        print("Getting sub dataset from filter (seed files)...")
        seeds_dataset = trainingset.sub_dataset_from_filename(filename_start_with=FNAME_START_WITH)
        print("Merging seed-only sub dataset with original dataset")
        trainingset.merge_with_dataset(seeds_dataset)
        print("Saving merged dataset in: " + train_path_ds)
        trainingset.save_hdf5(train_path_ds)
        print("Done.")

    print("")
    print("All done.")
def main():
    config.init()

    for crop_size, crop_size_stamp in zip(config.ALL_CROP_SIZE,
                                          config.ALL_CROP_SIZE_STAMP):
        crop = crop_size['crop']
        size = crop_size['size']

        dataset_path = common.dataset_path(config.DATASET, crop, size)
        out_validset_path = common.dataset_path(config.DATASET + '_sg_valid',
                                                crop, size)
        out_trainset_path = common.dataset_path(config.DATASET + '_sg_train',
                                                crop, size)

        print("")
        print("")
        print("Seed + Google train/valid set.")
        print("Original dataset: " + dataset_path)
        print("Out s+g trainset: " + out_trainset_path)
        print("Out s+g validset: " + out_validset_path)
        print("")

        trainingset = ImageDataset()
        print("loading hdf5 dataset set: {}".format(dataset_path))
        trainingset.load_hdf5(dataset_path)
        print("hdf5 file loaded.")

        print("Getting sub dataset (seed dataset)")
        seeds_dataset = trainingset.sub_dataset_from_filename(
            filename_start_with="seed")
        print("Getting sub dataset (google dataset)")
        google_dataset = trainingset.sub_dataset_from_filename_multi(
            filename_start_with=["google"], filename_end_with=FNAME_END)
        print("Splitting google dataset in train/valid")
        google_train, google_valid = google_dataset.validation_per_class_split(
            [SplitOptions("", 0.33)])

        print("Creating double_seeds_dataset")
        double_seeds_dataset = ImageDataset.merge_datasets(
            seeds_dataset, seeds_dataset)
        print(
            "Creating train dataset (merge google_train with double_seeds_dataset)"
        )
        train = ImageDataset.merge_datasets(google_train, double_seeds_dataset)
        print("Creating valid dataset (merge google_valid with seeds_dataset)")
        valid = ImageDataset.merge_datasets(google_valid, seeds_dataset)

        print("Saving train on h5")
        train.save_hdf5(out_trainset_path)
        print("Saving valid on h5")
        valid.save_hdf5(out_validset_path)
        print("Done.")

    print("")
    print("All done.")
Beispiel #5
0
def exp_split_dataset():
    dataset = cfg.dataset
    for crop_size in cfg.all_crop_size:
        crop = crop_size['crop']
        size = crop_size['size']

        dataset_path = common.dataset_path(dataset, crop, size)
        train_path = common.dataset_path(dataset + '_train', crop, size)
        valid_path = common.dataset_path(dataset + '_valid', crop, size)
        split_dataset_helper(dataset_path, train_path, valid_path,
                             split_options, exclude_file_starting_with)

    print("")
    print("All done.")
def main():
    config.init()

    print NETS
    for feat_net in NETS:
        crop, size = config.crop_size(net=feat_net)

        train_name = config.DATASET + "_so"
        test_name = config.DATASET + '_so_test'


        DATASETS = [train_name, test_name]
        # DATASETS = [test_name]

        print DATASETS
        for dataset in DATASETS:
            in_dataset_path = common.dataset_path(dataset, crop, size)
            out_dataset_path = common.feat_path(dataset, feat_net)
            print("")
            print("Features net:     {}".format(feat_net))
            print("Input dataset:    {}".format(in_dataset_path))
            print("Out feat dataset: {}".format(out_dataset_path))

            features_extraction_helper(feat_net, in_dataset_path, out_dataset_path, alternative_out_layer=None, verbose = True)
            print("Done.")
        print("")

    print("")
    print("All done.")
Beispiel #7
0
def exp_convert_folder_to_dataset():
    dataset = cfg.dataset
    for crop_size in cfg.all_crop_size:
        crop = crop_size['crop']
        size = crop_size['size']

        folder_dataset_path = common.folder_dataset_path(dataset)
        out_h5 = common.dataset_path(dataset, crop, size)

        imdir_label_to_hdf5_dataset(
            folder_dataset_path,
            out_h5,
            im_crop=[crop, crop],
            im_size=[size, size],
            remove_label_with_no_imgs=False,
            # chunk_size_in_ram=300,
            # skip_big_imgs=False,
            # big_images_pixels=10000 * 10000,
            verbose=True)