예제 #1
0
def load_pyradigms(dataset_paths, sub_group=None):
    """Reads in a list of datasets in pyradigm format.

    Parameters
    ----------
    dataset_paths : iterable
        List of paths to pyradigm dataset

    sub_group : iterable
        subset of classes to return. Default: return all classes.
        If sub_group is specified, returns only that subset of classes for all datasets.

    Raises
    ------
        ValueError
            If all the datasets do not contain the request subset of classes.

    """

    if sub_group is not None:
        sub_group = set(sub_group)

    # loading datasets
    datasets = list()
    for fp in dataset_paths:
        if not pexists(fp):
            raise IOError("Dataset @ {} does not exist.".format(fp))

        try:
            # there is an internal validation of dataset
            ds_in = MLDataset(fp)
        except:
            print("Dataset @ {} is not a valid MLDataset!".format(fp))
            raise

        class_set = set(ds_in.class_set)
        if sub_group is None or sub_group == class_set:
            ds_out = ds_in
        elif sub_group < class_set:  # < on sets is an issubset operation
            ds_out = ds_in.get_class(sub_group)
        else:
            raise ValueError(
                'One or more classes in {} does not exist in\n{}'.format(
                    sub_group, fp))

        # add the valid dataset to list
        datasets.append(ds_out)

    return datasets
예제 #2
0
test_dataset.description = 'test dataset'
print(test_dataset)
print('default format:\n {}'.format(test_dataset))
print('full repr     :\n {:full}'.format(test_dataset))
print('string/short  :\n {:s}'.format(test_dataset))

class_set, label_set, class_sizes = test_dataset.summarize_classes()

reloaded_dataset = MLDataset(filepath=out_file,
                             description='reloaded test_dataset')

copy_dataset = MLDataset(in_dataset=test_dataset)

rand_index = np.random.randint(0, len(class_set), 1)[0]
random_class_name = class_set[rand_index]
random_class_ds = test_dataset.get_class(random_class_name)

other_classes_ds = test_dataset - random_class_ds

other_class_set = set(class_set) - set([random_class_name])
other_classes_get_with_list = test_dataset.get_class(other_class_set)

recombined = other_classes_ds + random_class_ds

empty_dataset = MLDataset()

test2 = MLDataset()
test3 = MLDataset()

# TODO write tests for CLI
예제 #3
0
test_dataset.description = 'test dataset'
print(test_dataset)
print('default format:\n {}'.format(test_dataset))
print('full repr     :\n {:full}'.format(test_dataset))
print('string/short  :\n {:s}'.format(test_dataset))

class_set, label_set, class_sizes = test_dataset.summarize_classes()

reloaded_dataset = MLDataset(filepath=out_file, description='reloaded test_dataset')

copy_dataset = MLDataset(in_dataset=test_dataset)

rand_index = np.random.randint(0,len(class_set),1)[0]
random_class_name = class_set[rand_index]
random_class_ds = test_dataset.get_class(random_class_name)

other_classes_ds = test_dataset - random_class_ds

other_class_set = set(class_set)-set([random_class_name])
other_classes_get_with_list = test_dataset.get_class(other_class_set)

recombined = other_classes_ds + random_class_ds

empty_dataset = MLDataset()

test2 = MLDataset()
test3 = MLDataset()

# TODO write tests for CLI