def load_pyradigms(dataset_paths, sub_group=None): """Reads in a list of datasets in pyradigm format. Parameters ---------- dataset_paths : iterable List of paths to pyradigm dataset sub_group : iterable subset of classes to return. Default: return all classes. If sub_group is specified, returns only that subset of classes for all datasets. Raises ------ ValueError If all the datasets do not contain the request subset of classes. """ if sub_group is not None: sub_group = set(sub_group) # loading datasets datasets = list() for fp in dataset_paths: if not pexists(fp): raise IOError("Dataset @ {} does not exist.".format(fp)) try: # there is an internal validation of dataset ds_in = MLDataset(fp) except: print("Dataset @ {} is not a valid MLDataset!".format(fp)) raise class_set = set(ds_in.class_set) if sub_group is None or sub_group == class_set: ds_out = ds_in elif sub_group < class_set: # < on sets is an issubset operation ds_out = ds_in.get_class(sub_group) else: raise ValueError( 'One or more classes in {} does not exist in\n{}'.format( sub_group, fp)) # add the valid dataset to list datasets.append(ds_out) return datasets
test_dataset.description = 'test dataset' print(test_dataset) print('default format:\n {}'.format(test_dataset)) print('full repr :\n {:full}'.format(test_dataset)) print('string/short :\n {:s}'.format(test_dataset)) class_set, label_set, class_sizes = test_dataset.summarize_classes() reloaded_dataset = MLDataset(filepath=out_file, description='reloaded test_dataset') copy_dataset = MLDataset(in_dataset=test_dataset) rand_index = np.random.randint(0, len(class_set), 1)[0] random_class_name = class_set[rand_index] random_class_ds = test_dataset.get_class(random_class_name) other_classes_ds = test_dataset - random_class_ds other_class_set = set(class_set) - set([random_class_name]) other_classes_get_with_list = test_dataset.get_class(other_class_set) recombined = other_classes_ds + random_class_ds empty_dataset = MLDataset() test2 = MLDataset() test3 = MLDataset() # TODO write tests for CLI
test_dataset.description = 'test dataset' print(test_dataset) print('default format:\n {}'.format(test_dataset)) print('full repr :\n {:full}'.format(test_dataset)) print('string/short :\n {:s}'.format(test_dataset)) class_set, label_set, class_sizes = test_dataset.summarize_classes() reloaded_dataset = MLDataset(filepath=out_file, description='reloaded test_dataset') copy_dataset = MLDataset(in_dataset=test_dataset) rand_index = np.random.randint(0,len(class_set),1)[0] random_class_name = class_set[rand_index] random_class_ds = test_dataset.get_class(random_class_name) other_classes_ds = test_dataset - random_class_ds other_class_set = set(class_set)-set([random_class_name]) other_classes_get_with_list = test_dataset.get_class(other_class_set) recombined = other_classes_ds + random_class_ds empty_dataset = MLDataset() test2 = MLDataset() test3 = MLDataset() # TODO write tests for CLI