def parse_dataset(dataset_name): fn_norm = lambda x: string_normalize( x, lower=True, remove_whitespace='', remove_non_alphanumeric=True) dataset_name = fn_norm(dataset_name) # ====== get all datasets ====== # all_datasets = [] for name, member in inspect.getmembers(sys.modules[__name__]): if (isinstance(member, type) and issubclass(member, DataLoader)) \ or 'load_' == name[:5]: name = name.replace('load_', '') name = fn_norm(name) all_datasets.append((name, member)) # ====== search for best match ====== # match_dataset = [name for name, _ in all_datasets if name == dataset_name] if len(match_dataset) == 0: match_dataset = [ name for name, _ in all_datasets if dataset_name == name[:len(dataset_name)] ] if len(match_dataset) == 0: raise RuntimeError( "Cannot find dataset with name '%s', " "all available datasets are: %s" % (dataset_name, ', '.join([i[0] for i in all_datasets]))) if len(match_dataset) > 1: raise RuntimeError("Found multiple dataset for name: '%s', " "all the candidates are: %s" % (dataset_name, ', '.join([i for i in match_dataset]))) # ====== extract the found dataset ====== # match_dataset = match_dataset[0] dataset = [ds for name, ds in all_datasets if name == match_dataset][0] if 'load_' in match_dataset: return match_dataset() return dataset.load()
def get_distribution_description(dist_name): dist_name = string_normalize(dist_name, lower=True, remove_non_alphanumeric=True, remove_whitespace='') dist_name = dist_name.replace('_', '') dists = get_described_distributions() matches = [] # match exact for dist in dists: if dist.normalized_name == dist_name: matches.append(dist) # match contain if len(matches) == 0: for dist in dists: if dist_name == dist.normalized_name[:len(dist_name)]: matches.append(dist) # check if len(matches) == 0: raise RuntimeError( "Cannot find distribution with name: '%s', all distributions are: %s" % (dist_name, ', '.join([i.normalized_name for i in dists]))) elif len(matches) > 1: raise RuntimeError( "Found multiple distribution for name: '%s' which are: %s" % (dist_name, ', '.join([i.normalized_name for i in matches]))) # final match = matches[0] return match
def get_distribution_description(dist_name): dist_name = string_normalize(dist_name, lower = True, remove_non_alphanumeric=True, remove_whitespace='') dist_name = dist_name.replace('_', '') dists = get_described_distributions() matches = [] # match exact for dist in dists: if dist.normalized_name == dist_name: matches.append(dist) # match contain if len(matches) == 0: for dist in dists: if dist_name == dist.normalized_name[:len(dist_name)]: matches.append(dist) # check if len(matches) == 0: raise RuntimeError("Cannot find distribution with name: '%s', all distributions are: %s" % (dist_name, ', '.join([i.normalized_name for i in dists]))) elif len(matches) > 1: raise RuntimeError("Found multiple distribution for name: '%s' which are: %s" % (dist_name, ', '.join([i.normalized_name for i in matches]))) # final match = matches[0] return match
def normalized_name(self): for name, _ in globals().items(): if isinstance(_, DistributionDescription) and \ id(_) == id(self): name = string_normalize(name, lower=True, remove_whitespace='', remove_non_alphanumeric=True) return name
def parse_dataset(dataset_name): fn_norm = lambda x: string_normalize(x, lower=True, remove_whitespace='', remove_non_alphanumeric=True) dataset_name = fn_norm(dataset_name) # ====== get all datasets ====== # all_datasets = [] for name, member in inspect.getmembers(sys.modules[__name__]): if (isinstance(member, type) and issubclass(member, DataLoader)) \ or 'load_' == name[:5]: name = name.replace('load_', '') name = fn_norm(name) all_datasets.append((name, member)) # ====== search for best match ====== # match_dataset = [name for name, _ in all_datasets if name == dataset_name] if len(match_dataset) == 0: match_dataset = [name for name, _ in all_datasets if dataset_name == name[:len(dataset_name)]] if len(match_dataset) == 0: raise RuntimeError("Cannot find dataset with name '%s', " "all available datasets are: %s" % ( dataset_name, ', '.join([i[0] for i in all_datasets]))) if len(match_dataset) > 1: raise RuntimeError("Found multiple dataset for name: '%s', " "all the candidates are: %s" % ( dataset_name, ', '.join([i for i in match_dataset]))) # ====== extract the found dataset ====== # match_dataset = match_dataset[0] dataset = [ds for name, ds in all_datasets if name == match_dataset][0] if 'load_' in match_dataset: return match_dataset() return dataset.load()