Example #1
0
def parse_dataset(dataset_name):
    fn_norm = lambda x: string_normalize(
        x, lower=True, remove_whitespace='', remove_non_alphanumeric=True)
    dataset_name = fn_norm(dataset_name)
    # ====== get all datasets ====== #
    all_datasets = []
    for name, member in inspect.getmembers(sys.modules[__name__]):
        if (isinstance(member, type) and issubclass(member, DataLoader)) \
        or 'load_' == name[:5]:
            name = name.replace('load_', '')
            name = fn_norm(name)
            all_datasets.append((name, member))
    # ====== search for best match ====== #
    match_dataset = [name for name, _ in all_datasets if name == dataset_name]
    if len(match_dataset) == 0:
        match_dataset = [
            name for name, _ in all_datasets
            if dataset_name == name[:len(dataset_name)]
        ]
    if len(match_dataset) == 0:
        raise RuntimeError(
            "Cannot find dataset with name '%s', "
            "all available datasets are: %s" %
            (dataset_name, ', '.join([i[0] for i in all_datasets])))
    if len(match_dataset) > 1:
        raise RuntimeError("Found multiple dataset for name: '%s', "
                           "all the candidates are: %s" %
                           (dataset_name, ', '.join([i
                                                     for i in match_dataset])))
    # ====== extract the found dataset ====== #
    match_dataset = match_dataset[0]
    dataset = [ds for name, ds in all_datasets if name == match_dataset][0]
    if 'load_' in match_dataset:
        return match_dataset()
    return dataset.load()
Example #2
0
def get_distribution_description(dist_name):
    dist_name = string_normalize(dist_name,
                                 lower=True,
                                 remove_non_alphanumeric=True,
                                 remove_whitespace='')
    dist_name = dist_name.replace('_', '')
    dists = get_described_distributions()
    matches = []
    # match exact
    for dist in dists:
        if dist.normalized_name == dist_name:
            matches.append(dist)
    # match contain
    if len(matches) == 0:
        for dist in dists:
            if dist_name == dist.normalized_name[:len(dist_name)]:
                matches.append(dist)
    # check
    if len(matches) == 0:
        raise RuntimeError(
            "Cannot find distribution with name: '%s', all distributions are: %s"
            % (dist_name, ', '.join([i.normalized_name for i in dists])))
    elif len(matches) > 1:
        raise RuntimeError(
            "Found multiple distribution for name: '%s' which are: %s" %
            (dist_name, ', '.join([i.normalized_name for i in matches])))
    # final
    match = matches[0]
    return match
Example #3
0
def get_distribution_description(dist_name):
  dist_name = string_normalize(dist_name, lower = True,
                               remove_non_alphanumeric=True,
                               remove_whitespace='')
  dist_name = dist_name.replace('_', '')
  dists = get_described_distributions()
  matches = []
  # match exact
  for dist in dists:
    if dist.normalized_name == dist_name:
      matches.append(dist)
  # match contain
  if len(matches) == 0:
    for dist in dists:
      if dist_name == dist.normalized_name[:len(dist_name)]:
        matches.append(dist)
  # check
  if len(matches) == 0:
    raise RuntimeError("Cannot find distribution with name: '%s', all distributions are: %s" %
      (dist_name, ', '.join([i.normalized_name for i in dists])))
  elif len(matches) > 1:
    raise RuntimeError("Found multiple distribution for name: '%s' which are: %s" %
      (dist_name, ', '.join([i.normalized_name for i in matches])))
  # final
  match = matches[0]
  return match
Example #4
0
 def normalized_name(self):
   for name, _ in globals().items():
     if isinstance(_, DistributionDescription) and \
     id(_) == id(self):
       name = string_normalize(name, lower=True,
                               remove_whitespace='',
                               remove_non_alphanumeric=True)
       return name
Example #5
0
 def normalized_name(self):
     for name, _ in globals().items():
         if isinstance(_, DistributionDescription) and \
         id(_) == id(self):
             name = string_normalize(name,
                                     lower=True,
                                     remove_whitespace='',
                                     remove_non_alphanumeric=True)
             return name
Example #6
0
def parse_dataset(dataset_name):
  fn_norm = lambda x: string_normalize(x, lower=True,
                                       remove_whitespace='',
                                       remove_non_alphanumeric=True)
  dataset_name = fn_norm(dataset_name)
  # ====== get all datasets ====== #
  all_datasets = []
  for name, member in inspect.getmembers(sys.modules[__name__]):
    if (isinstance(member, type) and issubclass(member, DataLoader)) \
    or 'load_' == name[:5]:
      name = name.replace('load_', '')
      name = fn_norm(name)
      all_datasets.append((name, member))
  # ====== search for best match ====== #
  match_dataset = [name for name, _ in all_datasets
                   if name == dataset_name]
  if len(match_dataset) == 0:
    match_dataset = [name for name, _ in all_datasets
                     if dataset_name == name[:len(dataset_name)]]
  if len(match_dataset) == 0:
    raise RuntimeError("Cannot find dataset with name '%s', "
      "all available datasets are: %s" % (
          dataset_name,
          ', '.join([i[0] for i in all_datasets])))
  if len(match_dataset) > 1:
    raise RuntimeError("Found multiple dataset for name: '%s', "
      "all the candidates are: %s" % (
          dataset_name,
          ', '.join([i for i in match_dataset])))
  # ====== extract the found dataset ====== #
  match_dataset = match_dataset[0]
  dataset = [ds for name, ds in all_datasets
             if name == match_dataset][0]
  if 'load_' in match_dataset:
    return match_dataset()
  return dataset.load()