def main(): # Ensure the paths exist root = build_path("tables") if not os.path.exists(root): os.makedirs(root) # Current list of datasets lines = [] datasets = load_datasets_metadata() for dataset in datasets: dataset = DatasetMeta(dataset) head, space, line = make_dataset_row(dataset) lines.append(line) with open(build_path("tables", "datasets.md"), "w") as fil: fil.write("{}\n".format(head)) fil.write("{}\n".format(space)) for line in lines: fil.write("{}\n".format(line)) # Iterate over the other data tables dims = [ "activities", "features", "locations", "models", "pipelines", "transformers", "visualisations", ] for dim in dims: with open(build_path("tables", f"{dim}.md"), "w") as fil: data = load_metadata(f"{dim}.yaml") fil.write( f"| Index | {dim[0].upper()}{dim[1:].lower()} | value | \n") fil.write(f"| ----- | ----- | ----- | \n") if isinstance(data, dict): for ki, (key, value) in enumerate(data.items()): if isinstance(value, dict) and "description" in value: value = make_links(value["description"]) fil.write(f"| {ki} | {key} | {value} | \n")
def main(): # Ensure the paths exist root = build_path('tables') if not os.path.exists(root): os.makedirs(root) # Current list of datasets lines = [] datasets = load_datasets_metadata() for dataset in datasets: dataset = DatasetMeta(dataset) head, space, line = make_dataset_row(dataset) lines.append(line) with open(build_path('tables', 'datasets.md'), 'w') as fil: fil.write('{}\n'.format(head)) fil.write('{}\n'.format(space)) for line in lines: fil.write('{}\n'.format(line)) # Iterate over the other data tables dims = [ 'activities', 'features', 'locations', 'models', 'pipelines', 'transformers', 'visualisations', ] for dim in dims: with open(build_path('tables', f'{dim}.md'), 'w') as fil: data = load_metadata(f'{dim}.yaml') fil.write( f'| Index | {dim[0].upper()}{dim[1:].lower()} | value | \n') fil.write(f'| ----- | ----- | ----- | \n') if isinstance(data, dict): for ki, (key, value) in enumerate(data.items()): if isinstance(value, dict) and 'description' in value: value = make_links(value['description']) fil.write(f'| {ki} | {key} | {value} | \n')
def build_path(self, key): """ Args: *args: Returns: """ assert isinstance(key, Key) assert len(key) > 0 if not isinstance(key[0], str): logger.exception( ValueError( f'The argument for `build_path` must be strings, but got the type: {type(args[0])}' )) return build_path(self.identifier, str(key))
def __init__(self, name, parent=None, meta=None, default_backend='numpy'): """ Args: name: parent: meta: default_backend: """ super(BaseGraph, self).__init__(name=name, ) if isinstance(meta, BaseMeta): self.meta = meta elif isinstance(meta, (str, Path)): self.meta = BaseMeta(path=meta) elif isinstance(name, (str, Path)): self.meta = BaseMeta(path=name) else: logger.exception( ValueError( f'Ambiguous metadata specification with name={name} and meta={meta}' )) self.parent = parent self.fs_root = build_path() self.add_backend('pickle', PickleBackend(self.fs_root)) self.add_backend('pandas', PandasBackend(self.fs_root)) self.add_backend('numpy', NumpyBackend(self.fs_root)) self.add_backend('json', JsonBackend(self.fs_root, cls=NumpyEncoder)) self.add_backend('sklearn', ScikitLearnBackend(self.fs_root)) self.add_backend('png', PNGBackend(self.fs_root)) self.set_default_backend(default_backend) self.collections = ComputationalCollection( index=IndexSet(graph=self, parent=parent), outputs=ComputationalSet(graph=self, parent=parent), )
from src.keys import Key from src.meta import BaseMeta from src.utils.decorators import DecoratorBase from src.utils.loaders import build_path from src.utils.loaders import get_yaml_file_list from src.utils.misc import NumpyEncoder from src.utils.misc import randomised_order __all__ = ["ExecutionGraph", "get_ancestral_metadata"] INDEX_FILES_SET = set( get_yaml_file_list("indices", stem=True) + get_yaml_file_list("tasks", stem=True) + get_yaml_file_list("data_partitions", stem=True)) DATA_ROOT: Path = build_path() BACKEND_DICT = dict( none=VolatileBackend(), pickle=PickleBackend(DATA_ROOT), pandas=PandasBackend(DATA_ROOT), numpy=NumpyBackend(DATA_ROOT), json=JsonBackend(DATA_ROOT, cls=NumpyEncoder), sklearn=ScikitLearnBackend(DATA_ROOT), png=PNGBackend(DATA_ROOT), yaml=YamlBackend(DATA_ROOT), ) @lru_cache(2**16) def is_index_key(key: Optional[Union[Key, str]]) -> bool: