Esempio n. 1
0
def main():
    # Ensure the paths exist
    root = build_path("tables")
    if not os.path.exists(root):
        os.makedirs(root)

    # Current list of datasets
    lines = []
    datasets = load_datasets_metadata()
    for dataset in datasets:
        dataset = DatasetMeta(dataset)
        head, space, line = make_dataset_row(dataset)
        lines.append(line)
    with open(build_path("tables", "datasets.md"), "w") as fil:
        fil.write("{}\n".format(head))
        fil.write("{}\n".format(space))
        for line in lines:
            fil.write("{}\n".format(line))

    # Iterate over the other data tables
    dims = [
        "activities",
        "features",
        "locations",
        "models",
        "pipelines",
        "transformers",
        "visualisations",
    ]

    for dim in dims:
        with open(build_path("tables", f"{dim}.md"), "w") as fil:
            data = load_metadata(f"{dim}.yaml")
            fil.write(
                f"| Index | {dim[0].upper()}{dim[1:].lower()} | value | \n")
            fil.write(f"| ----- | ----- | ----- | \n")
            if isinstance(data, dict):
                for ki, (key, value) in enumerate(data.items()):
                    if isinstance(value, dict) and "description" in value:
                        value = make_links(value["description"])
                    fil.write(f"| {ki} | {key} | {value} | \n")
Esempio n. 2
0
def main():
    # Ensure the paths exist
    root = build_path('tables')
    if not os.path.exists(root):
        os.makedirs(root)

    # Current list of datasets
    lines = []
    datasets = load_datasets_metadata()
    for dataset in datasets:
        dataset = DatasetMeta(dataset)
        head, space, line = make_dataset_row(dataset)
        lines.append(line)
    with open(build_path('tables', 'datasets.md'), 'w') as fil:
        fil.write('{}\n'.format(head))
        fil.write('{}\n'.format(space))
        for line in lines:
            fil.write('{}\n'.format(line))

    # Iterate over the other data tables
    dims = [
        'activities',
        'features',
        'locations',
        'models',
        'pipelines',
        'transformers',
        'visualisations',
    ]

    for dim in dims:
        with open(build_path('tables', f'{dim}.md'), 'w') as fil:
            data = load_metadata(f'{dim}.yaml')
            fil.write(
                f'| Index | {dim[0].upper()}{dim[1:].lower()} | value | \n')
            fil.write(f'| ----- | ----- | ----- | \n')
            if isinstance(data, dict):
                for ki, (key, value) in enumerate(data.items()):
                    if isinstance(value, dict) and 'description' in value:
                        value = make_links(value['description'])
                    fil.write(f'| {ki} | {key} | {value} | \n')
Esempio n. 3
0
    def build_path(self, key):
        """
        
        Args:
            *args:

        Returns:

        """
        assert isinstance(key, Key)

        assert len(key) > 0
        if not isinstance(key[0], str):
            logger.exception(
                ValueError(
                    f'The argument for `build_path` must be strings, but got the type: {type(args[0])}'
                ))

        return build_path(self.identifier, str(key))
Esempio n. 4
0
    def __init__(self, name, parent=None, meta=None, default_backend='numpy'):
        """
        
        Args:
            name:
            parent:
            meta:
            default_backend:
        """
        super(BaseGraph, self).__init__(name=name, )

        if isinstance(meta, BaseMeta):
            self.meta = meta

        elif isinstance(meta, (str, Path)):
            self.meta = BaseMeta(path=meta)

        elif isinstance(name, (str, Path)):
            self.meta = BaseMeta(path=name)

        else:
            logger.exception(
                ValueError(
                    f'Ambiguous metadata specification with name={name} and meta={meta}'
                ))

        self.parent = parent

        self.fs_root = build_path()

        self.add_backend('pickle', PickleBackend(self.fs_root))
        self.add_backend('pandas', PandasBackend(self.fs_root))
        self.add_backend('numpy', NumpyBackend(self.fs_root))
        self.add_backend('json', JsonBackend(self.fs_root, cls=NumpyEncoder))
        self.add_backend('sklearn', ScikitLearnBackend(self.fs_root))
        self.add_backend('png', PNGBackend(self.fs_root))

        self.set_default_backend(default_backend)

        self.collections = ComputationalCollection(
            index=IndexSet(graph=self, parent=parent),
            outputs=ComputationalSet(graph=self, parent=parent),
        )
Esempio n. 5
0
from src.keys import Key
from src.meta import BaseMeta
from src.utils.decorators import DecoratorBase
from src.utils.loaders import build_path
from src.utils.loaders import get_yaml_file_list
from src.utils.misc import NumpyEncoder
from src.utils.misc import randomised_order

__all__ = ["ExecutionGraph", "get_ancestral_metadata"]

INDEX_FILES_SET = set(
    get_yaml_file_list("indices", stem=True) +
    get_yaml_file_list("tasks", stem=True) +
    get_yaml_file_list("data_partitions", stem=True))

DATA_ROOT: Path = build_path()

BACKEND_DICT = dict(
    none=VolatileBackend(),
    pickle=PickleBackend(DATA_ROOT),
    pandas=PandasBackend(DATA_ROOT),
    numpy=NumpyBackend(DATA_ROOT),
    json=JsonBackend(DATA_ROOT, cls=NumpyEncoder),
    sklearn=ScikitLearnBackend(DATA_ROOT),
    png=PNGBackend(DATA_ROOT),
    yaml=YamlBackend(DATA_ROOT),
)


@lru_cache(2**16)
def is_index_key(key: Optional[Union[Key, str]]) -> bool: