Esempio n. 1
0
def create(entity, entity_name, categories, mutability, **kwargs):
    """This command will create the workspace structure with data and spec file for an entity and set the store configurations.

        Example:
            create('dataset', 'dataset-ex', categories=['computer-vision', 'images'], mutability='strict')

        Args:
            entity (str): The type of an ML entity. (dataset, labels or model).
            entity_name (str): An ml-git entity name to identify a ML entity.
            categories (list): Artifact's category name.
            mutability (str): Mutability type. The mutability options are strict, flexible and mutable.
            store_type (str, optional): Data store type [default: s3h].
            version (int, optional): Number of retries to upload the files to the storage [default: 2].
            import_path (str, optional): Path to be imported to the project.
            bucket_name (str, optional): Bucket name.
            import_url (str, optional): Import data from a google drive url.
            credentials_path (str, optional): Directory of credentials.json.
            unzip (bool, optional): Unzip imported zipped files [default: False].
    """

    args = {'artifact_name': entity_name, 'category': categories, 'mutability': mutability,
            'version_number': kwargs.get('version', 1), 'import': kwargs.get('import_path', None),
            'store_type':  kwargs.get('store_type', StoreType.S3H.value),
            'bucket_name': kwargs.get('bucket_name', None), 'unzip': kwargs.get('unzip', False),
            'import_url': kwargs.get('import_url', None), 'credentials_path': kwargs.get('credentials_path', None),
            'wizard_config': False}

    repo = Repository(config_load(), entity)
    repo.create(args)
Esempio n. 2
0
def remote_add(entity, remote_url, global_configuration=False):
    """This command will add a remote to store the metadata from this ml-git project.

        Examples:
            remote_add('dataset', 'https://[email protected]/mlgit-datasets')

        Args:
            entity (str): The type of an ML entity. (repository, dataset, labels or model).
            remote_url(str): URL of an existing remote git repository.
            global_configuration (bool, optional): Use this option to set configuration at global level [default: False].
    """

    repo = Repository(config_load(), entity)
    repo.repo_remote_add(entity, remote_url, global_configuration)
Esempio n. 3
0
def push(entity, entity_name, retries=2, clear_on_fail=False):
    """This command allows pushing the data of a specific version of an ML entity.

        Example:
            push('dataset', 'dataset-ex')

        Args:
            entity (str): The type of an ML entity. (dataset, labels or model).
            entity_name (str): An ml-git entity name to identify a ML entity.
            retries (int, optional): Number of retries to upload the files to the storage [default: 2].
            clear_on_fail (bool, optional): Remove the files from the store in case of failure during the push operation [default: False].
    """

    repo = Repository(config_load(), entity)
    repo.push(entity_name, retries, clear_on_fail)
Esempio n. 4
0
def add(entity_type, entity_name, bumpversion=False, fsck=False, file_path=[]):
    """This command will add all the files under the directory into the ml-git index/staging area.

    Example:
        add('dataset', 'dataset-ex', bumpversion=True)

    Args:
        entity_type (str): The type of an ML entity. (dataset, labels or model)
        entity_name (str): The name of the ML entity you want to add the files.
        bumpversion (bool, optional): Increment the entity version number when adding more files [default: False].
        fsck (bool, optional): Run fsck after command execution [default: False].
        file_path (list, optional): List of files that must be added by the command [default: all files].
    """

    repo = Repository(config_load(), entity_type)
    repo.add(entity_name, file_path, bumpversion, fsck)
Esempio n. 5
0
def get_repository_instance(repo_type):
    project_repo_type = 'project'
    if repo_type not in EntityType.to_list(
    ) and repo_type != project_repo_type:
        raise RuntimeError(output_messages['ERROR_INVALID_ENTITY_TYPE'] %
                           EntityType.to_list())
    return Repository(config_load(), repo_type)
Esempio n. 6
0
    def test_tag_exist(self):
        mdpath = os.path.join(self.test_dir, 'metadata')
        specpath = 'dataset-ex'
        ensure_path_exists(os.path.join(mdpath, specpath))
        shutil.copy('hdata/dataset-ex.spec',
                    os.path.join(mdpath, specpath) + '/dataset-ex.spec')
        manifestpath = os.path.join(os.path.join(mdpath, specpath),
                                    'MANIFEST.yaml')
        yaml_save(files_mock, manifestpath)

        config['mlgit_path'] = self.test_dir
        m = Metadata(specpath, mdpath, config, repotype)
        r = Repository(config, repotype)
        r.init()

        fullmetadatapath, categories_subpath, metadata = m.tag_exists(
            self.test_dir)
        self.assertFalse(metadata is None)
Esempio n. 7
0
    def test_get_tag(self):
        mdpath = os.path.join(self.test_dir, 'metadata')
        specpath = 'dataset-ex'
        ensure_path_exists(os.path.join(mdpath, specpath))
        shutil.copy('hdata/dataset-ex.spec', os.path.join(mdpath, specpath) + '/dataset-ex.spec')
        manifestpath = os.path.join(os.path.join(mdpath, specpath), 'MANIFEST.yaml')
        yaml_save(files_mock, manifestpath)

        config['mlgit_path'] = self.test_dir
        m = Metadata(specpath, mdpath, config, DATASETS)
        r = Repository(config, DATASETS)
        r.init()

        tag_list = ['computer__images__dataset-ex__1']
        with mock.patch('ml_git.metadata.Metadata.list_tags', return_value=tag_list):
            target_tag = m.get_tag(specpath, -1)
        self.assertEqual(target_tag, tag_list[0])
        clear(m.path)
Esempio n. 8
0
def init(entity):
    """This command will start the ml-git entity.

        Examples:
            init('repository')
            init('dataset')

        Args:
            entity (str): The type of entity that will be initialized (repository, dataset, labels or model).
    """

    if entity == 'repository':
        init_mlgit()
    elif entity in EntityType.to_list():
        repo = Repository(config_load(), entity)
        repo.init()
    else:
        log.error('The type of entity entered is invalid. Valid types are: [repository, dataset, labels or model]')
Esempio n. 9
0
def clone(repository_url, folder=None, track=False):
    """This command will clone minimal configuration files from repository-url with valid .ml-git/config.yaml,
    then initialize the metadata according to configurations.

    Example:
        clone('https://[email protected]/mlgit-repository')

    Args:
        repository_url (str): The git repository that will be cloned.
        folder (str, optional): Directory that can be created to execute the clone command [default: current path].
        track (bool, optional): Set if the tracking of the cloned repository should be kept [default: False].

    """

    repo = Repository(config_load(), 'project')
    if folder is not None:
        repo.clone_config(repository_url, folder, track)
    else:
        current_directory = os.getcwd()
        with tempfile.TemporaryDirectory(dir=current_directory) as tempdir:
            mlgit_path = os.path.join(tempdir, 'mlgit')
            repo.clone_config(repository_url, mlgit_path, track)
            if not os.path.exists(os.path.join(current_directory, '.ml-git')):
                shutil.move(os.path.join(mlgit_path, '.ml-git'),
                            current_directory)
            os.chdir(current_directory)
Esempio n. 10
0
def checkout(entity, tag, sampling=None, retries=2, force=False, dataset=False, labels=False, version=-1):
    """This command allows retrieving the data of a specific version of an ML entity.

    Example:
        checkout('dataset', 'computer-vision__images3__imagenet__1')

    Args:
        entity (str): The type of an ML entity. (dataset, labels or model)
        tag (str): An ml-git tag to identify a specific version of an ML entity.
        sampling (dict): group: <amount>:<group> The group sample option consists of amount and group used to
                                 download a sample.\n
                         range: <start:stop:step> The range sample option consists of start, stop and step used
                                to download a sample. The start parameter can be equal or greater than zero. The
                                stop parameter can be 'all', -1 or any integer above zero.\n
                         random: <amount:frequency> The random sample option consists of amount and frequency
                                used to download a sample.
                         seed: The seed is used to initialize the pseudorandom numbers.
        retries (int, optional): Number of retries to download the files from the storage [default: 2].
        force (bool, optional): Force checkout command to delete untracked/uncommitted files from the local repository [default: False].
        dataset (bool, optional): If exist a dataset related with the model or labels, this one must be downloaded [default: False].
        labels (bool, optional): If exist labels related with the model, they must be downloaded [default: False].

    Returns:
        str: Return the path where the data was checked out.

    """

    repo = Repository(config_load(), entity)
    repo.update()
    if sampling is not None and not validate_sample(sampling):
        return None
    options = {}
    options['with_dataset'] = dataset
    options['with_labels'] = labels
    options['retry'] = retries
    options['force'] = force
    options['bare'] = False
    options['version'] = version
    repo.checkout(tag, sampling, options)

    data_path = os.path.join(entity, *tag.split('__')[:-1])
    if not os.path.exists(data_path):
        data_path = None
    return data_path
Esempio n. 11
0
def get_repository_instance(repo_type):
    return Repository(config_load(), repo_type)
Esempio n. 12
0
def init_repository(entity_type=DATASETS):
    return Repository(config_load(), entity_type)
Esempio n. 13
0
def init_repository(entity_type='dataset'):
    return Repository(config_load(), entity_type)
Esempio n. 14
0
def init_repository(entity_type=DATASETS):
    return Repository(config_load(hide_logs=True), entity_type)