Esempio n. 1
0
    def export(self, bucket, tag, retry):
        try:
            categories_path, spec_name, _ = spec_parse(tag)
            get_root_path()
            if not self._tag_exists(tag):
                return None, None
        except InvalidGitRepositoryError:
            log.error('You are not in an initialized ml-git repository.',
                      class_name=LOCAL_REPOSITORY_CLASS_NAME)
            return None, None
        except Exception as e:
            log.error(e, class_name=LOCAL_REPOSITORY_CLASS_NAME)
            return None, None

        try:
            self._checkout_ref(tag)
        except Exception:
            log.error('Unable to checkout to %s' % tag,
                      class_name=REPOSITORY_CLASS_NAME)
            return None, None

        local = LocalRepository(
            self.__config, get_objects_path(self.__config, self.__repo_type),
            self.__repo_type)
        local.export_tag(get_metadata_path(self.__config, self.__repo_type),
                         tag, bucket, retry)

        self._checkout_ref()
Esempio n. 2
0
def init_mlgit():
    try:
        root_path = get_root_path()
        log.info('You already are in a ml-git repository (%s)' %
                 (os.path.join(root_path, ROOT_FILE_NAME)),
                 class_name=ADMIN_CLASS_NAME)
        return
    except Exception:
        pass

    try:
        os.mkdir('.ml-git')
    except PermissionError:
        log.error(
            'Permission denied. You need write permission to initialize ml-git in this directory.',
            class_name=ADMIN_CLASS_NAME)
        return
    except FileExistsError:
        pass

    mlgit_config_save()
    root_path = get_root_path()
    log.info('Initialized empty ml-git repository in %s' %
             (os.path.join(root_path, ROOT_FILE_NAME)),
             class_name=ADMIN_CLASS_NAME)
Esempio n. 3
0
    def test_get_root_path(self):

        path = get_root_path()
        yaml_path_src = os.path.join(path, '.ml-git', 'config.yaml')
        yaml_path_dst = os.path.join(path, '.ml-git', 'coasdasdasnfig.ylma')
        os.rename(yaml_path_src, yaml_path_dst)
        self.assertRaises(RootPathException, lambda: get_root_path())
        os.rename(yaml_path_dst, yaml_path_src)
Esempio n. 4
0
def check_successfully_clone(project_dir, git_dir):
    try:
        os.chdir(project_dir)
        get_root_path()
    except RootPathException:
        clear(project_dir)
        log.error(output_messages['ERROR_MINIMAL_CONFIGURATION'], class_name=ADMIN_CLASS_NAME)
        clear(git_dir)
        return False
    return True
Esempio n. 5
0
def check_successfully_clone(project_dir, git_dir):
    try:
        os.chdir(project_dir)
        get_root_path()
    except RootPathException:
        clear(project_dir)
        log.error('Wrong minimal configuration files!',
                  class_name=ADMIN_CLASS_NAME)
        clear(git_dir)
        return False
    return True
Esempio n. 6
0
def merged_config_load(hide_logs=False):
    try:
        get_root_path()
        global mlgit_config
        global_config = merge_conf(global_config_load(hide_logs), mlgit_config)
        local_config = mlgit_config_load(hide_logs)
        if not hide_logs:
            log.debug(output_messages['DEBUG_MERGING_LOCAL_AND_GLOBAL_CONFIG'])
        config_file = merge_conf(local_config, global_config)
    except RootPathException:
        config_file = global_config_load(hide_logs)
    return config_file
Esempio n. 7
0
def clone_config_repository(url, folder, track):
    try:
        if get_root_path():
            log.error('You are in initialized ml-git project.',
                      class_name=ADMIN_CLASS_NAME)
            return False
    except RootPathException:
        pass

    git_dir = '.git'

    try:
        if folder is not None:
            project_dir = os.path.join(os.getcwd(), folder)
            ensure_path_exists(project_dir)
        else:
            project_dir = os.getcwd()

        if len(os.listdir(project_dir)) != 0:
            log.error(
                'The path [%s] is not an empty directory. Consider using --folder to create an empty folder.'
                % project_dir,
                class_name=ADMIN_CLASS_NAME)
            return False
        Repo.clone_from(url, project_dir)
    except Exception as e:
        error_msg = str(e)
        if (e.__class__ == GitCommandError and 'Permission denied' in str(
                e.args[2])) or e.__class__ == PermissionError:
            error_msg = 'Permission denied in folder %s' % project_dir
        else:
            if folder is not None:
                clear(project_dir)
            if e.__class__ == GitCommandError:
                error_msg = 'Could not read from remote repository.'
        log.error(error_msg, class_name=ADMIN_CLASS_NAME)
        return False

    try:
        os.chdir(project_dir)
        get_root_path()
    except RootPathException:
        clear(project_dir)
        log.error('Wrong minimal configuration files!',
                  class_name=ADMIN_CLASS_NAME)
        clear(git_dir)
        return False

    if not track:
        clear(os.path.join(project_dir, git_dir))

    return True
Esempio n. 8
0
 def __init_manager(self, type_entity):
     try:
         get_root_path()
         config = config_load()
         if not config[type_entity]['git']:
             log.warn(
                 output_messages['WARN_REPOSITORY_NOT_FOUND_FOR_ENTITY'] %
                 type_entity,
                 class_name=LocalEntityManager.__name__)
             return
         self._manager = MetadataManager(config, repo_type=type_entity)
         if not self._manager.check_exists():
             self._manager.init()
     except Exception as e:
         log.error(e, class_name=LocalEntityManager.__name__)
Esempio n. 9
0
def __get_log_files_path():
    try:
        path = get_root_path()
    except RootPathException:
        path = os.getcwd()

    return os.path.join(path, LOG_FILES_PATH)
Esempio n. 10
0
    def garbage_collector(self):
        any_metadata = False
        removed_files = 0
        reclaimed_space = 0
        for entity in EntityType:
            repo_type = entity.value
            if self.metadata_exists(repo_type):
                log.info(output_messages['INFO_STARTING_GC'] % repo_type,
                         class_name=REPOSITORY_CLASS_NAME)
                any_metadata = True
                index_path = get_index_path(self.__config, repo_type)
                objects_path = get_objects_path(self.__config, repo_type)
                blobs_hashes = self._get_blobs_hashes(index_path, objects_path,
                                                      repo_type)

                cache = Cache(get_cache_path(self.__config, repo_type))
                count_removed_cache, reclaimed_cache_space = cache.garbage_collector(
                    blobs_hashes)
                objects = Objects('', objects_path)
                count_removed_objects, reclaimed_objects_space = objects.garbage_collector(
                    blobs_hashes)

                reclaimed_space += reclaimed_objects_space + reclaimed_cache_space
                removed_files += count_removed_objects + count_removed_cache
        if not any_metadata:
            log.error(output_messages['ERROR_UNINITIALIZED_METADATA'],
                      class_name=REPOSITORY_CLASS_NAME)
            return
        log.info(output_messages['INFO_REMOVED_FILES'] %
                 (humanize.intword(removed_files),
                  os.path.join(get_root_path(), '.ml-git')),
                 class_name=REPOSITORY_CLASS_NAME)
        log.info(output_messages['INFO_RECLAIMED_SPACE'] %
                 humanize.naturalsize(reclaimed_space),
                 class_name=REPOSITORY_CLASS_NAME)
Esempio n. 11
0
def clone_config_repository(url, folder, track):
    try:
        if get_root_path():
            log.error(output_messages['ERROR_IN_INTIALIZED_PROJECT'],
                      class_name=ADMIN_CLASS_NAME)
            return False
    except RootPathException:
        pass

    git_dir = '.git'

    try:
        if folder is not None:
            project_dir = os.path.join(os.getcwd(), folder)
            ensure_path_exists(project_dir)
        else:
            project_dir = os.getcwd()

        if len(os.listdir(project_dir)) != 0:
            log.error(output_messages['ERROR_PATH_NOT_EMPTY'] % project_dir,
                      class_name=ADMIN_CLASS_NAME)
            return False
        Repo.clone_from(url, project_dir)
    except Exception as e:
        error_msg = handle_clone_exception(e, folder, project_dir)
        log.error(error_msg, class_name=ADMIN_CLASS_NAME)
        return False

    if not check_successfully_clone(project_dir, git_dir):
        return False

    if not track:
        clear(os.path.join(project_dir, git_dir))

    return True
Esempio n. 12
0
def search_spec_file(repotype, spec, categories_path):
    root_path = get_root_path()
    dir_with_cat_path = os.path.join(root_path, repotype, categories_path, spec)
    dir_without_cat_path = os.path.join(root_path, repotype, spec)

    files = None
    dir_files = None

    try:
        files = os.listdir(dir_with_cat_path)
        dir_files = dir_with_cat_path
    except Exception:
        try:
            files = os.listdir(dir_without_cat_path)
            dir_files = dir_without_cat_path
        except Exception:  # TODO: search '.' path as well
            # if 'files_without_cat_path' and 'files_with_cat_path' remains as None, the system couldn't find the directory
            #  which means that the entity name passed is wrong
            if files is None:
                raise SearchSpecException('The entity name passed is wrong. Please check again')

    if len(files) > 0:
        for file in files:
            if spec in file:
                log.debug('search spec file: found [%s]-[%s]' % (dir_files, file), class_name=ML_GIT_PROJECT_NAME)
                return dir_files, file
    raise SearchSpecException('The entity name passed is wrong. Please check again')
Esempio n. 13
0
def get_config_path(global_config=False):
    root_path = get_root_path()
    if global_config:
        file = get_global_config_path()
    else:
        file = os.path.join(root_path, CONFIG_FILE)
    return file
Esempio n. 14
0
def search_spec_file(repotype, spec, root_path=None):
    if root_path is None:
        root_path = os.path.join(get_root_path(), repotype)
    spec_file = spec + SPEC_EXTENSION
    for root, dir, files in os.walk(root_path):
        if spec_file in files:
            return root, spec_file
    raise SearchSpecException(output_messages['ERROR_WRONG_NAME'])
Esempio n. 15
0
def check_entity_exists(context, entity_type, entity_name):
    root_path = os.path.join(get_root_path(), entity_type)
    spec_file = entity_name + SPEC_EXTENSION
    for root, dir, files in os.walk(root_path):
        if spec_file in files:
            return root, spec_file
    log.error(output_messages['ERROR_WRONG_NAME'])
    context.exit()
Esempio n. 16
0
 def test_import_dir(self):
     root_path = get_root_path()
     src = os.path.join(root_path, 'hdata')
     dst = os.path.join(root_path, 'dst_dir')
     import_dir(src, dst)
     self.assertTrue(len(os.listdir(dst)) > 0)
     self.assertTrue(len(os.listdir(src)) > 0)
     shutil.rmtree(dst)
Esempio n. 17
0
def checkout(entity,
             tag,
             sampling=None,
             retries=2,
             force=False,
             dataset=False,
             labels=False,
             version=-1):
    """This command allows retrieving the data of a specific version of an ML entity.

    Example:
        checkout('datasets', 'computer-vision__images3__imagenet__1')

    Args:
        entity (str): The type of an ML entity. (datasets, labels or models)
        tag (str): An ml-git tag to identify a specific version of an ML entity.
        sampling (dict): group: <amount>:<group> The group sample option consists of amount and group used to
                                 download a sample.\n
                         range: <start:stop:step> The range sample option consists of start, stop and step used
                                to download a sample. The start parameter can be equal or greater than zero. The
                                stop parameter can be 'all', -1 or any integer above zero.\n
                         random: <amount:frequency> The random sample option consists of amount and frequency
                                used to download a sample.
                         seed: The seed is used to initialize the pseudorandom numbers.
        retries (int, optional): Number of retries to download the files from the storage [default: 2].
        force (bool, optional): Force checkout command to delete untracked/uncommitted files from the local repository [default: False].
        dataset (bool, optional): If exist a dataset related with the model or labels, this one must be downloaded [default: False].
        labels (bool, optional): If exist labels related with the model, they must be downloaded [default: False].

    Returns:
        str: Return the path where the data was checked out.

    """

    repo = get_repository_instance(entity)
    repo.update()
    if sampling is not None and not validate_sample(sampling):
        return None
    options = {}
    options['with_dataset'] = dataset
    options['with_labels'] = labels
    options['retry'] = retries
    options['force'] = force
    options['bare'] = False
    options['version'] = version
    repo.checkout(tag, sampling, options)

    spec_name = tag
    if re.search(RGX_TAG_FORMAT, tag):
        _, spec_name, _ = spec_parse(tag)
    spec_path, _ = search_spec_file(entity, spec_name)
    data_path = os.path.relpath(spec_path, get_root_path())
    if not os.path.exists(data_path):
        data_path = None
    return data_path
Esempio n. 18
0
def __get_conf_filepath():
    models_path = os.getenv('MLMODELS_PATH')
    if models_path is None:
        models_path = get_key('mlgit_path')
    try:
        root_path = get_root_path()
        return os.path.join(root_path,
                            os.sep.join([models_path,
                                         get_key('mlgit_conf')]))
    except Exception:
        return os.sep.join([models_path, get_key('mlgit_conf')])
Esempio n. 19
0
def init_mlgit():
    try:
        root_path = get_root_path()
        log.info(output_messages['INFO_ALREADY_IN_RESPOSITORY'], class_name=ADMIN_CLASS_NAME)
        return
    except Exception:
        pass
    try:
        os.mkdir('.ml-git')
    except PermissionError:
        log.error(output_messages['ERROR_PERMISSION_DENIED_INITIALIZE_DIRECTORY'],
                  class_name=ADMIN_CLASS_NAME)
        return
    except FileExistsError:
        pass

    mlgit_config_save()
    root_path = get_root_path()
    log.info(output_messages['INFO_INITIALIZED_PROJECT_IN'] % (os.path.join(root_path, ROOT_FILE_NAME)),
             class_name=ADMIN_CLASS_NAME)
Esempio n. 20
0
def update_store_spec(repotype, artefact_name, store_type, bucket):
    path = None
    try:
        path = get_root_path()
    except Exception as e:
        log.error(e, CLASS_NAME=ML_GIT_PROJECT_NAME)

    spec_path = os.path.join(path, repotype, artefact_name, artefact_name + '.spec')
    spec_hash = utils.yaml_load(spec_path)
    spec_hash[repotype]['manifest']['store'] = store_type + '://' + bucket
    utils.yaml_save(spec_hash, spec_path)
    return
Esempio n. 21
0
def update_storage_spec(repo_type, artifact_name, storage_type, bucket, entity_dir=''):
    path = None
    try:
        path = get_root_path()
    except Exception as e:
        log.error(e, CLASS_NAME=ML_GIT_PROJECT_NAME)
    spec_path = os.path.join(path, repo_type, entity_dir, artifact_name, artifact_name + SPEC_EXTENSION)
    spec_hash = utils.yaml_load(spec_path)

    entity_spec_key = get_spec_key(repo_type)
    spec_hash[entity_spec_key]['manifest'][STORAGE_SPEC_KEY] = storage_type + '://' + bucket
    utils.yaml_save(spec_hash, spec_path)
    return
Esempio n. 22
0
def create_workspace_tree_structure(repo_type,
                                    artifact_name,
                                    categories,
                                    storage_type,
                                    bucket_name,
                                    version,
                                    imported_dir,
                                    mutability,
                                    entity_dir=''):
    # get root path to create directories and files
    repo_type_dir = os.path.join(get_root_path(), repo_type)
    artifact_path = os.path.join(repo_type_dir, entity_dir, artifact_name)
    if not path_is_parent(repo_type_dir, artifact_path):
        raise Exception(
            output_messages['ERROR_INVALID_ENTITY_DIR'].format(entity_dir))
    if os.path.exists(artifact_path):
        raise PermissionError(output_messages['INFO_ENTITY_NAME_EXISTS'])
    data_path = os.path.join(artifact_path, 'data')
    # import files from  the directory passed
    if imported_dir is not None:
        import_dir(imported_dir, data_path)
    else:
        os.makedirs(data_path)

    spec_path = os.path.join(artifact_path, artifact_name + SPEC_EXTENSION)
    readme_path = os.path.join(artifact_path, 'README.md')
    file_exists = os.path.isfile(spec_path)

    storage = '%s://%s' % (storage_type, FAKE_STORAGE
                           if bucket_name is None else bucket_name)
    entity_spec_key = get_spec_key(repo_type)
    spec_structure = {
        entity_spec_key: {
            'categories': categories,
            'manifest': {
                STORAGE_SPEC_KEY: storage
            },
            'name': artifact_name,
            'mutability': mutability,
            'version': version
        }
    }

    # write in spec  file
    if not file_exists:
        yaml_save(spec_structure, spec_path)
        with open(readme_path, 'w'):
            pass
        return True
    else:
        return False
Esempio n. 23
0
    def test_create_workspace_tree_structure(self):
        root_path = get_root_path()
        IMPORT_PATH = os.path.join(os.getcwd(), 'test', 'src')
        os.makedirs(IMPORT_PATH)
        self.assertTrue(create_workspace_tree_structure(DATASETS, 'artefact_name',
                                                        ['imgs', 'old', 'blue'], S3H, 'minio', 2, IMPORT_PATH, STRICT))

        spec_path = os.path.join(os.getcwd(), os.sep.join([DATASETS, 'artefact_name', 'artefact_name.spec']))
        spec1 = yaml_load(spec_path)
        self.assertEqual(spec1[DATASET_SPEC_KEY]['manifest'][STORAGE_SPEC_KEY], 's3h://minio')
        self.assertEqual(spec1[DATASET_SPEC_KEY]['name'], 'artefact_name')
        self.assertEqual(spec1[DATASET_SPEC_KEY]['mutability'], STRICT)
        self.assertEqual(spec1[DATASET_SPEC_KEY]['version'], 2)

        shutil.rmtree(IMPORT_PATH)
        shutil.rmtree(os.path.join(root_path, DATASETS))
Esempio n. 24
0
 def create(self, kwargs):
     artifact_name = kwargs['artifact_name']
     categories = list(kwargs['category'])
     version = int(kwargs['version_number'])
     imported_dir = kwargs['import']
     store_type = kwargs['store_type']
     bucket_name = kwargs['bucket_name']
     start_wizard = kwargs['wizard_config']
     import_url = kwargs['import_url']
     unzip_file = kwargs['unzip']
     credentials_path = kwargs['credentials_path']
     repo_type = self.__repo_type
     try:
         create_workspace_tree_structure(repo_type, artifact_name,
                                         categories, store_type,
                                         bucket_name, version, imported_dir,
                                         kwargs['mutability'])
         if start_wizard:
             has_new_store, store_type, bucket, profile, endpoint_url, git_repo = start_wizard_questions(
                 repo_type)
             if has_new_store:
                 store_add(store_type, bucket, profile, endpoint_url)
             update_store_spec(repo_type, artifact_name, store_type, bucket)
             remote_add(repo_type, git_repo)
         if import_url:
             self.create_config_store('gdrive', credentials_path)
             local = LocalRepository(
                 self.__config, get_objects_path(self.__config, repo_type))
             destine_path = os.path.join(repo_type, artifact_name, 'data')
             local.import_file_from_url(destine_path, import_url,
                                        StoreType.GDRIVE.value)
         if unzip_file:
             log.info('Unzipping files', CLASS_NAME=REPOSITORY_CLASS_NAME)
             data_path = os.path.join(get_root_path(), repo_type,
                                      artifact_name, 'data')
             unzip_files_in_directory(data_path)
         log.info("Project Created.", CLASS_NAME=REPOSITORY_CLASS_NAME)
     except Exception as e:
         if not isinstance(e, PermissionError):
             clear(os.path.join(repo_type, artifact_name))
         if isinstance(e, KeyboardInterrupt):
             log.info("Create command aborted!",
                      class_name=REPOSITORY_CLASS_NAME)
         else:
             log.error(e, CLASS_NAME=REPOSITORY_CLASS_NAME)
Esempio n. 25
0
 def __init__(self, git, path):
     try:
         root_path = get_root_path()
         self.__path = os.path.join(root_path, path)
         self.__git = git
         ensure_path_exists(self.__path)
     except RootPathException as e:
         log.error(e, class_name=METADATA_MANAGER_CLASS_NAME)
         raise e
     except Exception as e:
         if str(
                 e
         ) == '\'Metadata\' object has no attribute \'_MetadataRepo__git\'':
             log.error('You are not in an initialized ml-git repository.',
                       class_name=METADATA_MANAGER_CLASS_NAME)
         else:
             log.error(e, class_name=METADATA_MANAGER_CLASS_NAME)
         return
Esempio n. 26
0
 def __init__(self, git, path, repo_type):
     self.__repo_type = repo_type
     try:
         root_path = get_root_path()
         self.__path = os.path.join(root_path, path)
         self.__git = git
         ensure_path_exists(self.__path)
     except RootPathException as e:
         log.error(e, class_name=METADATA_MANAGER_CLASS_NAME)
         raise e
     except Exception as e:
         if str(
                 e
         ) == '\'Metadata\' object has no attribute \'_MetadataRepo__git\'':
             log.error(output_messages['ERROR_NOT_IN_RESPOSITORY'],
                       class_name=METADATA_MANAGER_CLASS_NAME)
         else:
             log.error(e, class_name=METADATA_MANAGER_CLASS_NAME)
         return
Esempio n. 27
0
    def test_create_workspace_tree_structure(self):
        root_path = get_root_path()
        IMPORT_PATH = os.path.join(os.getcwd(), 'test', 'src')
        os.makedirs(IMPORT_PATH)
        self.assertTrue(
            create_workspace_tree_structure('repotype', 'artefact_name',
                                            ['imgs', 'old', 'blue'], 's3h',
                                            'minio', 2, IMPORT_PATH))

        spec_path = os.path.join(
            os.getcwd(),
            os.sep.join(['repotype', 'artefact_name', 'artefact_name.spec']))
        spec1 = yaml_load(spec_path)
        self.assertEqual(spec1['repotype']['manifest']['store'], 's3h://minio')
        self.assertEqual(spec1['repotype']['name'], 'artefact_name')
        self.assertEqual(spec1['repotype']['version'], 2)

        shutil.rmtree(IMPORT_PATH)
        shutil.rmtree(os.path.join(root_path, 'repotype'))
Esempio n. 28
0
    def get_log(self):
        log.debug('Loading log file', class_name=HASH_FS_CLASS_NAME)
        logs = []
        try:
            root_path = get_root_path()
            log_path = os.path.join(root_path, self._logpath, 'store.log')
        except Exception as e:
            log.error(e, class_name=LOCAL_REPOSITORY_CLASS_NAME)
            raise e

        if os.path.exists(log_path) is not True:
            return logs

        with open(log_path, 'r') as f:
            while True:
                line = f.readline().strip()
                if not line:
                    break
                logs.append(line)
        return logs
Esempio n. 29
0
    def get_log(self):
        log.debug(output_messages['DEBUG_LOADING_LOG'], class_name=HASH_FS_CLASS_NAME)
        logs = []
        try:
            root_path = get_root_path()
            log_path = os.path.join(root_path, self._logpath, STORAGE_LOG)
        except Exception as e:
            log.error(e, class_name=LOCAL_REPOSITORY_CLASS_NAME)
            raise e

        if os.path.exists(log_path) is not True:
            return logs

        with open(log_path, 'r') as f:
            while True:
                line = f.readline().strip()
                if not line:
                    break
                logs.append(line)
        return logs
Esempio n. 30
0
def create_workspace_tree_structure(repo_type, artifact_name, categories,
                                    store_type, bucket_name, version,
                                    imported_dir, mutability):
    # get root path to create directories and files
    path = get_root_path()
    artifact_path = os.path.join(path, repo_type, artifact_name)
    if os.path.exists(artifact_path):
        raise PermissionError('An entity with that name already exists.')
    data_path = os.path.join(artifact_path, 'data')
    # import files from  the directory passed
    if imported_dir is not None:
        import_dir(imported_dir, data_path)
    else:
        os.makedirs(data_path)

    spec_path = os.path.join(artifact_path, artifact_name + SPEC_EXTENSION)
    readme_path = os.path.join(artifact_path, 'README.md')
    file_exists = os.path.isfile(spec_path)

    store = '%s://%s' % (store_type,
                         FAKE_STORE if bucket_name is None else bucket_name)
    spec_structure = {
        repo_type: {
            'categories': categories,
            'manifest': {
                'store': store
            },
            'name': artifact_name,
            'mutability': mutability,
            'version': version
        }
    }

    # write in spec  file
    if not file_exists:
        yaml_save(spec_structure, spec_path)
        with open(readme_path, 'w'):
            pass
        return True
    else:
        return False