コード例 #1
0
 def repo_remote_del(self, global_conf=False):
     try:
         metadata_path = get_metadata_path(self.__config)
         metadata = Metadata('', metadata_path, self.__config,
                             self.__repo_type)
         if metadata.delete_git_reference():
             remote_del(self.__repo_type, global_conf)
     except Exception as e:
         log.error(e, class_name=REPOSITORY_CLASS_NAME)
         return
コード例 #2
0
    def get(self, file_path, reference):
        file_info = self.get_file_info_by_name(reference)

        if not file_info:
            log.error(output_messages['ERROR_NOT_FOUND'] % reference,
                      class_name=GDRIVE_STORAGE)
            return False

        self.download_file(file_path, file_info)
        return True
コード例 #3
0
 def repo_remote_add(self, repo_type, mlgit_remote, global_conf=False):
     try:
         remote_add(repo_type, mlgit_remote, global_conf)
         self.__config = config_load()
         metadata_path = get_metadata_path(self.__config)
         m = Metadata('', metadata_path, self.__config, self.__repo_type)
         m.remote_set_url(mlgit_remote)
     except Exception as e:
         log.error(e, class_name=REPOSITORY_CLASS_NAME)
         return
コード例 #4
0
 def _check_integrity(self, cid, data):
     cid0 = self._digest(data)
     if cid == cid0:
         log.debug('Checksum verified for chunk [%s]' % cid,
                   class_name=HASH_FS_CLASS_NAME)
         return True
     log.error('Corruption detected for chunk [%s] - got [%s]' %
               (cid, cid0),
               class_name=HASH_FS_CLASS_NAME)
     return False
コード例 #5
0
def incr_version(file, repotype='dataset'):
    spec_hash = utils.yaml_load(file)
    if is_valid_version(spec_hash, repotype):
        spec_hash[repotype]['version'] += 1
        utils.yaml_save(spec_hash, file)
        log.debug('Version incremented to %s.' % spec_hash[repotype]['version'], class_name=ML_GIT_PROJECT_NAME)
        return spec_hash[repotype]['version']
    else:
        log.error('Invalid version, could not increment.  File:\n     %s' % file, class_name=ML_GIT_PROJECT_NAME)
        return -1
コード例 #6
0
 def check_integrity(self, cid, ncid):
     # cid0 = self.digest(data)
     if cid == ncid:
         log.debug('Checksum verified for chunk [%s]' % cid,
                   class_name=MULTI_HASH_STORE_NAME)
         return True
     log.error('Corruption detected for chunk [%s] - got [%s]' %
               (cid, ncid),
               class_name=MULTI_HASH_STORE_NAME)
     return False
コード例 #7
0
def check_successfully_clone(project_dir, git_dir):
    try:
        os.chdir(project_dir)
        get_root_path()
    except RootPathException:
        clear(project_dir)
        log.error(output_messages['ERROR_MINIMAL_CONFIGURATION'], class_name=ADMIN_CLASS_NAME)
        clear(git_dir)
        return False
    return True
コード例 #8
0
 def _copy_to_metadata_path(self, src_path, full_metadata_path, file_name):
     if os.path.exists(src_path):
         dst_path = os.path.join(full_metadata_path, file_name)
         try:
             shutil.copy2(src_path, dst_path)
         except Exception as e:
             log.error(output_messages['ERROR_COULD_NOT_FIND_FILE'] %
                       file_name,
                       class_name=METADATA_CLASS_NAME)
             raise e
コード例 #9
0
 def list_tag(self, spec):
     repo_type = self.__repo_type
     try:
         metadata_path = get_metadata_path(self.__config, repo_type)
         m = Metadata(spec, metadata_path, self.__config, repo_type)
         for tag in m.list_tags(spec):
             print(tag)
     except Exception as e:
         log.error(e, class_name=REPOSITORY_CLASS_NAME)
         return
コード例 #10
0
def check_successfully_clone(project_dir, git_dir):
    try:
        os.chdir(project_dir)
        get_root_path()
    except RootPathException:
        clear(project_dir)
        log.error('Wrong minimal configuration files!',
                  class_name=ADMIN_CLASS_NAME)
        clear(git_dir)
        return False
    return True
コード例 #11
0
def validate_sample(sampling):
    if 'group' in sampling or 'random' in sampling:
        if 'seed' not in sampling:
            log.error('It is necessary to pass the attribute \'seed\' in \'sampling\'. Example: {\'group\': \'1:2\', '
                      '\'seed\': \'10\'}.')
            return False
    elif 'range' not in sampling:
        log.error('To use the sampling option, you must pass a valid type of sampling (group, '
                  'random or range).')
        return False
    return True
コード例 #12
0
ファイル: spec.py プロジェクト: tspthomas/ml-git
def incr_version(file, repo_type=DATASETS):
    spec_hash = utils.yaml_load(file)
    entity_spec_key = get_spec_key(repo_type)
    if is_valid_version(spec_hash, entity_spec_key):
        spec_hash[entity_spec_key]['version'] += 1
        utils.yaml_save(spec_hash, file)
        log.debug(output_messages['DEBUG_VERSION_INCREMENTED_TO'] % spec_hash[entity_spec_key]['version'], class_name=ML_GIT_PROJECT_NAME)
        return spec_hash[entity_spec_key]['version']
    else:
        log.error(output_messages['ERROR_INVALID_VERSION_INCREMENT'] % file, class_name=ML_GIT_PROJECT_NAME)
        return -1
コード例 #13
0
def storage_add(storage_type,
                bucket,
                credentials_profile,
                global_conf=False,
                endpoint_url=None,
                sftp_configs=None):
    if not valid_storage_type(storage_type):
        return

    try:
        region = get_bucket_region(bucket, credentials_profile)
    except Exception:
        region = None
    if storage_type not in (StorageType.S3H.value, StorageType.S3.value
                            ) or credentials_profile is None:
        log.info(output_messages['INFO_ADD_STORAGE_WITHOUT_PROFILE'] %
                 (storage_type, bucket),
                 class_name=ADMIN_CLASS_NAME)
    else:
        log.info(output_messages['INFO_ADD_STORAGE'] %
                 (storage_type, bucket, credentials_profile),
                 class_name=ADMIN_CLASS_NAME)
    try:
        file = get_config_path(global_conf)
        conf = yaml_load(file)
    except Exception as e:
        log.error(e, class_name=ADMIN_CLASS_NAME)
        return

    if STORAGE_CONFIG_KEY not in conf:
        conf[STORAGE_CONFIG_KEY] = {}
    if storage_type not in conf[STORAGE_CONFIG_KEY]:
        conf[STORAGE_CONFIG_KEY][storage_type] = {}
    conf[STORAGE_CONFIG_KEY][storage_type][bucket] = {}
    if storage_type in [StorageType.S3.value, StorageType.S3H.value]:
        conf[STORAGE_CONFIG_KEY][storage_type][bucket]['aws-credentials'] = {}
        conf[STORAGE_CONFIG_KEY][storage_type][bucket]['aws-credentials'][
            'profile'] = credentials_profile
        conf[STORAGE_CONFIG_KEY][storage_type][bucket]['region'] = region
        conf[STORAGE_CONFIG_KEY][storage_type][bucket][
            'endpoint-url'] = endpoint_url
    elif storage_type in [StorageType.GDRIVEH.value]:
        conf[STORAGE_CONFIG_KEY][storage_type][bucket][
            'credentials-path'] = credentials_profile
    elif storage_type in [StorageType.SFTPH.value]:
        conf[STORAGE_CONFIG_KEY][storage_type][bucket][
            'endpoint-url'] = endpoint_url
        conf[STORAGE_CONFIG_KEY][storage_type][bucket][
            'username'] = sftp_configs['username']
        conf[STORAGE_CONFIG_KEY][storage_type][bucket][
            'private-key'] = sftp_configs['private_key']
        conf[STORAGE_CONFIG_KEY][storage_type][bucket]['port'] = sftp_configs[
            'port']
    yaml_save(conf, file)
コード例 #14
0
 def _fetch(self, tag, samples, retries=2, bare=False):
     repo_type = self.__repo_type
     try:
         objects_path = get_objects_path(self.__config, repo_type)
         metadata_path = get_metadata_path(self.__config, repo_type)
         # check if no data left untracked/uncommitted. othrewise, stop.
         local_rep = LocalRepository(self.__config, objects_path, repo_type)
         return local_rep.fetch(metadata_path, tag, samples, retries, bare)
     except Exception as e:
         log.error(e, class_name=REPOSITORY_CLASS_NAME)
         return
コード例 #15
0
    def _is_valid_hashpath(self, path, file):
        """ Checks if the file is placed in a valid directory following the structure created in the _get_hashpath method """
        hashpath = self._get_hashpath(file)
        actual_fullpath = os.path.join(path, file)

        is_valid = hashpath.lower() == actual_fullpath.lower()

        if not is_valid:
            log.error(output_messages['ERROR_CHUNK_WRONG_DIRECTORY'] % (hashpath, actual_fullpath),
                      class_name=HASH_FS_CLASS_NAME)

        return is_valid
コード例 #16
0
def update_store_spec(repotype, artefact_name, store_type, bucket):
    path = None
    try:
        path = get_root_path()
    except Exception as e:
        log.error(e, CLASS_NAME=ML_GIT_PROJECT_NAME)

    spec_path = os.path.join(path, repotype, artefact_name, artefact_name + '.spec')
    spec_hash = utils.yaml_load(spec_path)
    spec_hash[repotype]['manifest']['store'] = store_type + '://' + bucket
    utils.yaml_save(spec_hash, spec_path)
    return
コード例 #17
0
ファイル: s3_storage.py プロジェクト: tspthomas/ml-git
 def bucket_exists(self):
     try:
         self._storage.meta.client.head_bucket(Bucket=self._bucket)
         return True
     except ClientError as e:
         error_msg = e.response['Error']['Message']
         if e.response['Error']['Code'] == '404':
             error_msg = output_messages['ERROR_BUCKET_DOES_NOT_EXIST'] % self._bucket
         elif e.response['Error']['Code'] == '403':
             error_msg = output_messages['ERROR_AWS_KEY_NOT_EXIST']
         log.error(error_msg, class_name=STORAGE_FACTORY_CLASS_NAME)
         return False
コード例 #18
0
 def get(self, file_path, reference):
     try:
         blob_client = self._storage.get_blob_client(container=self._bucket, blob=reference)
         with open(file_path, 'wb') as download_file:
             data = blob_client.download_blob().readall()
             download_file.write(data)
         if not self.check_integrity(reference, self.digest(data)):
             return False
     except Exception as e:
         log.error(e, class_name=AZURE_STORAGE_NAME)
         return False
     return True
コード例 #19
0
    def commit_metadata(self, index_path, tags, commit_msg, changed_files,
                        mutability, ws_path):
        spec_file = os.path.join(index_path, 'metadata', self._spec,
                                 self._spec + SPEC_EXTENSION)
        full_metadata_path, categories_sub_path, metadata = self._full_metadata_path(
            spec_file)
        log.debug('Metadata path [%s]' % full_metadata_path,
                  class_name=METADATA_CLASS_NAME)

        if full_metadata_path is None:
            return None, None
        elif categories_sub_path is None:
            return None, None

        ensure_path_exists(full_metadata_path)

        ret = self.__commit_manifest(full_metadata_path, index_path,
                                     changed_files, mutability)
        if ret is False:
            log.info('No files to commit for [%s]' % self._spec,
                     class_name=METADATA_CLASS_NAME)
            return None, None

        try:
            self.__commit_metadata(full_metadata_path, index_path, metadata,
                                   tags, ws_path)
        except Exception:
            return None, None
        # generates a tag to associate to the commit
        tag = self.metadata_tag(metadata)

        # check if tag already exists in the ml-git repository
        tags = self._tag_exists(tag)
        if len(tags) > 0:
            log.error(
                'Tag [%s] already exists in the ml-git repository. '
                'Consider using --bumpversion parameter to increment the version number for your dataset.'
                % tag,
                class_name=METADATA_CLASS_NAME)
            for t in tags:
                log.error('\t%s' % t)
            return None, None

        if commit_msg is not None and len(commit_msg) > 0:
            msg = commit_msg
        else:
            # generates a commit message
            msg = self.metadata_message(metadata)
        log.debug('Commit message [%s]' % msg, class_name=METADATA_CLASS_NAME)
        sha = self.commit(categories_sub_path, msg)
        self.tag_add(tag)
        return str(tag), str(sha)
コード例 #20
0
 def _is_spec_valid(self, spec_path):
     spec_file = yaml_load(spec_path)
     if not validate_spec_hash(spec_file, self.__repo_type):
         log.error(
             'Invalid %s spec in %s.  It should look something like this:\n%s'
             % (self.__repo_type, spec_path,
                get_sample_spec_doc('somebucket', self.__repo_type)),
             class_name=REPOSITORY_CLASS_NAME)
         return False
     if not validate_bucket_name(spec_file[self.__repo_type],
                                 self.__config):
         return False
     return True
コード例 #21
0
 def get_account(self):
     connection_string = os.getenv('AZURE_STORAGE_CONNECTION_STRING')
     if connection_string is not None:
         return connection_string
     try:
         azure_folder = os.path.expanduser(os.path.join('~', '.azure'))
         config = toml.load(os.path.join(azure_folder, 'config'))
         connection = config[STORAGE_SPEC_KEY]['connection_string']
         if connection != '':
             return connection
     except Exception:
         log.debug(output_messages['DEBUG_AZURE_CLI_NOT_FIND'], class_name=AZURE_STORAGE_NAME)
     log.error(output_messages['ERROR_AZURE_CREDENTIALS_NOT_FOUND'], class_name=AZURE_STORAGE_NAME)
コード例 #22
0
 def update(self):
     repo_type = self.__repo_type
     try:
         metadata_path = get_metadata_path(self.__config, repo_type)
         m = Metadata('', metadata_path, self.__config, repo_type)
         m.update()
     except GitError as error:
         log.error(
             'Could not update metadata. Check your remote configuration. %s'
             % error.stderr,
             class_name=REPOSITORY_CLASS_NAME)
     except Exception as e:
         log.error(e, class_name=REPOSITORY_CLASS_NAME)
コード例 #23
0
 def _check_corrupted_files(self, spec, repo):
     try:
         corrupted_files = repo.get_corrupted_files(spec)
         if corrupted_files is not None and len(corrupted_files) > 0:
             print('\n')
             log.warn(
                 'The following files cannot be added because they are corrupted:',
                 class_name=REPOSITORY_CLASS_NAME)
             for file in corrupted_files:
                 print('\t %s' % file)
     except Exception as e:
         log.error(e, class_name=REPOSITORY_CLASS_NAME)
         return
コード例 #24
0
ファイル: spec.py プロジェクト: tspthomas/ml-git
def update_storage_spec(repo_type, artifact_name, storage_type, bucket, entity_dir=''):
    path = None
    try:
        path = get_root_path()
    except Exception as e:
        log.error(e, CLASS_NAME=ML_GIT_PROJECT_NAME)
    spec_path = os.path.join(path, repo_type, entity_dir, artifact_name, artifact_name + SPEC_EXTENSION)
    spec_hash = utils.yaml_load(spec_path)

    entity_spec_key = get_spec_key(repo_type)
    spec_hash[entity_spec_key]['manifest'][STORAGE_SPEC_KEY] = storage_type + '://' + bucket
    utils.yaml_save(spec_hash, spec_path)
    return
コード例 #25
0
    def commit_metadata(self, index_path, tags, commit_msg, changed_files,
                        mutability, ws_path):
        spec_file = os.path.join(index_path, 'metadata', self._spec,
                                 self._spec + SPEC_EXTENSION)
        full_metadata_path, entity_sub_path, metadata = self._full_metadata_path(
            spec_file)
        log.debug(output_messages['DEBUG_METADATA_PATH'] % full_metadata_path,
                  class_name=METADATA_CLASS_NAME)

        if full_metadata_path is None:
            return None, None
        elif entity_sub_path is None:
            return None, None

        ensure_path_exists(full_metadata_path)

        ret = self.__commit_manifest(full_metadata_path, index_path,
                                     changed_files, mutability)
        if ret is False:
            log.info(output_messages['INFO_NO_FILES_COMMIT_FOR'] % self._spec,
                     class_name=METADATA_CLASS_NAME)
            return None, None

        try:
            self.__commit_metadata(full_metadata_path, index_path, metadata,
                                   tags, ws_path)
        except Exception:
            return None, None
        # generates a tag to associate to the commit
        tag = self.metadata_tag(metadata)

        # check if tag already exists in the ml-git repository
        tags = self._tag_exists(tag)
        if len(tags) > 0:
            log.error(output_messages[
                'ERROR_TAG_ALREADY_EXISTS_CONSIDER_USER_VERSION'] % tag,
                      class_name=METADATA_CLASS_NAME)
            for t in tags:
                log.error(output_messages['ERROR_METADATA_MESSAGE'] % t)
            return None, None

        if commit_msg is not None and len(commit_msg) > 0:
            msg = commit_msg
        else:
            # generates a commit message
            msg = self.metadata_message(metadata)
        log.debug(output_messages['DEBUG_COMMIT_MESSAGE'] % msg,
                  class_name=METADATA_CLASS_NAME)
        sha = self.commit(entity_sub_path, msg)
        self.tag_add(tag)
        return str(tag), str(sha)
コード例 #26
0
    def get_by_id(self, file_path, file_id):
        try:
            file_info = self._store.files().get(fileId=file_id).execute()
        except errors.HttpError as error:
            log.error('%s' % error, class_name=GDRIVE_STORE)
            return False

        if not file_info:
            log.error('[%s] not found.' % file_id, class_name=GDRIVE_STORE)
            return False

        file_path = os.path.join(file_path, file_info.get('name'))
        self.download_file(file_path, file_info)
        return True
コード例 #27
0
    def _full_metadata_path(self, spec_file):
        log.debug('Getting subpath from categories in specfile [%s]' % spec_file, class_name=METADATA_CLASS_NAME)

        metadata = yaml_load(spec_file)
        if metadata == {}:
            log.error('The entity name passed it\'s wrong. Please check again', class_name=METADATA_CLASS_NAME)
            return None, None, None
        categories_path = self.metadata_subpath(metadata)
        if categories_path is None:
            log.error('You must place at least one category in the entity .spec file', class_name=METADATA_CLASS_NAME)
            return None, None, None

        full_metadata_path = os.path.join(self.__path, categories_path)
        return full_metadata_path, categories_path, metadata
コード例 #28
0
    def _is_valid_hashpath(self, path, file):
        """ Checks if the file is placed in a valid directory following the structure created in the _get_hashpath method """
        hashpath = self._get_hashpath(file)
        actual_fullpath = os.path.join(path, file)

        is_valid = hashpath.lower() == actual_fullpath.lower()

        if not is_valid:
            log.error(
                'Chunk found in wrong directory. Expected [%s]. Found [%s]' %
                (hashpath, actual_fullpath),
                class_name=HASH_FS_CLASS_NAME)

        return is_valid
コード例 #29
0
def remote_del(repo_type, global_conf=False):
    file = get_config_path(global_conf)
    conf = yaml_load(file)

    if repo_type in conf:
        git_url = conf[repo_type]['git']
        if git_url is None or not len(conf[repo_type]['git']) > 0:
            log.error(output_messages['ERROR_REMOTE_UNCONFIGURED'] % repo_type, class_name=ADMIN_CLASS_NAME)
        else:
            log.info(output_messages['INFO_REMOVE_REMOTE'] % (git_url, repo_type), class_name=ADMIN_CLASS_NAME)
            conf[repo_type]['git'] = ''
            yaml_save(conf, file)
    else:
        log.error(output_messages['ERROR_ENTITY_NOT_FOUND'] % repo_type, class_name=ADMIN_CLASS_NAME)
コード例 #30
0
    def list_tags(self, spec, full_info=False):
        tags = []
        try:
            repo = Repo(self.__path)
            r_tags = repo.tags if full_info else repo.git.tag(
                sort='creatordate').split('\n')
            for tag in r_tags:
                if f'__{spec}__' in str(tag):
                    tags.append(tag)

        except Exception:
            log.error('Invalid ml-git repository!',
                      class_name=METADATA_MANAGER_CLASS_NAME)
        return tags