コード例 #1
0
ファイル: hdf.py プロジェクト: decarlof/data-quality
 def func(name, dset):
     if isinstance(dset, h5py.Dataset):
         tag = dset.name
         tag_attribs = required_tags.get(tag)
         if tag_attribs is not None:
             tag_list.remove(tag)
             attrib_list = utils.key_list(tag_attribs)
             for key in tag_attribs:
                 if len(attrib_list) > 0:
                     if key == 'dim':
                         attrib_list.remove(key)
                         check_dim(dset, tag_attribs)
                     else:
                         attr = dset.attrs.get(key)
                         if attr is not None:
                             attr_str = attr.decode('utf-8')
                             if attr_str != tag_attribs.get(key):
                                 logger.warning('incorrect attribute in ' +
                                       tag + ': is ' +
                                       key + ':' +
                                       attr_str + ' but should be ' +
                                       key + ':' +
                                       tag_attribs.get(key))
                                 res.res = False
                             attrib_list.remove(key)
             report_items(
                 attrib_list,
                 'the following attributes are missing in tag ',
                 tag,
                 logger)
コード例 #2
0
ファイル: hdf.py プロジェクト: decarlof/data-quality
 def func(name, dset):
     if isinstance(dset, h5py.Dataset):
         tag = dset.name
         tag_attribs = required_tags.get(tag)
         if tag_attribs is not None:
             tag_list.remove(tag)
             attrib_list = utils.key_list(tag_attribs)
             for key in tag_attribs:
                 if len(attrib_list) > 0:
                     if key == 'dim':
                         attrib_list.remove(key)
                         check_dim(dset, tag_attribs)
                     else:
                         attr = dset.attrs.get(key)
                         if attr is not None:
                             attr_str = attr.decode('utf-8')
                             if attr_str != tag_attribs.get(key):
                                 logger.warning('incorrect attribute in ' +
                                                tag + ': is ' + key + ':' +
                                                attr_str +
                                                ' but should be ' + key +
                                                ':' + tag_attribs.get(key))
                                 res.res = False
                             attrib_list.remove(key)
             report_items(attrib_list,
                          'the following attributes are missing in tag ',
                          tag, logger)
コード例 #3
0
ファイル: hdf.py プロジェクト: decarlof/data-quality
def tags(file, required_tags, logger):
    """
    This method is used when a file of hdf type is given.
    All tags from the hdf file are added in the filetags list.
    Then the schema is evaluated for tags. With each tag discovered
    it checks whether there is matching tag in the filetags list.
    If a tag is missing, the function exits with False.
    Otherwise, it will return True.

    Parameters
    ----------
    file : str
        File Name including path

    schema : str
        Schema file name

    logger : Logger
        a Logger instance

    Returns
    -------
    True if verified
    False if not verified

    """

    tag_list = utils.key_list(required_tags)

    class Result:

        def __init__(self):
            self.result = True

        def missing_tag(self):
            self.result = False

        def is_verified(self):
            return self.result

    result = Result()
    filetags = []

    def func(name, dset):
        if isinstance(dset, h5py.Dataset):
            filetags.append(dset.name)

    file_h5 = h5py.File(file, 'r')
    file_h5.visititems(func)

    for tag in tag_list:
        if tag not in filetags:
            logger.warning('tag ' + tag + ' not found')
            result.missing_tag()

    return result.is_verified()
コード例 #4
0
ファイル: hdf.py プロジェクト: decarlof/data-quality
def tags(file, required_tags, logger):
    """
    This method is used when a file of hdf type is given.
    All tags from the hdf file are added in the filetags list.
    Then the schema is evaluated for tags. With each tag discovered
    it checks whether there is matching tag in the filetags list.
    If a tag is missing, the function exits with False.
    Otherwise, it will return True.

    Parameters
    ----------
    file : str
        File Name including path

    schema : str
        Schema file name

    logger : Logger
        a Logger instance

    Returns
    -------
    True if verified
    False if not verified

    """

    tag_list = utils.key_list(required_tags)

    class Result:
        def __init__(self):
            self.result = True

        def missing_tag(self):
            self.result = False

        def is_verified(self):
            return self.result

    result = Result()
    filetags = []

    def func(name, dset):
        if isinstance(dset, h5py.Dataset):
            filetags.append(dset.name)

    file_h5 = h5py.File(file, 'r')
    file_h5.visititems(func)

    for tag in tag_list:
        if tag not in filetags:
            logger.warning('tag ' + tag + ' not found')
            result.missing_tag()

    return result.is_verified()
コード例 #5
0
ファイル: hdf.py プロジェクト: decarlof/data-quality
def structure(file, required_tags, logger):
    """
    This method is used when a file of hdf type is given.
    All tags and array dimensions are verified against a schema.
    (see :download:`schemas/tags.json <../../../config/default/schemas/tags.json>` 
    example file).

    Parameters
    ----------
    file : str
        File Name including path

    schema : str
        Schema file name

    logger : Logger
        a Logger instance

    Returns
    -------
    None

    """
    class Result():
        res = True

    def check_dim(dset, attr):
        required_dim = attr.get('dim')
        required_dim_copy = utils.copy_list(required_dim)
        dim = dset.shape
        if len(dim) == len(required_dim):
            for i in range(len(dim)):
                try:
                    required_dim_copy.remove(dim[i])
                except ValueError:
                    logger.warning('ValueError: The dataset ' + dset.name +
                          ' dimension ' + str(i) +
                          ' is wrong: it is [' +
                          str(dset.shape[i]) + '] but should be [' +
                          str(required_dim[i]) + ']')
                    res.res = False
        else:
            logger.warning('The dataset ' + dset.name + ' dimensions: ' +
                  str(dset.shape) + ' but should be ' + str(required_dim))
            res.res = False

    def func(name, dset):
        if isinstance(dset, h5py.Dataset):
            tag = dset.name
            tag_attribs = required_tags.get(tag)
            if tag_attribs is not None:
                tag_list.remove(tag)
                attrib_list = utils.key_list(tag_attribs)
                for key in tag_attribs:
                    if len(attrib_list) > 0:
                        if key == 'dim':
                            attrib_list.remove(key)
                            check_dim(dset, tag_attribs)
                        else:
                            attr = dset.attrs.get(key)
                            if attr is not None:
                                attr_str = attr.decode('utf-8')
                                if attr_str != tag_attribs.get(key):
                                    logger.warning('incorrect attribute in ' +
                                          tag + ': is ' +
                                          key + ':' +
                                          attr_str + ' but should be ' +
                                          key + ':' +
                                          tag_attribs.get(key))
                                    res.res = False
                                attrib_list.remove(key)
                report_items(
                    attrib_list,
                    'the following attributes are missing in tag ',
                    tag,
                    logger)

    res = Result()
    tag_list = utils.key_list(required_tags)
    file_h5 = h5py.File(file, 'r')
    file_h5.visititems(func)
    if res.res:
        return True
    else:
        report_items(tag_list, 'the following tags are missing: ', '', logger)
        return False
コード例 #6
0
ファイル: hdf.py プロジェクト: decarlof/data-quality
def structure(file, required_tags, logger):
    """
    This method is used when a file of hdf type is given.
    All tags and array dimensions are verified against a schema.
    (see :download:`schemas/tags.json <../../../config/default/schemas/tags.json>` 
    example file).

    Parameters
    ----------
    file : str
        File Name including path

    schema : str
        Schema file name

    logger : Logger
        a Logger instance

    Returns
    -------
    None

    """
    class Result():
        res = True

    def check_dim(dset, attr):
        required_dim = attr.get('dim')
        required_dim_copy = utils.copy_list(required_dim)
        dim = dset.shape
        if len(dim) == len(required_dim):
            for i in range(len(dim)):
                try:
                    required_dim_copy.remove(dim[i])
                except ValueError:
                    logger.warning('ValueError: The dataset ' + dset.name +
                                   ' dimension ' + str(i) +
                                   ' is wrong: it is [' + str(dset.shape[i]) +
                                   '] but should be [' + str(required_dim[i]) +
                                   ']')
                    res.res = False
        else:
            logger.warning('The dataset ' + dset.name + ' dimensions: ' +
                           str(dset.shape) + ' but should be ' +
                           str(required_dim))
            res.res = False

    def func(name, dset):
        if isinstance(dset, h5py.Dataset):
            tag = dset.name
            tag_attribs = required_tags.get(tag)
            if tag_attribs is not None:
                tag_list.remove(tag)
                attrib_list = utils.key_list(tag_attribs)
                for key in tag_attribs:
                    if len(attrib_list) > 0:
                        if key == 'dim':
                            attrib_list.remove(key)
                            check_dim(dset, tag_attribs)
                        else:
                            attr = dset.attrs.get(key)
                            if attr is not None:
                                attr_str = attr.decode('utf-8')
                                if attr_str != tag_attribs.get(key):
                                    logger.warning('incorrect attribute in ' +
                                                   tag + ': is ' + key + ':' +
                                                   attr_str +
                                                   ' but should be ' + key +
                                                   ':' + tag_attribs.get(key))
                                    res.res = False
                                attrib_list.remove(key)
                report_items(attrib_list,
                             'the following attributes are missing in tag ',
                             tag, logger)

    res = Result()
    tag_list = utils.key_list(required_tags)
    file_h5 = h5py.File(file, 'r')
    file_h5.visititems(func)
    if res.res:
        return True
    else:
        report_items(tag_list, 'the following tags are missing: ', '', logger)
        return False