Esempio n. 1
0
    def __init__(self, onixfile, profile):
        if hasattr(onixfile, 'name'):
            self.basename = basename(onixfile.name)
        else:
            self.basename = basename(onixfile)

        if hasattr(onixfile, 'seek'):
            onixfile.seek(0)

        self._onix_file = OnixFile(onixfile)
        self.onix = self._onix_file.xml_tree().getroot()

        # Get rid of namespaces for easier xpath search
        for elem in self.onix.getiterator():
            if not hasattr(elem.tag, 'find'):
                continue
            i = elem.tag.find('}')
            if i >= 0:
                elem.tag = elem.tag[i + 1:]
        objectify.deannotate(self.onix, cleanup_namespaces=True)

        self.spec = yaml.load(open(profile, 'rb'))

        name, ext = splitext(basename(profile))
        self.name = name.upper()
        self.urls_checked = set()
        self.errors = []
        self.warnings = []
Esempio n. 2
0
def validate(infile, schemas=('xsd', )):
    """Validate an ONIX file.

    :param infile: File or path to file
    :type infile: file or str
    :param schemas: Iterable with paths to custom valdation profiles
    :type  schemas: collections.Iterable[str]
    :return: List of `Message` objects (invalid ONIX) or empty list (valid ONIX)
    :rtype: list[Message]
    """
    if hasattr(infile, 'name'):
        filename = basename(infile.name)
    else:
        filename = basename(infile)

    try:
        onix_file = OnixFile(infile)
    except (OnixError, XMLSyntaxError) as e:
        return [Message.from_exception(e, filename)]

    messages = []

    for s in schemas:
        if s in (
                'xsd',
                'rng',
        ):
            try:
                validator = onix_file.get_validator(s)
            except OnixError as e:
                messages.append(Message.from_exception(e, filename))
                continue
            validator(onix_file.xml_tree())
            errors = validator.error_log
            msg = Message.from_logentry
            messages.extend([msg(err, filename) for err in errors])

    for s in schemas:
        if s in ('google', 'biblon'):
            if s == 'google':
                profile = schema.GOOGLE_O30_YML_REFERENCE
            elif s == 'biblon':
                profile = schema.BIBLON_O30_YML_REFERENCE

            validator = OnixFix(infile, profile)
            validator.validate()
            messages.extend(validator.errors)

    return messages
Esempio n. 3
0
    def __init__(self, onixfile, profile):
        if hasattr(onixfile, 'name'):
            self.basename = basename(onixfile.name)
        else:
            self.basename = basename(onixfile)

        if hasattr(onixfile, 'seek'):
            onixfile.seek(0)

        self._onix_file = OnixFile(onixfile)
        self.onix = self._onix_file.xml_tree().getroot()

        # Get rid of namespaces for easier xpath search
        for elem in self.onix.getiterator():
            if not hasattr(elem.tag, 'find'):
                continue
            i = elem.tag.find('}')
            if i >= 0:
                elem.tag = elem.tag[i + 1:]
        objectify.deannotate(self.onix, cleanup_namespaces=True)

        self.spec = yaml.load(open(profile, 'rb'))

        name, ext = splitext(basename(profile))
        self.name = name.upper()
        self.urls_checked = set()
        self.errors = []
        self.warnings = []
Esempio n. 4
0
def validate(infile, schemas=("xsd",)):
    """Validate an ONIX file.

    :param infile: File or path to file
    :type infile: file or str
    :param schemas: Iterable with paths to custom valdation profiles
    :type  schemas: collections.Iterable[str]
    :return: List of `Message` objects (invalid ONIX) or empty list (valid ONIX)
    :rtype: list[Message]
    """
    if hasattr(infile, "name"):
        filename = basename(infile.name)
    else:
        filename = basename(infile)

    try:
        onix_file = OnixFile(infile)
    except (OnixError, XMLSyntaxError) as e:
        return [Message.from_exception(e, filename)]

    messages = []

    for s in schemas:
        if s in ("xsd", "rng"):
            try:
                validator = onix_file.get_validator(s)
            except OnixError as e:
                messages.append(Message.from_exception(e, filename))
                continue
            validator(onix_file.xml_tree())
            errors = validator.error_log
            msg = Message.from_logentry
            messages.extend([msg(err, filename) for err in errors])

    for s in schemas:
        if s in ("google", "biblon"):
            if s == "google":
                profile = schema.GOOGLE_O30_YML_REFERENCE
            elif s == "biblon":
                profile = schema.BIBLON_O30_YML_REFERENCE

            validator = OnixFix(infile, profile)
            validator.validate()
            messages.extend(validator.errors)

    return messages
Esempio n. 5
0
def test_get_validator_o3():

    o3_ref = OnixFile(data.VALID_ONIX3_REF)
    validator = o3_ref.get_validator()
    assert isinstance(validator, etree.XMLSchema)

    o3_short = OnixFile(data.VALID_ONIX3_SHORT)
    validator = o3_short.get_validator()
    assert isinstance(validator, etree.XMLSchema)
Esempio n. 6
0
def test_onix_file_with_path():
    ofile = OnixFile(data.VALID_ONIX3_REF)
    assert isinstance(ofile, OnixFile)
    assert isinstance(ofile.xml_tree(), etree._ElementTree)
Esempio n. 7
0
def test_message_from_logentry():
    onix_file = OnixFile(data.INVALID_ONIX3_REF)
    validator = onix_file.get_validator()
    validator(onix_file.xml_tree())
    msg = Message.from_logentry(validator.error_log[0])
    assert isinstance(msg, Message)
Esempio n. 8
0
def test_get_validator_o2():
    o2_ref = OnixFile(data.VALID_ONIX2_REF)
    validator = o2_ref.get_validator()
    assert isinstance(validator, etree.XMLSchema)
Esempio n. 9
0
def test_onix_file_with_file_obj():
    with open(data.VALID_ONIX3_REF, 'rb') as infile:
        ofile = OnixFile(infile)
        assert isinstance(ofile.xml_tree(), etree._ElementTree)
    assert isinstance(ofile, OnixFile)
Esempio n. 10
0
class OnixFix(object):
    """
    Validates Onix xml against custom yaml based subspecifications and
    provides usefull error and warning messages.

    :param onixfile: File obj with onix data or path to onix file
    :type onixfile: file or str
    :param profile: Path to yaml profile
    :type profile: str
    """
    def __init__(self, onixfile, profile):
        if hasattr(onixfile, 'name'):
            self.basename = basename(onixfile.name)
        else:
            self.basename = basename(onixfile)

        if hasattr(onixfile, 'seek'):
            onixfile.seek(0)

        self._onix_file = OnixFile(onixfile)
        self.onix = self._onix_file.xml_tree().getroot()

        # Get rid of namespaces for easier xpath search
        for elem in self.onix.getiterator():
            if not hasattr(elem.tag, 'find'):
                continue
            i = elem.tag.find('}')
            if i >= 0:
                elem.tag = elem.tag[i + 1:]
        objectify.deannotate(self.onix, cleanup_namespaces=True)

        self.spec = yaml.load(open(profile, 'rb'))

        name, ext = splitext(basename(profile))
        self.name = name.upper()
        self.urls_checked = set()
        self.errors = []
        self.warnings = []

    @property
    def messages(self):
        return self.errors + self.warnings

    def add_message(self, message, path, el=None, level='ERROR'):

        assert level in (
            'ERROR',
            'WARNING',
        )

        if el is not None and el.sourceline is not None:
            location = '%s' % el.sourceline
        else:
            location = path

        location = '%s:%s' % (self.basename, location)

        msg = Message(level=level,
                      validator=self.name,
                      location=location,
                      message=message,
                      error_type='SPECIFICATION ERROR')
        if level == 'ERROR':
            self.errors.append(msg)
        elif level == 'WARNING':
            self.warnings.append(msg)

    def validate(self):
        """
        Validate Onix agianst Specfile

        :return list[Message]: list of error messages if any else empty list.
        """

        for path_or_keyword in self.spec:
            if path_or_keyword.startswith('/'):
                path = path_or_keyword.lstrip('/')
                specnode = self.spec[path_or_keyword]
                xmlnode = self.onix
                self.validate_block(path, specnode, xmlnode)
        return self.errors

    def validate_block(self, path, specnode, xmlnode):
        for pok in specnode:
            if pok in ('description', 'example'):
                continue
            elif pok in ('required', 'mulitple', 'recommended'):
                func = getattr(self, 'handle_' + pok)
                func(path, xmlnode, specnode.get(pok))
            elif pok.startswith('/'):
                sub_path = pok.lstrip('/')
                for node in xmlnode.xpath(path):
                    self.validate_block(sub_path, specnode[pok], node)
            else:
                func = getattr(self, 'handle_' + pok)
                value = specnode.get(pok)
                for element in xmlnode.xpath(path):
                    func(path, element, value)

    def handle_required(self, path, el, val):
        if val:
            result = el.xpath(path)
            if len(result) == 0:
                msg = 'Missing element %s within %s' % (path, el.tag)
                self.add_message(msg, path, el)

    def handle_requires_one_of(self, path, el, val):
        if val:
            results = []
            for elname in val:
                result = el.xpath(elname)
                if len(result) == 1:
                    results.append(result)
            if len(results) != 1:
                msg = 'Exactly one of %s must be within %s' % (val, el.tag)
                self.add_message(msg, path)

    def handle_recommended(self, path, el, val):
        if val:
            result = el.xpath(path)
            if len(result) == 0:
                msg = 'Missing recommended element %s within %s' % (path,
                                                                    el.tag)
                self.add_message(msg, path, level='WARNING')

    def handle_multiple(self, path, el, val):
        if not val:
            result = el.getparent().xpath(path)
            if len(result) > 1:
                msg = 'Only one "%s" allowed per "%s" found %s' % (
                    path, el.getparent().tag, len(result))
                self.add_message(msg, path)

    def handle_content(self, path, el, val):
        if el.text != val:
            msg = 'Expected content "%s" does not match "%s"' % (val, el.text)
            self.add_message(msg, path, el)

    def handle_length(self, path, el, val):
        if len(el.text) != val:
            msg = 'Expected %s characters for "%s"' % (val, el.text)
            self.add_message(msg, path, el)

    def handle_choice(self, path, el, val):
        if el.text not in val:
            msg = '"%s" not one of "%s"' % (el.text, val)
            self.add_message(msg, path, el)

    def handle_dateformat(self, path, el, val):
        try:
            datetime.strptime(el.text, val).date()
        except ValueError as e:
            self.add_message(e.message, path, el)

    def handle_range(self, path, el, val):
        if not int(el.text) in range(*val):
            msg = '"%s" out of range' % el.text
            self.add_message(msg, path, el)

    def handle_type(self, path, el, val):
        assert val in ('int', 'isbn13', 'isbn', 'decimal')
        getattr(self, 'check_' + val)(path, el)

    def check_int(self, path, el):
        invalid = [c for c in el.text if c not in string.digits]
        if invalid:
            msg = 'Not an Integer. Invalid chars %s.' % invalid
            self.add_message(msg, path, el)

    def check_isbn13(self, path, el):
        if not isbnlib.is_isbn13(el.text):
            msg = 'Invalid ISBN13: %s' % el.text
            self.add_message(msg, path, el)

    def check_isbn(self, path, el):
        if not isbnlib.is_isbn13(el.text) and not isbnlib.is_isbn10(el.text):
            msg = 'Invalid ISBN: %s' % el.text
            self.add_message(msg, path, el)

    def check_decimal(self, path, el):
        try:
            Decimal(el.text)
        except InvalidOperation:
            msg = 'Invalid decimal value %s' % el.text
            self.add_message(msg, path, el)
Esempio n. 11
0
class OnixFix(object):
    """
    Validates Onix xml against custom yaml based subspecifications and
    provides usefull error and warning messages.

    :param onixfile: File obj with onix data or path to onix file
    :type onixfile: file or str
    :param profile: Path to yaml profile
    :type profile: str
    """

    def __init__(self, onixfile, profile):
        if hasattr(onixfile, 'name'):
            self.basename = basename(onixfile.name)
        else:
            self.basename = basename(onixfile)

        if hasattr(onixfile, 'seek'):
            onixfile.seek(0)

        self._onix_file = OnixFile(onixfile)
        self.onix = self._onix_file.xml_tree().getroot()

        # Get rid of namespaces for easier xpath search
        for elem in self.onix.getiterator():
            if not hasattr(elem.tag, 'find'):
                continue
            i = elem.tag.find('}')
            if i >= 0:
                elem.tag = elem.tag[i + 1:]
        objectify.deannotate(self.onix, cleanup_namespaces=True)

        self.spec = yaml.load(open(profile, 'rb'))

        name, ext = splitext(basename(profile))
        self.name = name.upper()
        self.urls_checked = set()
        self.errors = []
        self.warnings = []

    @property
    def messages(self):
        return self.errors + self.warnings

    def add_message(self, message, path, el=None, level='ERROR'):

        assert level in ('ERROR', 'WARNING',)

        if el is not None and el.sourceline is not None:
            location = '%s' % el.sourceline
        else:
            location = path

        location = '%s:%s' % (self.basename, location)

        msg = Message(
            level=level,
            validator=self.name,
            location=location,
            message=message,
            error_type='SPECIFICATION ERROR'
        )
        if level == 'ERROR':
            self.errors.append(msg)
        elif level == 'WARNING':
            self.warnings.append(msg)

    def validate(self):
        """
        Validate Onix agianst Specfile

        :return list[Message]: list of error messages if any else empty list.
        """

        for path_or_keyword in self.spec:
            if path_or_keyword.startswith('/'):
                path = path_or_keyword.lstrip('/')
                specnode = self.spec[path_or_keyword]
                xmlnode = self.onix
                self.validate_block(path, specnode, xmlnode)
        return self.errors

    def validate_block(self, path, specnode, xmlnode):
        for pok in specnode:
            if pok in ('description', 'example'):
                continue
            elif pok in ('required', 'mulitple', 'recommended'):
                func = getattr(self, 'handle_' + pok)
                func(path, xmlnode, specnode.get(pok))
            elif pok.startswith('/'):
                sub_path = pok.lstrip('/')
                for node in xmlnode.xpath(path):
                    self.validate_block(sub_path, specnode[pok], node)
            else:
                func = getattr(self, 'handle_' + pok)
                value = specnode.get(pok)
                for element in xmlnode.xpath(path):
                    func(path, element, value)

    def handle_required(self, path, el, val):
        if val:
            result = el.xpath(path)
            if len(result) == 0:
                msg = 'Missing element %s within %s' % (path, el.tag)
                self.add_message(msg, path, el)

    def handle_requires_one_of(self, path, el, val):
        if val:
            results = []
            for elname in val:
                result = el.xpath(elname)
                if len(result) == 1:
                    results.append(result)
            if len(results) != 1:
                msg = 'Exactly one of %s must be within %s' % (val, el.tag)
                self.add_message(msg, path)

    def handle_recommended(self, path, el, val):
        if val:
            result = el.xpath(path)
            if len(result) == 0:
                msg = 'Missing recommended element %s within %s' % (path, el.tag)
                self.add_message(msg, path, level='WARNING')

    def handle_multiple(self, path, el, val):
        if not val:
            result = el.getparent().xpath(path)
            if len(result) > 1:
                msg = 'Only one "%s" allowed per "%s" found %s' % (path, el.getparent().tag, len(result))
                self.add_message(msg, path)

    def handle_content(self, path, el, val):
        if el.text != val:
            msg = 'Expected content "%s" does not match "%s"' % (val, el.text)
            self.add_message(msg, path, el)

    def handle_length(self, path, el, val):
        if len(el.text) != val:
            msg = 'Expected %s characters for "%s"' % (val, el.text)
            self.add_message(msg, path, el)

    def handle_choice(self, path, el, val):
        if el.text not in val:
            msg = '"%s" not one of "%s"' % (el.text, val)
            self.add_message(msg, path, el)

    def handle_dateformat(self, path, el, val):
        try:
            datetime.strptime(el.text, val).date()
        except ValueError as e:
            self.add_message(e.message, path, el)

    def handle_range(self, path, el, val):
        if not int(el.text) in range(*val):
            msg = '"%s" out of range' % el.text
            self.add_message(msg, path, el)

    def handle_type(self, path, el, val):
        assert val in ('int', 'isbn13', 'isbn', 'decimal')
        getattr(self, 'check_' + val)(path, el)

    def check_int(self, path, el):
        invalid = [c for c in el.text if c not in string.digits]
        if invalid:
            msg = 'Not an Integer. Invalid chars %s.' % invalid
            self.add_message(msg, path, el)

    def check_isbn13(self, path, el):
        if not isbnlib.is_isbn13(el.text):
            msg = 'Invalid ISBN13: %s' % el.text
            self.add_message(msg, path, el)

    def check_isbn(self, path, el):
        if not isbnlib.is_isbn13(el.text) and not isbnlib.is_isbn10(el.text):
            msg = 'Invalid ISBN: %s' % el.text
            self.add_message(msg, path, el)

    def check_decimal(self, path, el):
        try:
            Decimal(el.text)
        except InvalidOperation:
            msg = 'Invalid decimal value %s' % el.text
            self.add_message(msg, path, el)