Example #1
0
    def claims_for_parse(cls, file_):
        """Is this parser likely to parse that file-like object?

        `file_` is a readable file-like object. It is the responsability of the
        caller to close it.

        Return an int between 00 and 99, indicating the likelyhood of this
        parser to handle correctly the given URL. 70 is used as a standard
        value when the parser is pretty sure it can handle the URL.
        """
        r = 0
        info = getattr(file_, "info", lambda: {})()
        mimetype = info.get("content-type", "")
        if mimetype.startswith(cls.MIMETYPE):
            r = 80
        else:
            if mimetype.startswith("application/json") \
            or mimetype.startswith("text/javascript"):
                r += 20
            fpath = get_path(file_)
            if fpath.endswith(cls.EXTENSION):
                r += 50
            elif fpath.endswith(".json"):
                r += 20
            elif fpath.endswith(".js"):
                r += 10

        return r
Example #2
0
    def claims_for_parse(cls, file_):
        """Is this parser likely to parse that file-like object?

        `file_` is a readable file-like object. It is the responsability of the
        caller to close it.
        """
        r = 0

        info = getattr(file_, "info", lambda: {})()
        mimetype = info.get("content-type", "")
        if mimetype.startswith(cls.MIMETYPE):
            r = 80  # overrides extension
        elif mimetype.startswith("application/x-zip"):
            r += 30
            fpath = get_path(file_)
            raise Exception
            if fpath.endswith(cls.EXTENSION):
                r += 40
            elif fpath.endswith(".zip"):
                r += 20

        if hasattr(file_, "seek"):
            # If possible, inspect ZIP file to adjust the claim-score.
            # NB: if those tests fail, we do not drop the claim-score to 0,
            # but merely reduce it. This is because, if no other parser claims
            # that file, a ParseError will be more informative than a
            # NoClaimError.
            old_pos = file_.tell()
            try:
                z = ZipFile(file_, "r")
            except BadZipfile:
                r /= 5
            else:
                if "mimetype" in z.namelist():
                    if z.read("mimetype").startswith(cls.MIMETYPE):
                        r = max(r, 70)
                    else:
                        r /= 5
                elif "content.xml" in z.namelist():
                    r = max(r, 20)
                    # wait for other information to make up our mind
                else:
                    r /= 5
                z.close()
            file_.seek(old_pos)

        return r
Example #3
0
 def parse(self):
     "Do the actual parsing."
     file_ = self.file
     fpath = get_path(file_)
     if is_local(file_) and fpath.endswith("content.xml"):
         # looks like this is a manually-unzipped package,
         dirname = path.split(fpath)[0]
         mfn = path.join(dirname, "mimetype")
         if exists(mfn):
             f = open(mfn)
             mimetype = f.read()
             f.close()
             if mimetype == self.MIMETYPE:
                 self.package.set_meta(PACKAGED_ROOT, dirname)
     # NB: PACKAGED_ROOT may have been set elsewhere, so:
     self.standalone_xml = not self.package.get_meta(PACKAGED_ROOT, None)
     XmlParserBase.parse(self)
Example #4
0
    def claims_for_parse(cls, file_):
        """Is this parser likely to parse that file-like object?

        `file_` is a readable file-like object. It is the responsability of the
        caller to close it.

        Return an int between 00 and 99, indicating the likelyhood of this parser
        to handle correctly the given URL. 70 is used as a standard value when the
        parser is pretty sure it can handle the URL.
        """
        r = 0
        info = getattr(file_, "info", lambda: {})()
        mimetype = info.get("content-type", "")
        if mimetype.startswith(cls.MIMETYPE):
            r = 80
        else:
            if mimetype.startswith("application/xml") \
            or mimetype.startswith("text/xml"):
                r += 20
            fpath = get_path(file_)
            if fpath.endswith(cls.EXTENSION):
                r += 50
            elif fpath.endswith(".xml"):
                r += 20

        clone = clone_filelike(file_)
        if clone:
            # If possible, inspect XML file to adjust the claim-score.
            # NB: if those tests fail, we do not drop the claim-score to 0,
            # but merely reduce it. This is because, if no other parser claims
            # that file, a ParseError will be more informative than a
            # NoClaimError.
            it = iterparse(clone, events=("start",))
            try:
                ev, el = it.next()
            except XmlParseError, e:
                r /= 5
            else:
                if el.tag != "{%s}package" % cls._NAMESPACE_URI:
                    r /= 2
                else:
                    r = max(70, r)
            finally: