コード例 #1
0
ファイル: document.py プロジェクト: xiayansummer/openxmllib
    def __init__(self, file_, mime_type=None):
        """**Creating a new document**
        """
        #: The MIME type of the document
        self.mime_type = mime_type

        # Some shortcuts
        op_sep = os.path.sep
        op_join = os.path.join
        op_isdir = os.path.isdir
        op_dirname = os.path.dirname

        # Preliminary settings depending on input
        #: The file mane of the document
        self.filename = getattr(file_, 'name', None)
        if self.filename is None and mime_type is None:
            raise ValueError(
                "Cannot guess mime type from such object, you should use the mime_type constructor arg."
            )

        # Need to make a real file for urllib.urlopen objects
        if isinstance(file_, urllib.addinfourl):
            fh, self._cache_file = tempfile.mkstemp()
            fh = os.fdopen(fh, 'wb')
            fh.write(file_.read())
            fh.close()
            file_.close()
            file_ = open(self._cache_file, 'rb')

        # Inflating the zipped file
        self._cache_dir = tempfile.mkdtemp()
        openxmldoc = zipfile.ZipFile(file_, 'r', zipfile.ZIP_DEFLATED)
        for outpath in openxmldoc.namelist():

            # Makes Windows path when under Windows
            rel_outpath = op_sep.join(outpath.split('/'))
            abs_outpath = op_join(self._cache_dir, rel_outpath)
            abs_outdir = op_dirname(abs_outpath)
            if not op_isdir(abs_outdir):
                os.makedirs(abs_outdir)
            fh = file(abs_outpath, 'wb')
            fh.write(openxmldoc.read(outpath))
            fh.close()
        openxmldoc.close()
        file_.close()

        # Getting the content types declarations
        ct_file = op_join(self._cache_dir, '[Content_Types].xml')

        #: A :class:`openxmllib.contenttypes.ContentTypes` object for this document
        self.content_types = contenttypes.ContentTypes(xmlFile(ct_file, 'rb'))
        return
コード例 #2
0
    def __init__(self, file_, mime_type=None):
        """**Creating a new document**
        """
        #: The MIME type of the document
        self.mime_type = mime_type

        # Some shortcuts
        op_sep = os.path.sep
        op_join = os.path.join
        op_isdir = os.path.isdir
        op_dirname = os.path.dirname

        # Preliminary settings depending on input
        #: The file mane of the document
        self.filename = getattr(file_, 'name', None)
        if self.filename is None and mime_type is None:
            raise ValueError("Cannot guess mime type from such object, you should use the mime_type constructor arg.")

        # Need to make a real file for urllib.urlopen objects
        if isinstance(file_, urllib.addinfourl):
            fh, self._cache_file = tempfile.mkstemp()
            fh = os.fdopen(fh, 'wb')
            fh.write(file_.read())
            fh.close()
            file_.close()
            file_ = open(self._cache_file, 'rb')

        # Inflating the zipped file
        self._cache_dir = tempfile.mkdtemp()
        openxmldoc = zipfile.ZipFile(file_, 'r', zipfile.ZIP_DEFLATED)
        for outpath in openxmldoc.namelist():

            # Makes Windows path when under Windows
            rel_outpath = op_sep.join(outpath.split('/'))
            abs_outpath = op_join(self._cache_dir, rel_outpath)
            abs_outdir = op_dirname(abs_outpath)
            if not op_isdir(abs_outdir):
                os.makedirs(abs_outdir)
            fh = file(abs_outpath, 'wb')
            fh.write(openxmldoc.read(outpath))
            fh.close()
        openxmldoc.close()
        file_.close()

        # Getting the content types declarations
        ct_file = op_join(self._cache_dir, '[Content_Types].xml')

        #: A :class:`openxmllib.contenttypes.ContentTypes` object for this document
        self.content_types = contenttypes.ContentTypes(xmlFile(ct_file, 'rb'))
        return
コード例 #3
0
ファイル: contenttypes.py プロジェクト: 5up3rc/spiderfoot
    def getTreesFor(self, document, content_type):
        """Provides all XML documents for that content type
        @param document: a Document or subclass object
        @param content_type: a MIME content type
        @return: list of etree._ElementTree of that content type
        """

        # Relative path without potential leading path separator
        # otherwise os.path.join doesn't work
        for rel_path in self.overrides[content_type]:
            if rel_path[0] in ('/', '\\'):
                rel_path = rel_path[1:]
            file_path = os.path.join(document._cache_dir, rel_path)
            yield etree.parse(utils.xmlFile(file_path, 'rb'))
        return
コード例 #4
0
    def getTreesFor(self, document, content_type):
        """Provides all XML documents for that content type
        @param document: a Document or subclass object
        @param content_type: a MIME content type
        @return: list of etree._ElementTree of that content type
        """

        # Relative path without potential leading path separator
        # otherwise os.path.join doesn't work
        for rel_path in self.overrides[content_type]:
            if rel_path[0] in ('/', '\\'):
                rel_path = rel_path[1:]
            file_path = os.path.join(document._cache_dir, rel_path)
            yield etree.parse(utils.xmlFile(file_path, 'rb'))
        return
コード例 #5
0
    def __init__(self, file_, mime_type=None):
        """Creating a new document
        @param file_: An opened file(like) obj to the document
        A file must be opened in 'rb' mode
        """
        self.mime_type = mime_type

        # Some shortcuts
        op_sep = os.path.sep
        op_join = os.path.join
        op_isdir = os.path.isdir
        op_dirname = os.path.dirname

        # Preliminary settings depending on input
        self.filename = getattr(file_, 'name', None)
        if self.filename is None and mime_type is None:
            raise ValueError(
                "Cannot guess mime type from such object, you should use the mime_type constructor arg."
            )

        # Need to make a real file for urllib.urlopen objects
        if isinstance(file_, urllib.addinfourl):
            fh, self._cache_file = tempfile.mkstemp()
            fh = os.fdopen(fh, 'wb')
            fh.write(file_.read())
            fh.close()
            file_.close()
            file_ = open(self._cache_file, 'rb')

        # Inflating the file
        self._cache_dir = tempfile.mkdtemp()
        openxmldoc = zipfile.ZipFile(file_, 'r', zipfile.ZIP_DEFLATED)
        for outpath in openxmldoc.namelist():
            # We need to be sure that target dir exists
            rel_outpath = op_sep.join(outpath.split('/'))
            abs_outpath = op_join(self._cache_dir, rel_outpath)
            abs_outdir = op_dirname(abs_outpath)
            if not op_isdir(abs_outdir):
                os.makedirs(abs_outdir)
            fh = file(abs_outpath, 'wb')
            fh.write(openxmldoc.read(outpath))
            fh.close()
        openxmldoc.close()
        file_.close()

        # Getting the content types decl
        ct_file = op_join(self._cache_dir, '[Content_Types].xml')
        self.content_types = contenttypes.ContentTypes(xmlFile(ct_file, 'rb'))
コード例 #6
0
ファイル: document.py プロジェクト: 5up3rc/spiderfoot
    def __init__(self, file_, mime_type=None):
        """Creating a new document
        @param file_: An opened file(like) obj to the document
        A file must be opened in 'rb' mode
        """
        self.mime_type = mime_type

        # Some shortcuts
        op_sep = os.path.sep
        op_join = os.path.join
        op_isdir = os.path.isdir
        op_dirname = os.path.dirname

        # Preliminary settings depending on input
        self.filename = getattr(file_, 'name', None)
        if self.filename is None and mime_type is None:
            raise ValueError("Cannot guess mime type from such object, you should use the mime_type constructor arg.")

        # Need to make a real file for urllib.urlopen objects
        if isinstance(file_, urllib.addinfourl):
            fh, self._cache_file = tempfile.mkstemp()
            fh = os.fdopen(fh, 'wb')
            fh.write(file_.read())
            fh.close()
            file_.close()
            file_ = open(self._cache_file, 'rb')

        # Inflating the file
        self._cache_dir = tempfile.mkdtemp()
        openxmldoc = zipfile.ZipFile(file_, 'r', zipfile.ZIP_DEFLATED)
        for outpath in openxmldoc.namelist():
            # We need to be sure that target dir exists
            rel_outpath = op_sep.join(outpath.split('/'))
            abs_outpath = op_join(self._cache_dir, rel_outpath)
            abs_outdir = op_dirname(abs_outpath)
            if not op_isdir(abs_outdir):
                os.makedirs(abs_outdir)
            fh = file(abs_outpath, 'wb')
            fh.write(openxmldoc.read(outpath))
            fh.close()
        openxmldoc.close()
        file_.close()

        # Getting the content types decl
        ct_file = op_join(self._cache_dir, '[Content_Types].xml')
        self.content_types = contenttypes.ContentTypes(xmlFile(ct_file, 'rb'))
コード例 #7
0
ファイル: document.py プロジェクト: kdeldycke/openxmllib
    def documentCover(self):
        """Cover page image

        :return: (file extension, file object) tuple.
        """
        rels_pth = os.path.join(self._cache_dir, "_rels", ".rels")
        rels_xml = lxml.etree.parse(xmlFile(rels_pth, 'rb'))
        thumb_ns = ns_map["thumbnails"]
        thumb_elm_xpr = "relationships:Relationship[@Type='%s']" % thumb_ns
        rels_xpath = lxml.etree.XPath(thumb_elm_xpr, namespaces=ns_map)
        try:
            cover_path = rels_xpath(rels_xml)[0].attrib["Target"]
        except IndexError:
            return None
        cover_fp = open(self._cache_dir + os.sep + cover_path, "rb")
        cover_type = imghdr.what(None, h=cover_fp.read(32))
        cover_fp.seek(0)
        # some MS docs say the type can be JPEG which is ok,
        # or WMF, which imghdr does not recognize...
        if not cover_type:
            cover_type = cover_path.split('.')[-1]
        else:
            cover_type = cover_type.replace("jpeg", "jpg")
        return (cover_type, cover_fp)
コード例 #8
0
ファイル: document.py プロジェクト: glenfant/openxmllib
    def documentCover(self):
        """Cover page image

        :return: (file extension, file object) tuple.
        """
        rels_pth = os.path.join(self._cache_dir, "_rels", ".rels")
        rels_xml = lxml.etree.parse(xmlFile(rels_pth, 'rb'))
        thumb_ns = ns_map["thumbnails"]
        thumb_elm_xpr = "relationships:Relationship[@Type='%s']" % thumb_ns
        rels_xpath = lxml.etree.XPath(thumb_elm_xpr, namespaces=ns_map)
        try:
            cover_path = rels_xpath(rels_xml)[0].attrib["Target"]
        except IndexError:
            return None
        cover_fp = open(self._cache_dir + os.sep + cover_path, "rb")
        cover_type = imghdr.what(None, h=cover_fp.read(32))
        cover_fp.seek(0)
        # some MS docs say the type can be JPEG which is ok,
        # or WMF, which imghdr does not recognize...
        if not cover_type:
            cover_type = cover_path.split('.')[-1]
        else:
            cover_type = cover_type.replace("jpeg", "jpg")
        return (cover_type, cover_fp)
コード例 #9
0
        for outpath in openxmldoc.namelist():
            # We need to be sure that target dir exists
            rel_outpath = op_sep.join(outpath.split('/'))
            abs_outpath = op_join(self._cache_dir, rel_outpath)
            abs_outdir = op_dirname(abs_outpath)
            if not op_isdir(abs_outdir):
                os.makedirs(abs_outdir)
            fh = file(abs_outpath, 'wb')
            fh.write(openxmldoc.read(outpath))
            fh.close()
        openxmldoc.close()
        in_file.close()

        # Getting the content types decl
        ct_file = op_join(self._cache_dir, '[Content_Types].xml')
        self.content_types = contenttypes.ContentTypes(xmlFile(ct_file, 'rb'))
        return


    @property
    def mimeType(self):
        """The official MIME type for this document
        @return: 'application/xxx' for this file
        """

        if self.mime_type:
            # Supposed validated by the factory
            return self.mime_type
        for pattern, mime_type in self._extpattern_to_mime.items():
            if fnmatch.fnmatch(self.filename, pattern):
                return mime_type
コード例 #10
0
        for outpath in openxmldoc.namelist():
            # We need to be sure that target dir exists
            rel_outpath = op_sep.join(outpath.split('/'))
            abs_outpath = op_join(self._cache_dir, rel_outpath)
            abs_outdir = op_dirname(abs_outpath)
            if not op_isdir(abs_outdir):
                os.makedirs(abs_outdir)
            fh = file(abs_outpath, 'wb')
            fh.write(openxmldoc.read(outpath))
            fh.close()
        openxmldoc.close()
        in_file.close()

        # Getting the content types decl
        ct_file = op_join(self._cache_dir, '[Content_Types].xml')
        self.content_types = contenttypes.ContentTypes(xmlFile(ct_file, 'rb'))
        return

    @property
    def mimeType(self):
        """The official MIME type for this document
        @return: 'application/xxx' for this file
        """

        if self.mime_type:
            # Supposed validated by the factory
            return self.mime_type
        for pattern, mime_type in self._extpattern_to_mime.items():
            if fnmatch.fnmatch(self.filename, pattern):
                return mime_type
        return None