Exemple #1
0
def set_metadata(stream, mi):
    from calibre.utils.zipfile import safe_replace
    c = DOCX(stream, extract=False)
    dp_name, ap_name = c.get_document_properties_names()
    dp_raw = c.read(dp_name)
    try:
        ap_raw = c.read(ap_name)
    except Exception:
        ap_raw = None
    cp = etree.fromstring(dp_raw)
    update_doc_props(cp, mi)
    replacements = {}
    if ap_raw is not None:
        ap = etree.fromstring(ap_raw)
        comp = ap.makeelement('{%s}Company' % namespaces['ep'])
        for child in tuple(ap):
            if child.tag == comp.tag:
                ap.remove(child)
        comp.text = mi.publisher
        ap.append(comp)
        replacements[ap_name] = BytesIO(xml2str(ap))
    stream.seek(0)
    safe_replace(stream,
                 dp_name,
                 BytesIO(xml2str(cp)),
                 extra_replacements=replacements)
Exemple #2
0
 def __init__(self,
              path_or_stream,
              dest_dir=None,
              log=None,
              notes_text=None):
     self.docx = DOCX(path_or_stream, log=log)
     self.log = self.docx.log
     self.notes_text = notes_text or _('Notes')
     self.dest_dir = dest_dir or os.getcwdu()
     self.mi = self.docx.metadata
     self.body = BODY()
     self.styles = Styles()
     self.images = Images()
     self.tables = Tables()
     self.object_map = OrderedDict()
     self.html = HTML(
         HEAD(
             META(charset='utf-8'),
             TITLE(self.mi.title or _('Unknown')),
             LINK(rel='stylesheet', type='text/css', href='docx.css'),
         ), self.body)
     self.html.text = '\n\t'
     self.html[0].text = '\n\t\t'
     self.html[0].tail = '\n'
     for child in self.html[0]:
         child.tail = '\n\t\t'
     self.html[0][-1].tail = '\n\t'
     self.html[1].text = self.html[1].tail = '\n'
     lang = canonicalize_lang(self.mi.language)
     if lang and lang != 'und':
         lang = lang_as_iso639_1(lang)
         if lang:
             self.html.set('lang', lang)
Exemple #3
0
def get_metadata(stream):
    c = DOCX(stream, extract=False)
    mi = c.metadata
    try:
        cdata = get_cover(c)
    except Exception:
        cdata = None
        import traceback
        traceback.print_exc()
    c.close()
    stream.seek(0)
    if cdata is not None:
        mi.cover_data = cdata

    return mi
Exemple #4
0
 def __init__(self,
              path_or_stream,
              dest_dir=None,
              log=None,
              detect_cover=True,
              notes_text=None,
              notes_nopb=False,
              nosupsub=False):
     self.docx = DOCX(path_or_stream, log=log)
     self.namespace = self.docx.namespace
     self.ms_pat = re.compile(r'\s{2,}')
     self.ws_pat = re.compile(r'[\n\r\t]')
     self.log = self.docx.log
     self.detect_cover = detect_cover
     self.notes_text = notes_text or _('Notes')
     self.notes_nopb = notes_nopb
     self.nosupsub = nosupsub
     self.dest_dir = dest_dir or os.getcwdu()
     self.mi = self.docx.metadata
     self.body = BODY()
     self.theme = Theme(self.namespace)
     self.settings = Settings(self.namespace)
     self.tables = Tables(self.namespace)
     self.fields = Fields(self.namespace)
     self.styles = Styles(self.namespace, self.tables)
     self.images = Images(self.namespace, self.log)
     self.object_map = OrderedDict()
     self.html = HTML(
         HEAD(
             META(charset='utf-8'),
             TITLE(self.mi.title or _('Unknown')),
             LINK(rel='stylesheet', type='text/css', href='docx.css'),
         ), self.body)
     self.html.text = '\n\t'
     self.html[0].text = '\n\t\t'
     self.html[0].tail = '\n'
     for child in self.html[0]:
         child.tail = '\n\t\t'
     self.html[0][-1].tail = '\n\t'
     self.html[1].text = self.html[1].tail = '\n'
     lang = html_lang(self.mi.language)
     if lang:
         self.html.set('lang', lang)
         self.doc_lang = lang
     else:
         self.doc_lang = None
Exemple #5
0
def get_metadata(stream):
    c = DOCX(stream, extract=False)
    mi = c.metadata
    c.close()
    stream.seek(0)
    cdata = None
    with ZipFile(stream, 'r') as zf:
        for zi in zf.infolist():
            ext = zi.filename.rpartition('.')[-1].lower()
            if cdata is None and ext in {'jpeg', 'jpg', 'png', 'gif'}:
                raw = zf.read(zi)
                try:
                    width, height, fmt = identify_data(raw)
                except:
                    continue
                if 0.8 <= height / width <= 1.8 and height * width >= 12000:
                    cdata = (fmt, raw)
        if cdata is not None:
            mi.cover_data = cdata

    return mi