def set_metadata(stream, mi): from calibre.utils.zipfile import safe_replace c = DOCX(stream, extract=False) dp_name, ap_name = c.get_document_properties_names() dp_raw = c.read(dp_name) try: ap_raw = c.read(ap_name) except Exception: ap_raw = None cp = etree.fromstring(dp_raw) update_doc_props(cp, mi) replacements = {} if ap_raw is not None: ap = etree.fromstring(ap_raw) comp = ap.makeelement('{%s}Company' % namespaces['ep']) for child in tuple(ap): if child.tag == comp.tag: ap.remove(child) comp.text = mi.publisher ap.append(comp) replacements[ap_name] = BytesIO(xml2str(ap)) stream.seek(0) safe_replace(stream, dp_name, BytesIO(xml2str(cp)), extra_replacements=replacements)
def __init__(self, path_or_stream, dest_dir=None, log=None, notes_text=None): self.docx = DOCX(path_or_stream, log=log) self.log = self.docx.log self.notes_text = notes_text or _('Notes') self.dest_dir = dest_dir or os.getcwdu() self.mi = self.docx.metadata self.body = BODY() self.styles = Styles() self.images = Images() self.tables = Tables() self.object_map = OrderedDict() self.html = HTML( HEAD( META(charset='utf-8'), TITLE(self.mi.title or _('Unknown')), LINK(rel='stylesheet', type='text/css', href='docx.css'), ), self.body) self.html.text = '\n\t' self.html[0].text = '\n\t\t' self.html[0].tail = '\n' for child in self.html[0]: child.tail = '\n\t\t' self.html[0][-1].tail = '\n\t' self.html[1].text = self.html[1].tail = '\n' lang = canonicalize_lang(self.mi.language) if lang and lang != 'und': lang = lang_as_iso639_1(lang) if lang: self.html.set('lang', lang)
def get_metadata(stream): c = DOCX(stream, extract=False) mi = c.metadata try: cdata = get_cover(c) except Exception: cdata = None import traceback traceback.print_exc() c.close() stream.seek(0) if cdata is not None: mi.cover_data = cdata return mi
def __init__(self, path_or_stream, dest_dir=None, log=None, detect_cover=True, notes_text=None, notes_nopb=False, nosupsub=False): self.docx = DOCX(path_or_stream, log=log) self.namespace = self.docx.namespace self.ms_pat = re.compile(r'\s{2,}') self.ws_pat = re.compile(r'[\n\r\t]') self.log = self.docx.log self.detect_cover = detect_cover self.notes_text = notes_text or _('Notes') self.notes_nopb = notes_nopb self.nosupsub = nosupsub self.dest_dir = dest_dir or os.getcwdu() self.mi = self.docx.metadata self.body = BODY() self.theme = Theme(self.namespace) self.settings = Settings(self.namespace) self.tables = Tables(self.namespace) self.fields = Fields(self.namespace) self.styles = Styles(self.namespace, self.tables) self.images = Images(self.namespace, self.log) self.object_map = OrderedDict() self.html = HTML( HEAD( META(charset='utf-8'), TITLE(self.mi.title or _('Unknown')), LINK(rel='stylesheet', type='text/css', href='docx.css'), ), self.body) self.html.text = '\n\t' self.html[0].text = '\n\t\t' self.html[0].tail = '\n' for child in self.html[0]: child.tail = '\n\t\t' self.html[0][-1].tail = '\n\t' self.html[1].text = self.html[1].tail = '\n' lang = html_lang(self.mi.language) if lang: self.html.set('lang', lang) self.doc_lang = lang else: self.doc_lang = None
def get_metadata(stream): c = DOCX(stream, extract=False) mi = c.metadata c.close() stream.seek(0) cdata = None with ZipFile(stream, 'r') as zf: for zi in zf.infolist(): ext = zi.filename.rpartition('.')[-1].lower() if cdata is None and ext in {'jpeg', 'jpg', 'png', 'gif'}: raw = zf.read(zi) try: width, height, fmt = identify_data(raw) except: continue if 0.8 <= height / width <= 1.8 and height * width >= 12000: cdata = (fmt, raw) if cdata is not None: mi.cover_data = cdata return mi