def _extract_metadata(self, files=None): image_file = self._find_processing_file(files) width, height = get_image_dimensions(image_file) # XXX: this is a bit redundant... self.set("origwidth", width) self.set("origheight", height) self.set("origsize", image_file.size) self.set("width", width) self.set("height", height) # Exif unwanted_attrs = Image.get_unwanted_exif_attributes() with open(image_file.abspath, 'rb') as f: tags = EXIF.process_file(f) for k in tags.keys(): # don't set unwanted exif attributes if any(tag in k for tag in unwanted_attrs): continue if tags[k]: self.set("exif_" + k.replace(" ", "_"), utf8_decode_escape(str(tags[k]))) # IPTC iptc_metadata = lib.iptc.IPTC.get_iptc_tags(image_file.abspath) if iptc_metadata is not None: for k, v in iteritems(iptc_metadata): self.set('iptc_' + k, v)
def mkdir(self, path): path, filename = utils.splitpath(path) olddir = self.dir oldnode = self.node if not self.cwd(path): raise IOError("no such directory: " + path) node = self.node self.dir = olddir self.node = oldnode node.addChild(tree.Node(utf8_decode_escape(filename), type="directory"))
def file_to_node(file_node, upload_dir): ''' Converts the FileNode object in the upload_dir into a Node with the FileNode as an attachment @param file_node: FileNode @param upload_dir: Node @return: Node if one was created ''' home_dir = upload_dir.getParents()[0] file_type = file_node.getType() if file_type == 'other' or file_type == 'zip': return path = file_node.retrieveFile().split('/') new_name = path.pop().replace('ftp_', '', 1) path.append(new_name) new_path = '/'.join(path) try: os.rename(file_node.retrieveFile(), new_path) except: new_path = file_node.retrieveFile() schema = home_dir.get('system.ftp.{}'.format(file_type)).lstrip('/') if not schema: schema = 'file' new_node = tree.Node(utf8_decode_escape(new_name), type='/'.join([file_node.getType(), schema])) upload_dir.removeFile(file_node) file_node._path = file_node._path.replace(config.get('paths.datadir'), '') file_node._path = file_node._path.replace( file_node._path.split('/')[-1], new_node.getName()) new_node.addFile(file_node) new_node.event_files_changed() upload_dir.addChild(new_node) return new_node
def file_to_node(file_node, upload_dir): ''' Converts the FileNode object in the upload_dir into a Node with the FileNode as an attachment @param file_node: FileNode @param upload_dir: Node @return: Node if one was created ''' home_dir = upload_dir.getParents()[0] file_type = file_node.getType() if file_type == 'other' or file_type == 'zip': return path = file_node.retrieveFile().split('/') new_name = path.pop().replace('ftp_', '', 1) path.append(new_name) new_path = '/'.join(path) try: os.rename(file_node.retrieveFile(), new_path) except: new_path = file_node.retrieveFile() schema = home_dir.get('system.ftp.{}'.format(file_type)).lstrip('/') if not schema: schema = 'file' new_node = tree.Node(utf8_decode_escape(new_name), type='/'.join([file_node.getType(), schema])) upload_dir.removeFile(file_node) file_node._path = file_node._path.replace(config.get('paths.datadir'), '') file_node._path = file_node._path.replace(file_node._path.split('/')[-1], new_node.getName()) new_node.addFile(file_node) new_node.event_files_changed() upload_dir.addChild(new_node) return new_node
def importBibTeX(infile, node=None, req=None): if req: try: user = users.getUserFromRequest(req) msg = "bibtex import: import started by user '%s'" % (user.name) except: msg = "bibtex import: starting import (unable to identify user)" else: msg = "bibtex import: starting import (%s)" % str(sys.argv) logger.info(msg) print msg bibtextypes = getbibtexmappings() result = [] entries = [] shortcut = {} if isinstance(infile, list): entries = infile else: if not node: node = tree.Node(name=utf8_decode_escape(os.path.basename(infile)), type="directory") try: entries = getentries(infile) except: logger.error("getentries failed", exc_info=1) msg = "bibtex import: getentries failed, import stopped (encoding error)" logger.error(msg) raise ValueError("getentries failed") logger.info("bibtex import: %d entries" % len(entries)) counter = 0 for doctype, docid, fields in entries: counter += 1 docid_utf8 = utf8_decode_escape(docid) mytype = detecttype(doctype, fields) if doctype == "string": if VERBOSE: logger.info( "bibtex import: processing %s: %s, %s --> (is string)" % (str(counter), doctype, docid)) continue if mytype: fieldnames = {} datefields = {} if mytype == "string": continue elif mytype not in bibtextypes: msg = "bibtex mapping of bibtex type '%s' not defined - import stopped" % mytype logger.error("bibtex import: " + msg) raise MissingMapping(msg) result += [(mytype.lower(), fields)] metatype = bibtextypes[mytype] # check for mask configuration mask = getMetaType(metatype).getMask("bibtex_import") if not mask: mask = getMetaType(metatype).getMask("bibtex") if mask: for f in mask.getMaskFields(): try: _bib_name = tree.getNode( f.get("mappingfield")).getName() _mfield = tree.getNode(f.get("attribute")) _med_name = _mfield.getName() if _mfield.get("type") == "date": datefields[_med_name] = _mfield.get("valuelist") except tree.NoSuchNodeError as e: msg = "bibtex import docid='%s': field error for bibtex mask for type %s and bibtex-type '%s': %s: " % ( docid_utf8, metatype, mytype, str(e)) msg = msg + "_bib_name='%s', _mfield='%s', _med_name='%s'" % ( str(_bib_name), str(_mfield), str(_med_name)) logger.error(msg) continue fieldnames[_bib_name] = _med_name doc = tree.Node(docid_utf8, type="document/" + metatype) for k, v in fields.items(): if k in fieldnames.keys(): k = fieldnames[k] # map bibtex name if k in datefields.keys(): # format date field v = parse_date(v, datefields[k]) doc.set(k, utf8_decode_escape(v)) child_id = None child_type = None try: node.addChild(doc) doc.setDirty() child_id = doc.id child_type = doc.type except Exception as e: logger.error("bibtex import: %s" % (str(e))) raise ValueError() if VERBOSE: try: logger.info( "bibtex import: done processing %s: %s, %s --> type=%s, id=%s" % (str(counter), doctype, docid, str(child_type), str(child_id))) except Exception as e: try: logger.info( "bibtex import: done processing %s: %s, %s --> type=%s, id=%s" % (str(counter), doctype, docid.decode("utf8", "replace"), str(child_type), str(child_id))) except Exception as e: logger.info( "bibtex import: done processing %s: %s, %s --> type=%s, id=%s" % (str(counter), doctype, "'not printable bibtex key'", str(child_type), str(child_id))) msg = "bibtex import: finished import" logger.info(msg) print msg return node
def event_files_changed(self): print "Postprocessing node", self.id if "image" in self.type: for f in self.getFiles(): if f.getName().lower().endswith('svg'): self.svg_to_png(f.retrieveFile(), f.retrieveFile()[:-4] + ".png") self.removeFile(f) self.addFile(FileNode(name=f.retrieveFile(), type="original", mimetype=f.mimetype)) self.addFile(FileNode(name=f.retrieveFile(), type="image", mimetype=f.mimetype)) self.addFile(FileNode(name=f.retrieveFile()[:-4] + ".png", type="tmppng", mimetype="image/png")) break orig = 0 thumb = 0 for f in self.getFiles(): if f.type == "original": orig = 1 if f.type == "thumb": thumb = 1 if orig == 0: for f in self.getFiles(): if f.type == "image": if f.mimetype == "image/tiff" or ((f.mimetype is None or f.mimetype == "application/x-download") and (f.getName().lower().endswith("tif") or f.getName().lower().endswith("tiff"))): # move old file to "original", create a new png to be used as "image" self.removeFile(f) path, ext = splitfilename(f.retrieveFile()) pngname = path + ".png" if not os.path.isfile(pngname): makeOriginalFormat(f.retrieveFile(), pngname) width, height = getImageDimensions(pngname) self.set("width", width) self.set("height", height) else: width, height = getImageDimensions(pngname) self.set("width", width) self.set("height", height) print 'png: ', pngname self.addFile(FileNode(name=pngname, type="image", mimetype="image/png")) self.addFile(FileNode(name=f.retrieveFile(), type="original", mimetype="image/tiff")) break else: self.addFile(FileNode(name=f.retrieveFile(), type="original", mimetype=f.mimetype)) # retrieve technical metadata. for f in self.getFiles(): if (f.type == "image" and not f.getName().lower().endswith("svg")) or f.type == "tmppng": width, height = getImageDimensions(f.retrieveFile()) self.set("origwidth", width) self.set("origheight", height) self.set("origsize", f.getSize()) if f.mimetype == "image/jpeg": self.set("jpg_comment", iso2utf8(getJpegSection(f.retrieveFile(), 0xFE).strip())) if thumb == 0: for f in self.getFiles(): if (f.type == "image" and not f.getName().lower().endswith("svg")) or f.type == "tmppng": basename = hashlib.md5(str(random.random())).hexdigest()[0:8] path = os.path.join(getImportDir(), basename) thumbname = path + ".thumb" thumbname2 = path + ".thumb2" print 'tumb: ', thumbname print 'presentation: ', thumbname2 assert not os.path.isfile(thumbname) assert not os.path.isfile(thumbname2) width, height = getImageDimensions(f.retrieveFile()) makeThumbNail(f.retrieveFile(), thumbname) makePresentationFormat(f.retrieveFile(), thumbname2) if f.mimetype is None: if f.getName().lower().endswith("jpg"): f.mimetype = "image/jpeg" else: f.mimetype = "image/tiff" self.addFile(FileNode(name=thumbname, type="thumb", mimetype="image/jpeg")) self.addFile(FileNode(name=thumbname2, type="presentation", mimetype="image/jpeg")) self.set("width", width) self.set("height", height) # fetch unwanted tags to be omitted unwanted_attrs = self.unwanted_attributes() # Exif try: files = self.getFiles() for file in files: if file.type == "original": f = open(file.retrieveFile(), 'rb') tags = EXIF.process_file(f) tags.keys().sort() for k in tags.keys(): # don't set unwanted exif attributes if any(tag in k for tag in unwanted_attrs): continue if tags[k] != "" and k != "JPEGThumbnail": self.set("exif_" + k.replace(" ", "_"), utf8_decode_escape(str(tags[k]))) elif k == "JPEGThumbnail": if tags[k] != "": self.set("Thumbnail", "True") else: self.set("Thumbnail", "False") except: None if dozoom(self) == 1: tileok = 0 for f in self.getFiles(): if f.type.startswith("tile"): tileok = 1 if not tileok and self.get("width") and self.get("height"): zoom.getImage(self.id, 1) for f in self.getFiles(): if f.getType() == 'original': wanted_tags = lib.iptc.IPTC.get_wanted_iptc_tags() tags_in_upload = lib.iptc.IPTC.get_iptc_values(f.retrieveFile(), wanted_tags) with_value = [] for field in getMetaType(self.getSchema()).getMetaFields(): if field.get('type') == "meta" and len(field.getValueList()) > 1: value = self.get('iptc_{}'.format(field.getName())) if len(value) > 0: with_value.append(field.getName()) if tags_in_upload: for key in tags_in_upload.keys(): if tags_in_upload[key] != '': if key not in with_value: self.set('iptc_{}'.format(key.replace(' ', '_')), tags_in_upload[key]) for f in self.getFiles(): if f.getName().lower().endswith("png") and f.type == "tmppng": self.removeFile(f) break
def event_files_changed(self): print "Postprocessing node", self.id thumb = 0 fulltext = 0 doc = None present = 0 fileinfo = 0 for f in self.getFiles(): if f.type == "thumb": thumb = 1 elif f.type.startswith("present"): present = 1 elif f.type == "fulltext": fulltext = 1 elif f.type == "fileinfo": fileinfo = 1 elif f.type == "doc": doc = f elif f.type == "document": doc = f if not doc: for f in self.getFiles(): if f.type == "thumb": self.removeFile(f) elif f.type.startswith("present"): self.removeFile(f) elif f.type == "fileinfo": self.removeFile(f) elif f.type == "fulltext": self.removeFile(f) #fetch unwated tags to be omitted unwanted_attrs = self.unwanted_attributes() if doc: path, ext = splitfilename(doc.retrieveFile()) if not (thumb and present and fulltext and fileinfo): thumbname = path + ".thumb" thumb2name = path + ".thumb2" fulltextname = path + ".txt" infoname = path + ".info" tempdir = config.get("paths.tempdir") try: pdfdata = parsepdf.parsePDF2(doc.retrieveFile(), tempdir) except parsepdf.PDFException as ex: raise OperationException(ex.value) fi = open(infoname, "rb") for line in fi.readlines(): i = line.find(':') if i > 0: if any(tag in line[0:i].strip().lower() for tag in unwanted_attrs): continue self.set("pdf_" + line[0:i].strip().lower(), utf8_decode_escape(line[i + 1:].strip())) fi.close() self.addFile( FileNode(name=thumbname, type="thumb", mimetype="image/jpeg")) self.addFile( FileNode(name=thumb2name, type="presentation", mimetype="image/jpeg")) self.addFile( FileNode(name=fulltextname, type="fulltext", mimetype="text/plain")) self.addFile( FileNode(name=infoname, type="fileinfo", mimetype="text/plain"))
def importBibTeX(infile, node=None, req=None): user = None if req: try: user = users.getUserFromRequest(req) msg = "bibtex import: import started by user '%s'" % (user.name) except: msg = "bibtex import: starting import (unable to identify user)" else: msg = "bibtex import: starting import (%s)" % ustr(sys.argv) logg.info(msg) bibtextypes = getbibtexmappings() result = [] entries = [] if isinstance(infile, list): entries = infile else: node = node or Directory(utf8_decode_escape(os.path.basename(infile))) try: entries = getentries(infile) except: # XXX TODO This reports *everything* as encoding error # XXX TODO (even things like full disk or other parsing errors). # XXX TODO We should at least reformulate the error message, # XXX TODO and -- even better -- only catch errors that are to be expected. logg.error("getentries failed", exc_info=1) msg = "bibtex import: getentries failed, import stopped (encoding error)" logg.error(msg) raise ValueError("bibtex_unspecified_error") logg.info("bibtex import: %d entries", len(entries)) for count, fields in enumerate(entries): docid_utf8 = fields["ID"] fields[u"key"] = fields.pop("ID") doctype = fields.pop("ENTRYTYPE") mytype = detecttype(doctype, fields) if mytype: fieldnames = {} datefields = {} if mytype not in bibtextypes: logg.error( "bibtex mapping of bibtex type '%s' not defined - import stopped", mytype) msg = "bibtex mapping of bibtex type '%s' not defined - import stopped" % mytype raise MissingMapping(msg) result += [(mytype.lower(), fields)] metatype = bibtextypes[mytype] # check for mask configuration metadatatype = q(Metadatatype).filter_by(name=metatype).one() mask = metadatatype.get_mask( u"bibtex_import") or metadatatype.get_mask(u"bibtex") if mask: for f in mask.all_maskitems: try: _bib_name = q(Node).get(f.get(u"mappingfield")).name _mfield = q(Node).get(f.get(u"attribute")) _med_name = _mfield.name if _mfield.get(u"type") == u"date": datefields[_med_name] = _mfield.get(u"valuelist") except AttributeError as e: msg = "bibtex import docid='{}': field error for bibtex mask for type {} and bibtex-type '{}': {}" msg = msg.format(docid_utf8, metatype, mytype, e) logg.error(msg) else: fieldnames[_bib_name] = _med_name doc = Document(docid_utf8, schema=metatype) for k, v in fields.items(): if k in fieldnames.keys(): k = fieldnames[k] # map bibtex name if k in datefields.keys(): # format date field try: v = str(parse_date(v, datefields[k])) # if date format does not contains '%' the valid digit of the result must not be longer than the date format # e.g. if datefields[k] is 'yyyy' then the result v must be clipped after 4 characters # afterwards the result is expanded again (without the invalid digits) if datefields[k].find('%') < 0: v = v[:len(datefields[k])] v = str(parse_date(v, datefields[k])) except ValueError as e: logg.exception("bibtex exception: %s: %s", k, v) raise ValueError("bibtex_date_error") doc.set(k, v) # because the bibtex import contains only a subset of the metadata defined in metadatatype, # all other metadata are created and set to default values. # this will be done in the same manner as if the document is loaded in editor and saved without # any changes (required fields are not considered) editmask = metadatatype.get_mask(u"editmask") if editmask and hasattr(editmask, 'set_default_metadata'): editmask.set_default_metadata(doc) try: node.children.append(doc) if user: doc.set("creator", user.login_name) doc.set( "creationtime", unicode( time.strftime('%Y-%m-%dT%H:%M:%S', time.localtime(time.time())))) except Exception as e: logg.exception("bibtex exception") raise ValueError() logg.debug("bibtex import: finished import") print msg return node
def importBibTeX(infile, node=None, req=None): if req: try: user = users.getUserFromRequest(req) msg = "bibtex import: import started by user '%s'" % (user.name) except: msg = "bibtex import: starting import (unable to identify user)" else: msg = "bibtex import: starting import (%s)" % str(sys.argv) logger.info(msg) print msg bibtextypes = getbibtexmappings() result = [] entries = [] shortcut = {} if isinstance(infile, list): entries = infile else: if not node: node = tree.Node(name=utf8_decode_escape(os.path.basename(infile)), type="directory") try: entries = getentries(infile) except: logger.error("getentries failed", exc_info=1) msg = "bibtex import: getentries failed, import stopped (encoding error)" logger.error(msg) raise ValueError("getentries failed") logger.info("bibtex import: %d entries" % len(entries)) counter = 0 for doctype, docid, fields in entries: counter += 1 docid_utf8 = utf8_decode_escape(docid) mytype = detecttype(doctype, fields) if doctype == "string": if VERBOSE: logger.info( "bibtex import: processing %s: %s, %s --> (is string)" % (str(counter), doctype, docid)) continue if mytype: fieldnames = {} datefields = {} if mytype == "string": continue elif mytype not in bibtextypes: msg = "bibtex mapping of bibtex type '%s' not defined - import stopped" % mytype logger.error("bibtex import: " + msg) raise MissingMapping(msg) result += [(mytype.lower(), fields)] metatype = bibtextypes[mytype] # check for mask configuration mask = getMetaType(metatype).getMask("bibtex_import") if not mask: mask = getMetaType(metatype).getMask("bibtex") if mask: for f in mask.getMaskFields(): try: _bib_name = tree.getNode(f.get("mappingfield")).getName() _mfield = tree.getNode(f.get("attribute")) _med_name = _mfield.getName() if _mfield.get("type") == "date": datefields[_med_name] = _mfield.get("valuelist") except tree.NoSuchNodeError as e: msg = "bibtex import docid='%s': field error for bibtex mask for type %s and bibtex-type '%s': %s: " % ( docid_utf8, metatype, mytype, str(e)) msg = msg + "_bib_name='%s', _mfield='%s', _med_name='%s'" % ( str(_bib_name), str(_mfield), str(_med_name)) logger.error(msg) continue fieldnames[_bib_name] = _med_name doc = tree.Node(docid_utf8, type="document/" + metatype) for k, v in fields.items(): if k in fieldnames.keys(): k = fieldnames[k] # map bibtex name if k in datefields.keys(): # format date field v = parse_date(v, datefields[k]) doc.set(k, utf8_decode_escape(v)) child_id = None child_type = None try: node.addChild(doc) doc.setDirty() child_id = doc.id child_type = doc.type except Exception as e: logger.error("bibtex import: %s" % (str(e))) raise ValueError() if VERBOSE: try: logger.info("bibtex import: done processing %s: %s, %s --> type=%s, id=%s" % ( str(counter), doctype, docid, str(child_type), str(child_id))) except Exception as e: try: logger.info("bibtex import: done processing %s: %s, %s --> type=%s, id=%s" % ( str(counter), doctype, docid.decode("utf8", "replace"), str(child_type), str(child_id))) except Exception as e: logger.info("bibtex import: done processing %s: %s, %s --> type=%s, id=%s" % ( str(counter), doctype, "'not printable bibtex key'", str(child_type), str(child_id))) msg = "bibtex import: finished import" logger.info(msg) print msg return node
def importBibTeX(infile, node=None, req=None): if req: try: user = users.getUserFromRequest(req) msg = "bibtex import: import started by user '%s'" % (user.name) except: msg = "bibtex import: starting import (unable to identify user)" else: msg = "bibtex import: starting import (%s)" % ustr(sys.argv) logg.info(msg) bibtextypes = getbibtexmappings() result = [] entries = [] if isinstance(infile, list): entries = infile else: node = node or Directory(utf8_decode_escape(os.path.basename(infile))) try: entries = getentries(infile) except: logg.error("getentries failed", exc_info=1) msg = "bibtex import: getentries failed, import stopped (encoding error)" logg.error(msg) raise ValueError("encoding_error") logg.info("bibtex import: %d entries", len(entries)) for count, fields in enumerate(entries): docid_utf8 = fields["ID"] fields["key"] = fields.pop("ID") doctype = fields.pop("ENTRYTYPE") mytype = detecttype(doctype, fields) if mytype: fieldnames = {} datefields = {} if mytype not in bibtextypes: logg.error("bibtex mapping of bibtex type '%s' not defined - import stopped", mytype) msg = "bibtex mapping of bibtex type '%s' not defined - import stopped" % mytype raise MissingMapping(msg) result += [(mytype.lower(), fields)] metatype = bibtextypes[mytype] # check for mask configuration metadatatype = q(Metadatatype).filter_by(name=metatype).one() mask = metadatatype.get_mask(u"bibtex_import") or metadatatype.get_mask(u"bibtex") if mask: for f in mask.all_maskitems: try: _bib_name = q(Node).get(f.get(u"mappingfield")).name _mfield = q(Node).get(f.get(u"attribute")) _med_name = _mfield.name if _mfield.get(u"type") == u"date": datefields[_med_name] = _mfield.get(u"valuelist") except AttributeError as e: msg = "bibtex import docid='{}': field error for bibtex mask for type {} and bibtex-type '{}': {}" msg = msg.format(docid_utf8, metatype, mytype, e) logg.error(msg) else: fieldnames[_bib_name] = _med_name doc = Document(docid_utf8,schema=metatype) for k, v in fields.items(): if k in fieldnames.keys(): k = fieldnames[k] # map bibtex name if k in datefields.keys(): # format date field try: v = str(parse_date(v, datefields[k])) # if date format does not contains '%' the result must not be longer than the date format # e.g. if datefields[k] is 'yyyy' then the result v must be clipped after 4 characters if datefields[k].find('%') < 0: v = v[:len(datefields[k])] except ValueError as e: logg.exception("bibtex exception: %s: %s", k, v) raise ValueError("ValueError: " + k + ": " + v) doc.set(k, v) child_id = None child_type = None try: node.children.append(doc) child_id = doc.id child_type = doc.type except Exception as e: logg.exception("bibtex exception") raise ValueError() logg.debug("bibtex import: finished import") print msg return node
def event_files_changed(self): print "Postprocessing node", self.id thumb = 0 fulltext = 0 doc = None present = 0 fileinfo = 0 for f in self.getFiles(): if f.type == "thumb": thumb = 1 elif f.type.startswith("present"): present = 1 elif f.type == "fulltext": fulltext = 1 elif f.type == "fileinfo": fileinfo = 1 elif f.type == "doc": doc = f elif f.type == "document": doc = f if not doc: for f in self.getFiles(): if f.type == "thumb": self.removeFile(f) elif f.type.startswith("present"): self.removeFile(f) elif f.type == "fileinfo": self.removeFile(f) elif f.type == "fulltext": self.removeFile(f) #fetch unwated tags to be omitted unwanted_attrs = self.unwanted_attributes() if doc: path, ext = splitfilename(doc.retrieveFile()) if not (thumb and present and fulltext and fileinfo): thumbname = path + ".thumb" thumb2name = path + ".thumb2" fulltextname = path + ".txt" infoname = path + ".info" tempdir = config.get("paths.tempdir") try: pdfdata = parsepdf.parsePDF2(doc.retrieveFile(), tempdir) except parsepdf.PDFException as ex: raise OperationException(ex.value) fi = open(infoname, "rb") for line in fi.readlines(): i = line.find(':') if i > 0: if any(tag in line[0:i].strip().lower() for tag in unwanted_attrs): continue self.set("pdf_" + line[0:i].strip().lower(), utf8_decode_escape(line[i + 1:].strip())) fi.close() self.addFile(FileNode(name=thumbname, type="thumb", mimetype="image/jpeg")) self.addFile(FileNode(name=thumb2name, type="presentation", mimetype="image/jpeg")) self.addFile(FileNode(name=fulltextname, type="fulltext", mimetype="text/plain")) self.addFile(FileNode(name=infoname, type="fileinfo", mimetype="text/plain"))
def event_files_changed(self): logg.debug("Postprocessing node %s", self.id) thumb = 0 fulltext = 0 doc = None present = 0 fileinfo = 0 for f in self.files: if f.type == "thumb": thumb = 1 elif f.type.startswith("present"): present = 1 elif f.type == "fulltext": fulltext = 1 elif f.type == "fileinfo": fileinfo = 1 elif f.type == "document": doc = f if not doc: for f in self.files: if f.type == "thumb": self.files.remove(f) elif f.type.startswith("present"): self.files.remove(f) elif f.type == "fileinfo": self.files.remove(f) elif f.type == "fulltext": self.files.remove(f) #fetch unwanted tags to be omitted unwanted_attrs = self.get_unwanted_exif_attributes() if doc: path, ext = splitfilename(doc.abspath) if not (thumb and present and fulltext and fileinfo): thumbname = path + ".thumb" thumb2name = path + ".thumb2" fulltextname = path + ".txt" infoname = path + ".info" tempdir = config.get("paths.tempdir") try: pdfdata = parsepdf.parsePDFExternal(doc.abspath, tempdir) except parsepdf.PDFException as ex: if ex.value == 'error:document encrypted': # allow upload of encrypted document db.session.commit() return raise OperationException(ex.value) with codecs.open(infoname, "rb", encoding='utf8') as fi: for line in fi.readlines(): i = line.find(':') if i > 0: if any(tag in line[0:i].strip().lower() for tag in unwanted_attrs): continue self.set("pdf_" + line[0:i].strip().lower(), utf8_decode_escape(line[i + 1:].strip())) self.files.append(File(thumbname, "thumb", "image/jpeg")) self.files.append( File(thumb2name, "presentation", "image/jpeg")) self.files.append(File(fulltextname, "fulltext", "text/plain")) self.files.append(File(infoname, "fileinfo", "text/plain")) if doc: import_node_fulltext(self, overwrite=True) db.session.commit()
def event_files_changed(self): print "Postprocessing node", self.id if "image" in self.type: for f in self.getFiles(): if f.getName().lower().endswith('svg'): self.svg_to_png(f.retrieveFile(), f.retrieveFile()[:-4] + ".png") self.removeFile(f) self.addFile(FileNode(name=f.retrieveFile(), type="original", mimetype=f.mimetype)) self.addFile(FileNode(name=f.retrieveFile(), type="image", mimetype=f.mimetype)) self.addFile(FileNode(name=f.retrieveFile()[:-4] + ".png", type="tmppng", mimetype="image/png")) break orig = 0 thumb = 0 for f in self.getFiles(): if f.type == "original": orig = 1 if f.type == "thumb": thumb = 1 if orig == 0: for f in self.getFiles(): if f.type == "image": if f.mimetype == "image/tiff" or ((f.mimetype is None or f.mimetype == "application/x-download") and (f.getName().lower().endswith("tif") or f.getName().lower().endswith("tiff"))): # move old file to "original", create a new png to be used as "image" self.removeFile(f) path, ext = splitfilename(f.retrieveFile()) pngname = path + ".png" if not os.path.isfile(pngname): makeOriginalFormat(f.retrieveFile(), pngname) width, height = getImageDimensions(pngname) self.set("width", width) self.set("height", height) else: width, height = getImageDimensions(pngname) self.set("width", width) self.set("height", height) print 'png name/path: ', pngname self.addFile(FileNode(name=pngname, type="image", mimetype="image/png")) self.addFile(FileNode(name=f.retrieveFile(), type="original", mimetype="image/tiff")) break else: self.addFile(FileNode(name=f.retrieveFile(), type="original", mimetype=f.mimetype)) # retrieve technical metadata. for f in self.getFiles(): if (f.type == "image" and not f.getName().lower().endswith("svg")) or f.type == "tmppng": width, height = getImageDimensions(f.retrieveFile()) self.set("origwidth", width) self.set("origheight", height) self.set("origsize", f.getSize()) if f.mimetype == "image/jpeg": self.set("jpg_comment", iso2utf8(getJpegSection(f.retrieveFile(), 0xFE).strip())) if thumb == 0: for f in self.getFiles(): if (f.type == "image" and not f.getName().lower().endswith("svg")) or f.type == "tmppng": path, ext = splitfilename(f.retrieveFile()) basename = hashlib.md5(str(random.random())).hexdigest()[0:8] # path = os.path.join(getImportDir(),os.path.basename(path)) path = os.path.join(getImportDir(), basename) thumbname = path + ".thumb" thumbname2 = path + ".thumb2" print 'tumb: ', thumbname print 'presentation: ', thumbname2 assert not os.path.isfile(thumbname) assert not os.path.isfile(thumbname2) width, height = getImageDimensions(f.retrieveFile()) makeThumbNail(f.retrieveFile(), thumbname) makePresentationFormat(f.retrieveFile(), thumbname2) if f.mimetype is None: if f.getName().lower().endswith("jpg"): f.mimetype = "image/jpeg" else: f.mimetype = "image/tiff" self.addFile(FileNode(name=thumbname, type="thumb", mimetype="image/jpeg")) self.addFile(FileNode(name=thumbname2, type="presentation", mimetype="image/jpeg")) self.set("width", width) self.set("height", height) #fetch unwanted tags to be omitted unwanted_attrs = self.unwanted_attributes() # Exif try: from lib.Exif import EXIF files = self.getFiles() for file in files: if file.type == "original": f = open(file.retrieveFile(), 'rb') tags = EXIF.process_file(f) tags.keys().sort() for k in tags.keys(): # don't set unwanted exif attributes if any(tag in k for tag in unwanted_attrs): continue if tags[k] != "" and k != "JPEGThumbnail": self.set("exif_" + k.replace(" ", "_"), utf8_decode_escape(str(tags[k]))) elif k == "JPEGThumbnail": if tags[k] != "": self.set("Thumbnail", "True") else: self.set("Thumbnail", "False") except: None if dozoom(self) == 1: tileok = 0 for f in self.getFiles(): if f.type.startswith("tile"): tileok = 1 if not tileok and self.get("width") and self.get("height"): zoom.getImage(self.id, 1) # iptc try: from lib.iptc import IPTC files = self.getFiles() for file in files: if file.type == "original": tags = IPTC.getIPTCValues(file.retrieveFile()) tags.keys().sort() for k in tags.keys(): # skip unknown iptc tags if 'IPTC_' in k: continue if any(tag in k for tag in unwanted_attrs): continue if isinstance(tags[k], list): tags[k] = ', '.join(tags[k]) if tags[k] != "": self.set("iptc_" + k.replace(" ", "_"), utf8_decode_escape(str(tags[k]))) except: None for f in self.getFiles(): if f.getName().lower().endswith("png") and f.type == "tmppng": self.removeFile(f) break
def event_files_changed(self): logg.debug("Postprocessing node %s", self.id) thumb = 0 fulltext = 0 doc = None present = 0 fileinfo = 0 for f in self.files: if f.type == "thumb": thumb = 1 elif f.type.startswith("present"): present = 1 elif f.type == "fulltext": fulltext = 1 elif f.type == "fileinfo": fileinfo = 1 elif f.type == "document": doc = f if not doc: for f in self.files: if f.type == "thumb": self.files.remove(f) elif f.type.startswith("present"): self.files.remove(f) elif f.type == "fileinfo": self.files.remove(f) elif f.type == "fulltext": self.files.remove(f) #fetch unwanted tags to be omitted unwanted_attrs = self.get_unwanted_exif_attributes() if doc: path, ext = splitfilename(doc.abspath) if not (thumb and present and fulltext and fileinfo): thumbname = path + ".thumb" thumb2name = path + ".thumb2" fulltextname = path + ".txt" infoname = path + ".info" tempdir = config.get("paths.tempdir") try: pdfdata = parsepdf.parsePDFExternal(doc.abspath, tempdir) except parsepdf.PDFException as ex: if ex.value == 'error:document encrypted': # allow upload of encrypted document db.session.commit() return raise OperationException(ex.value) with codecs.open(infoname, "rb", encoding='utf8') as fi: for line in fi.readlines(): i = line.find(':') if i > 0: if any(tag in line[0:i].strip().lower() for tag in unwanted_attrs): continue self.set("pdf_" + line[0:i].strip().lower(), utf8_decode_escape(line[i + 1:].strip())) self.files.append(File(thumbname, "thumb", "image/jpeg")) self.files.append(File(thumb2name, "presentation", "image/jpeg")) self.files.append(File(fulltextname, "fulltext", "text/plain")) self.files.append(File(infoname, "fileinfo", "text/plain")) if doc: import_node_fulltext(self, overwrite=True) db.session.commit()
def event_files_changed(self): print "Postprocessing node", self.id if "image" in self.type: for f in self.getFiles(): if f.getName().lower().endswith('svg'): self.svg_to_png(f.retrieveFile(), f.retrieveFile()[:-4] + ".png") self.removeFile(f) self.addFile( FileNode(name=f.retrieveFile(), type="original", mimetype=f.mimetype)) self.addFile( FileNode(name=f.retrieveFile(), type="image", mimetype=f.mimetype)) self.addFile( FileNode(name=f.retrieveFile()[:-4] + ".png", type="tmppng", mimetype="image/png")) break orig = 0 thumb = 0 for f in self.getFiles(): if f.type == "original": orig = 1 if f.type == "thumb": thumb = 1 if orig == 0: for f in self.getFiles(): if f.type == "image": if f.mimetype == "image/tiff" or ( (f.mimetype is None or f.mimetype == "application/x-download") and (f.getName().lower().endswith("tif") or f.getName().lower().endswith("tiff"))): # move old file to "original", create a new png to be used as "image" self.removeFile(f) path, ext = splitfilename(f.retrieveFile()) pngname = path + ".png" if not os.path.isfile(pngname): makeOriginalFormat(f.retrieveFile(), pngname) width, height = getImageDimensions(pngname) self.set("width", width) self.set("height", height) else: width, height = getImageDimensions(pngname) self.set("width", width) self.set("height", height) self.addFile( FileNode(name=pngname, type="image", mimetype="image/png")) self.addFile( FileNode(name=f.retrieveFile(), type="original", mimetype="image/tiff")) break else: self.addFile( FileNode(name=f.retrieveFile(), type="original", mimetype=f.mimetype)) # retrieve technical metadata. for f in self.getFiles(): if (f.type == "image" and not f.getName().lower().endswith("svg") ) or f.type == "tmppng": width, height = getImageDimensions(f.retrieveFile()) self.set("origwidth", width) self.set("origheight", height) self.set("origsize", f.getSize()) if f.mimetype == "image/jpeg": self.set( "jpg_comment", iso2utf8( getJpegSection(f.retrieveFile(), 0xFE).strip())) if thumb == 0: for f in self.getFiles(): if (f.type == "image" and not f.getName().lower().endswith("svg") ) or f.type == "tmppng": path, ext = splitfilename(f.retrieveFile()) basename = hashlib.md5(str( random.random())).hexdigest()[0:8] #path = os.path.join(getImportDir(),os.path.basename(path)) path = os.path.join(getImportDir(), basename) thumbname = path + ".thumb" thumbname2 = path + ".thumb2" assert not os.path.isfile(thumbname) assert not os.path.isfile(thumbname2) width, height = getImageDimensions(f.retrieveFile()) makeThumbNail(f.retrieveFile(), thumbname) makePresentationFormat(f.retrieveFile(), thumbname2) if f.mimetype is None: if f.getName().lower().endswith("jpg"): f.mimetype = "image/jpeg" else: f.mimetype = "image/tiff" self.addFile( FileNode(name=thumbname, type="thumb", mimetype="image/jpeg")) self.addFile( FileNode(name=thumbname2, type="presentation", mimetype="image/jpeg")) self.set("width", width) self.set("height", height) #fetch unwanted tags to be omitted unwanted_attrs = self.unwanted_attributes() # Exif try: from lib.Exif import EXIF files = self.getFiles() for file in files: if file.type == "original": f = open(file.retrieveFile(), 'rb') tags = EXIF.process_file(f) tags.keys().sort() for k in tags.keys(): # don't set unwanted exif attributes if any(tag in k for tag in unwanted_attrs): continue if tags[k] != "" and k != "JPEGThumbnail": self.set("exif_" + k.replace(" ", "_"), utf8_decode_escape(str(tags[k]))) elif k == "JPEGThumbnail": if tags[k] != "": self.set("Thumbnail", "True") else: self.set("Thumbnail", "False") except: None if dozoom(self) == 1: tileok = 0 for f in self.getFiles(): if f.type.startswith("tile"): tileok = 1 if not tileok and self.get("width") and self.get("height"): zoom.getImage(self.id, 1) # iptc try: from lib.iptc import IPTC files = self.getFiles() for file in files: if file.type == "original": tags = IPTC.getIPTCValues(file.retrieveFile()) tags.keys().sort() for k in tags.keys(): # skip unknown iptc tags if 'IPTC_' in k: continue if any(tag in k for tag in unwanted_attrs): continue if isinstance(tags[k], list): tags[k] = ', '.join(tags[k]) if tags[k] != "": self.set("iptc_" + k.replace(" ", "_"), utf8_decode_escape(str(tags[k]))) except: None for f in self.getFiles(): if f.getName().lower().endswith("png") and f.type == "tmppng": self.removeFile(f) break