def nodeToFulltextSearch(self, node): # build fulltext index from node if not node.getContentType() in ("document", "dissertation"): # only build fulltext of document nodes # print "object is no document" return True r = re.compile("[a-zA-Z0-9]+") for file in node.getFiles(): w = '' if file.getType() == "fulltext" and os.path.exists( file.retrieveFile()): data = {} content = '' f = open(file.retrieveFile()) try: for line in f: if FULLTEXT_INDEX_MODE == 0: content += u(line) else: for w in re.findall(r, line): if w not in data.keys(): data[w] = 1 try: data[w] += 1 except KeyError: data[w] = 1 finally: f.close() if FULLTEXT_INDEX_MODE == 1: for key in data.keys(): content += key + " " elif FULLTEXT_INDEX_MODE == 2: for key in data.keys(): content += key + " [" + str(data[key]) + "] " sql = "" if len(content) > 0: try: sql = 'INSERT INTO textsearchmeta (id, type, schema, value) VALUES("{}", "{}", "{}", "{}")'.format( node.id, node.getContentType(), node.getSchema(), iso2utf8(esc(content))) self.db.execute(sql) except: print "error", node.id, "\n" return False else: print "no Content" return True
def event_files_changed(self): print "Postprocessing node", self.id if "image" in self.type: for f in self.getFiles(): if f.getName().lower().endswith('svg'): self.svg_to_png(f.retrieveFile(), f.retrieveFile()[:-4] + ".png") self.removeFile(f) self.addFile(FileNode(name=f.retrieveFile(), type="original", mimetype=f.mimetype)) self.addFile(FileNode(name=f.retrieveFile(), type="image", mimetype=f.mimetype)) self.addFile(FileNode(name=f.retrieveFile()[:-4] + ".png", type="tmppng", mimetype="image/png")) break orig = 0 thumb = 0 for f in self.getFiles(): if f.type == "original": orig = 1 if f.type == "thumb": thumb = 1 if orig == 0: for f in self.getFiles(): if f.type == "image": if f.mimetype == "image/tiff" or ((f.mimetype is None or f.mimetype == "application/x-download") and (f.getName().lower().endswith("tif") or f.getName().lower().endswith("tiff"))): # move old file to "original", create a new png to be used as "image" self.removeFile(f) path, ext = splitfilename(f.retrieveFile()) pngname = path + ".png" if not os.path.isfile(pngname): makeOriginalFormat(f.retrieveFile(), pngname) width, height = getImageDimensions(pngname) self.set("width", width) self.set("height", height) else: width, height = getImageDimensions(pngname) self.set("width", width) self.set("height", height) print 'png name/path: ', pngname self.addFile(FileNode(name=pngname, type="image", mimetype="image/png")) self.addFile(FileNode(name=f.retrieveFile(), type="original", mimetype="image/tiff")) break else: self.addFile(FileNode(name=f.retrieveFile(), type="original", mimetype=f.mimetype)) # retrieve technical metadata. for f in self.getFiles(): if (f.type == "image" and not f.getName().lower().endswith("svg")) or f.type == "tmppng": width, height = getImageDimensions(f.retrieveFile()) self.set("origwidth", width) self.set("origheight", height) self.set("origsize", f.getSize()) if f.mimetype == "image/jpeg": self.set("jpg_comment", iso2utf8(getJpegSection(f.retrieveFile(), 0xFE).strip())) if thumb == 0: for f in self.getFiles(): if (f.type == "image" and not f.getName().lower().endswith("svg")) or f.type == "tmppng": path, ext = splitfilename(f.retrieveFile()) basename = hashlib.md5(str(random.random())).hexdigest()[0:8] # path = os.path.join(getImportDir(),os.path.basename(path)) path = os.path.join(getImportDir(), basename) thumbname = path + ".thumb" thumbname2 = path + ".thumb2" print 'tumb: ', thumbname print 'presentation: ', thumbname2 assert not os.path.isfile(thumbname) assert not os.path.isfile(thumbname2) width, height = getImageDimensions(f.retrieveFile()) makeThumbNail(f.retrieveFile(), thumbname) makePresentationFormat(f.retrieveFile(), thumbname2) if f.mimetype is None: if f.getName().lower().endswith("jpg"): f.mimetype = "image/jpeg" else: f.mimetype = "image/tiff" self.addFile(FileNode(name=thumbname, type="thumb", mimetype="image/jpeg")) self.addFile(FileNode(name=thumbname2, type="presentation", mimetype="image/jpeg")) self.set("width", width) self.set("height", height) #fetch unwanted tags to be omitted unwanted_attrs = self.unwanted_attributes() # Exif try: from lib.Exif import EXIF files = self.getFiles() for file in files: if file.type == "original": f = open(file.retrieveFile(), 'rb') tags = EXIF.process_file(f) tags.keys().sort() for k in tags.keys(): # don't set unwanted exif attributes if any(tag in k for tag in unwanted_attrs): continue if tags[k] != "" and k != "JPEGThumbnail": self.set("exif_" + k.replace(" ", "_"), utf8_decode_escape(str(tags[k]))) elif k == "JPEGThumbnail": if tags[k] != "": self.set("Thumbnail", "True") else: self.set("Thumbnail", "False") except: None if dozoom(self) == 1: tileok = 0 for f in self.getFiles(): if f.type.startswith("tile"): tileok = 1 if not tileok and self.get("width") and self.get("height"): zoom.getImage(self.id, 1) # iptc try: from lib.iptc import IPTC files = self.getFiles() for file in files: if file.type == "original": tags = IPTC.getIPTCValues(file.retrieveFile()) tags.keys().sort() for k in tags.keys(): # skip unknown iptc tags if 'IPTC_' in k: continue if any(tag in k for tag in unwanted_attrs): continue if isinstance(tags[k], list): tags[k] = ', '.join(tags[k]) if tags[k] != "": self.set("iptc_" + k.replace(" ", "_"), utf8_decode_escape(str(tags[k]))) except: None for f in self.getFiles(): if f.getName().lower().endswith("png") and f.type == "tmppng": self.removeFile(f) break
def nodeToFulltextSearch(self, node): # build fulltext index from node if not node.getContentType() in ("document", "dissertation"): # only build fulltext of document nodes # print "object is no document" return True r = re.compile("[a-zA-Z0-9]+") for file in node.getFiles(): w = '' if file.getType() == "fulltext" and os.path.exists(file.retrieveFile()): data = {} content = '' f = open(file.retrieveFile()) try: for line in f: if FULLTEXT_INDEX_MODE == 0: content += u(line) else: for w in re.findall(r, line): if w not in data.keys(): data[w] = 1 try: data[w] += 1 except KeyError: data[w] = 1 finally: f.close() if FULLTEXT_INDEX_MODE == 1: for key in data.keys(): content += key + " " elif FULLTEXT_INDEX_MODE == 2: for key in data.keys(): content += key + " [" + str(data[key]) + "] " sql = "" if len(content) > 0: try: sql = 'INSERT INTO textsearchmeta (id, type, schema, value) VALUES("{}", "{}", "{}", "{}")'.format(node.id, node.getContentType(), node.getSchema(), iso2utf8(esc(content))) self.db.execute(sql) except: print "error", node.id, "\n" return False else: print "no Content" return True
def event_files_changed(self): print "Postprocessing node", self.id if "image" in self.type: for f in self.getFiles(): if f.getName().lower().endswith('svg'): self.svg_to_png(f.retrieveFile(), f.retrieveFile()[:-4] + ".png") self.removeFile(f) self.addFile(FileNode(name=f.retrieveFile(), type="original", mimetype=f.mimetype)) self.addFile(FileNode(name=f.retrieveFile(), type="image", mimetype=f.mimetype)) self.addFile(FileNode(name=f.retrieveFile()[:-4] + ".png", type="tmppng", mimetype="image/png")) break orig = 0 thumb = 0 for f in self.getFiles(): if f.type == "original": orig = 1 if f.type == "thumb": thumb = 1 if orig == 0: for f in self.getFiles(): if f.type == "image": if f.mimetype == "image/tiff" or ((f.mimetype is None or f.mimetype == "application/x-download") and (f.getName().lower().endswith("tif") or f.getName().lower().endswith("tiff"))): # move old file to "original", create a new png to be used as "image" self.removeFile(f) path, ext = splitfilename(f.retrieveFile()) pngname = path + ".png" if not os.path.isfile(pngname): makeOriginalFormat(f.retrieveFile(), pngname) width, height = getImageDimensions(pngname) self.set("width", width) self.set("height", height) else: width, height = getImageDimensions(pngname) self.set("width", width) self.set("height", height) print 'png: ', pngname self.addFile(FileNode(name=pngname, type="image", mimetype="image/png")) self.addFile(FileNode(name=f.retrieveFile(), type="original", mimetype="image/tiff")) break else: self.addFile(FileNode(name=f.retrieveFile(), type="original", mimetype=f.mimetype)) # retrieve technical metadata. for f in self.getFiles(): if (f.type == "image" and not f.getName().lower().endswith("svg")) or f.type == "tmppng": width, height = getImageDimensions(f.retrieveFile()) self.set("origwidth", width) self.set("origheight", height) self.set("origsize", f.getSize()) if f.mimetype == "image/jpeg": self.set("jpg_comment", iso2utf8(getJpegSection(f.retrieveFile(), 0xFE).strip())) if thumb == 0: for f in self.getFiles(): if (f.type == "image" and not f.getName().lower().endswith("svg")) or f.type == "tmppng": basename = hashlib.md5(str(random.random())).hexdigest()[0:8] path = os.path.join(getImportDir(), basename) thumbname = path + ".thumb" thumbname2 = path + ".thumb2" print 'tumb: ', thumbname print 'presentation: ', thumbname2 assert not os.path.isfile(thumbname) assert not os.path.isfile(thumbname2) width, height = getImageDimensions(f.retrieveFile()) makeThumbNail(f.retrieveFile(), thumbname) makePresentationFormat(f.retrieveFile(), thumbname2) if f.mimetype is None: if f.getName().lower().endswith("jpg"): f.mimetype = "image/jpeg" else: f.mimetype = "image/tiff" self.addFile(FileNode(name=thumbname, type="thumb", mimetype="image/jpeg")) self.addFile(FileNode(name=thumbname2, type="presentation", mimetype="image/jpeg")) self.set("width", width) self.set("height", height) # fetch unwanted tags to be omitted unwanted_attrs = self.unwanted_attributes() # Exif try: files = self.getFiles() for file in files: if file.type == "original": f = open(file.retrieveFile(), 'rb') tags = EXIF.process_file(f) tags.keys().sort() for k in tags.keys(): # don't set unwanted exif attributes if any(tag in k for tag in unwanted_attrs): continue if tags[k] != "" and k != "JPEGThumbnail": self.set("exif_" + k.replace(" ", "_"), utf8_decode_escape(str(tags[k]))) elif k == "JPEGThumbnail": if tags[k] != "": self.set("Thumbnail", "True") else: self.set("Thumbnail", "False") except: None if dozoom(self) == 1: tileok = 0 for f in self.getFiles(): if f.type.startswith("tile"): tileok = 1 if not tileok and self.get("width") and self.get("height"): zoom.getImage(self.id, 1) for f in self.getFiles(): if f.getType() == 'original': wanted_tags = lib.iptc.IPTC.get_wanted_iptc_tags() tags_in_upload = lib.iptc.IPTC.get_iptc_values(f.retrieveFile(), wanted_tags) with_value = [] for field in getMetaType(self.getSchema()).getMetaFields(): if field.get('type') == "meta" and len(field.getValueList()) > 1: value = self.get('iptc_{}'.format(field.getName())) if len(value) > 0: with_value.append(field.getName()) if tags_in_upload: for key in tags_in_upload.keys(): if tags_in_upload[key] != '': if key not in with_value: self.set('iptc_{}'.format(key.replace(' ', '_')), tags_in_upload[key]) for f in self.getFiles(): if f.getName().lower().endswith("png") and f.type == "tmppng": self.removeFile(f) break
def event_files_changed(self): print "Postprocessing node", self.id if "image" in self.type: for f in self.getFiles(): if f.getName().lower().endswith('svg'): self.svg_to_png(f.retrieveFile(), f.retrieveFile()[:-4] + ".png") self.removeFile(f) self.addFile( FileNode(name=f.retrieveFile(), type="original", mimetype=f.mimetype)) self.addFile( FileNode(name=f.retrieveFile(), type="image", mimetype=f.mimetype)) self.addFile( FileNode(name=f.retrieveFile()[:-4] + ".png", type="tmppng", mimetype="image/png")) break orig = 0 thumb = 0 for f in self.getFiles(): if f.type == "original": orig = 1 if f.type == "thumb": thumb = 1 if orig == 0: for f in self.getFiles(): if f.type == "image": if f.mimetype == "image/tiff" or ( (f.mimetype is None or f.mimetype == "application/x-download") and (f.getName().lower().endswith("tif") or f.getName().lower().endswith("tiff"))): # move old file to "original", create a new png to be used as "image" self.removeFile(f) path, ext = splitfilename(f.retrieveFile()) pngname = path + ".png" if not os.path.isfile(pngname): makeOriginalFormat(f.retrieveFile(), pngname) width, height = getImageDimensions(pngname) self.set("width", width) self.set("height", height) else: width, height = getImageDimensions(pngname) self.set("width", width) self.set("height", height) self.addFile( FileNode(name=pngname, type="image", mimetype="image/png")) self.addFile( FileNode(name=f.retrieveFile(), type="original", mimetype="image/tiff")) break else: self.addFile( FileNode(name=f.retrieveFile(), type="original", mimetype=f.mimetype)) # retrieve technical metadata. for f in self.getFiles(): if (f.type == "image" and not f.getName().lower().endswith("svg") ) or f.type == "tmppng": width, height = getImageDimensions(f.retrieveFile()) self.set("origwidth", width) self.set("origheight", height) self.set("origsize", f.getSize()) if f.mimetype == "image/jpeg": self.set( "jpg_comment", iso2utf8( getJpegSection(f.retrieveFile(), 0xFE).strip())) if thumb == 0: for f in self.getFiles(): if (f.type == "image" and not f.getName().lower().endswith("svg") ) or f.type == "tmppng": path, ext = splitfilename(f.retrieveFile()) basename = hashlib.md5(str( random.random())).hexdigest()[0:8] #path = os.path.join(getImportDir(),os.path.basename(path)) path = os.path.join(getImportDir(), basename) thumbname = path + ".thumb" thumbname2 = path + ".thumb2" assert not os.path.isfile(thumbname) assert not os.path.isfile(thumbname2) width, height = getImageDimensions(f.retrieveFile()) makeThumbNail(f.retrieveFile(), thumbname) makePresentationFormat(f.retrieveFile(), thumbname2) if f.mimetype is None: if f.getName().lower().endswith("jpg"): f.mimetype = "image/jpeg" else: f.mimetype = "image/tiff" self.addFile( FileNode(name=thumbname, type="thumb", mimetype="image/jpeg")) self.addFile( FileNode(name=thumbname2, type="presentation", mimetype="image/jpeg")) self.set("width", width) self.set("height", height) #fetch unwanted tags to be omitted unwanted_attrs = self.unwanted_attributes() # Exif try: from lib.Exif import EXIF files = self.getFiles() for file in files: if file.type == "original": f = open(file.retrieveFile(), 'rb') tags = EXIF.process_file(f) tags.keys().sort() for k in tags.keys(): # don't set unwanted exif attributes if any(tag in k for tag in unwanted_attrs): continue if tags[k] != "" and k != "JPEGThumbnail": self.set("exif_" + k.replace(" ", "_"), utf8_decode_escape(str(tags[k]))) elif k == "JPEGThumbnail": if tags[k] != "": self.set("Thumbnail", "True") else: self.set("Thumbnail", "False") except: None if dozoom(self) == 1: tileok = 0 for f in self.getFiles(): if f.type.startswith("tile"): tileok = 1 if not tileok and self.get("width") and self.get("height"): zoom.getImage(self.id, 1) # iptc try: from lib.iptc import IPTC files = self.getFiles() for file in files: if file.type == "original": tags = IPTC.getIPTCValues(file.retrieveFile()) tags.keys().sort() for k in tags.keys(): # skip unknown iptc tags if 'IPTC_' in k: continue if any(tag in k for tag in unwanted_attrs): continue if isinstance(tags[k], list): tags[k] = ', '.join(tags[k]) if tags[k] != "": self.set("iptc_" + k.replace(" ", "_"), utf8_decode_escape(str(tags[k]))) except: None for f in self.getFiles(): if f.getName().lower().endswith("png") and f.type == "tmppng": self.removeFile(f) break