Example #1
0
    def nodeToFulltextSearch(self, node):
        # build fulltext index from node

        if not node.getContentType() in ("document", "dissertation"):
            # only build fulltext of document nodes
            # print "object is no document"
            return True
        r = re.compile("[a-zA-Z0-9]+")

        for file in node.getFiles():
            w = ''
            if file.getType() == "fulltext" and os.path.exists(
                    file.retrieveFile()):
                data = {}
                content = ''
                f = open(file.retrieveFile())
                try:
                    for line in f:
                        if FULLTEXT_INDEX_MODE == 0:
                            content += u(line)
                        else:
                            for w in re.findall(r, line):
                                if w not in data.keys():
                                    data[w] = 1
                            try:
                                data[w] += 1
                            except KeyError:
                                data[w] = 1
                finally:
                    f.close()

                if FULLTEXT_INDEX_MODE == 1:
                    for key in data.keys():
                        content += key + " "
                elif FULLTEXT_INDEX_MODE == 2:
                    for key in data.keys():
                        content += key + " [" + str(data[key]) + "] "
                sql = ""
                if len(content) > 0:
                    try:
                        sql = 'INSERT INTO textsearchmeta (id, type, schema, value) VALUES("{}", "{}", "{}", "{}")'.format(
                            node.id, node.getContentType(), node.getSchema(),
                            iso2utf8(esc(content)))
                        self.db.execute(sql)
                    except:
                        print "error", node.id, "\n"
                        return False
                else:
                    print "no Content"
        return True
Example #2
0
    def event_files_changed(self):
        print "Postprocessing node", self.id
        if "image" in self.type:
            for f in self.getFiles():
                if f.getName().lower().endswith('svg'):
                    self.svg_to_png(f.retrieveFile(), f.retrieveFile()[:-4] + ".png")
                    self.removeFile(f)
                    self.addFile(FileNode(name=f.retrieveFile(), type="original", mimetype=f.mimetype))
                    self.addFile(FileNode(name=f.retrieveFile(), type="image", mimetype=f.mimetype))
                    self.addFile(FileNode(name=f.retrieveFile()[:-4] + ".png", type="tmppng", mimetype="image/png"))
                    break
            orig = 0
            thumb = 0
            for f in self.getFiles():
                if f.type == "original":
                    orig = 1
                if f.type == "thumb":
                    thumb = 1

            if orig == 0:
                for f in self.getFiles():
                    if f.type == "image":

                        if f.mimetype == "image/tiff" or ((f.mimetype is None or f.mimetype == "application/x-download")
                                                          and (f.getName().lower().endswith("tif") or f.getName().lower().endswith("tiff"))):

                            # move old file to "original", create a new png to be used as "image"
                            self.removeFile(f)

                            path, ext = splitfilename(f.retrieveFile())
                            pngname = path + ".png"

                            if not os.path.isfile(pngname):
                                makeOriginalFormat(f.retrieveFile(), pngname)

                                width, height = getImageDimensions(pngname)
                                self.set("width", width)
                                self.set("height", height)

                            else:
                                width, height = getImageDimensions(pngname)
                                self.set("width", width)
                                self.set("height", height)

                            print 'png name/path: ', pngname

                            self.addFile(FileNode(name=pngname, type="image", mimetype="image/png"))
                            self.addFile(FileNode(name=f.retrieveFile(), type="original", mimetype="image/tiff"))
                            break
                        else:
                            self.addFile(FileNode(name=f.retrieveFile(), type="original", mimetype=f.mimetype))

            # retrieve technical metadata.
            for f in self.getFiles():
                if (f.type == "image" and not f.getName().lower().endswith("svg")) or f.type == "tmppng":
                    width, height = getImageDimensions(f.retrieveFile())
                    self.set("origwidth", width)
                    self.set("origheight", height)
                    self.set("origsize", f.getSize())

                    if f.mimetype == "image/jpeg":
                        self.set("jpg_comment", iso2utf8(getJpegSection(f.retrieveFile(), 0xFE).strip()))

            if thumb == 0:
                for f in self.getFiles():
                    if (f.type == "image" and not f.getName().lower().endswith("svg")) or f.type == "tmppng":
                        path, ext = splitfilename(f.retrieveFile())
                        basename = hashlib.md5(str(random.random())).hexdigest()[0:8]

                        # path = os.path.join(getImportDir(),os.path.basename(path))
                        path = os.path.join(getImportDir(), basename)

                        thumbname = path + ".thumb"
                        thumbname2 = path + ".thumb2"

                        print 'tumb: ', thumbname
                        print 'presentation: ', thumbname2

                        assert not os.path.isfile(thumbname)
                        assert not os.path.isfile(thumbname2)
                        width, height = getImageDimensions(f.retrieveFile())
                        makeThumbNail(f.retrieveFile(), thumbname)
                        makePresentationFormat(f.retrieveFile(), thumbname2)
                        if f.mimetype is None:
                            if f.getName().lower().endswith("jpg"):
                                f.mimetype = "image/jpeg"
                            else:
                                f.mimetype = "image/tiff"
                        self.addFile(FileNode(name=thumbname, type="thumb", mimetype="image/jpeg"))
                        self.addFile(FileNode(name=thumbname2, type="presentation", mimetype="image/jpeg"))
                        self.set("width", width)
                        self.set("height", height)

            #fetch unwanted tags to be omitted
            unwanted_attrs = self.unwanted_attributes()

            # Exif
            try:
                from lib.Exif import EXIF
                files = self.getFiles()

                for file in files:
                    if file.type == "original":
                        f = open(file.retrieveFile(), 'rb')
                        tags = EXIF.process_file(f)
                        tags.keys().sort()

                        for k in tags.keys():
                            # don't set unwanted exif attributes
                            if any(tag in k for tag in unwanted_attrs):
                                continue
                            if tags[k] != "" and k != "JPEGThumbnail":
                                self.set("exif_" + k.replace(" ", "_"),
                                         utf8_decode_escape(str(tags[k])))
                            elif k == "JPEGThumbnail":
                                if tags[k] != "":
                                    self.set("Thumbnail", "True")
                                else:
                                    self.set("Thumbnail", "False")

            except:
                None

            if dozoom(self) == 1:
                tileok = 0
                for f in self.getFiles():
                    if f.type.startswith("tile"):
                        tileok = 1
                if not tileok and self.get("width") and self.get("height"):
                    zoom.getImage(self.id, 1)

            # iptc
            try:
                from lib.iptc import IPTC
                files = self.getFiles()

                for file in files:
                    if file.type == "original":
                        tags = IPTC.getIPTCValues(file.retrieveFile())
                        tags.keys().sort()
                        for k in tags.keys():
                            # skip unknown iptc tags
                            if 'IPTC_' in k:
                                continue
                            if any(tag in k for tag in unwanted_attrs):
                                continue
                            if isinstance(tags[k], list):
                                tags[k] = ', '.join(tags[k])
                            if tags[k] != "":
                                self.set("iptc_" + k.replace(" ", "_"),
                                         utf8_decode_escape(str(tags[k])))
            except:
                None

            for f in self.getFiles():
                if f.getName().lower().endswith("png") and f.type == "tmppng":
                    self.removeFile(f)
                    break
Example #3
0
    def nodeToFulltextSearch(self, node):
        # build fulltext index from node

        if not node.getContentType() in ("document", "dissertation"):
            # only build fulltext of document nodes
            # print "object is no document"
            return True
        r = re.compile("[a-zA-Z0-9]+")

        for file in node.getFiles():
            w = ''
            if file.getType() == "fulltext" and os.path.exists(file.retrieveFile()):
                data = {}
                content = ''
                f = open(file.retrieveFile())
                try:
                    for line in f:
                        if FULLTEXT_INDEX_MODE == 0:
                            content += u(line)
                        else:
                            for w in re.findall(r, line):
                                if w not in data.keys():
                                    data[w] = 1
                            try:
                                data[w] += 1
                            except KeyError:
                                data[w] = 1
                finally:
                    f.close()

                if FULLTEXT_INDEX_MODE == 1:
                    for key in data.keys():
                        content += key + " "
                elif FULLTEXT_INDEX_MODE == 2:
                    for key in data.keys():
                        content += key + " [" + str(data[key]) + "] "
                sql = ""
                if len(content) > 0:
                    try:
                        sql = 'INSERT INTO textsearchmeta (id, type, schema, value) VALUES("{}", "{}", "{}", "{}")'.format(node.id,
                                                                                                                           node.getContentType(),
                                                                                                                           node.getSchema(),
                                                                                                                           iso2utf8(esc(content)))
                        self.db.execute(sql)
                    except:
                        print "error", node.id, "\n"
                        return False
                else:
                    print "no Content"
        return True
Example #4
0
    def event_files_changed(self):
        print "Postprocessing node", self.id
        if "image" in self.type:
            for f in self.getFiles():
                if f.getName().lower().endswith('svg'):
                    self.svg_to_png(f.retrieveFile(), f.retrieveFile()[:-4] + ".png")
                    self.removeFile(f)
                    self.addFile(FileNode(name=f.retrieveFile(), type="original", mimetype=f.mimetype))
                    self.addFile(FileNode(name=f.retrieveFile(), type="image", mimetype=f.mimetype))
                    self.addFile(FileNode(name=f.retrieveFile()[:-4] + ".png", type="tmppng", mimetype="image/png"))
                    break
            orig = 0
            thumb = 0
            for f in self.getFiles():
                if f.type == "original":
                    orig = 1
                if f.type == "thumb":
                    thumb = 1

            if orig == 0:
                for f in self.getFiles():
                    if f.type == "image":
                        if f.mimetype == "image/tiff" or ((f.mimetype is None or f.mimetype == "application/x-download")
                                                          and (f.getName().lower().endswith("tif") or f.getName().lower().endswith("tiff"))):

                            # move old file to "original", create a new png to be used as "image"
                            self.removeFile(f)

                            path, ext = splitfilename(f.retrieveFile())
                            pngname = path + ".png"

                            if not os.path.isfile(pngname):
                                makeOriginalFormat(f.retrieveFile(), pngname)

                                width, height = getImageDimensions(pngname)
                                self.set("width", width)
                                self.set("height", height)

                            else:
                                width, height = getImageDimensions(pngname)
                                self.set("width", width)
                                self.set("height", height)

                            print 'png: ', pngname

                            self.addFile(FileNode(name=pngname, type="image", mimetype="image/png"))
                            self.addFile(FileNode(name=f.retrieveFile(), type="original", mimetype="image/tiff"))
                            break
                        else:
                            self.addFile(FileNode(name=f.retrieveFile(), type="original", mimetype=f.mimetype))

            # retrieve technical metadata.
            for f in self.getFiles():
                if (f.type == "image" and not f.getName().lower().endswith("svg")) or f.type == "tmppng":
                    width, height = getImageDimensions(f.retrieveFile())
                    self.set("origwidth", width)
                    self.set("origheight", height)
                    self.set("origsize", f.getSize())

                    if f.mimetype == "image/jpeg":
                        self.set("jpg_comment", iso2utf8(getJpegSection(f.retrieveFile(), 0xFE).strip()))

            if thumb == 0:
                for f in self.getFiles():
                    if (f.type == "image" and not f.getName().lower().endswith("svg")) or f.type == "tmppng":
                        basename = hashlib.md5(str(random.random())).hexdigest()[0:8]

                        path = os.path.join(getImportDir(), basename)

                        thumbname = path + ".thumb"
                        thumbname2 = path + ".thumb2"

                        print 'tumb: ', thumbname
                        print 'presentation: ', thumbname2

                        assert not os.path.isfile(thumbname)
                        assert not os.path.isfile(thumbname2)
                        width, height = getImageDimensions(f.retrieveFile())
                        makeThumbNail(f.retrieveFile(), thumbname)
                        makePresentationFormat(f.retrieveFile(), thumbname2)
                        if f.mimetype is None:
                            if f.getName().lower().endswith("jpg"):
                                f.mimetype = "image/jpeg"
                            else:
                                f.mimetype = "image/tiff"
                        self.addFile(FileNode(name=thumbname, type="thumb", mimetype="image/jpeg"))
                        self.addFile(FileNode(name=thumbname2, type="presentation", mimetype="image/jpeg"))
                        self.set("width", width)
                        self.set("height", height)

            # fetch unwanted tags to be omitted
            unwanted_attrs = self.unwanted_attributes()

            # Exif
            try:
                files = self.getFiles()

                for file in files:
                    if file.type == "original":
                        f = open(file.retrieveFile(), 'rb')
                        tags = EXIF.process_file(f)
                        tags.keys().sort()

                        for k in tags.keys():
                            # don't set unwanted exif attributes
                            if any(tag in k for tag in unwanted_attrs):
                                continue
                            if tags[k] != "" and k != "JPEGThumbnail":
                                self.set("exif_" + k.replace(" ", "_"),
                                         utf8_decode_escape(str(tags[k])))
                            elif k == "JPEGThumbnail":
                                if tags[k] != "":
                                    self.set("Thumbnail", "True")
                                else:
                                    self.set("Thumbnail", "False")
            except:
                None

            if dozoom(self) == 1:
                tileok = 0
                for f in self.getFiles():
                    if f.type.startswith("tile"):
                        tileok = 1
                if not tileok and self.get("width") and self.get("height"):
                    zoom.getImage(self.id, 1)

            for f in self.getFiles():
                if f.getType() == 'original':

                    wanted_tags = lib.iptc.IPTC.get_wanted_iptc_tags()

                    tags_in_upload = lib.iptc.IPTC.get_iptc_values(f.retrieveFile(), wanted_tags)

                    with_value = []
                    for field in getMetaType(self.getSchema()).getMetaFields():
                        if field.get('type') == "meta" and len(field.getValueList()) > 1:
                            value = self.get('iptc_{}'.format(field.getName()))

                            if len(value) > 0:
                                with_value.append(field.getName())

                    if tags_in_upload:
                        for key in tags_in_upload.keys():
                            if tags_in_upload[key] != '':

                                if key not in with_value:
                                    self.set('iptc_{}'.format(key.replace(' ', '_')),
                                             tags_in_upload[key])

            for f in self.getFiles():
                if f.getName().lower().endswith("png") and f.type == "tmppng":
                    self.removeFile(f)
                    break
Example #5
0
    def event_files_changed(self):
        print "Postprocessing node", self.id
        if "image" in self.type:
            for f in self.getFiles():
                if f.getName().lower().endswith('svg'):
                    self.svg_to_png(f.retrieveFile(),
                                    f.retrieveFile()[:-4] + ".png")
                    self.removeFile(f)
                    self.addFile(
                        FileNode(name=f.retrieveFile(),
                                 type="original",
                                 mimetype=f.mimetype))
                    self.addFile(
                        FileNode(name=f.retrieveFile(),
                                 type="image",
                                 mimetype=f.mimetype))
                    self.addFile(
                        FileNode(name=f.retrieveFile()[:-4] + ".png",
                                 type="tmppng",
                                 mimetype="image/png"))
                    break
            orig = 0
            thumb = 0
            for f in self.getFiles():
                if f.type == "original":
                    orig = 1
                if f.type == "thumb":
                    thumb = 1
            if orig == 0:
                for f in self.getFiles():
                    if f.type == "image":
                        if f.mimetype == "image/tiff" or (
                            (f.mimetype is None
                             or f.mimetype == "application/x-download") and
                            (f.getName().lower().endswith("tif")
                             or f.getName().lower().endswith("tiff"))):
                            # move old file to "original", create a new png to be used as "image"
                            self.removeFile(f)

                            path, ext = splitfilename(f.retrieveFile())
                            pngname = path + ".png"
                            if not os.path.isfile(pngname):
                                makeOriginalFormat(f.retrieveFile(), pngname)

                                width, height = getImageDimensions(pngname)
                                self.set("width", width)
                                self.set("height", height)

                            else:
                                width, height = getImageDimensions(pngname)
                                self.set("width", width)
                                self.set("height", height)

                            self.addFile(
                                FileNode(name=pngname,
                                         type="image",
                                         mimetype="image/png"))
                            self.addFile(
                                FileNode(name=f.retrieveFile(),
                                         type="original",
                                         mimetype="image/tiff"))
                            break
                        else:
                            self.addFile(
                                FileNode(name=f.retrieveFile(),
                                         type="original",
                                         mimetype=f.mimetype))

            # retrieve technical metadata.
            for f in self.getFiles():
                if (f.type == "image"
                        and not f.getName().lower().endswith("svg")
                    ) or f.type == "tmppng":
                    width, height = getImageDimensions(f.retrieveFile())
                    self.set("origwidth", width)
                    self.set("origheight", height)
                    self.set("origsize", f.getSize())

                    if f.mimetype == "image/jpeg":
                        self.set(
                            "jpg_comment",
                            iso2utf8(
                                getJpegSection(f.retrieveFile(),
                                               0xFE).strip()))

            if thumb == 0:
                for f in self.getFiles():
                    if (f.type == "image"
                            and not f.getName().lower().endswith("svg")
                        ) or f.type == "tmppng":
                        path, ext = splitfilename(f.retrieveFile())
                        basename = hashlib.md5(str(
                            random.random())).hexdigest()[0:8]

                        #path = os.path.join(getImportDir(),os.path.basename(path))
                        path = os.path.join(getImportDir(), basename)

                        thumbname = path + ".thumb"
                        thumbname2 = path + ".thumb2"

                        assert not os.path.isfile(thumbname)
                        assert not os.path.isfile(thumbname2)
                        width, height = getImageDimensions(f.retrieveFile())
                        makeThumbNail(f.retrieveFile(), thumbname)
                        makePresentationFormat(f.retrieveFile(), thumbname2)
                        if f.mimetype is None:
                            if f.getName().lower().endswith("jpg"):
                                f.mimetype = "image/jpeg"
                            else:
                                f.mimetype = "image/tiff"
                        self.addFile(
                            FileNode(name=thumbname,
                                     type="thumb",
                                     mimetype="image/jpeg"))
                        self.addFile(
                            FileNode(name=thumbname2,
                                     type="presentation",
                                     mimetype="image/jpeg"))
                        self.set("width", width)
                        self.set("height", height)

            #fetch unwanted tags to be omitted
            unwanted_attrs = self.unwanted_attributes()

            # Exif
            try:
                from lib.Exif import EXIF
                files = self.getFiles()

                for file in files:
                    if file.type == "original":
                        f = open(file.retrieveFile(), 'rb')
                        tags = EXIF.process_file(f)
                        tags.keys().sort()

                        for k in tags.keys():
                            # don't set unwanted exif attributes
                            if any(tag in k for tag in unwanted_attrs):
                                continue
                            if tags[k] != "" and k != "JPEGThumbnail":
                                self.set("exif_" + k.replace(" ", "_"),
                                         utf8_decode_escape(str(tags[k])))
                            elif k == "JPEGThumbnail":
                                if tags[k] != "":
                                    self.set("Thumbnail", "True")
                                else:
                                    self.set("Thumbnail", "False")

            except:
                None

            if dozoom(self) == 1:
                tileok = 0
                for f in self.getFiles():
                    if f.type.startswith("tile"):
                        tileok = 1
                if not tileok and self.get("width") and self.get("height"):
                    zoom.getImage(self.id, 1)

            # iptc
            try:
                from lib.iptc import IPTC
                files = self.getFiles()

                for file in files:
                    if file.type == "original":
                        tags = IPTC.getIPTCValues(file.retrieveFile())
                        tags.keys().sort()
                        for k in tags.keys():
                            # skip unknown iptc tags
                            if 'IPTC_' in k:
                                continue
                            if any(tag in k for tag in unwanted_attrs):
                                continue
                            if isinstance(tags[k], list):
                                tags[k] = ', '.join(tags[k])
                            if tags[k] != "":
                                self.set("iptc_" + k.replace(" ", "_"),
                                         utf8_decode_escape(str(tags[k])))
            except:
                None

            for f in self.getFiles():
                if f.getName().lower().endswith("png") and f.type == "tmppng":
                    self.removeFile(f)
                    break