Example #1
0
    def _extract_metadata(self, files=None):
        image_file = self._find_processing_file(files)
        width, height = get_image_dimensions(image_file)
        # XXX: this is a bit redundant...
        self.set("origwidth", width)
        self.set("origheight", height)
        self.set("origsize", image_file.size)
        self.set("width", width)
        self.set("height", height)

        # Exif
        unwanted_attrs = Image.get_unwanted_exif_attributes()

        with open(image_file.abspath, 'rb') as f:
            tags = EXIF.process_file(f)

        for k in tags.keys():
            # don't set unwanted exif attributes
            if any(tag in k for tag in unwanted_attrs):
                continue
            if tags[k]:
                self.set("exif_" + k.replace(" ", "_"), utf8_decode_escape(str(tags[k])))

        # IPTC
        iptc_metadata = lib.iptc.IPTC.get_iptc_tags(image_file.abspath)
        if iptc_metadata is not None:
            for k, v in iteritems(iptc_metadata):
                self.set('iptc_' + k, v)
Example #2
0
    def _extract_metadata(self, files=None):
        image_file = self._find_processing_file(files)
        width, height = get_image_dimensions(image_file)
        # XXX: this is a bit redundant...
        self.set("origwidth", width)
        self.set("origheight", height)
        self.set("origsize", image_file.size)
        self.set("width", width)
        self.set("height", height)

        # Exif
        unwanted_attrs = Image.get_unwanted_exif_attributes()

        with open(image_file.abspath, 'rb') as f:
            tags = EXIF.process_file(f)

        for k in tags.keys():
            # don't set unwanted exif attributes
            if any(tag in k for tag in unwanted_attrs):
                continue
            if tags[k]:
                self.set("exif_" + k.replace(" ", "_"), utf8_decode_escape(str(tags[k])))

        # IPTC
        iptc_metadata = lib.iptc.IPTC.get_iptc_tags(image_file.abspath)
        if iptc_metadata is not None:
            for k, v in iteritems(iptc_metadata):
                self.set('iptc_' + k, v)
Example #3
0
 def mkdir(self, path):
     path, filename = utils.splitpath(path)
     olddir = self.dir
     oldnode = self.node
     if not self.cwd(path):
         raise IOError("no such directory: " + path)
     node = self.node
     self.dir = olddir
     self.node = oldnode
     node.addChild(tree.Node(utf8_decode_escape(filename),
                             type="directory"))
Example #4
0
 def mkdir(self, path):
     path, filename = utils.splitpath(path)
     olddir = self.dir
     oldnode = self.node
     if not self.cwd(path):
         raise IOError("no such directory: " + path)
     node = self.node
     self.dir = olddir
     self.node = oldnode
     node.addChild(tree.Node(utf8_decode_escape(filename),
                             type="directory"))
Example #5
0
def file_to_node(file_node, upload_dir):
    '''
    Converts the FileNode object in the upload_dir into a Node with the FileNode as an attachment
    @param file_node: FileNode
    @param upload_dir: Node
    @return: Node if one was created
    '''

    home_dir = upload_dir.getParents()[0]
    file_type = file_node.getType()

    if file_type == 'other' or file_type == 'zip':
        return

    path = file_node.retrieveFile().split('/')
    new_name = path.pop().replace('ftp_', '', 1)
    path.append(new_name)
    new_path = '/'.join(path)

    try:
        os.rename(file_node.retrieveFile(), new_path)
    except:
        new_path = file_node.retrieveFile()

    schema = home_dir.get('system.ftp.{}'.format(file_type)).lstrip('/')
    if not schema:
        schema = 'file'

    new_node = tree.Node(utf8_decode_escape(new_name),
                         type='/'.join([file_node.getType(), schema]))
    upload_dir.removeFile(file_node)
    file_node._path = file_node._path.replace(config.get('paths.datadir'), '')
    file_node._path = file_node._path.replace(
        file_node._path.split('/')[-1], new_node.getName())
    new_node.addFile(file_node)
    new_node.event_files_changed()
    upload_dir.addChild(new_node)

    return new_node
Example #6
0
def file_to_node(file_node, upload_dir):
    '''
    Converts the FileNode object in the upload_dir into a Node with the FileNode as an attachment
    @param file_node: FileNode
    @param upload_dir: Node
    @return: Node if one was created
    '''

    home_dir = upload_dir.getParents()[0]
    file_type = file_node.getType()

    if file_type == 'other' or file_type == 'zip':
        return

    path = file_node.retrieveFile().split('/')
    new_name = path.pop().replace('ftp_', '', 1)
    path.append(new_name)
    new_path = '/'.join(path)

    try:
        os.rename(file_node.retrieveFile(), new_path)
    except:
        new_path = file_node.retrieveFile()

    schema = home_dir.get('system.ftp.{}'.format(file_type)).lstrip('/')
    if not schema:
        schema = 'file'

    new_node = tree.Node(utf8_decode_escape(new_name),
                         type='/'.join([file_node.getType(),
                                        schema]))
    upload_dir.removeFile(file_node)
    file_node._path = file_node._path.replace(config.get('paths.datadir'), '')
    file_node._path = file_node._path.replace(file_node._path.split('/')[-1], new_node.getName())
    new_node.addFile(file_node)
    new_node.event_files_changed()
    upload_dir.addChild(new_node)

    return new_node
Example #7
0
def importBibTeX(infile, node=None, req=None):
    if req:
        try:
            user = users.getUserFromRequest(req)
            msg = "bibtex import: import started by user '%s'" % (user.name)
        except:
            msg = "bibtex import: starting import (unable to identify user)"
    else:
        msg = "bibtex import: starting import (%s)" % str(sys.argv)
    logger.info(msg)
    print msg

    bibtextypes = getbibtexmappings()
    result = []
    entries = []
    shortcut = {}

    if isinstance(infile, list):
        entries = infile
    else:
        if not node:
            node = tree.Node(name=utf8_decode_escape(os.path.basename(infile)),
                             type="directory")
        try:
            entries = getentries(infile)
        except:
            logger.error("getentries failed", exc_info=1)
            msg = "bibtex import: getentries failed, import stopped (encoding error)"
            logger.error(msg)
            raise ValueError("getentries failed")

    logger.info("bibtex import: %d entries" % len(entries))

    counter = 0
    for doctype, docid, fields in entries:
        counter += 1
        docid_utf8 = utf8_decode_escape(docid)

        mytype = detecttype(doctype, fields)

        if doctype == "string":
            if VERBOSE:
                logger.info(
                    "bibtex import:       processing %s: %s, %s --> (is string)"
                    % (str(counter), doctype, docid))
            continue

        if mytype:
            fieldnames = {}
            datefields = {}

            if mytype == "string":
                continue

            elif mytype not in bibtextypes:
                msg = "bibtex mapping of bibtex type '%s' not defined - import stopped" % mytype
                logger.error("bibtex import: " + msg)
                raise MissingMapping(msg)
            result += [(mytype.lower(), fields)]

            metatype = bibtextypes[mytype]

            # check for mask configuration
            mask = getMetaType(metatype).getMask("bibtex_import")
            if not mask:
                mask = getMetaType(metatype).getMask("bibtex")
            if mask:
                for f in mask.getMaskFields():
                    try:
                        _bib_name = tree.getNode(
                            f.get("mappingfield")).getName()
                        _mfield = tree.getNode(f.get("attribute"))
                        _med_name = _mfield.getName()

                        if _mfield.get("type") == "date":
                            datefields[_med_name] = _mfield.get("valuelist")

                    except tree.NoSuchNodeError as e:
                        msg = "bibtex import docid='%s': field error for bibtex mask for type %s and bibtex-type '%s': %s: " % (
                            docid_utf8, metatype, mytype, str(e))
                        msg = msg + "_bib_name='%s', _mfield='%s', _med_name='%s'" % (
                            str(_bib_name), str(_mfield), str(_med_name))
                        logger.error(msg)
                        continue

                    fieldnames[_bib_name] = _med_name

            doc = tree.Node(docid_utf8, type="document/" + metatype)
            for k, v in fields.items():
                if k in fieldnames.keys():
                    k = fieldnames[k]  # map bibtex name

                if k in datefields.keys():  # format date field
                    v = parse_date(v, datefields[k])

                doc.set(k, utf8_decode_escape(v))

            child_id = None
            child_type = None
            try:
                node.addChild(doc)
                doc.setDirty()
                child_id = doc.id
                child_type = doc.type
            except Exception as e:
                logger.error("bibtex import: %s" % (str(e)))
                raise ValueError()

            if VERBOSE:
                try:
                    logger.info(
                        "bibtex import: done  processing %s: %s, %s --> type=%s, id=%s"
                        % (str(counter), doctype, docid, str(child_type),
                           str(child_id)))
                except Exception as e:
                    try:
                        logger.info(
                            "bibtex import: done  processing %s: %s, %s --> type=%s, id=%s"
                            % (str(counter), doctype,
                               docid.decode("utf8", "replace"),
                               str(child_type), str(child_id)))
                    except Exception as e:
                        logger.info(
                            "bibtex import: done  processing %s: %s, %s --> type=%s, id=%s"
                            % (str(counter), doctype,
                               "'not printable bibtex key'", str(child_type),
                               str(child_id)))
    msg = "bibtex import: finished import"
    logger.info(msg)
    print msg

    return node
Example #8
0
    def event_files_changed(self):
        print "Postprocessing node", self.id
        if "image" in self.type:
            for f in self.getFiles():
                if f.getName().lower().endswith('svg'):
                    self.svg_to_png(f.retrieveFile(), f.retrieveFile()[:-4] + ".png")
                    self.removeFile(f)
                    self.addFile(FileNode(name=f.retrieveFile(), type="original", mimetype=f.mimetype))
                    self.addFile(FileNode(name=f.retrieveFile(), type="image", mimetype=f.mimetype))
                    self.addFile(FileNode(name=f.retrieveFile()[:-4] + ".png", type="tmppng", mimetype="image/png"))
                    break
            orig = 0
            thumb = 0
            for f in self.getFiles():
                if f.type == "original":
                    orig = 1
                if f.type == "thumb":
                    thumb = 1

            if orig == 0:
                for f in self.getFiles():
                    if f.type == "image":
                        if f.mimetype == "image/tiff" or ((f.mimetype is None or f.mimetype == "application/x-download")
                                                          and (f.getName().lower().endswith("tif") or f.getName().lower().endswith("tiff"))):

                            # move old file to "original", create a new png to be used as "image"
                            self.removeFile(f)

                            path, ext = splitfilename(f.retrieveFile())
                            pngname = path + ".png"

                            if not os.path.isfile(pngname):
                                makeOriginalFormat(f.retrieveFile(), pngname)

                                width, height = getImageDimensions(pngname)
                                self.set("width", width)
                                self.set("height", height)

                            else:
                                width, height = getImageDimensions(pngname)
                                self.set("width", width)
                                self.set("height", height)

                            print 'png: ', pngname

                            self.addFile(FileNode(name=pngname, type="image", mimetype="image/png"))
                            self.addFile(FileNode(name=f.retrieveFile(), type="original", mimetype="image/tiff"))
                            break
                        else:
                            self.addFile(FileNode(name=f.retrieveFile(), type="original", mimetype=f.mimetype))

            # retrieve technical metadata.
            for f in self.getFiles():
                if (f.type == "image" and not f.getName().lower().endswith("svg")) or f.type == "tmppng":
                    width, height = getImageDimensions(f.retrieveFile())
                    self.set("origwidth", width)
                    self.set("origheight", height)
                    self.set("origsize", f.getSize())

                    if f.mimetype == "image/jpeg":
                        self.set("jpg_comment", iso2utf8(getJpegSection(f.retrieveFile(), 0xFE).strip()))

            if thumb == 0:
                for f in self.getFiles():
                    if (f.type == "image" and not f.getName().lower().endswith("svg")) or f.type == "tmppng":
                        basename = hashlib.md5(str(random.random())).hexdigest()[0:8]

                        path = os.path.join(getImportDir(), basename)

                        thumbname = path + ".thumb"
                        thumbname2 = path + ".thumb2"

                        print 'tumb: ', thumbname
                        print 'presentation: ', thumbname2

                        assert not os.path.isfile(thumbname)
                        assert not os.path.isfile(thumbname2)
                        width, height = getImageDimensions(f.retrieveFile())
                        makeThumbNail(f.retrieveFile(), thumbname)
                        makePresentationFormat(f.retrieveFile(), thumbname2)
                        if f.mimetype is None:
                            if f.getName().lower().endswith("jpg"):
                                f.mimetype = "image/jpeg"
                            else:
                                f.mimetype = "image/tiff"
                        self.addFile(FileNode(name=thumbname, type="thumb", mimetype="image/jpeg"))
                        self.addFile(FileNode(name=thumbname2, type="presentation", mimetype="image/jpeg"))
                        self.set("width", width)
                        self.set("height", height)

            # fetch unwanted tags to be omitted
            unwanted_attrs = self.unwanted_attributes()

            # Exif
            try:
                files = self.getFiles()

                for file in files:
                    if file.type == "original":
                        f = open(file.retrieveFile(), 'rb')
                        tags = EXIF.process_file(f)
                        tags.keys().sort()

                        for k in tags.keys():
                            # don't set unwanted exif attributes
                            if any(tag in k for tag in unwanted_attrs):
                                continue
                            if tags[k] != "" and k != "JPEGThumbnail":
                                self.set("exif_" + k.replace(" ", "_"),
                                         utf8_decode_escape(str(tags[k])))
                            elif k == "JPEGThumbnail":
                                if tags[k] != "":
                                    self.set("Thumbnail", "True")
                                else:
                                    self.set("Thumbnail", "False")
            except:
                None

            if dozoom(self) == 1:
                tileok = 0
                for f in self.getFiles():
                    if f.type.startswith("tile"):
                        tileok = 1
                if not tileok and self.get("width") and self.get("height"):
                    zoom.getImage(self.id, 1)

            for f in self.getFiles():
                if f.getType() == 'original':

                    wanted_tags = lib.iptc.IPTC.get_wanted_iptc_tags()

                    tags_in_upload = lib.iptc.IPTC.get_iptc_values(f.retrieveFile(), wanted_tags)

                    with_value = []
                    for field in getMetaType(self.getSchema()).getMetaFields():
                        if field.get('type') == "meta" and len(field.getValueList()) > 1:
                            value = self.get('iptc_{}'.format(field.getName()))

                            if len(value) > 0:
                                with_value.append(field.getName())

                    if tags_in_upload:
                        for key in tags_in_upload.keys():
                            if tags_in_upload[key] != '':

                                if key not in with_value:
                                    self.set('iptc_{}'.format(key.replace(' ', '_')),
                                             tags_in_upload[key])

            for f in self.getFiles():
                if f.getName().lower().endswith("png") and f.type == "tmppng":
                    self.removeFile(f)
                    break
Example #9
0
    def event_files_changed(self):
        print "Postprocessing node", self.id

        thumb = 0
        fulltext = 0
        doc = None
        present = 0
        fileinfo = 0
        for f in self.getFiles():
            if f.type == "thumb":
                thumb = 1
            elif f.type.startswith("present"):
                present = 1
            elif f.type == "fulltext":
                fulltext = 1
            elif f.type == "fileinfo":
                fileinfo = 1
            elif f.type == "doc":
                doc = f
            elif f.type == "document":
                doc = f
        if not doc:
            for f in self.getFiles():
                if f.type == "thumb":
                    self.removeFile(f)
                elif f.type.startswith("present"):
                    self.removeFile(f)
                elif f.type == "fileinfo":
                    self.removeFile(f)
                elif f.type == "fulltext":
                    self.removeFile(f)

        #fetch unwated tags to be omitted
        unwanted_attrs = self.unwanted_attributes()

        if doc:
            path, ext = splitfilename(doc.retrieveFile())

            if not (thumb and present and fulltext and fileinfo):
                thumbname = path + ".thumb"
                thumb2name = path + ".thumb2"
                fulltextname = path + ".txt"
                infoname = path + ".info"
                tempdir = config.get("paths.tempdir")

                try:
                    pdfdata = parsepdf.parsePDF2(doc.retrieveFile(), tempdir)
                except parsepdf.PDFException as ex:
                    raise OperationException(ex.value)
                fi = open(infoname, "rb")
                for line in fi.readlines():
                    i = line.find(':')
                    if i > 0:
                        if any(tag in line[0:i].strip().lower()
                               for tag in unwanted_attrs):
                            continue
                        self.set("pdf_" + line[0:i].strip().lower(),
                                 utf8_decode_escape(line[i + 1:].strip()))
                fi.close()
                self.addFile(
                    FileNode(name=thumbname,
                             type="thumb",
                             mimetype="image/jpeg"))
                self.addFile(
                    FileNode(name=thumb2name,
                             type="presentation",
                             mimetype="image/jpeg"))
                self.addFile(
                    FileNode(name=fulltextname,
                             type="fulltext",
                             mimetype="text/plain"))
                self.addFile(
                    FileNode(name=infoname,
                             type="fileinfo",
                             mimetype="text/plain"))
Example #10
0
def importBibTeX(infile, node=None, req=None):
    user = None
    if req:
        try:
            user = users.getUserFromRequest(req)
            msg = "bibtex import: import started by user '%s'" % (user.name)
        except:
            msg = "bibtex import: starting import (unable to identify user)"
    else:
        msg = "bibtex import: starting import (%s)" % ustr(sys.argv)
    logg.info(msg)

    bibtextypes = getbibtexmappings()
    result = []
    entries = []

    if isinstance(infile, list):
        entries = infile
    else:
        node = node or Directory(utf8_decode_escape(os.path.basename(infile)))
        try:
            entries = getentries(infile)
        except:
            # XXX TODO This reports *everything* as encoding error
            # XXX TODO (even things like full disk or other parsing errors).
            # XXX TODO We should at least reformulate the error message,
            # XXX TODO and -- even better -- only catch errors that are to be expected.
            logg.error("getentries failed", exc_info=1)
            msg = "bibtex import: getentries failed, import stopped (encoding error)"
            logg.error(msg)
            raise ValueError("bibtex_unspecified_error")

    logg.info("bibtex import: %d entries", len(entries))

    for count, fields in enumerate(entries):
        docid_utf8 = fields["ID"]
        fields[u"key"] = fields.pop("ID")
        doctype = fields.pop("ENTRYTYPE")
        mytype = detecttype(doctype, fields)

        if mytype:
            fieldnames = {}
            datefields = {}
            if mytype not in bibtextypes:
                logg.error(
                    "bibtex mapping of bibtex type '%s' not defined - import stopped",
                    mytype)
                msg = "bibtex mapping of bibtex type '%s' not defined - import stopped" % mytype
                raise MissingMapping(msg)
            result += [(mytype.lower(), fields)]

            metatype = bibtextypes[mytype]

            # check for mask configuration
            metadatatype = q(Metadatatype).filter_by(name=metatype).one()
            mask = metadatatype.get_mask(
                u"bibtex_import") or metadatatype.get_mask(u"bibtex")
            if mask:
                for f in mask.all_maskitems:
                    try:
                        _bib_name = q(Node).get(f.get(u"mappingfield")).name
                        _mfield = q(Node).get(f.get(u"attribute"))
                        _med_name = _mfield.name
                        if _mfield.get(u"type") == u"date":
                            datefields[_med_name] = _mfield.get(u"valuelist")
                    except AttributeError as e:
                        msg = "bibtex import docid='{}': field error for bibtex mask for type {} and bibtex-type '{}': {}"
                        msg = msg.format(docid_utf8, metatype, mytype, e)
                        logg.error(msg)
                    else:
                        fieldnames[_bib_name] = _med_name

            doc = Document(docid_utf8, schema=metatype)
            for k, v in fields.items():
                if k in fieldnames.keys():
                    k = fieldnames[k]  # map bibtex name

                if k in datefields.keys():  # format date field
                    try:
                        v = str(parse_date(v, datefields[k]))
                        # if date format does not contains '%' the valid digit of the result must not be longer than the date format
                        # e.g. if datefields[k] is 'yyyy' then the result v must be clipped after 4 characters
                        # afterwards the result is expanded again (without the invalid digits)
                        if datefields[k].find('%') < 0:
                            v = v[:len(datefields[k])]
                            v = str(parse_date(v, datefields[k]))
                    except ValueError as e:
                        logg.exception("bibtex exception: %s: %s", k, v)
                        raise ValueError("bibtex_date_error")

                doc.set(k, v)

            # because the bibtex import contains only a subset of the metadata defined in metadatatype,
            # all other metadata are created and set to default values.
            # this will be done in the same manner as if the document is loaded in editor and saved without
            # any changes (required fields are not considered)
            editmask = metadatatype.get_mask(u"editmask")
            if editmask and hasattr(editmask, 'set_default_metadata'):
                editmask.set_default_metadata(doc)

            try:
                node.children.append(doc)
                if user:
                    doc.set("creator", user.login_name)
                doc.set(
                    "creationtime",
                    unicode(
                        time.strftime('%Y-%m-%dT%H:%M:%S',
                                      time.localtime(time.time()))))
            except Exception as e:
                logg.exception("bibtex exception")
                raise ValueError()

    logg.debug("bibtex import: finished import")
    print msg

    return node
Example #11
0
def importBibTeX(infile, node=None, req=None):
    if req:
        try:
            user = users.getUserFromRequest(req)
            msg = "bibtex import: import started by user '%s'" % (user.name)
        except:
            msg = "bibtex import: starting import (unable to identify user)"
    else:
        msg = "bibtex import: starting import (%s)" % str(sys.argv)
    logger.info(msg)
    print msg

    bibtextypes = getbibtexmappings()
    result = []
    entries = []
    shortcut = {}

    if isinstance(infile, list):
        entries = infile
    else:
        if not node:
            node = tree.Node(name=utf8_decode_escape(os.path.basename(infile)),
                             type="directory")
        try:
            entries = getentries(infile)
        except:
            logger.error("getentries failed", exc_info=1)
            msg = "bibtex import: getentries failed, import stopped (encoding error)"
            logger.error(msg)
            raise ValueError("getentries failed")

    logger.info("bibtex import: %d entries" % len(entries))

    counter = 0
    for doctype, docid, fields in entries:
        counter += 1
        docid_utf8 = utf8_decode_escape(docid)

        mytype = detecttype(doctype, fields)

        if doctype == "string":
            if VERBOSE:
                logger.info(
                    "bibtex import:       processing %s: %s, %s --> (is string)" % (str(counter), doctype, docid))
            continue

        if mytype:
            fieldnames = {}
            datefields = {}

            if mytype == "string":
                continue

            elif mytype not in bibtextypes:
                msg = "bibtex mapping of bibtex type '%s' not defined - import stopped" % mytype
                logger.error("bibtex import: " + msg)
                raise MissingMapping(msg)
            result += [(mytype.lower(), fields)]

            metatype = bibtextypes[mytype]

            # check for mask configuration
            mask = getMetaType(metatype).getMask("bibtex_import")
            if not mask:
                mask = getMetaType(metatype).getMask("bibtex")
            if mask:
                for f in mask.getMaskFields():
                    try:
                        _bib_name = tree.getNode(f.get("mappingfield")).getName()
                        _mfield = tree.getNode(f.get("attribute"))
                        _med_name = _mfield.getName()

                        if _mfield.get("type") == "date":
                            datefields[_med_name] = _mfield.get("valuelist")

                    except tree.NoSuchNodeError as e:
                        msg = "bibtex import docid='%s': field error for bibtex mask for type %s and bibtex-type '%s': %s: " % (
                            docid_utf8, metatype, mytype, str(e))
                        msg = msg + "_bib_name='%s', _mfield='%s', _med_name='%s'" % (
                            str(_bib_name), str(_mfield), str(_med_name))
                        logger.error(msg)
                        continue

                    fieldnames[_bib_name] = _med_name

            doc = tree.Node(docid_utf8, type="document/" + metatype)
            for k, v in fields.items():
                if k in fieldnames.keys():
                    k = fieldnames[k]  # map bibtex name

                if k in datefields.keys():  # format date field
                    v = parse_date(v, datefields[k])

                doc.set(k,  utf8_decode_escape(v))

            child_id = None
            child_type = None
            try:
                node.addChild(doc)
                doc.setDirty()
                child_id = doc.id
                child_type = doc.type
            except Exception as e:
                logger.error("bibtex import: %s" % (str(e)))
                raise ValueError()

            if VERBOSE:
                try:
                    logger.info("bibtex import: done  processing %s: %s, %s --> type=%s, id=%s" % (
                        str(counter), doctype, docid, str(child_type), str(child_id)))
                except Exception as e:
                    try:
                        logger.info("bibtex import: done  processing %s: %s, %s --> type=%s, id=%s" % (
                            str(counter), doctype, docid.decode("utf8", "replace"), str(child_type), str(child_id)))
                    except Exception as e:
                        logger.info("bibtex import: done  processing %s: %s, %s --> type=%s, id=%s" % (
                            str(counter), doctype, "'not printable bibtex key'", str(child_type), str(child_id)))
    msg = "bibtex import: finished import"
    logger.info(msg)
    print msg

    return node
Example #12
0
def importBibTeX(infile, node=None, req=None):
    if req:
        try:
            user = users.getUserFromRequest(req)
            msg = "bibtex import: import started by user '%s'" % (user.name)
        except:
            msg = "bibtex import: starting import (unable to identify user)"
    else:
        msg = "bibtex import: starting import (%s)" % ustr(sys.argv)
    logg.info(msg)

    bibtextypes = getbibtexmappings()
    result = []
    entries = []

    if isinstance(infile, list):
        entries = infile
    else:
        node = node or Directory(utf8_decode_escape(os.path.basename(infile)))
        try:
            entries = getentries(infile)
        except:
            logg.error("getentries failed", exc_info=1)
            msg = "bibtex import: getentries failed, import stopped (encoding error)"
            logg.error(msg)
            raise ValueError("encoding_error")

    logg.info("bibtex import: %d entries", len(entries))

    for count, fields in enumerate(entries):
        docid_utf8 = fields["ID"]
        fields["key"] = fields.pop("ID")
        doctype = fields.pop("ENTRYTYPE")
        mytype = detecttype(doctype, fields)

        if mytype:
            fieldnames = {}
            datefields = {}
            if mytype not in bibtextypes:
                logg.error("bibtex mapping of bibtex type '%s' not defined - import stopped", mytype)
                msg = "bibtex mapping of bibtex type '%s' not defined - import stopped" % mytype
                raise MissingMapping(msg)
            result += [(mytype.lower(), fields)]

            metatype = bibtextypes[mytype]

            # check for mask configuration
            metadatatype = q(Metadatatype).filter_by(name=metatype).one()
            mask = metadatatype.get_mask(u"bibtex_import") or metadatatype.get_mask(u"bibtex")
            if mask:
                for f in mask.all_maskitems:
                    try:
                        _bib_name = q(Node).get(f.get(u"mappingfield")).name
                        _mfield = q(Node).get(f.get(u"attribute"))
                        _med_name = _mfield.name
                        if _mfield.get(u"type") == u"date":
                            datefields[_med_name] = _mfield.get(u"valuelist")
                    except AttributeError as e:
                        msg = "bibtex import docid='{}': field error for bibtex mask for type {} and bibtex-type '{}': {}"
                        msg = msg.format(docid_utf8, metatype, mytype, e)
                        logg.error(msg)
                    else:
                        fieldnames[_bib_name] = _med_name

            doc = Document(docid_utf8,schema=metatype)
            for k, v in fields.items():
                if k in fieldnames.keys():
                    k = fieldnames[k]  # map bibtex name

                if k in datefields.keys():  # format date field
                    try:
                        v = str(parse_date(v, datefields[k]))
                        # if date format does not contains '%' the result  must not be longer than the date format
                        # e.g. if datefields[k] is 'yyyy' then the result v must be clipped after 4 characters
                        if datefields[k].find('%') < 0:
                            v = v[:len(datefields[k])]
                    except ValueError as e:
                        logg.exception("bibtex exception: %s: %s", k, v)
                        raise ValueError("ValueError: " + k + ": " + v)

                doc.set(k, v)

            child_id = None
            child_type = None
            try:
                node.children.append(doc)
                child_id = doc.id
                child_type = doc.type
            except Exception as e:
                logg.exception("bibtex exception")
                raise ValueError()

    logg.debug("bibtex import: finished import")
    print msg

    return node
Example #13
0
    def event_files_changed(self):
        print "Postprocessing node", self.id

        thumb = 0
        fulltext = 0
        doc = None
        present = 0
        fileinfo = 0
        for f in self.getFiles():
            if f.type == "thumb":
                thumb = 1
            elif f.type.startswith("present"):
                present = 1
            elif f.type == "fulltext":
                fulltext = 1
            elif f.type == "fileinfo":
                fileinfo = 1
            elif f.type == "doc":
                doc = f
            elif f.type == "document":
                doc = f
        if not doc:
            for f in self.getFiles():
                if f.type == "thumb":
                    self.removeFile(f)
                elif f.type.startswith("present"):
                    self.removeFile(f)
                elif f.type == "fileinfo":
                    self.removeFile(f)
                elif f.type == "fulltext":
                    self.removeFile(f)

        #fetch unwated tags to be omitted
        unwanted_attrs = self.unwanted_attributes()

        if doc:
            path, ext = splitfilename(doc.retrieveFile())

            if not (thumb and present and fulltext and fileinfo):
                thumbname = path + ".thumb"
                thumb2name = path + ".thumb2"
                fulltextname = path + ".txt"
                infoname = path + ".info"
                tempdir = config.get("paths.tempdir")

                try:
                    pdfdata = parsepdf.parsePDF2(doc.retrieveFile(), tempdir)
                except parsepdf.PDFException as ex:
                    raise OperationException(ex.value)
                fi = open(infoname, "rb")
                for line in fi.readlines():
                    i = line.find(':')
                    if i > 0:
                        if any(tag in line[0:i].strip().lower() for tag in unwanted_attrs):
                                continue
                        self.set("pdf_" + line[0:i].strip().lower(), utf8_decode_escape(line[i + 1:].strip()))
                fi.close()
                self.addFile(FileNode(name=thumbname, type="thumb", mimetype="image/jpeg"))
                self.addFile(FileNode(name=thumb2name, type="presentation", mimetype="image/jpeg"))
                self.addFile(FileNode(name=fulltextname, type="fulltext", mimetype="text/plain"))
                self.addFile(FileNode(name=infoname, type="fileinfo", mimetype="text/plain"))
Example #14
0
    def event_files_changed(self):
        logg.debug("Postprocessing node %s", self.id)

        thumb = 0
        fulltext = 0
        doc = None
        present = 0
        fileinfo = 0
        for f in self.files:
            if f.type == "thumb":
                thumb = 1
            elif f.type.startswith("present"):
                present = 1
            elif f.type == "fulltext":
                fulltext = 1
            elif f.type == "fileinfo":
                fileinfo = 1
            elif f.type == "document":
                doc = f
        if not doc:
            for f in self.files:
                if f.type == "thumb":
                    self.files.remove(f)
                elif f.type.startswith("present"):
                    self.files.remove(f)
                elif f.type == "fileinfo":
                    self.files.remove(f)
                elif f.type == "fulltext":
                    self.files.remove(f)

        #fetch unwanted tags to be omitted
        unwanted_attrs = self.get_unwanted_exif_attributes()

        if doc:
            path, ext = splitfilename(doc.abspath)

            if not (thumb and present and fulltext and fileinfo):
                thumbname = path + ".thumb"
                thumb2name = path + ".thumb2"
                fulltextname = path + ".txt"
                infoname = path + ".info"
                tempdir = config.get("paths.tempdir")

                try:
                    pdfdata = parsepdf.parsePDFExternal(doc.abspath, tempdir)
                except parsepdf.PDFException as ex:
                    if ex.value == 'error:document encrypted':
                        # allow upload of encrypted document
                        db.session.commit()
                        return
                    raise OperationException(ex.value)
                with codecs.open(infoname, "rb", encoding='utf8') as fi:
                    for line in fi.readlines():
                        i = line.find(':')
                        if i > 0:
                            if any(tag in line[0:i].strip().lower()
                                   for tag in unwanted_attrs):
                                continue
                            self.set("pdf_" + line[0:i].strip().lower(),
                                     utf8_decode_escape(line[i + 1:].strip()))

                self.files.append(File(thumbname, "thumb", "image/jpeg"))
                self.files.append(
                    File(thumb2name, "presentation", "image/jpeg"))
                self.files.append(File(fulltextname, "fulltext", "text/plain"))
                self.files.append(File(infoname, "fileinfo", "text/plain"))

        if doc:
            import_node_fulltext(self, overwrite=True)

        db.session.commit()
Example #15
0
    def event_files_changed(self):
        print "Postprocessing node", self.id
        if "image" in self.type:
            for f in self.getFiles():
                if f.getName().lower().endswith('svg'):
                    self.svg_to_png(f.retrieveFile(), f.retrieveFile()[:-4] + ".png")
                    self.removeFile(f)
                    self.addFile(FileNode(name=f.retrieveFile(), type="original", mimetype=f.mimetype))
                    self.addFile(FileNode(name=f.retrieveFile(), type="image", mimetype=f.mimetype))
                    self.addFile(FileNode(name=f.retrieveFile()[:-4] + ".png", type="tmppng", mimetype="image/png"))
                    break
            orig = 0
            thumb = 0
            for f in self.getFiles():
                if f.type == "original":
                    orig = 1
                if f.type == "thumb":
                    thumb = 1

            if orig == 0:
                for f in self.getFiles():
                    if f.type == "image":

                        if f.mimetype == "image/tiff" or ((f.mimetype is None or f.mimetype == "application/x-download")
                                                          and (f.getName().lower().endswith("tif") or f.getName().lower().endswith("tiff"))):

                            # move old file to "original", create a new png to be used as "image"
                            self.removeFile(f)

                            path, ext = splitfilename(f.retrieveFile())
                            pngname = path + ".png"

                            if not os.path.isfile(pngname):
                                makeOriginalFormat(f.retrieveFile(), pngname)

                                width, height = getImageDimensions(pngname)
                                self.set("width", width)
                                self.set("height", height)

                            else:
                                width, height = getImageDimensions(pngname)
                                self.set("width", width)
                                self.set("height", height)

                            print 'png name/path: ', pngname

                            self.addFile(FileNode(name=pngname, type="image", mimetype="image/png"))
                            self.addFile(FileNode(name=f.retrieveFile(), type="original", mimetype="image/tiff"))
                            break
                        else:
                            self.addFile(FileNode(name=f.retrieveFile(), type="original", mimetype=f.mimetype))

            # retrieve technical metadata.
            for f in self.getFiles():
                if (f.type == "image" and not f.getName().lower().endswith("svg")) or f.type == "tmppng":
                    width, height = getImageDimensions(f.retrieveFile())
                    self.set("origwidth", width)
                    self.set("origheight", height)
                    self.set("origsize", f.getSize())

                    if f.mimetype == "image/jpeg":
                        self.set("jpg_comment", iso2utf8(getJpegSection(f.retrieveFile(), 0xFE).strip()))

            if thumb == 0:
                for f in self.getFiles():
                    if (f.type == "image" and not f.getName().lower().endswith("svg")) or f.type == "tmppng":
                        path, ext = splitfilename(f.retrieveFile())
                        basename = hashlib.md5(str(random.random())).hexdigest()[0:8]

                        # path = os.path.join(getImportDir(),os.path.basename(path))
                        path = os.path.join(getImportDir(), basename)

                        thumbname = path + ".thumb"
                        thumbname2 = path + ".thumb2"

                        print 'tumb: ', thumbname
                        print 'presentation: ', thumbname2

                        assert not os.path.isfile(thumbname)
                        assert not os.path.isfile(thumbname2)
                        width, height = getImageDimensions(f.retrieveFile())
                        makeThumbNail(f.retrieveFile(), thumbname)
                        makePresentationFormat(f.retrieveFile(), thumbname2)
                        if f.mimetype is None:
                            if f.getName().lower().endswith("jpg"):
                                f.mimetype = "image/jpeg"
                            else:
                                f.mimetype = "image/tiff"
                        self.addFile(FileNode(name=thumbname, type="thumb", mimetype="image/jpeg"))
                        self.addFile(FileNode(name=thumbname2, type="presentation", mimetype="image/jpeg"))
                        self.set("width", width)
                        self.set("height", height)

            #fetch unwanted tags to be omitted
            unwanted_attrs = self.unwanted_attributes()

            # Exif
            try:
                from lib.Exif import EXIF
                files = self.getFiles()

                for file in files:
                    if file.type == "original":
                        f = open(file.retrieveFile(), 'rb')
                        tags = EXIF.process_file(f)
                        tags.keys().sort()

                        for k in tags.keys():
                            # don't set unwanted exif attributes
                            if any(tag in k for tag in unwanted_attrs):
                                continue
                            if tags[k] != "" and k != "JPEGThumbnail":
                                self.set("exif_" + k.replace(" ", "_"),
                                         utf8_decode_escape(str(tags[k])))
                            elif k == "JPEGThumbnail":
                                if tags[k] != "":
                                    self.set("Thumbnail", "True")
                                else:
                                    self.set("Thumbnail", "False")

            except:
                None

            if dozoom(self) == 1:
                tileok = 0
                for f in self.getFiles():
                    if f.type.startswith("tile"):
                        tileok = 1
                if not tileok and self.get("width") and self.get("height"):
                    zoom.getImage(self.id, 1)

            # iptc
            try:
                from lib.iptc import IPTC
                files = self.getFiles()

                for file in files:
                    if file.type == "original":
                        tags = IPTC.getIPTCValues(file.retrieveFile())
                        tags.keys().sort()
                        for k in tags.keys():
                            # skip unknown iptc tags
                            if 'IPTC_' in k:
                                continue
                            if any(tag in k for tag in unwanted_attrs):
                                continue
                            if isinstance(tags[k], list):
                                tags[k] = ', '.join(tags[k])
                            if tags[k] != "":
                                self.set("iptc_" + k.replace(" ", "_"),
                                         utf8_decode_escape(str(tags[k])))
            except:
                None

            for f in self.getFiles():
                if f.getName().lower().endswith("png") and f.type == "tmppng":
                    self.removeFile(f)
                    break
Example #16
0
    def event_files_changed(self):
        logg.debug("Postprocessing node %s", self.id)

        thumb = 0
        fulltext = 0
        doc = None
        present = 0
        fileinfo = 0
        for f in self.files:
            if f.type == "thumb":
                thumb = 1
            elif f.type.startswith("present"):
                present = 1
            elif f.type == "fulltext":
                fulltext = 1
            elif f.type == "fileinfo":
                fileinfo = 1
            elif f.type == "document":
                doc = f
        if not doc:
            for f in self.files:
                if f.type == "thumb":
                    self.files.remove(f)
                elif f.type.startswith("present"):
                    self.files.remove(f)
                elif f.type == "fileinfo":
                    self.files.remove(f)
                elif f.type == "fulltext":
                    self.files.remove(f)

        #fetch unwanted tags to be omitted
        unwanted_attrs = self.get_unwanted_exif_attributes()

        if doc:
            path, ext = splitfilename(doc.abspath)

            if not (thumb and present and fulltext and fileinfo):
                thumbname = path + ".thumb"
                thumb2name = path + ".thumb2"
                fulltextname = path + ".txt"
                infoname = path + ".info"
                tempdir = config.get("paths.tempdir")

                try:
                    pdfdata = parsepdf.parsePDFExternal(doc.abspath, tempdir)
                except parsepdf.PDFException as ex:
                    if ex.value == 'error:document encrypted':
                        # allow upload of encrypted document
                        db.session.commit()
                        return
                    raise OperationException(ex.value)
                with codecs.open(infoname, "rb", encoding='utf8') as fi:
                    for line in fi.readlines():
                        i = line.find(':')
                        if i > 0:
                            if any(tag in line[0:i].strip().lower() for tag in unwanted_attrs):
                                continue
                            self.set("pdf_" + line[0:i].strip().lower(), utf8_decode_escape(line[i + 1:].strip()))

                self.files.append(File(thumbname, "thumb", "image/jpeg"))
                self.files.append(File(thumb2name, "presentation", "image/jpeg"))
                self.files.append(File(fulltextname, "fulltext", "text/plain"))
                self.files.append(File(infoname, "fileinfo", "text/plain"))

        if doc:
            import_node_fulltext(self, overwrite=True)

        db.session.commit()
Example #17
0
    def event_files_changed(self):
        print "Postprocessing node", self.id
        if "image" in self.type:
            for f in self.getFiles():
                if f.getName().lower().endswith('svg'):
                    self.svg_to_png(f.retrieveFile(),
                                    f.retrieveFile()[:-4] + ".png")
                    self.removeFile(f)
                    self.addFile(
                        FileNode(name=f.retrieveFile(),
                                 type="original",
                                 mimetype=f.mimetype))
                    self.addFile(
                        FileNode(name=f.retrieveFile(),
                                 type="image",
                                 mimetype=f.mimetype))
                    self.addFile(
                        FileNode(name=f.retrieveFile()[:-4] + ".png",
                                 type="tmppng",
                                 mimetype="image/png"))
                    break
            orig = 0
            thumb = 0
            for f in self.getFiles():
                if f.type == "original":
                    orig = 1
                if f.type == "thumb":
                    thumb = 1
            if orig == 0:
                for f in self.getFiles():
                    if f.type == "image":
                        if f.mimetype == "image/tiff" or (
                            (f.mimetype is None
                             or f.mimetype == "application/x-download") and
                            (f.getName().lower().endswith("tif")
                             or f.getName().lower().endswith("tiff"))):
                            # move old file to "original", create a new png to be used as "image"
                            self.removeFile(f)

                            path, ext = splitfilename(f.retrieveFile())
                            pngname = path + ".png"
                            if not os.path.isfile(pngname):
                                makeOriginalFormat(f.retrieveFile(), pngname)

                                width, height = getImageDimensions(pngname)
                                self.set("width", width)
                                self.set("height", height)

                            else:
                                width, height = getImageDimensions(pngname)
                                self.set("width", width)
                                self.set("height", height)

                            self.addFile(
                                FileNode(name=pngname,
                                         type="image",
                                         mimetype="image/png"))
                            self.addFile(
                                FileNode(name=f.retrieveFile(),
                                         type="original",
                                         mimetype="image/tiff"))
                            break
                        else:
                            self.addFile(
                                FileNode(name=f.retrieveFile(),
                                         type="original",
                                         mimetype=f.mimetype))

            # retrieve technical metadata.
            for f in self.getFiles():
                if (f.type == "image"
                        and not f.getName().lower().endswith("svg")
                    ) or f.type == "tmppng":
                    width, height = getImageDimensions(f.retrieveFile())
                    self.set("origwidth", width)
                    self.set("origheight", height)
                    self.set("origsize", f.getSize())

                    if f.mimetype == "image/jpeg":
                        self.set(
                            "jpg_comment",
                            iso2utf8(
                                getJpegSection(f.retrieveFile(),
                                               0xFE).strip()))

            if thumb == 0:
                for f in self.getFiles():
                    if (f.type == "image"
                            and not f.getName().lower().endswith("svg")
                        ) or f.type == "tmppng":
                        path, ext = splitfilename(f.retrieveFile())
                        basename = hashlib.md5(str(
                            random.random())).hexdigest()[0:8]

                        #path = os.path.join(getImportDir(),os.path.basename(path))
                        path = os.path.join(getImportDir(), basename)

                        thumbname = path + ".thumb"
                        thumbname2 = path + ".thumb2"

                        assert not os.path.isfile(thumbname)
                        assert not os.path.isfile(thumbname2)
                        width, height = getImageDimensions(f.retrieveFile())
                        makeThumbNail(f.retrieveFile(), thumbname)
                        makePresentationFormat(f.retrieveFile(), thumbname2)
                        if f.mimetype is None:
                            if f.getName().lower().endswith("jpg"):
                                f.mimetype = "image/jpeg"
                            else:
                                f.mimetype = "image/tiff"
                        self.addFile(
                            FileNode(name=thumbname,
                                     type="thumb",
                                     mimetype="image/jpeg"))
                        self.addFile(
                            FileNode(name=thumbname2,
                                     type="presentation",
                                     mimetype="image/jpeg"))
                        self.set("width", width)
                        self.set("height", height)

            #fetch unwanted tags to be omitted
            unwanted_attrs = self.unwanted_attributes()

            # Exif
            try:
                from lib.Exif import EXIF
                files = self.getFiles()

                for file in files:
                    if file.type == "original":
                        f = open(file.retrieveFile(), 'rb')
                        tags = EXIF.process_file(f)
                        tags.keys().sort()

                        for k in tags.keys():
                            # don't set unwanted exif attributes
                            if any(tag in k for tag in unwanted_attrs):
                                continue
                            if tags[k] != "" and k != "JPEGThumbnail":
                                self.set("exif_" + k.replace(" ", "_"),
                                         utf8_decode_escape(str(tags[k])))
                            elif k == "JPEGThumbnail":
                                if tags[k] != "":
                                    self.set("Thumbnail", "True")
                                else:
                                    self.set("Thumbnail", "False")

            except:
                None

            if dozoom(self) == 1:
                tileok = 0
                for f in self.getFiles():
                    if f.type.startswith("tile"):
                        tileok = 1
                if not tileok and self.get("width") and self.get("height"):
                    zoom.getImage(self.id, 1)

            # iptc
            try:
                from lib.iptc import IPTC
                files = self.getFiles()

                for file in files:
                    if file.type == "original":
                        tags = IPTC.getIPTCValues(file.retrieveFile())
                        tags.keys().sort()
                        for k in tags.keys():
                            # skip unknown iptc tags
                            if 'IPTC_' in k:
                                continue
                            if any(tag in k for tag in unwanted_attrs):
                                continue
                            if isinstance(tags[k], list):
                                tags[k] = ', '.join(tags[k])
                            if tags[k] != "":
                                self.set("iptc_" + k.replace(" ", "_"),
                                         utf8_decode_escape(str(tags[k])))
            except:
                None

            for f in self.getFiles():
                if f.getName().lower().endswith("png") and f.type == "tmppng":
                    self.removeFile(f)
                    break