Exemple #1
0
def process_PDFs(site):
    cat = site.portal_catalog
    filebrains = cat(portal_type='File', contentType='application/pdf')
    print('Processing {} PDFs'.format(len(filebrains)))
    count = 0
    for brain in filebrains:
        obj = brain.getObject()
        blob = obj.file.open('r')
        tmp_dir = tempfile.mkdtemp()
        filepath = os.path.join(tmp_dir,
                                obj.file.filename).encode('utf-8').strip()
        file = open(filepath, 'wb')
        file.write(blob.read())
        file.close()
        blob.close()
        exception = False
        try:
            gs_pdf(filepath)
        except Exception:
            logger.warn(
                u'Could not strip additional metadata with gs {}'.format(
                    filepath),
                exc_info=True)  # noqa
            exception = True
        if not exception:
            try:
                exiftool(filepath)
            except Exception:
                logger.warn(
                    u'Could not strip metadata with exiftool {}'.format(
                        filepath),
                    exc_info=True)  # noqa
                exception = True
        if not exception:
            try:
                qpdf(filepath)
            except Exception:
                logger.warn(
                    u'Could not strip additional metadata with qpdf {}'.format(
                        filepath),
                    exc_info=True)  # noqa
                exception = True
        if not exception:
            file = open(filepath, 'rb')
            obj.file = NamedBlobFile(file, filename=obj.file.filename)
            file.close()
            count += 1
            if count % 50 == 0:
                print('Processed {} PDFs'.format(count))
                transaction.commit()
        else:
            logger.warn(
                '{sitepath} will not be modified because an exception occured.'
                .format(sitepath=brain.getURL()))  # noqa
        shutil.rmtree(tmp_dir)
    transaction.commit()
    print('Done.')
Exemple #2
0
    def create_object(self, folder, type_, info):
        filename = info['name']
        name = filename.decode("utf8")
        chooser = INameChooser(folder)
        chooser_name = name.lower().replace('aq_', '')
        newid = chooser.chooseName(chooser_name, folder.aq_parent)

        # strip metadata from file
        if (type_ in ('Image', 'File', 'Video', 'Audio')
                and exiftool is not None and 'tmp_file' in info):
            try:
                exiftool(info['tmp_file'])
            except Exception:
                logger.warn('Could not strip metadata from file: %s' %
                            info['tmp_file'])

        fi = open(info['tmp_file'], 'r')
        try:
            # Try to determine which kind of NamedBlob we need
            # This will suffice for standard p.a.contenttypes File/Image
            # and any other custom type that would have 'File' or 'Image' in
            # its type name
            filename = ploneutils.safe_unicode(filename)
            create_opts = dict(type=type_, id=newid, container=folder)
            if 'Image' in type_:
                image = NamedBlobImage(data=fi, filename=filename)
                try:
                    image.focal_point = [
                        float(self.request.form.get('focalX')),
                        float(self.request.form.get('focalY'))
                    ]
                except Exception:
                    pass
                create_opts['image'] = image
            else:
                create_opts['file'] = NamedBlobFile(data=fi, filename=filename)

            for field in get_upload_fields():
                if not field.get('name'):
                    continue
                name = field['name']
                if not self.request.form.get(name):
                    continue
                if name in ('tags', 'subject'):
                    # tags needs to be converted
                    create_opts['subject'] = self.request.form.get(name).split(
                        ';')
                else:
                    create_opts[name] = self.request.form.get(name, '')
            return api.content.create(**create_opts)
        finally:
            fi.close()