def process_PDFs(site): cat = site.portal_catalog filebrains = cat(portal_type='File', contentType='application/pdf') print('Processing {} PDFs'.format(len(filebrains))) count = 0 for brain in filebrains: obj = brain.getObject() blob = obj.file.open('r') tmp_dir = tempfile.mkdtemp() filepath = os.path.join(tmp_dir, obj.file.filename).encode('utf-8').strip() file = open(filepath, 'wb') file.write(blob.read()) file.close() blob.close() exception = False try: gs_pdf(filepath) except Exception: logger.warn( u'Could not strip additional metadata with gs {}'.format( filepath), exc_info=True) # noqa exception = True if not exception: try: exiftool(filepath) except Exception: logger.warn( u'Could not strip metadata with exiftool {}'.format( filepath), exc_info=True) # noqa exception = True if not exception: try: qpdf(filepath) except Exception: logger.warn( u'Could not strip additional metadata with qpdf {}'.format( filepath), exc_info=True) # noqa exception = True if not exception: file = open(filepath, 'rb') obj.file = NamedBlobFile(file, filename=obj.file.filename) file.close() count += 1 if count % 50 == 0: print('Processed {} PDFs'.format(count)) transaction.commit() else: logger.warn( '{sitepath} will not be modified because an exception occured.' .format(sitepath=brain.getURL())) # noqa shutil.rmtree(tmp_dir) transaction.commit() print('Done.')
def create_object(self, folder, type_, info): filename = info['name'] name = filename.decode("utf8") chooser = INameChooser(folder) chooser_name = name.lower().replace('aq_', '') newid = chooser.chooseName(chooser_name, folder.aq_parent) # strip metadata from file if (type_ in ('Image', 'File', 'Video', 'Audio') and exiftool is not None and 'tmp_file' in info): try: exiftool(info['tmp_file']) except Exception: logger.warn('Could not strip metadata from file: %s' % info['tmp_file']) fi = open(info['tmp_file'], 'r') try: # Try to determine which kind of NamedBlob we need # This will suffice for standard p.a.contenttypes File/Image # and any other custom type that would have 'File' or 'Image' in # its type name filename = ploneutils.safe_unicode(filename) create_opts = dict(type=type_, id=newid, container=folder) if 'Image' in type_: image = NamedBlobImage(data=fi, filename=filename) try: image.focal_point = [ float(self.request.form.get('focalX')), float(self.request.form.get('focalY')) ] except Exception: pass create_opts['image'] = image else: create_opts['file'] = NamedBlobFile(data=fi, filename=filename) for field in get_upload_fields(): if not field.get('name'): continue name = field['name'] if not self.request.form.get(name): continue if name in ('tags', 'subject'): # tags needs to be converted create_opts['subject'] = self.request.form.get(name).split( ';') else: create_opts[name] = self.request.form.get(name, '') return api.content.create(**create_opts) finally: fi.close()