def page_index(request): db = app.get_feature('document_storage').default_db pages = Page.objects(db).order_by('date_time') for p in pages: print p.pk, p, p.summary, p.image return {'pages': pages, 'thumbnail': _get_thumbnail}
def add_pages(args): _args_to_unicode(args, ['language', 'summary', 'summary_prefix']) db = app.get_feature('document_storage').default_db # check if the files exist for path in args.paths: assert os.path.exists(path) # import for path in args.paths: yield '* importing {0} (language {1})'.format(path, args.language) fingerprint = get_file_hash(open(path, 'rb')) # check file hash uniqueness if Page.objects(db).where(source_fingerprint=fingerprint): yield '...already in the database.' continue p = Page() p.summary = args.summary or get_summary_from_path(path) if args.summary_prefix: p.summary = u'{0} {1}'.format(args.summary_prefix, p.summary) p.language = args.language or None p.source_fingerprint = fingerprint if not args.no_ocr: try: p.details = image_to_text(path=path, language=p.language) except RuntimeError as e: if not args.skip_ocr_errors: raise CommandError(e) yield '(OCR failed, saving only image itself)' # usually we don't need heavy formats like ppm or tiff even for OCR img = Image.open(path) if args.format: fmt = args.format elif img.format not in IMAGE_FORMATS: fmt = IMAGE_FORMATS[0] else: fmt = img.format img.save(TMP_FILENAME, fmt) p['image'] = open(TMP_FILENAME, 'rb') # provide original path so that the resulting filename is inherited p['image'].path = path p.save(db)