def get_zip_preview(file_id, path): cache_path = pathconst.TEMP manager = PreviewManager(cache_path, create_folder=True) path_to_zip_json = manager.get_json_preview(file_path=path, ) return path_to_zip_json
def test_to_pdf(): manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True) with pytest.raises(UnavailablePreviewType): path_to_file = manager.get_pdf_preview(file_path=IMAGE_FILE_PATH, force=True)
def get_one(self, file_id, revision_id=None): user = tmpl_context.current_user file_id = int(file_id) user = tmpl_context.current_user workspace = tmpl_context.workspace content_api = ContentApi( user, show_archived=True, show_deleted=True, ) if revision_id: file_content = content_api.get_one_from_revision( file_id, self._item_type, workspace, revision_id).file_content else: file_content = content_api.get_one(file_id, self._item_type, workspace).file_content file_name = content_api.get_one(file_id, self._item_type).file_name cache_path = '/home/alexis/Pictures/cache/' file = BytesIO() file.write(file_content) current_user_content = Context(CTX.CURRENT_USER, current_user=user).toDict(user) current_user_content.roles.sort(key=lambda role: role.workspace.name) with open('{}{}'.format(cache_path, file_name), 'wb') as temp_file: file.seek(0, 0) buffer = file.read(1024) while buffer: temp_file.write(buffer) buffer = file.read(1024) preview_manager = PreviewManager(cache_path, create_folder=True) nb_page = preview_manager.get_nb_page( file_path='/home/alexis/Pictures/cache/{}'.format(file_name), ) fake_api_breadcrumb = self.get_breadcrumb(file_id) fake_api_content = DictLikeClass(breadcrumb=fake_api_breadcrumb, current_user=current_user_content) fake_api = Context(CTX.FOLDER, current_user=user).toDict(fake_api_content) if revision_id: file = content_api.get_one_from_revision(file_id, self._item_type, workspace, revision_id) else: file = content_api.get_one(file_id, self._item_type, workspace) dictified_file = Context(self._get_one_context, current_user=user).toDict(file, 'file') url = [] for i in range(int(nb_page)): url.append('/previews/{}/pages/{}'.format(file_id, i)) return DictLikeClass( result=dictified_file, fake_api=fake_api, nb_page=nb_page, url=url, )
def test_cache_dir_is_created() -> None: shutil.rmtree(CACHE_DIR, ignore_errors=True) PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True) assert os.path.exists(CACHE_DIR)
def test_get_file_hash_with_page() -> None: pm = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True) hash = pm._get_file_hash("/tmp/image.jpeg", page=3) assert hash == "7f8df7223d8be60a7ac8a9bf7bd1df2a-page3"
def test_page_number__extension_forced() -> None: manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True) page_number = manager.get_page_nb(file_path=HTML_FILE_PATH_NO_EXTENSION, file_ext=".html") assert page_number == 7
def test_to_text() -> None: manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True) assert manager.has_text_preview(file_path=IMAGE_FILE_PATH) is False with pytest.raises(UnavailablePreviewType): manager.get_text_preview(file_path=IMAGE_FILE_PATH, force=True)
def test_to_pdf__err_timeout() -> None: with pytest.raises(subprocess.TimeoutExpired): manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True) manager.get_pdf_preview(file_path=ODT_FILE_PATH, force=True)
def test_get_nb_page() -> None: manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True) nb_page = manager.get_page_nb(file_path=ODT_FILE_PATH) assert nb_page == 2
def test_to_pdf_no_extension(): manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True) assert manager.has_pdf_preview( file_path=IMAGE_FILE_PATH_NO_EXTENSION, ) is False with pytest.raises(UnavailablePreviewType): manager.get_pdf_preview(file_path=IMAGE_FILE_PATH_NO_EXTENSION, )
def preview_router(index_name: str, cache_path: str, path_property: str, tags: List[str] = ["preview"]): router = APIRouter() query_builder = ElasticsearchAPIQueryBuilder() conf = get_config() manager = PreviewManager(cache_path, create_folder=True) @query_builder.filter() def filter_config(id: str = Path(None, description="Id of the document to preview.")): return { "ids": { "values": [id] } } @router.get("/preview/{id}", tags=tags) async def preview( page: Optional[int] = Query(0, ge=0, description="The page of the document to generate the preview."), width: Optional[int] = Query(300, ge=1, le=1024, description="The width of the generated preview."), height: Optional[int] = Query(200, ge=1, le=1024, description="The height of the generated preview."), query_body: Dict = Depends(query_builder.build(source=[path_property])), es_client: Elasticsearch = Depends(get_client), auth_header: Dict = Depends(get_auth_header)) -> FileResponse: resp = es_client.search( body=query_body, headers=auth_header, index=index_name ) if resp["hits"]["total"]["value"] > 0: document_path = resp["hits"]["hits"][0]["_source"][path_property] path_to_preview_image = manager.get_jpeg_preview(document_path, page=page, width=width, height=height, ) return FileResponse(path_to_preview_image) else: raise HTTPException(status_code=404, detail="Document not found") @router.get("/preview/info/{id}", tags=tags, response_model=PreviewInfoModel) async def preview_info( query_body: Dict = Depends(query_builder.build(source=[path_property])), es_client: Elasticsearch = Depends(get_client), auth_header: Dict = Depends(get_auth_header)) -> FileResponse: resp = es_client.search( body=query_body, headers=auth_header, index=index_name ) if resp["hits"]["total"]["value"] > 0: document_path = resp["hits"]["hits"][0]["_source"][path_property] if os.path.isfile(document_path): supported = manager.has_jpeg_preview(document_path) pages = manager.get_page_nb(document_path) return PreviewInfoModel(supported=supported, pages=pages) else: return PreviewInfoModel(supported=False, pages=0) else: raise HTTPException(status_code=404, detail="Document not found") return router
def test_page_number(): manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True) page_number = manager.get_page_nb( file_path=IMAGE_FILE_PATH, ) assert page_number == 1
def test_page_number__no_extension(): manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True) page_number = manager.get_page_nb( file_path=IMAGE_FILE_PATH_NO_EXTENSION, ) assert page_number == 1
def build_preview_manager(): return PreviewManager(CACHE_PATH, create_folder=True)
def test_zip_to_text(): manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True) path_to_file = manager.get_text_preview( file_path=os.path.join(CURRENT_DIR, 'the_zip.zip')) assert os.path.exists(path_to_file) == True assert os.path.getsize(path_to_file) > 0
def test_get_nb_page_no_extension() -> None: manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True) nb_page = manager.get_page_nb(file_path=ODT_FILE_PATH_NO_EXTENSION, file_ext=ODT_FILE_EXT) assert nb_page == 2
def test_page_number() -> None: manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True) page_number = manager.get_page_nb(file_path=HTML_FILE_PATH) assert page_number == 7
def test_get_nb_page(file_path): manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True) with pytest.raises(UnsupportedMimeType): manager.get_page_nb(file_path=file_path, file_ext=".bin")
def test_get_preview_name_with_page() -> None: pm = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True) filehash = pm.get_preview_context("/tmp/image.jpeg", file_ext=".jpeg").hash hash = pm._get_preview_name(filehash, page=3) assert hash == "7f8df7223d8be60a7ac8a9bf7bd1df2a-page3"
def test_to_jpeg(file_path: str) -> None: manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True) with pytest.raises(UnsupportedMimeType): manager.get_jpeg_preview(file_path=file_path, height=256, width=512)
def test_to_json() -> None: manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True) assert manager.has_json_preview(file_path=IMAGE_FILE_PATH) is True manager.get_json_preview(file_path=IMAGE_FILE_PATH, force=True)
def test_to_jpeg__default_size(file_path: str) -> None: manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True) with pytest.raises(UnsupportedMimeType): manager.get_jpeg_preview(file_path=file_path, file_ext=".bin")
def test_get_file_hash_with_size() -> None: pm = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True) from preview_generator.utils import ImgDims hash = pm._get_file_hash("/tmp/image.jpeg", ImgDims(width=512, height=256)) assert hash == "7f8df7223d8be60a7ac8a9bf7bd1df2a-512x256"
def test_to_pdf(file_path: str) -> None: manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True) with pytest.raises(UnsupportedMimeType): manager.get_pdf_preview(file_path=file_path, force=True)
def test_get_nb_page(): manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True) nb_page = manager.get_page_nb(file_path=IMAGE_FILE_PATH) # FIXME must add parameter force=True/False in the API assert nb_page == 1
def test_has_html_preview(file_path: str) -> None: manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True) with pytest.raises(UnsupportedMimeType): manager.has_html_preview(file_path=file_path, file_ext=".bin")
def test_zip_to_text(): manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True) assert manager.has_text_preview(file_path=IMAGE_FILE_PATH) is True path_to_file = manager.get_text_preview(file_path=IMAGE_FILE_PATH) assert os.path.exists(path_to_file) == True assert os.path.getsize(path_to_file) > 0
def test_to_pdf(): manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True) assert manager.has_pdf_preview(file_path=IMAGE_FILE_PATH) is True path_to_file = manager.get_pdf_preview(file_path=IMAGE_FILE_PATH, force=True)
def lambda_handler(event, context): ### Inputs print("#### Received event is: " + str(event)) src_bucket = event["src_bucket"] src_path = event["src_path"] dest_bucket = event["dest_bucket"] dest_path = event["dest_path"] dimensions = event["dimensions"] output_format = event["format"] ok_sns = event["ok_sns"] error_sns = event["error_sns"] request_id = event["request_id"] try: ### Remove the leading slash from src_path and dest_path # if src_path[0] == "/": # scr_path = src_path[1:] # if dest_path[0] == "/": # dest_path = dest_path[1:] ### Get the maximum size from the dimensions received maximum_width = 0 maximum_height = 0 for dimension in dimensions: if int(dimension["width"]) > maximum_width: maximum_width = int(dimension["width"]) maximum_height = int(dimension["height"]) ### Copy the file from S3: print("Copy file from source S3") if src_bucket != "": client = boto3.client('s3') source_file = "/tmp/" + src_path.split("/")[-1] client.download_file(src_bucket, src_path, source_file) else: # If the src_bucket == 0 we will not copy from S3, and use local path. # This is for local testing source_file = src_path print("Successfully copied file from source S3") ### If file is Excel and it is big, make it smaller. if ".xls" in src_path and os.path.getsize(source_file) > 5000000: #5MB print("It seems to be a big Excel file. Try to keep only first 100 lines and columns") # opening the source excel file wb1 = openpyxl.load_workbook(source_file) ws1 = wb1.worksheets[0] # opening the destination excel file truncated_file = source_file + ".truncated." + source_file.split(".")[-1] # Create new empty file wb = openpyxl.Workbook() ws = wb.active ws.title = "Changed Sheet" wb.save(filename = truncated_file) wb2 = openpyxl.load_workbook(truncated_file) ws2 = wb2.active # We will write only the first 100 lines and columns mr = 100 mc = 100 # copying the cell values from source # excel file to destination excel file for i in range (1, mr + 1): for j in range (1, mc + 1): # reading cell value from source excel file c = ws1.cell(row = i, column = j) # writing the read value to destination excel file ws2.cell(row = i, column = j).value = c.value # saving the destination excel file print("Saving truncated file to: " + truncated_file) wb2.save(str(truncated_file)) print("Saved") source_file = truncated_file print("Successfuly truncated the big Excel file") ### Generate preview print("Generating preview") manager = PreviewManager("/tmp/cache", create_folder= True) preview_image = manager.get_jpeg_preview(source_file, width=maximum_width, height=maximum_height) print("Successfully generated preview") ### Make sure the image has the expected size and format print("Resize and change the format") # Open the preview image img = Image.open(preview_image) # Calculate the differences between actual sizes and wanted sizes delta_w = maximum_width - img.width delta_h = maximum_height - img.height # Then actual sizes is odd (not even), the we need to add one more pixel precision_pixel_w = maximum_width - img.width - int(delta_w/2) - int(delta_w-(delta_w/2)) precision_pixel_h = maximum_height - img.height - int(delta_h/2) - int(delta_h-(delta_h/2)) # Resize the image ltrb_border=(int(delta_w/2), int(delta_h/2), int(delta_w-(delta_w/2)) + precision_pixel_w, int(delta_h-(delta_h/2)) + precision_pixel_h) img_with_border = ImageOps.expand(img, border=ltrb_border, fill='white') output_file = "/tmp/output_preview." + output_format img_with_border.convert('RGB').save(output_file) print("Successfully resized and formatted for the maximum (" + str(maximum_width) + ":" + str(maximum_height) + ") wanted size") ### Copy to S3 print("Copy the maximum (" + str(maximum_width) + ":" + str(maximum_height) + ") preview to destination bucket") if dest_bucket != "": client = boto3.client('s3') client.upload_file(output_file, dest_bucket, dest_path.replace("#{width}", str(maximum_width)).replace("#{height}", str(maximum_height))) else: # If the dest_bucket == 0 we will not copy from S3, but will copy on local path copyfile(output_file, dest_path.replace("#{width}", str(maximum_width)).replace("#{height}", str(maximum_height))) print("Successfully copied the maximum (" + str(maximum_width) + ":" + str(maximum_height) + ") preview to destination bucket") ### Transform, and copy all other wanted dimensions to S3 bucket for dimension in dimensions: # Get dimensions current_width = int(dimension["width"]) current_height = int(dimension["height"]) # Skip if the dimension is the maximum one if current_width == maximum_width and current_height == maximum_height: continue # Open image image = Image.open(output_file) new_image = image.resize((current_width, current_height)) output_file_resized = "/tmp/output_preview_resized." + output_format new_image.save(output_file_resized) ### Copy to S3 print("Copy the dimension (" + str(current_width) + ":" + str(current_height) + ") to destination bucket") if dest_bucket != "": client = boto3.client('s3') client.upload_file(output_file_resized, dest_bucket, dest_path.replace("#{width}", str(current_width)).replace("#{height}", str(current_height))) else: # If the dest_bucket == 0 we will not copy from S3, but will copy on local path copyfile(output_file_resized, dest_path.replace("#{width}", str(current_width)).replace("#{height}", str(current_height))) print("Successfully copied the dimension (" + str(current_width) + ":" + str(current_height) + ") preview to destination bucket") ### Send ok message to ok SNS print("Sending OK to SNS") if src_bucket != "": # If the src_bucket is not empty, then the function is running locally for dev purposes, so no need to send SNS # Compose the OK JSON okJSON = { "success": "true", "request_id": request_id, "src_bucket": src_bucket, "src_path": src_path } client = boto3.client('sns') response = client.publish( TopicArn=ok_sns, Message=str(okJSON) ) print("Successfully send OK to SNS") return "OK" except Exception as e: print("Error: {}".format(e)) tracebackError = traceback.format_exc() print(tracebackError) ### Send error message to error SNS print("Sending ERROR to SNS") # Compose the ERROR JSON errorJSON = { "success": "false", "request_id": request_id, "src_bucket": src_bucket, "src_path": src_path } if src_bucket != "": # If the src_bucket is not empty, then the function is running locally for dev purposes, so no need to send SNS client = boto3.client('sns') response = client.publish( TopicArn=error_sns, Message=str(errorJSON) ) print("Successfully send ERROR to SNS") return "ERROR"
def get_docs_preview(file_id, path): cache_path = pathconst.TEMP manager = PreviewManager(cache_path, create_folder=True) path_to_pdf_preview = manager.get_pdf_preview(file_path=path) return path_to_pdf_preview