Exemple #1
0
def get_zip_preview(file_id, path):
    cache_path = pathconst.TEMP
    manager = PreviewManager(cache_path, create_folder=True)
    path_to_zip_json = manager.get_json_preview(file_path=path, )
    return path_to_zip_json
Exemple #2
0
def test_to_pdf():
    manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True)
    with pytest.raises(UnavailablePreviewType):
        path_to_file = manager.get_pdf_preview(file_path=IMAGE_FILE_PATH,
                                               force=True)
Exemple #3
0
    def get_one(self, file_id, revision_id=None):
        user = tmpl_context.current_user
        file_id = int(file_id)
        user = tmpl_context.current_user
        workspace = tmpl_context.workspace
        content_api = ContentApi(
            user,
            show_archived=True,
            show_deleted=True,
        )
        if revision_id:
            file_content = content_api.get_one_from_revision(
                file_id, self._item_type, workspace, revision_id).file_content
        else:
            file_content = content_api.get_one(file_id, self._item_type,
                                               workspace).file_content

        file_name = content_api.get_one(file_id, self._item_type).file_name
        cache_path = '/home/alexis/Pictures/cache/'
        file = BytesIO()
        file.write(file_content)

        current_user_content = Context(CTX.CURRENT_USER,
                                       current_user=user).toDict(user)
        current_user_content.roles.sort(key=lambda role: role.workspace.name)

        with open('{}{}'.format(cache_path, file_name), 'wb') as temp_file:
            file.seek(0, 0)
            buffer = file.read(1024)
            while buffer:
                temp_file.write(buffer)
                buffer = file.read(1024)

        preview_manager = PreviewManager(cache_path, create_folder=True)
        nb_page = preview_manager.get_nb_page(
            file_path='/home/alexis/Pictures/cache/{}'.format(file_name), )

        fake_api_breadcrumb = self.get_breadcrumb(file_id)
        fake_api_content = DictLikeClass(breadcrumb=fake_api_breadcrumb,
                                         current_user=current_user_content)
        fake_api = Context(CTX.FOLDER,
                           current_user=user).toDict(fake_api_content)

        if revision_id:
            file = content_api.get_one_from_revision(file_id, self._item_type,
                                                     workspace, revision_id)
        else:
            file = content_api.get_one(file_id, self._item_type, workspace)

        dictified_file = Context(self._get_one_context,
                                 current_user=user).toDict(file, 'file')

        url = []
        for i in range(int(nb_page)):
            url.append('/previews/{}/pages/{}'.format(file_id, i))

        return DictLikeClass(
            result=dictified_file,
            fake_api=fake_api,
            nb_page=nb_page,
            url=url,
        )
def test_cache_dir_is_created() -> None:
    shutil.rmtree(CACHE_DIR, ignore_errors=True)
    PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True)
    assert os.path.exists(CACHE_DIR)
def test_get_file_hash_with_page() -> None:
    pm = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True)

    hash = pm._get_file_hash("/tmp/image.jpeg", page=3)
    assert hash == "7f8df7223d8be60a7ac8a9bf7bd1df2a-page3"
Exemple #6
0
def test_page_number__extension_forced() -> None:
    manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True)
    page_number = manager.get_page_nb(file_path=HTML_FILE_PATH_NO_EXTENSION,
                                      file_ext=".html")
    assert page_number == 7
Exemple #7
0
def test_to_text() -> None:
    manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True)
    assert manager.has_text_preview(file_path=IMAGE_FILE_PATH) is False
    with pytest.raises(UnavailablePreviewType):
        manager.get_text_preview(file_path=IMAGE_FILE_PATH, force=True)
Exemple #8
0
def test_to_pdf__err_timeout() -> None:
    with pytest.raises(subprocess.TimeoutExpired):
        manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True)
        manager.get_pdf_preview(file_path=ODT_FILE_PATH, force=True)
Exemple #9
0
def test_get_nb_page() -> None:
    manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True)
    nb_page = manager.get_page_nb(file_path=ODT_FILE_PATH)
    assert nb_page == 2
def test_to_pdf_no_extension():
    manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True)
    assert manager.has_pdf_preview(
        file_path=IMAGE_FILE_PATH_NO_EXTENSION, ) is False
    with pytest.raises(UnavailablePreviewType):
        manager.get_pdf_preview(file_path=IMAGE_FILE_PATH_NO_EXTENSION, )
Exemple #11
0
def preview_router(index_name: str, cache_path: str, path_property: str, tags: List[str] = ["preview"]):
    router = APIRouter()
    query_builder = ElasticsearchAPIQueryBuilder()

    conf = get_config()
    manager = PreviewManager(cache_path, create_folder=True)

    @query_builder.filter()
    def filter_config(id: str = Path(None,
                                     description="Id of the document to preview.")):
        return {
            "ids": {
                "values": [id]
            }
        }

    @router.get("/preview/{id}", tags=tags)
    async def preview(
            page: Optional[int] = Query(0,
                                        ge=0,
                                        description="The page of the document to generate the preview."),
            width: Optional[int] = Query(300,
                                         ge=1,
                                         le=1024,
                                         description="The width of the generated preview."),
            height: Optional[int] = Query(200,
                                          ge=1,
                                          le=1024,
                                          description="The height of the generated preview."),
            query_body: Dict = Depends(query_builder.build(source=[path_property])),
            es_client: Elasticsearch = Depends(get_client),
            auth_header: Dict = Depends(get_auth_header)) -> FileResponse:
        resp = es_client.search(
            body=query_body,
            headers=auth_header,
            index=index_name
        )
        if resp["hits"]["total"]["value"] > 0:
            document_path = resp["hits"]["hits"][0]["_source"][path_property]
            path_to_preview_image = manager.get_jpeg_preview(document_path,
                                                             page=page,
                                                             width=width,
                                                             height=height,
                                                             )
            return FileResponse(path_to_preview_image)
        else:
            raise HTTPException(status_code=404, detail="Document not found")

    @router.get("/preview/info/{id}", tags=tags, response_model=PreviewInfoModel)
    async def preview_info(
            query_body: Dict = Depends(query_builder.build(source=[path_property])),
            es_client: Elasticsearch = Depends(get_client),
            auth_header: Dict = Depends(get_auth_header)) -> FileResponse:
        resp = es_client.search(
            body=query_body,
            headers=auth_header,
            index=index_name
        )
        if resp["hits"]["total"]["value"] > 0:
            document_path = resp["hits"]["hits"][0]["_source"][path_property]
            if os.path.isfile(document_path):
                supported = manager.has_jpeg_preview(document_path)
                pages = manager.get_page_nb(document_path)
                return PreviewInfoModel(supported=supported, pages=pages)
            else:
                return PreviewInfoModel(supported=False, pages=0)
        else:
            raise HTTPException(status_code=404, detail="Document not found")

    return router
def test_page_number():
    manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True)
    page_number = manager.get_page_nb(
        file_path=IMAGE_FILE_PATH,
    )
    assert page_number == 1
def test_page_number__no_extension():
    manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True)
    page_number = manager.get_page_nb(
        file_path=IMAGE_FILE_PATH_NO_EXTENSION,
    )
    assert page_number == 1
Exemple #14
0
def build_preview_manager():
    return PreviewManager(CACHE_PATH, create_folder=True)
Exemple #15
0
def test_zip_to_text():
    manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True)
    path_to_file = manager.get_text_preview(
        file_path=os.path.join(CURRENT_DIR, 'the_zip.zip'))
    assert os.path.exists(path_to_file) == True
    assert os.path.getsize(path_to_file) > 0
Exemple #16
0
def test_get_nb_page_no_extension() -> None:
    manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True)
    nb_page = manager.get_page_nb(file_path=ODT_FILE_PATH_NO_EXTENSION, file_ext=ODT_FILE_EXT)
    assert nb_page == 2
Exemple #17
0
def test_page_number() -> None:
    manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True)
    page_number = manager.get_page_nb(file_path=HTML_FILE_PATH)
    assert page_number == 7
Exemple #18
0
def test_get_nb_page(file_path):
    manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True)
    with pytest.raises(UnsupportedMimeType):
        manager.get_page_nb(file_path=file_path, file_ext=".bin")
def test_get_preview_name_with_page() -> None:
    pm = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True)

    filehash = pm.get_preview_context("/tmp/image.jpeg", file_ext=".jpeg").hash
    hash = pm._get_preview_name(filehash, page=3)
    assert hash == "7f8df7223d8be60a7ac8a9bf7bd1df2a-page3"
Exemple #20
0
def test_to_jpeg(file_path: str) -> None:
    manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True)
    with pytest.raises(UnsupportedMimeType):
        manager.get_jpeg_preview(file_path=file_path, height=256, width=512)
Exemple #21
0
def test_to_json() -> None:
    manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True)
    assert manager.has_json_preview(file_path=IMAGE_FILE_PATH) is True
    manager.get_json_preview(file_path=IMAGE_FILE_PATH, force=True)
Exemple #22
0
def test_to_jpeg__default_size(file_path: str) -> None:
    manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True)
    with pytest.raises(UnsupportedMimeType):
        manager.get_jpeg_preview(file_path=file_path, file_ext=".bin")
def test_get_file_hash_with_size() -> None:
    pm = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True)
    from preview_generator.utils import ImgDims

    hash = pm._get_file_hash("/tmp/image.jpeg", ImgDims(width=512, height=256))
    assert hash == "7f8df7223d8be60a7ac8a9bf7bd1df2a-512x256"
Exemple #24
0
def test_to_pdf(file_path: str) -> None:
    manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True)
    with pytest.raises(UnsupportedMimeType):
        manager.get_pdf_preview(file_path=file_path, force=True)
Exemple #25
0
def test_get_nb_page():
    manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True)
    nb_page = manager.get_page_nb(file_path=IMAGE_FILE_PATH)
    # FIXME must add parameter force=True/False in the API
    assert nb_page == 1
Exemple #26
0
def test_has_html_preview(file_path: str) -> None:
    manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True)
    with pytest.raises(UnsupportedMimeType):
        manager.has_html_preview(file_path=file_path, file_ext=".bin")
Exemple #27
0
def test_zip_to_text():
    manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True)
    assert manager.has_text_preview(file_path=IMAGE_FILE_PATH) is True
    path_to_file = manager.get_text_preview(file_path=IMAGE_FILE_PATH)
    assert os.path.exists(path_to_file) == True
    assert os.path.getsize(path_to_file) > 0
Exemple #28
0
def test_to_pdf():
    manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True)
    assert manager.has_pdf_preview(file_path=IMAGE_FILE_PATH) is True
    path_to_file = manager.get_pdf_preview(file_path=IMAGE_FILE_PATH,
                                           force=True)
Exemple #29
0
def lambda_handler(event, context):

  ### Inputs
  print("#### Received event is: " + str(event))
  src_bucket = event["src_bucket"]
  src_path = event["src_path"]
  dest_bucket = event["dest_bucket"]
  dest_path = event["dest_path"]
  dimensions = event["dimensions"]
  output_format = event["format"]
  ok_sns = event["ok_sns"]
  error_sns = event["error_sns"]
  request_id = event["request_id"]

  try:
    ### Remove the leading slash from src_path and dest_path
#     if src_path[0] == "/":
#       scr_path = src_path[1:]
#     if dest_path[0] == "/":
#       dest_path = dest_path[1:]

    ### Get the maximum size from the dimensions received
    maximum_width = 0
    maximum_height = 0
    for dimension in dimensions:
      if int(dimension["width"]) > maximum_width:
        maximum_width = int(dimension["width"])
        maximum_height = int(dimension["height"])

    ### Copy the file from S3:
    print("Copy file from source S3")
    if src_bucket != "":
      client = boto3.client('s3')
      source_file = "/tmp/" + src_path.split("/")[-1]
      client.download_file(src_bucket, src_path, source_file)
    else:
      # If the src_bucket == 0 we will not copy from S3, and use local path.
      # This is for local testing
      source_file = src_path
    print("Successfully copied file from source S3")

    ### If file is Excel and it is big, make it smaller.
    if ".xls" in src_path and os.path.getsize(source_file) > 5000000: #5MB
      print("It seems to be a big Excel file. Try to keep only first 100 lines and columns")
      # opening the source excel file
      wb1 = openpyxl.load_workbook(source_file)
      ws1 = wb1.worksheets[0]

      # opening the destination excel file
      truncated_file = source_file + ".truncated." + source_file.split(".")[-1]

      # Create new empty file
      wb = openpyxl.Workbook()
      ws =  wb.active
      ws.title = "Changed Sheet"
      wb.save(filename = truncated_file)

      wb2 = openpyxl.load_workbook(truncated_file)
      ws2 = wb2.active

      # We will write only the first 100 lines and columns
      mr = 100
      mc = 100

      # copying the cell values from source
      # excel file to destination excel file
      for i in range (1, mr + 1):
          for j in range (1, mc + 1):
              # reading cell value from source excel file
              c = ws1.cell(row = i, column = j)

              # writing the read value to destination excel file
              ws2.cell(row = i, column = j).value = c.value

      # saving the destination excel file
      print("Saving truncated file to: " + truncated_file)
      wb2.save(str(truncated_file))
      print("Saved")
      source_file = truncated_file
      print("Successfuly truncated the big Excel file")


    ### Generate preview
    print("Generating preview")
    manager = PreviewManager("/tmp/cache", create_folder= True)
    preview_image = manager.get_jpeg_preview(source_file, width=maximum_width, height=maximum_height)
    print("Successfully generated preview")

    ### Make sure the image has the expected size and format
    print("Resize and change the format")
    # Open the preview image
    img = Image.open(preview_image)

    # Calculate the differences between actual sizes and wanted sizes
    delta_w = maximum_width - img.width
    delta_h = maximum_height - img.height

    # Then actual sizes is odd (not even), the we need to add one more pixel
    precision_pixel_w = maximum_width - img.width - int(delta_w/2) - int(delta_w-(delta_w/2))
    precision_pixel_h = maximum_height - img.height - int(delta_h/2) - int(delta_h-(delta_h/2))

    # Resize the image
    ltrb_border=(int(delta_w/2), int(delta_h/2), int(delta_w-(delta_w/2)) + precision_pixel_w, int(delta_h-(delta_h/2)) + precision_pixel_h)
    img_with_border = ImageOps.expand(img, border=ltrb_border, fill='white')
    output_file = "/tmp/output_preview." + output_format
    img_with_border.convert('RGB').save(output_file)
    print("Successfully resized and formatted for the maximum (" + str(maximum_width) + ":" + str(maximum_height) + ") wanted size")

    ### Copy to S3
    print("Copy the maximum (" + str(maximum_width) + ":" + str(maximum_height) + ") preview to destination bucket")
    if dest_bucket != "":
      client = boto3.client('s3')
      client.upload_file(output_file, dest_bucket, dest_path.replace("#{width}", str(maximum_width)).replace("#{height}", str(maximum_height)))
    else:
      # If the dest_bucket == 0 we will not copy from S3, but will copy on local path
      copyfile(output_file, dest_path.replace("#{width}", str(maximum_width)).replace("#{height}", str(maximum_height)))
    print("Successfully copied the maximum (" + str(maximum_width) + ":" + str(maximum_height) + ") preview to destination bucket")

    ### Transform, and copy all other wanted dimensions to S3 bucket
    for dimension in dimensions:
      # Get dimensions
      current_width = int(dimension["width"])
      current_height = int(dimension["height"])

      # Skip if the dimension is the maximum one
      if current_width == maximum_width and current_height == maximum_height:
        continue

      # Open image
      image = Image.open(output_file)
      new_image = image.resize((current_width, current_height))
      output_file_resized = "/tmp/output_preview_resized." + output_format
      new_image.save(output_file_resized)

      ### Copy to S3
      print("Copy the dimension (" + str(current_width) + ":" + str(current_height) + ") to destination bucket")
      if dest_bucket != "":
        client = boto3.client('s3')
        client.upload_file(output_file_resized, dest_bucket, dest_path.replace("#{width}", str(current_width)).replace("#{height}", str(current_height)))
      else:
        # If the dest_bucket == 0 we will not copy from S3, but will copy on local path
        copyfile(output_file_resized, dest_path.replace("#{width}", str(current_width)).replace("#{height}", str(current_height)))
      print("Successfully copied the dimension (" + str(current_width) + ":" + str(current_height) + ") preview to destination bucket")

    ### Send ok message to ok SNS
    print("Sending OK to SNS")
    if src_bucket != "":
      # If the src_bucket is not empty, then the function is running locally for dev purposes, so no need to send SNS
      # Compose the OK JSON
      okJSON = {
        "success": "true",
        "request_id": request_id,
        "src_bucket": src_bucket,
        "src_path": src_path
      }
      client = boto3.client('sns')
      response = client.publish(
          TopicArn=ok_sns,
          Message=str(okJSON)
      )
    print("Successfully send OK to SNS")
    return "OK"

  except Exception as e:
    print("Error: {}".format(e))
    tracebackError = traceback.format_exc()
    print(tracebackError)
    ### Send error message to error SNS
    print("Sending ERROR to SNS")
    # Compose the ERROR JSON
    errorJSON = {
      "success": "false",
      "request_id": request_id,
      "src_bucket": src_bucket,
      "src_path": src_path
    }
    if src_bucket != "":
      # If the src_bucket is not empty, then the function is running locally for dev purposes, so no need to send SNS
      client = boto3.client('sns')
      response = client.publish(
          TopicArn=error_sns,
          Message=str(errorJSON)
      )
    print("Successfully send ERROR to SNS")
    return "ERROR"
Exemple #30
0
def get_docs_preview(file_id, path):
    cache_path = pathconst.TEMP
    manager = PreviewManager(cache_path, create_folder=True)
    path_to_pdf_preview = manager.get_pdf_preview(file_path=path)
    return path_to_pdf_preview