Example #1
0
def extract_image(file_obj, **kwargs):
    if kwargs['width']:
        if kwargs['height']:
            size = '%dx%d' % (kwargs['width'], kwargs['height'])
        else:
            size = '%dx' % kwargs['width']
    else:
        size = 'x%d' % kwargs['height']

    pdf_name = file_obj.name

    params = kwargs.copy()
    if 'max_previews' in params:
        del params['max_previews']
    params['page'] = 1

    # Extract PDF page as images
    os.rename(pdf_name, '%s.pdf' % pdf_name)
    docsplit = Docsplit()
    try:
        docsplit.extract_images(str(pdf_name+'.pdf'), output='/tmp', sizes=[size], formats=['png'])
    except RunError:
        abort(400, 'Please enter a pdf file')
    os.rename('%s.pdf' % pdf_name, pdf_name)


    pngs = list(glob('/tmp/%s/%s*.png' % (size, os.path.basename(pdf_name))))
    # For each page, we create the image in the cache
    for page_png in pngs:
        m = re.search('_(\d+)\.png$', page_png)
        page_number = int(m.group(1))
        params['page'] = page_number
        print params
        page_cache_file_path = get_thumb_path_for_kwargs(**params)
        cache_file = open(page_cache_file_path, 'w')
        with open(page_png, 'r') as f:
            cache_file.write(f.read())
        cache_file.close()
        # print page_cache_file_path, params['page']
    print len(pngs)
        
    remove_list = pngs
    for remove_file in remove_list:
        os.remove(remove_file)

    return len(pngs)
Example #2
0
def extract_image(file_obj, **kwargs):
    if kwargs["width"]:
        if kwargs["height"]:
            size = "%dx%d" % (kwargs["width"], kwargs["height"])
        else:
            size = "%dx" % kwargs["width"]
    else:
        size = "x%d" % kwargs["height"]

    pdf_name = file_obj.name

    params = kwargs.copy()
    if "max_previews" in params:
        del params["max_previews"]
    params["page"] = 1

    # Extract PDF page as images
    os.rename(pdf_name, "%s.pdf" % pdf_name)
    docsplit = Docsplit()
    try:
        docsplit.extract_images(str(pdf_name + ".pdf"), output="/tmp", sizes=[size], formats=["png"])
    except RunError:
        abort(400, "Please enter a pdf file")
    os.rename("%s.pdf" % pdf_name, pdf_name)

    pngs = list(glob("/tmp/%s/%s*.png" % (size, os.path.basename(pdf_name))))
    # For each page, we create the image in the cache
    for page_png in pngs:
        m = re.search("_(\d+)\.png$", page_png)
        page_number = int(m.group(1))
        params["page"] = page_number
        print params
        page_cache_file_path = get_thumb_path_for_kwargs(**params)
        cache_file = open(page_cache_file_path, "w")
        with open(page_png, "r") as f:
            cache_file.write(f.read())
        cache_file.close()
        # print page_cache_file_path, params['page']
    print len(pngs)

    remove_list = pngs
    for remove_file in remove_list:
        os.remove(remove_file)

    return len(pngs)
Example #3
0
def extract_image(file_obj, **kwargs):
    if kwargs['width']:
        if kwargs['height']:
            size = '%dx%d' % (kwargs['width'], kwargs['height'])
        else:
            size = '%dx' % kwargs['width']
    else:
        size = 'x%d' % kwargs['height']

    pdf_name = file_obj.name

    params = kwargs.copy()
    if 'max_previews' in params:
        del params['max_previews']
    params['page'] = 1

    # Extract PDF page as images
    os.rename(pdf_name, '%s.pdf' % pdf_name)
    docsplit = Docsplit()
    try:
        docsplit.extract_images(str(pdf_name+'.pdf'), output='/tmp', sizes=[size], formats=['png'])
    except RunError:
        abort(400, 'Please enter a pdf file')
    os.rename('%s.pdf' % pdf_name, pdf_name)


    pngs = list(glob('/tmp/%s/%s*.png' % (size, os.path.basename(pdf_name))))
    # For each page, we create the image in the cache
    for page_png in pngs:
        m = re.search('_(\d+)\.png$', page_png)
        page_number = int(m.group(1))
        params['page'] = page_number
        page_cache_file_path = get_thumb_path_for_kwargs(**params)
        cache_file = open(page_cache_file_path, 'w')
        with open(page_png, 'r') as f:
            cache_file.write(f.read())
        cache_file.close()
        
    remove_list = pngs
    for remove_file in remove_list:
        os.remove(remove_file)

    return len(pngs)
Example #4
0
def extract_image(url, width, height):
    data = download_from_url(url)
    path, ext = os.path.splitext(url)
    docsplit = Docsplit()
    tmp = NamedTemporaryFile()
    pdf_name = u'%s%s' % (tmp.name, ext)

    with open(pdf_name, 'wb') as f:
        f.write(data.getvalue())

    if width:
        if height:
            size = '%dx%d' % (width, height)
        else:
            size = '%dx' % width
    else:
        size = 'x%d' % height

    try:
        docsplit.extract_images(str(pdf_name), output='/tmp', sizes=[size], formats=['png'], pages=[1])
    except RunError:
        abort(400, 'Please enter a pdf file')

    filename, ext = os.path.splitext(pdf_name)

    io = StringIO()
    png = '/tmp/%s/%s_1.png' % (size, os.path.basename(filename))
    with open(png) as f:
        io.write(f.read())

    for remove_file in list(glob('/tmp/%s/%s*' % (size, filename))) + list(glob('/tmp/%s*' % filename)):
        print remove_file
        os.remove(remove_file)
    tmp.close()

    return io.getvalue()