Beispiel #1
0
def pdf_compress(data):
    """ take a pdf data string, return a compressed string
        compression is done using ps2pdf14 in ghostscript
    """
    f = tempfile.NamedTemporaryFile(delete=False, suffix='.pdf')
    f.write(data)
    f.close()

    f2 = tempfile.NamedTemporaryFile(delete=False, suffix='.pdf')
    f2.close()
    ret = os.system('ps2pdf14 "{0}" "{1}"'.format(f.name, f2.name))
    if ret != 0:
        log_err("Compress: ps2pdf14 failed!")
        newdata = None
    else:
        newdata = open(f2.name).read()
    file_succ = newdata is not None and \
            check_file_type(f2.name, 'PDF document') and \
            len(newdata) >= ukconfig.FILE_SIZE_MINIMUM
    try:
        os.remove(f2.name)
        os.remove(f.name)
    except OSError:
        pass
    if file_succ and \
       len(newdata) < len(data):
        log_info("Compress succeed: {0}->{1}".format(
            parse_file_size(len(data)), parse_file_size(len(newdata))))
        return newdata
    else:
        return data
Beispiel #2
0
def pdf_compress(data):
    """ take a pdf data string, return a compressed string
        compression is done using ps2pdf14 in ghostscript
    """
    f = tempfile.NamedTemporaryFile(delete=False, suffix='.pdf')
    f.write(data)
    f.close()

    f2 = tempfile.NamedTemporaryFile(delete=False, suffix='.pdf')
    f2.close()
    ret = os.system('ps2pdf14 "{0}" "{1}"'.format(f.name, f2.name))
    if ret != 0:
        log_err("Compress: ps2pdf14 failed!")
        newdata = None
    else:
        newdata = open(f2.name).read()
    file_succ = newdata is not None and \
            check_file_type(f2.name, 'PDF document') and \
            len(newdata) >= ukconfig.FILE_SIZE_MINIMUM
    try:
        os.remove(f2.name)
        os.remove(f.name)
    except OSError:
        pass
    if file_succ and \
       len(newdata) < len(data):
        log_info("Compress succeed: {0}->{1}".format(
            parse_file_size(len(data)), parse_file_size(len(newdata))))
        return newdata
    else:
        return data
Beispiel #3
0
def requests_download(url, progress_updater, headers=None):
    resp = requests.get(url, stream=True, headers=headers)
    total_length = resp.headers.get('content-length')
    if total_length is None:
        data = resp.content
        progress_updater.finish(data)
        return data
    else:
        total_length = int(total_length)
        if total_length < ukconfig.FILE_SIZE_MINIMUM:
            raise FileCorrupted("File too small: " + parse_file_size(total_length))
        if total_length > ukconfig.FILE_SIZE_MAXIMUM:
            raise FileCorrupted("File too large: " + parse_file_size(total_length))
        progress_updater.set_total(total_length)
        dl = 0
        ret = ""
        for data in resp.iter_content():
            dl += len(data)
            ret += data
            progress_updater.update(dl)
        progress_updater.finish(data)
        return ret
Beispiel #4
0
def do_compress(data, pid):
    """ this *must* succeed adding the pdf"""
    try:
        # compress
        data = pdf_compress(data)
    except:
        pass

    db = get_mongo('paper')
    db.update({'_id': pid}, {'$set': {'pdf': Binary(data)}})
    log_info("Updated pdf {0}: size={1}".format(pid,
                                                parse_file_size(len(data))))
    return data
Beispiel #5
0
def do_compress(data, pid):
    """ this *must* succeed adding the pdf"""
    try:
        # compress
        data = pdf_compress(data)
    except:
        pass

    db = get_mongo('paper')
    db.update({'_id': pid}, {'$set': {'pdf': Binary(data)}} )
    log_info("Updated pdf {0}: size={1}".format(
        pid, parse_file_size(len(data))))
    return data
Beispiel #6
0
def direct_download(url, progress_updater, headers=None):
    """ download with methods given by ukconfig.download_method
        return the data
    """
    log_info("Directly Download with URL {0} ...".format(url))

    if headers is None:
        headers = {'Host': urlparse(url).netloc,
                   'User-Agent': ukconfig.USER_AGENT,
                   'Connection': 'Keep-Alive'
                  }

    # for test and cmd tools only
    if ukconfig.download_method == 'wget':
        data = wget_download(url, progress_updater, headers)
    else:
        data = requests_download(url, progress_updater, headers)
    if len(data) < ukconfig.FILE_SIZE_MINIMUM:
        raise FileCorrupted("File too small: " + parse_file_size(len(data)))
    return data
Beispiel #7
0
 def set_total(self, size):
     """size: number of bytes"""
     log_info("File size is {0}".format(parse_file_size(size)))
     self.total = size