Python pdf_compress Exemples, lib.pdfutil.pdf_compress Python Exemples

Exemple #1

0

Afficher le fichier

def main():
    global args
    args = get_args()
    data = open(args.file).read()
    newdata = pdf_compress(data)

    if len(newdata) < len(data):
        newfilename = args.file + '.compressed'
        with open(newfilename, 'w') as fout:
            fout.write(newdata)
        os.remove(args.file)
        os.rename(newfilename, args.file)

Exemple #2

0

Afficher le fichier

Fichier : pdf-compress.py Projet : Mukosame/SoPaper

def main():
    global args
    args = get_args()
    data = open(args.file).read()
    newdata = pdf_compress(data)

    if len(newdata) < len(data):
        newfilename = args.file + '.compressed'
        with open(newfilename, 'w') as fout:
            fout.write(newdata)
        os.remove(args.file)
        os.rename(newfilename, args.file)

Exemple #3

0

Afficher le fichier

Fichier : paper-downloader.py Projet : Mukosame/SoPaper

def main():
    global args
    args = get_args()
    query = args.title
    directory = args.directory


    searchers = searcher.register_searcher.get_searcher_list()
    parsers = fetcher.register_parser.get_parser_list()
    download_candidates = []
    if re.match('^http[s]?://', query):
        # skip search
        ctx = JobContext("")
        sr = SearchResult(None, query)
        for parser in parsers:
            if parser.can_handle(sr):
                parser.fetch_info(ctx, sr)      # will update title
                download_candidates.append((parser, sr))
    else:
        #query = "Distinctive image features from scale-invariant keypoint"
        ctx = JobContext(query)

        search_args = zip(searchers, [ctx] * len(searchers))
        pool = Pool()
        as_results = [pool.apply_async(searcher_run, arg) for arg in search_args]
        #results = [searcher_run(*arg) for arg in search_args]  # for debug

        for s in as_results:
            s = s.get()
            if s is None:
                continue
            ctx.update_meta_dict(s['ctx_update'])
            print s['ctx_update']
            ctx.try_update_title_from_search_result(s)

            for sr in s['results']:
                for parser in parsers:
                    if parser.can_handle(sr):
                        parser.fetch_info(ctx, sr)      # will update title
                        download_candidates.append((parser, sr))
        pool.terminate()

    download_candidates = sorted(
        download_candidates,
        key=lambda x: x[0].priority,
        reverse=True)

    for (parser, sr) in download_candidates:
        data = parser.download(sr)
        if not data:
            continue
        data = pdf_compress(data)
        if ctx.title:
            ctx.title = finalize_filename(ctx.title)
        else:
            log_info("Failed to guess paper title!")
            ctx.title = "Unnamed Paper {}".format(md5(data))

        filename = os.path.join(directory, ctx.title + ".pdf")
        if os.path.exists(filename):
            log_err("File \"{}\" exists! overwrite? (y/n)".format(os.path.basename(filename)))
            resp = raw_input()
            if resp not in ['y', 'Y']:
                log_info("No file written. Exiting...")
                break
        with open(filename, 'wb') as f:
            f.write(data)
        if args.output:
            os.rename(filename, args.output)
        log_info("Successfully downloaded to {0}".format(filename))
        break
    else:
        log_err("Failed to download {0}".format(ctx.title))
    if ctx.meta.get('bibtex'):
        log_info("Bibtex:\n{}".format(ctx.meta['bibtex']))
    if ctx.meta.get('author'):
        log_info("Author: {0}".format(ctx.meta['author']))
    if ctx.meta.get('citecnt'):
        log_info("Cite count: {0}".format(ctx.meta['citecnt']))

Exemple #4

0

Afficher le fichier

def main():
    global args
    args = get_args()
    query = args.title
    directory = args.directory

    searchers = searcher.register_searcher.get_searcher_list()
    parsers = fetcher.register_parser.get_parser_list()
    download_candidates = []
    if re.match('^http[s]?://', query):
        # skip search
        ctx = JobContext("")
        sr = SearchResult(None, query)
        for parser in parsers:
            if parser.can_handle(sr):
                parser.fetch_info(ctx, sr)  # will update title
                download_candidates.append((parser, sr))
    else:
        #query = "Distinctive image features from scale-invariant keypoint"
        ctx = JobContext(query)

        search_args = zip(searchers, [ctx] * len(searchers))
        pool = Pool()
        as_results = [
            pool.apply_async(searcher_run, arg) for arg in search_args
        ]
        #results = [searcher_run(*arg) for arg in search_args]  # for debug

        for s in as_results:
            s = s.get()
            if s is None:
                continue
            ctx.update_meta_dict(s['ctx_update'])
            print s['ctx_update']
            ctx.try_update_title_from_search_result(s)

            for sr in s['results']:
                for parser in parsers:
                    if parser.can_handle(sr):
                        parser.fetch_info(ctx, sr)  # will update title
                        download_candidates.append((parser, sr))
        pool.terminate()

    download_candidates = sorted(download_candidates,
                                 key=lambda x: x[0].priority,
                                 reverse=True)

    for (parser, sr) in download_candidates:
        data = parser.download(sr)
        if not data:
            continue
        data = pdf_compress(data)
        if ctx.title:
            ctx.title = finalize_filename(ctx.title)
        else:
            log_info("Failed to guess paper title!")
            ctx.title = "Unnamed Paper {}".format(md5(data))

        filename = os.path.join(directory, ctx.title + ".pdf")
        if os.path.exists(filename):
            log_err("File \"{}\" exists! overwrite? (y/n)".format(
                os.path.basename(filename)))
            resp = raw_input()
            if resp not in ['y', 'Y']:
                log_info("No file written. Exiting...")
                break
        with open(filename, 'wb') as f:
            f.write(data)
        if args.output:
            os.rename(filename, args.output)
        log_info("Successfully downloaded to {0}".format(filename))
        break
    else:
        log_err("Failed to download {0}".format(ctx.title))
    if ctx.meta.get('bibtex'):
        log_info("Bibtex:\n{}".format(ctx.meta['bibtex']))
    if ctx.meta.get('author'):
        log_info("Author: {0}".format(ctx.meta['author']))
    if ctx.meta.get('citecnt'):
        log_info("Cite count: {0}".format(ctx.meta['citecnt']))