Ejemplo n.º 1
0
def _get_kindcode_and_startpage(country, patnum, kindcode, target_folder):
    """
    Get kindcode and startpage of a given patent.
    Iterates through a list of possible kindcodes; for every kindocde, the
    first page of the patent document is tried to be retrieved. A successful download
    gives us the correct kindcode, for which the start page of the description is then retrieved.
    - country: Country code
    - patnum: Patent number
    - kindocde: Kindcode (e.g. "A1", "B", ...)
    - target_folder: Folder that the first page is downloaded to
    """
    startpage = None
    possible_kindcodes = []
    if kindcode: possible_kindcodes = [kindcode]

    possible_kindcodes += ['A1', 'A2', 'A3', 'A4', 'B1']
    print possible_kindcodes
    for code in possible_kindcodes:
        try:
            kindcode = download.get_pdf_page(country, patnum, code, 1, target_folder)
            if kindcode:
                meta = download.get_meta_data(country, patnum, 'DESCRIPTION', kindcode, skip=True)
                print meta
                if meta:
                    startpage = meta['DESCRIPTION']
                    break
        except:
            e =  "Unexpected error:", sys.exc_info()[0]
            continue
    print kindcode, startpage        
    return kindcode, startpage
Ejemplo n.º 2
0
def _parse_pages_to_download(locations, country, patnum, kindcode, folder):
    """
    Parses input and tries to retrieve pages of the description part of a patent.
    """
    pages, columns = parser.get_pages(locations)
    target_folder = _create_target_folder(country, patnum, folder)
    kindcode, startpage = _get_kindcode_and_startpage(country, patnum, kindcode, target_folder)
    if startpage and kindcode:
        offset = 0
        if country != "EP": offset += startpage - 1
        for page in pages:
            download.get_pdf_page(country, patnum, kindcode, page + offset, target_folder, "page-" + str(page))
        for column in columns:
            page = int(math.ceil(column / 2.))
            offset = startpage - 1
            print country, patnum, kindcode
            download.get_pdf_page(country, patnum, kindcode, page + offset, target_folder, "column-" + str(column))
    return kindcode, startpage