Exemple #1
0
def get_page_patents(keyword, page_num):
    patent_index = page_num * 10
    spider = SoopatSpider()
    content = spider.soopat_search(keyword, patent_index)
    parser = Parser(content)
    logger.info("get page %s patents ok" % page_num)
    return parser.get_patents()
Exemple #2
0
def get_page_patents(keyword, page_num):
    patent_index = page_num * 10
    spider = SoopatSpider()
    content = spider.soopat_search(keyword, patent_index)
    parser = Parser(content)
    logger.info("get page %s patents ok" % page_num) 
    return parser.get_patents()
Exemple #3
0
def get_all_page_patents(keyword):
    all_patents = []
    logger.info("start to get patents, keyword %s" % keyword)
    spider = SoopatSpider()
    content = spider.soopat_search(keyword)
    search_result_num = spider.get_search_result_num(content)
    page_num = get_patent_page_num(search_result_num)

    for i in range(page_num):
        sleep_seconds = random.randint(5, 20)
        logger.info("sleep for %s seconds" % sleep_seconds)
        time.sleep(sleep_seconds)
        patents = get_page_patents(keyword, i)
        for patent in patents:
            all_patents.append(patent)
    logger.info("get %s patents, keyword %s" % (len(all_patents), keyword))
    logger.info("end to get patents, keyword %s" % keyword)
    return all_patents
Exemple #4
0
def get_all_page_patents(keyword):
    all_patents = []
    logger.info("start to get patents, keyword %s" % keyword)
    spider = SoopatSpider()
    content = spider.soopat_search(keyword)
    search_result_num = spider.get_search_result_num(content)
    page_num = get_patent_page_num(search_result_num)

    for i in range(page_num):
        sleep_seconds = random.randint(5, 20)
        logger.info("sleep for %s seconds" % sleep_seconds)
        time.sleep(sleep_seconds)
        patents = get_page_patents(keyword, i)
        for patent in patents:
            all_patents.append(patent)
    logger.info("get %s patents, keyword %s" % (len(all_patents), keyword))
    logger.info("end to get patents, keyword %s" % keyword)
    return all_patents