def get_page_patents(keyword, page_num): patent_index = page_num * 10 spider = SoopatSpider() content = spider.soopat_search(keyword, patent_index) parser = Parser(content) logger.info("get page %s patents ok" % page_num) return parser.get_patents()
def get_all_page_patents(keyword): all_patents = [] logger.info("start to get patents, keyword %s" % keyword) spider = SoopatSpider() content = spider.soopat_search(keyword) search_result_num = spider.get_search_result_num(content) page_num = get_patent_page_num(search_result_num) for i in range(page_num): sleep_seconds = random.randint(5, 20) logger.info("sleep for %s seconds" % sleep_seconds) time.sleep(sleep_seconds) patents = get_page_patents(keyword, i) for patent in patents: all_patents.append(patent) logger.info("get %s patents, keyword %s" % (len(all_patents), keyword)) logger.info("end to get patents, keyword %s" % keyword) return all_patents