def __init__(self, USPTO_patent_property) -> None: google_patent_url = "https://patents.google.com/patent/%s" % USPTO_patent_property.number crawler = handler.Crawler(google_patent_url) self.__patent_property = PatentAnalyzer.__wrapping_patent_property( USPTO_patent_property) self.__parser = crawler.get_beautiful_soup_parser() self.__head_elem = self.__parser.head self.__section_elems = self.__parser.find_all('section')
def __try_find_total_patent_count(cls, encoded_query) -> int: url_for_max_patent_count = cls.__BASIC_URL_PATTERN % (1, 1, encoded_query) crawler = handler.Crawler(url_for_max_patent_count) parser = crawler.get_beautiful_soup_parser() doc = parser.find_all("strong") if doc is None: return -1 else: return int(doc[len(doc) - 1].get_text())
def __try_find_total_patent_count(self, default_total_size) -> int: url_for_max_patent_count = self.__BASIC_URL_PATTERN % (1, 1, self.__encoded_query) print("try auto detect 'the total number of patent'...") crawler = handler.Crawler(url_for_max_patent_count) parser = crawler.get_beautiful_soup_parser() doc = parser.find_all("strong") if doc is None: print("auto detect failed...") return default_total_size else: total_patent_count = int(doc[len(doc) - 1].get_text()) print("auto detect success !! total number of patent is %d" % total_patent_count) return total_patent_count
def build_USPTO_patent_property_list(cls, patent_url) -> List[USPTOPatentProperty]: patent_property_list = [] parser = handler.Crawler(patent_url).get_beautiful_soup_parser() table_td = parser.find_all('td', {"valign": "top"}) patent_index = -1 for index, elem in enumerate(table_td): position = index % 3 data = elem.string.strip() if position == 0: patent_index += 1 patent_property_list.append(USPTOPatentProperty()) elif position == 1: patent_property_list[patent_index].number = data elif position == 2: patent_property_list[patent_index].title = data return patent_property_list