Ejemplo n.º 1
0
    def __init__(self, USPTO_patent_property) -> None:
        google_patent_url = "https://patents.google.com/patent/%s" % USPTO_patent_property.number

        crawler = handler.Crawler(google_patent_url)

        self.__patent_property = PatentAnalyzer.__wrapping_patent_property(
            USPTO_patent_property)
        self.__parser = crawler.get_beautiful_soup_parser()
        self.__head_elem = self.__parser.head
        self.__section_elems = self.__parser.find_all('section')
Ejemplo n.º 2
0
    def __try_find_total_patent_count(cls, encoded_query) -> int:
        url_for_max_patent_count = cls.__BASIC_URL_PATTERN % (1, 1,
                                                              encoded_query)

        crawler = handler.Crawler(url_for_max_patent_count)

        parser = crawler.get_beautiful_soup_parser()
        doc = parser.find_all("strong")
        if doc is None:
            return -1
        else:
            return int(doc[len(doc) - 1].get_text())
Ejemplo n.º 3
0
    def __try_find_total_patent_count(self, default_total_size) -> int:
        url_for_max_patent_count = self.__BASIC_URL_PATTERN % (1, 1, self.__encoded_query)

        print("try auto detect 'the total number of patent'...")
        crawler = handler.Crawler(url_for_max_patent_count)

        parser = crawler.get_beautiful_soup_parser()
        doc = parser.find_all("strong")
        if doc is None:
            print("auto detect failed...")
            return default_total_size
        else:
            total_patent_count = int(doc[len(doc) - 1].get_text())
            print("auto detect success !! total number of patent is %d" % total_patent_count)
            return total_patent_count
Ejemplo n.º 4
0
    def build_USPTO_patent_property_list(cls, patent_url) -> List[USPTOPatentProperty]:
        patent_property_list = []

        parser = handler.Crawler(patent_url).get_beautiful_soup_parser()

        table_td = parser.find_all('td', {"valign": "top"})

        patent_index = -1

        for index, elem in enumerate(table_td):
            position = index % 3
            data = elem.string.strip()

            if position == 0:
                patent_index += 1
                patent_property_list.append(USPTOPatentProperty())
            elif position == 1:
                patent_property_list[patent_index].number = data
            elif position == 2:
                patent_property_list[patent_index].title = data

        return patent_property_list