Python Crawler Examples

Programming Language: Python

Namespace/Package Name: patent.handler

Method/Function: Crawler

Examples at hotexamples.com: 4

Python Crawler - 4 examples found. These are the top rated real world Python examples of patent.handler.Crawler extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

    def __init__(self, USPTO_patent_property) -> None:
        google_patent_url = "https://patents.google.com/patent/%s" % USPTO_patent_property.number

        crawler = handler.Crawler(google_patent_url)

        self.__patent_property = PatentAnalyzer.__wrapping_patent_property(
            USPTO_patent_property)
        self.__parser = crawler.get_beautiful_soup_parser()
        self.__head_elem = self.__parser.head
        self.__section_elems = self.__parser.find_all('section')

Example #2

Show file

File: USPTO.py Project: growingfuture/data-science-project

    def __try_find_total_patent_count(cls, encoded_query) -> int:
        url_for_max_patent_count = cls.__BASIC_URL_PATTERN % (1, 1,
                                                              encoded_query)

        crawler = handler.Crawler(url_for_max_patent_count)

        parser = crawler.get_beautiful_soup_parser()
        doc = parser.find_all("strong")
        if doc is None:
            return -1
        else:
            return int(doc[len(doc) - 1].get_text())

Example #3

Show file

File: USPTO.py Project: orgPatentRoot/data-science-project

    def __try_find_total_patent_count(self, default_total_size) -> int:
        url_for_max_patent_count = self.__BASIC_URL_PATTERN % (1, 1, self.__encoded_query)

        print("try auto detect 'the total number of patent'...")
        crawler = handler.Crawler(url_for_max_patent_count)

        parser = crawler.get_beautiful_soup_parser()
        doc = parser.find_all("strong")
        if doc is None:
            print("auto detect failed...")
            return default_total_size
        else:
            total_patent_count = int(doc[len(doc) - 1].get_text())
            print("auto detect success !! total number of patent is %d" % total_patent_count)
            return total_patent_count

Example #4

Show file

File: USPTO.py Project: orgPatentRoot/data-science-project

    def build_USPTO_patent_property_list(cls, patent_url) -> List[USPTOPatentProperty]:
        patent_property_list = []

        parser = handler.Crawler(patent_url).get_beautiful_soup_parser()

        table_td = parser.find_all('td', {"valign": "top"})

        patent_index = -1

        for index, elem in enumerate(table_td):
            position = index % 3
            data = elem.string.strip()

            if position == 0:
                patent_index += 1
                patent_property_list.append(USPTOPatentProperty())
            elif position == 1:
                patent_property_list[patent_index].number = data
            elif position == 2:
                patent_property_list[patent_index].title = data

        return patent_property_list