예제 #1
0
 def pre_load(self, data, *args, **kwargs):
     data = data["exchange-document"]
     bib = data["bibliographic-data"]
     del data["bibliographic-data"]
     data["publications"] = resolve_list(
         bib, "publication-reference.document-id")
     ipc_class = resolve_list(bib,
                              "classifications-ipcr.classification-ipcr")
     data["ipc_classes"] = [c["text"] for c in ipc_class]
     classifications = resolve_list(
         bib, "patent-classifications.patent-classification")
     if classifications:
         data["cpc_classes"] = [
             c for c in classifications
             if resolve(c, "classification-scheme.@scheme") == "CPCI"
         ]
         data["us_classes"] = [
             c for c in classifications
             if resolve(c, "classification-scheme.@scheme") == "UC"
         ]
     data["applications"] = resolve_list(
         bib, "application-reference.document-id")
     data["priority_claims"] = self.pre_load_priority_claims(bib)
     data["applicants"] = resolve_list(bib, "parties.applicants.applicant")
     data["inventors"] = resolve_list(bib, "parties.inventors.inventor")
     titles = resolve(bib, "invention-title")
     if isinstance(titles, list):
         data["title"] = next(t["#text"] for t in titles
                              if t["@lang"] == "en")
     elif isinstance(titles, str):
         data["title"] = titles
     else:
         data["title"] = titles["#text"]
     return data
예제 #2
0
 def __len__(self):
     page = self.get_page(0)
     max_length = int(resolve(page, "@total-result-count"))
     limit = self.config["limit"]
     if limit:
         return limit if limit < max_length else max_length
     else:
         return max_length
예제 #3
0
 def get_page(self, page_number):
     if page_number not in self.pages:
         query_params = self.query_params(page_number)
         response = session.get(self.search_url,
                                params=query_params,
                                timeout=10)
         data = xmltodict.parse(response.text,
                                process_namespaces=True,
                                namespaces=NS)
         self.pages[page_number] = resolve(
             data, "world-patent-data.biblio-search")
     return self.pages[page_number]
예제 #4
0
 def pre_load_priority_claims(self, bib):
     pcs = resolve(bib, "priority-claims.priority-claim")
     out = list()
     pcs = (pcs if isinstance(pcs, list) else [
         pcs,
     ])
     for pc in pcs:
         if isinstance(pc["document-id"], list):
             doc_id = pc["document-id"][0]
         else:
             doc_id = pc["document-id"]
         pc = {**pc, **doc_id}
         del pc["document-id"]
         out.append(pc)
     return out
예제 #5
0
 def result_gen(offset, limit):
     num_pages = math.ceil(len(self) / self.page_size)
     page_num = int(offset / self.page_size)
     counter = page_num * self.page_size
     while page_num < num_pages:
         page_data = self.get_page(page_num)
         data = resolve(page_data, self.item_path)
         if not isinstance(data, list):
             yield counter, data
             counter += 1
         else:
             for item in data:
                 if not self.config[
                         "limit"] or counter < self.config["limit"]:
                     yield counter, item
                 counter += 1
         page_num += 1