def download(self, path=None): response = session.get(self.pdf_url, stream=True) response.raise_for_status() if path is None: path = Path(f"{self.publication_number}.pdf") with path.open("wb") as f: for chunk in response.iter_content(chunk_size=512 * 1024): if chunk: # filter out keep-alive new chunks f.write(chunk) return path
def get_page(self, page_no): params = self.get_query(page_no) response = session.get( self.url, params=params, verify=False, headers={"Accept": "application/xml"}, ) text = response.text self._len, page = self.parser.parse(text) return page
def fields(self): if not hasattr(self.__class__, "_fields"): url = "https://ped.uspto.gov/api/search-fields" response = session.get(url) if not response.ok: raise ValueError("Can't get fields!") raw = response.json() output = { inflection.underscore(key): value for (key, value) in raw.items() } self.__class__._fields = output return self.__class__._fields
def is_online(self): with session.cache_disabled(): response = session.get("https://ped.uspto.gov/api/search-fields") if response.ok: return True elif "requested resource is not available" in response.text: raise NotAvailableException( "Patent Examination Data is Offline - this is a USPTO problem" ) elif "attempt failed or the origin closed the connection" in response.text: raise NotAvailableException( "The Patent Examination Data API is Broken! - this is a USPTO problem" ) else: raise NotAvailableException("There is a USPTO problem")
def download(self, path=".", include_appl_id=True): if str(path)[-4:].lower() == ".pdf": # If we've been given a specific filename, use it out_file = Path(path) elif include_appl_id: out_file = ( Path(path) / f"{self.appl_id} - {self.mail_room_date} - {self.code} - {self.description[:40]}.pdf" ) else: out_file = ( Path(path) / f"{self.mail_room_date} - {self.code} - {self.description[:40]}.pdf" ) with session.get(self.base_url + self.url, stream=True) as r: r.raise_for_status() with out_file.open("wb") as f: for chunk in r.iter_content(chunk_size=8192): f.write(chunk) return out_file
def _len(self): response = session.get(self.url + self.path, params=self.query()) return response.json()["recordTotalQuantity"]
def get_page(self, page_no): query = self.query() query["recordStartNumber"] = page_no * self.page_size response = session.get(self.url + self.path, params=query) return response.json()["results"]
def _get_results(self) -> Iterator[USApplication]: url = self.query_url + self.config["filter"]["appl_id"][0] response = session.get(url) for item in response.json(): yield self.__schema__.load(item)
def __len__(self): url = self.query_url + self.config["filter"]["appl_id"][0] response = session.get(url) return len(response.json())
def download(self): """downloads the PDF associated with the assignment to the current working directory""" response = session.get(self._image_url, stream=True) with open(f"{self.id}.pdf", "wb") as f: f.write(response.raw.read())