def test_deletes_http_cache(http_pretty, tmpdir_setup): assert len(http_client().cache.responses) == 0 httpretty.register_uri(httpretty.GET, 'http://example.com') http_client().get('http://example.com') assert len(http_client().cache.responses) == 1 CliRunner().invoke(clear) assert len(http_client().cache.responses) == 0
def fetch_notice_json(cfr_title, cfr_part, only_final=False, max_effective_date=None): """Search through all articles associated with this part. Right now, limited to 1000; could use paging to fix this in the future.""" params = { "conditions[cfr][title]": cfr_title, "conditions[cfr][part]": cfr_part, "per_page": 1000, "order": "oldest", "fields[]": FULL_NOTICE_FIELDS } if only_final: params["conditions[type][]"] = 'RULE' if max_effective_date: params["conditions[effective_date][lte]"] = max_effective_date url = API_BASE + "articles" logger.info("Fetching notices - URL: %s Params: %r", url, params) response = http_client().get(url, params=params).json() logger.debug("Fetching notices response - %r", response) if 'results' in response: return response['results'] else: return []
def meta_data(document_number, fields=None): """Return the requested meta data for a specific Federal Register document. Accounts for a bad document number by throwing an exception""" url = "{}articles/{}".format(API_BASE, document_number) params = {} # default fields are generally good if fields: params["fields[]"] = fields response = http_client().get(url, params=params) response.raise_for_status() return response.json()
def check_url(url): """Verify that content exists at a given URL""" client = http_client() response = client.head(url) if response.status_code == requests.codes.not_implemented: response = client.get(url) if response.status_code == requests.codes.ok: return url
def _get(self, suffix): """Actually make the GET request. Assume the result is JSON. Right now, there is no error handling""" if self.base_url.startswith('http'): # API json_str = http_client().get(self.base_url + suffix).text else: # file system if os.path.isdir(self.base_url + suffix): suffix = suffix + "/index.html" with open(self.base_url + suffix) as f: json_str = f.read() return json.loads(json_str, object_hook=node_decode_hook)
def clear(path): """Delete intermediate and cache data. Only PATH arguments are cleared unless no arguments are present, then everything is wiped. \b $ eregs clear # clears everything $ eregs clear diff/27 trees # deletes all cached trees and all CFR # title 27 diffs """ if path: paths = [os.path.join(settings.EREGS_INDEX_ROOT, p) for p in path] # Deleting cascades DependencyNode.objects.filter(pk__in=paths).delete() for path in paths: DependencyNode.objects.filter(pk__startswith=path).delete() else: DependencyNode.objects.all().delete() http_client().cache.clear()
def docs(docket_id, filter_fn=None): """Fetch RegsGovDocs representing documents within this docket. Grab all types except public submissions. Use `filter_fn` to limit the results""" # Use a list for consistent ordering, which is useful for caching params = [('api_key', REGS_GOV_KEY), ('dktid', docket_id), ('rpp', 1000), ('sb', 'docId'), ('so', 'ASC'), ('dct', 'N+PR+FR+O+SR')] results = http_client().get(REGS_GOV_DOC_API, params=params).json() if results.get('error'): logger.warning("Error retrieving data from regs.gov: %s", results['error'].get('message')) for doc_dict in results.get('documents', []): if filter_fn is None or filter_fn(doc_dict): yield RegsGovDoc(doc_dict['documentId'], doc_dict['title'])
def notice_xmls_for_url(notice_url): """Find, preprocess, and return the XML(s) associated with a particular FR notice url""" local_notices = local_copies(notice_url) if local_notices: logger.info("using local xml for %s", notice_url) for local_notice_file in local_notices: with open(local_notice_file, 'rb') as f: yield NoticeXML(f.read(), local_notice_file).preprocess() else: # ignore initial slash path_parts = urlparse(notice_url).path[1:].split('/') client = http_client() first_try_url = settings.XML_REPO_PREFIX + '/'.join(path_parts) logger.info('trying to fetch notice xml from %s', first_try_url) response = client.get(first_try_url) if response.status_code != requests.codes.ok: logger.info('failed. fetching from %s', notice_url) response = client.get(notice_url) yield NoticeXML(response.content, notice_url).preprocess()
def notice_xmls_for_url(doc_num, notice_url): """Find, preprocess, and return the XML(s) associated with a particular FR notice url""" local_notices = local_copies(notice_url) if local_notices: logger.info("using local xml for %s", notice_url) for local_notice_file in local_notices: with open(local_notice_file, "rb") as f: yield NoticeXML(f.read(), local_notice_file).preprocess() else: # ignore initial slash path_parts = urlparse(notice_url).path[1:].split("/") client = http_client() first_try_url = settings.XML_REPO_PREFIX + "/".join(path_parts) logger.info("trying to fetch notice xml from %s", first_try_url) response = client.get(first_try_url) if response.status_code != requests.codes.ok: logger.info("failed. fetching from %s", notice_url) response = client.get(notice_url) yield NoticeXML(response.content, notice_url).preprocess()
def fetch_notice_json(cfr_title, cfr_part, only_final=False, max_effective_date=None): """Search through all articles associated with this part. Right now, limited to 1000; could use paging to fix this in the future.""" params = { "conditions[cfr][title]": cfr_title, "conditions[cfr][part]": cfr_part, "per_page": 1000, "order": "oldest", "fields[]": FULL_NOTICE_FIELDS} if only_final: params["conditions[type][]"] = 'RULE' if max_effective_date: params["conditions[effective_date][lte]"] = max_effective_date url = API_BASE + "articles" logger.info("Fetching notices - URL: %s Params: %r", url, params) response = http_client().get(url, params=params).json() logger.debug("Fetching notices response - %r", response) if 'results' in response: return response['results'] else: return []
def find_part_xml(self, part): """Pull the XML for an annual edition, first checking locally""" logger.info("Find Part xml for %s CFR %s", self.title, part) url = CFR_PART_URL.format(year=self.year, title=self.title, volume=self.vol_num, part=part) filename = url.split('/')[-1] for xml_path in settings.LOCAL_XML_PATHS: xml_path = os.path.join(xml_path, 'annual', filename) logger.debug("Checking locally for file %s", xml_path) if os.path.isfile(xml_path): with open(xml_path, 'rb') as f: return XMLWrapper(f.read(), xml_path) client = http_client() first_try_url = settings.XML_REPO_PREFIX + 'annual/' + filename logging.info('trying to fetch annual edition from %s', first_try_url) response = client.get(first_try_url) if response.status_code != requests.codes.ok: logger.info('failed. fetching from %s', url) response = client.get(url) if response.status_code == requests.codes.ok: return XMLWrapper(response.content, url)
def response(self): logger.debug("GET %s", self.url) return http_client().get(self.url, stream=True)