def download_file(api_key, url): LOGGER.info('Requesting download from regulations.gov') binary_data = requests.get(url + '&api_key=' + api_key) status_code_check.check_status(binary_data.status_code) LOGGER.info('download has been retrieved') return binary_data
def find_documents_data(manager, job, job_id): print("Getting documents from regulations.gov...\n") data = get_documents(manager.api_key, job["page_offset"], job["start_date"], job["end_date"]) LOGGER.info("Job#%s: Packaging documents...", str(job_id)) results = package_documents(data, manager.client_id, job_id) return results
def find_download_data(manager, job, job_id): print("Getting download from regulations.gov...\n") data = download_file(manager.api_key, job['url']) data_json = { 'folder_name': job['folder_name'], 'file_name': job['file_name'], 'file_type': job['file_type'], 'data': data.text } LOGGER.info("Job#%s: Packaging downloads..", str(job_id)) results = package_downloads(data_json, manager.client_id, job_id) return results
def download_document(api_key, document_id): """ downloads a file based on a url, api key and document_id (if given) """ api_key = "&api_key=" + api_key document_id = "&documentId=" + document_id url = "https://api.data.gov:443/regulations/v3/document.json?" LOGGER.info('Requesting document from regulations.gov') data = requests.get(url + api_key + document_id) status_code_check.check_status(data.status_code) LOGGER.info('document has been retrieved') document = data.json() return document
def get_documents_data(api_key, offset, date): """ Makes call to regulations.gov and retrieves the documents data """ LOGGER.info('Requesting document from regulations.gov') response = requests.get('https://api.data.gov:443/regulations' + '/v3/documents.json?rpp=1000&api_key=' + api_key + '&po=' + str(offset) + '&crd=' + date) check_status(response.status_code) LOGGER.info('document has been retrieved') return response.json()
def get_docket_data(api_key, docket_id): """ Makes call to regulations.gov and retrieves the docket data """ LOGGER.info('Requesting docket from regulations.gov') response = requests.get("https://api.data.gov:443/" + "regulations/v3/docket.json?api_key=" + api_key + "&docketId=" + docket_id) check_status(response.status_code) LOGGER.info('docket has been retrieved') return response.json()
def check_status(status_code): if status_code == 400: LOGGER.error('The given ID is incorrect') raise reggov_api_doc_error.IncorrectIDPatternException if status_code == 403: LOGGER.error('The given API key is incorrect') raise reggov_api_doc_error.IncorrectApiKeyException if status_code == 404: LOGGER.error('The given Doc ID was bad') raise reggov_api_doc_error.BadDocIDException if status_code == 429: LOGGER.error('The 1000 call an hour limit has been exceeded') raise reggov_api_doc_error.ExceedCallLimitException
def do_job(api_key): """ Gets a job from the server and handles the job based on the type of job """ try: LOGGER.info('Getting job from server...') job = requests.get('http://capstone.cs.moravian.edu:5000/get_job') job = job.json() LOGGER.info("Job has been acquired") except Exception: LOGGER.error("A connection error has occurred") raise NoConnectionError results = handle_specific_job(job, api_key) if results is None: return post_job(results)
def post_job(results): LOGGER.info("Packaging successful!") LOGGER.info("Posting job to server") requests.post('http://capstone.cs.moravian.edu:5000/return_result', json=results) LOGGER.info("Job has successfully been posted!")
def find_docket_data(manager, job, job_id): print("Getting docket from regulations.gov...\n") data = get_docket(manager.api_key, job['docket_id']) LOGGER.info("Job#%s: Packaging docket..", str(job_id)) results = package_docket(data, manager.client_id, job_id) return results