def test_callfailexception(mock_req): mock_req.get(client_add_api_key(base_url), status_code=403) with pytest.raises(CallFailException): api_call_manager(client_add_api_key(base_url)) mock_req.get(server_add_api_key(base_url), status_code=403) with pytest.raises(CallFailException): api_call_manager(server_add_api_key(base_url))
def test_retry_calls_failure(mock_req): mock_req.get(client_add_api_key(base_url), status_code=304) with pytest.raises(CallFailException): api_call_manager(client_add_api_key(base_url)) mock_req.get(server_add_api_key(base_url), status_code=304) with pytest.raises(CallFailException): api_call_manager(server_add_api_key(base_url))
def documents_processor(urls, job_id, client_id): """ Call each url in the list, process the results of the calls and then form a json file to send back the results :param urls: list of urls that have to be called :param job_id: the id of the job that is being worked on currently :param client_id: id of the client calling this function :return result: the json to be returned to the server after each call is processed """ global workfiles workfiles = [] for url in urls: try: result = api_call_manager(client_add_api_key(url)) process_results(result) except Exception: logger.error('Error - URL processing failed') return { 'job_id': job_id, 'type': 'docs', 'data': workfiles, 'client_id': client_id, 'version': version }
def download_attachments(dirpath, doc_json, documentId): """ Download the other attachments for the document :param dirpath: path to the directory where the download will be saved :param doc_json: the json from a single document api call :param documentId: the string of a documentId :return: the total number of requests used to download the extra attachments """ total_requests = 0 try: extra_attachments = doc_json['attachments'] total_requests += len(extra_attachments) for attachment in extra_attachments: attachment_formats = attachment['fileFormats'] for a_format in attachment_formats: time.sleep(30) here = str(a_format).index('contentType') + 12 type = str(a_format)[here:] result = api_call_manager(client_add_api_key(str(a_format))) download_document(dirpath, documentId, result, type) except KeyError: pass except CallFailException: logger.error('Error - API call failed') pass return total_requests
def test_collect_attachments(mock_req, workfile_tempdir): mock_req.get(client_add_api_key(make_doc_url("DOCUMENT")), status_code=200, text='{ "attachments": [ ' '{ "fileFormats": [ ' '"https://api.data.gov/regulations/v3/' 'download?documentId=' 'FDA-2015-N-0540-0004&attachmentNumber=1' '&contentType=msw12", ' '"https://api.data.gov/regulations/v3/' 'download?documentId=' 'FDA-2015-N-0540-0004&attachmentNumber=1&' 'contentType=pdf" ' '] } ] }') mock_req.get(client_add_api_key( "https://api.data.gov/regulations/v3/download?documentId" "=FDA-2015-N-0540-0004&attachmentNumber=1&contentType=msw12"), status_code=200, text='Document!') mock_req.get(client_add_api_key( "https://api.data.gov/regulations/v3/download?documentId=" "FDA-2015-N-0540-0004&attachmentNumber=1&contentType=pdf"), status_code=200, text='Document!') with mock.patch('time.sleep'): result = get_extra_documents( api_call_manager(client_add_api_key(make_doc_url("DOCUMENT"))), workfile_tempdir, "FDA-2015-N-0540-0004") assert result == 1
def test_download_document(workfile_tempdir, mock_req): url = "https://api.data.gov/regulations/v3/download?documentId=" \ "FDA-2015-N-0540-0004&attachmentNumber=1&contentType=msw12" mock_req.get(client_add_api_key(url), status_code=200, reason="") result = api_call_manager(client_add_api_key(url)) type = "msw12" download_document(workfile_tempdir, "FDA-2015-N-0540-0004", result, type) assert os.path.exists(workfile_tempdir + "/doc.FDA-2015-N-0540-0004.doc")
def get_work(client_id): """ Calls the /get_work endpoint of the server to fetch work to process :param client_id: the id of the client calling /get_work :return: the result of making a call to get work """ url = serverurl + "/get_work?client_id=" + str(client_id) result = man.api_call_manager(url) logger.critical('Obtained work from server.') return result
def get_work(server_url, client_id): """ Calls the /get_work endpoint of the server to fetch work to process :param client_id: the id of the client calling /get_work :return: the result of making a call to get work """ url = server_url + '/get_work?client_id=' + client_id result = man.api_call_manager(url) logger.warning('Obtained work from server.') return result
def monolith(): """ Runs the script. This is one monolithic function (aptly named) as the script just needs to be run; however, there is a certain point where I need to break out of the program if an error occurs, and I wasn't sure how exactly sys.exit() would work and whether or not it would mess with things outside of / calling this script, so I just made one giant method so I can return when needed. :return: """ url_base = 'https://api.data.gov/regulations/v3/documents.json?rpp=1000' r = redis_manager.RedisManager() regulations_key = config.server_read_value('api key') current_page = 0 if regulations_key != '': # Gets number of documents available to download try: url = \ 'https://api.data.gov/regulations/v3/documents.json?api_key=' \ + regulations_key + '&countsOnly=1' record_count = \ api_manager.api_call_manager(url).json()['totalNumRecords'] except api_manager.CallFailException: logger.error('Error occured with API request') print('Error occurred with docs_work_gen regulations API request.') return 0 # Gets the max page we'll go to; each page is 1000 documents max_page_hit = record_count // 1000 # This loop generates lists of URLs, sending out a job and # writing them to the work server every 1000 URLs. # It will stop and send whatever's left if we hit the max page limit. while current_page < max_page_hit: url_list = [] for i in range(1000): current_page += 1 url_full = url_base + '&po=' + str(current_page * 1000) url_list.append(url_full) if current_page == max_page_hit: break # Makes a JSON from the list of URLs and send # it to the queue as a job docs_work = [ ''.join( random.choices(string.ascii_letters + string.digits, k=16)), 'docs', url_list ] r.add_to_queue(endpoints.generate_json(docs_work)) else: print('No API Key!')
def test_user_out_of_api_calls_sleeps(mock_req): mock_req.register_uri('GET', client_add_api_key(base_url), [{ 'text': 'resp1', 'status_code': 429 }, { 'text': '{}', 'status_code': 200 }]) assert api_call_manager(client_add_api_key(base_url)).text == '{}' mock_req.register_uri('GET', server_add_api_key(base_url), [{ 'text': 'resp1', 'status_code': 429 }, { 'text': '{}', 'status_code': 200 }]) assert api_call_manager(server_add_api_key(base_url)).text == '{}'
def test_valid_results(mock_req): urls = [base_url] mock_req.get(client_add_api_key(base_url), status_code=200, text='{"documents": ' '[{"documentId": ' '"CMS-2005-0001-0001", ' '"attachmentCount": 4},\ {"documentId": ' '"CMS-2005-0001-0002", ' '"attachmentCount": 999}]}') result = process_results(api_call_manager(client_add_api_key(base_url))) assert result
def document_processor(doc_ids): """ This process takes all of the document ids given to it and saves all of the data for the documents in a temporary directory. :param doc_ids: list of document ids that have to be collected. :return: temporary directory that data was written to. """ dirpath = tempfile.TemporaryDirectory() for doc_id in doc_ids: try: result = api_call_manager(client_add_api_key(make_doc_url(doc_id))) total = get_extra_documents(result, dirpath.name, doc_id) except CallFailException: logger.error('Error - Bad document ID') return dirpath
def test_collect_extra_documents(mock_req, workfile_tempdir): mock_req.get(client_add_api_key(make_doc_url("DOCUMENT")), status_code=200, text='{ "fileFormats": ' '["https://api.data.gov/regulations/v3/download?' 'documentId=OSHA-H117-2006-0947-0647&' 'attachmentNumber=1&contentType=pdf"] }') mock_req.get(client_add_api_key( "https://api.data.gov/regulations/v3/download?documentId=" "OSHA-H117-2006-0947-0647&attachmentNumber=1&contentType=pdf"), status_code=200, text='Document!') result = get_extra_documents( api_call_manager(client_add_api_key(make_doc_url("DOCUMENT"))), workfile_tempdir, "OSHA-H117-2006-0947-0647") assert result == 1
def test_call_fail_raises_exception(mock_req): mock_req.get(base_url, status_code=407, text='{}') with pytest.raises(CallFailException): api_call_manager(base_url)
def test_successful_call(mock_req): mock_req.get(base_url, status_code=200, text='{}') assert api_call_manager(base_url).text == '{}'
def test_empty_json(mock_req): mock_req.get(base_url, status_code=200, text='') with pytest.raises(json.JSONDecodeError): process_results(api_call_manager(base_url))
def test_bad_json_format(mock_req): mock_req.get(base_url, status_code=200, text='{information: [{},{}]}') with pytest.raises(json.JSONDecodeError): process_results(api_call_manager(base_url))
def test_success(mock_req): mock_req.get(client_add_api_key(base_url), status_code=200, text='{}') assert api_call_manager(client_add_api_key(base_url)).text == '{}' mock_req.get(server_add_api_key(base_url), status_code=200, text='{}') assert api_call_manager(server_add_api_key(base_url)).text == '{}'