Exemple #1
0
def test_callfailexception(mock_req):
    mock_req.get(client_add_api_key(base_url), status_code=403)
    with pytest.raises(CallFailException):
        api_call_manager(client_add_api_key(base_url))

    mock_req.get(server_add_api_key(base_url), status_code=403)
    with pytest.raises(CallFailException):
        api_call_manager(server_add_api_key(base_url))
Exemple #2
0
def test_retry_calls_failure(mock_req):
    mock_req.get(client_add_api_key(base_url), status_code=304)
    with pytest.raises(CallFailException):
        api_call_manager(client_add_api_key(base_url))

    mock_req.get(server_add_api_key(base_url), status_code=304)
    with pytest.raises(CallFailException):
        api_call_manager(server_add_api_key(base_url))
Exemple #3
0
def documents_processor(urls, job_id, client_id):
    """
    Call each url in the list, process the results of
    the calls and then form a json file to send back the results
    :param urls: list of urls that have to be called
    :param job_id: the id of the job that is being worked on currently
    :param client_id: id of the client calling this function
    :return result: the json to be returned
            to the server after each call is processed
    """
    global workfiles
    workfiles = []
    for url in urls:
        try:
            result = api_call_manager(client_add_api_key(url))
            process_results(result)
        except Exception:
            logger.error('Error - URL processing failed')

    return {
        'job_id': job_id,
        'type': 'docs',
        'data': workfiles,
        'client_id': client_id,
        'version': version
    }
Exemple #4
0
def download_attachments(dirpath, doc_json, documentId):
    """
    Download the other attachments for the document
    :param dirpath: path to the directory where the download will be saved
    :param doc_json: the json from a single document api call
    :param documentId: the string of a documentId
    :return: the total number of requests used to download the
             extra attachments
    """
    total_requests = 0
    try:
        extra_attachments = doc_json['attachments']
        total_requests += len(extra_attachments)
        for attachment in extra_attachments:
            attachment_formats = attachment['fileFormats']
            for a_format in attachment_formats:
                time.sleep(30)
                here = str(a_format).index('contentType') + 12
                type = str(a_format)[here:]
                result = api_call_manager(client_add_api_key(str(a_format)))
                download_document(dirpath, documentId, result, type)
    except KeyError:
        pass
    except CallFailException:
        logger.error('Error - API call failed')
        pass
    return total_requests
def test_collect_attachments(mock_req, workfile_tempdir):
    mock_req.get(client_add_api_key(make_doc_url("DOCUMENT")),
                 status_code=200,
                 text='{ "attachments": [ '
                 '{ "fileFormats": [ '
                 '"https://api.data.gov/regulations/v3/'
                 'download?documentId='
                 'FDA-2015-N-0540-0004&attachmentNumber=1'
                 '&contentType=msw12", '
                 '"https://api.data.gov/regulations/v3/'
                 'download?documentId='
                 'FDA-2015-N-0540-0004&attachmentNumber=1&'
                 'contentType=pdf" '
                 '] } ] }')
    mock_req.get(client_add_api_key(
        "https://api.data.gov/regulations/v3/download?documentId"
        "=FDA-2015-N-0540-0004&attachmentNumber=1&contentType=msw12"),
                 status_code=200,
                 text='Document!')
    mock_req.get(client_add_api_key(
        "https://api.data.gov/regulations/v3/download?documentId="
        "FDA-2015-N-0540-0004&attachmentNumber=1&contentType=pdf"),
                 status_code=200,
                 text='Document!')

    with mock.patch('time.sleep'):
        result = get_extra_documents(
            api_call_manager(client_add_api_key(make_doc_url("DOCUMENT"))),
            workfile_tempdir, "FDA-2015-N-0540-0004")

    assert result == 1
def test_download_document(workfile_tempdir, mock_req):
    url = "https://api.data.gov/regulations/v3/download?documentId=" \
          "FDA-2015-N-0540-0004&attachmentNumber=1&contentType=msw12"
    mock_req.get(client_add_api_key(url), status_code=200, reason="")
    result = api_call_manager(client_add_api_key(url))
    type = "msw12"
    download_document(workfile_tempdir, "FDA-2015-N-0540-0004", result, type)
    assert os.path.exists(workfile_tempdir + "/doc.FDA-2015-N-0540-0004.doc")
Exemple #7
0
def get_work(client_id):
    """
    Calls the /get_work endpoint of the server to fetch work to process
    :param client_id: the id of the client calling /get_work
    :return: the result of making a call to get work
    """
    url = serverurl + "/get_work?client_id=" + str(client_id)
    result = man.api_call_manager(url)
    logger.critical('Obtained work from server.')
    return result
Exemple #8
0
def get_work(server_url, client_id):
    """
    Calls the /get_work endpoint of the server to fetch work to process
    :param client_id: the id of the client calling /get_work
    :return: the result of making a call to get work
    """
    url = server_url + '/get_work?client_id=' + client_id
    result = man.api_call_manager(url)
    logger.warning('Obtained work from server.')
    return result
Exemple #9
0
def monolith():
    """
    Runs the script. This is one monolithic function (aptly named)
    as the script just needs to be run; however, there is a certain
    point where I need to break out of the program if an error occurs,
    and I wasn't sure how exactly sys.exit() would work and whether
    or not it would mess with things outside of / calling this script,
    so I just made one giant method so I can return when needed.
    :return:
    """
    url_base = 'https://api.data.gov/regulations/v3/documents.json?rpp=1000'
    r = redis_manager.RedisManager()
    regulations_key = config.server_read_value('api key')
    current_page = 0

    if regulations_key != '':
        # Gets number of documents available to download
        try:
            url = \
                'https://api.data.gov/regulations/v3/documents.json?api_key=' \
                + regulations_key + '&countsOnly=1'
            record_count = \
                api_manager.api_call_manager(url).json()['totalNumRecords']
        except api_manager.CallFailException:
            logger.error('Error occured with API request')
            print('Error occurred with docs_work_gen regulations API request.')
            return 0

        # Gets the max page we'll go to; each page is 1000 documents
        max_page_hit = record_count // 1000

        # This loop generates lists of URLs, sending out a job and
        # writing them to the work server every 1000 URLs.
        # It will stop and send whatever's left if we hit the max page limit.
        while current_page < max_page_hit:
            url_list = []
            for i in range(1000):
                current_page += 1
                url_full = url_base + '&po=' + str(current_page * 1000)

                url_list.append(url_full)

                if current_page == max_page_hit:
                    break

            # Makes a JSON from the list of URLs and send
            # it to the queue as a job
            docs_work = [
                ''.join(
                    random.choices(string.ascii_letters + string.digits,
                                   k=16)), 'docs', url_list
            ]
            r.add_to_queue(endpoints.generate_json(docs_work))
    else:
        print('No API Key!')
Exemple #10
0
def test_user_out_of_api_calls_sleeps(mock_req):
    mock_req.register_uri('GET', client_add_api_key(base_url),
                          [{
                              'text': 'resp1',
                              'status_code': 429
                          }, {
                              'text': '{}',
                              'status_code': 200
                          }])
    assert api_call_manager(client_add_api_key(base_url)).text == '{}'

    mock_req.register_uri('GET', server_add_api_key(base_url),
                          [{
                              'text': 'resp1',
                              'status_code': 429
                          }, {
                              'text': '{}',
                              'status_code': 200
                          }])
    assert api_call_manager(server_add_api_key(base_url)).text == '{}'
def test_valid_results(mock_req):
    urls = [base_url]
    mock_req.get(client_add_api_key(base_url),
                 status_code=200,
                 text='{"documents": '
                 '[{"documentId": '
                 '"CMS-2005-0001-0001", '
                 '"attachmentCount": 4},\
                                         {"documentId": '
                 '"CMS-2005-0001-0002", '
                 '"attachmentCount": 999}]}')
    result = process_results(api_call_manager(client_add_api_key(base_url)))
    assert result
Exemple #12
0
def document_processor(doc_ids):
    """
    This process takes all of the document ids given to it and
    saves all of the data for the documents in a temporary directory.
    :param doc_ids: list of document ids that have to be collected.
    :return: temporary directory that data was written to.
    """
    dirpath = tempfile.TemporaryDirectory()
    for doc_id in doc_ids:
        try:
            result = api_call_manager(client_add_api_key(make_doc_url(doc_id)))
            total = get_extra_documents(result, dirpath.name, doc_id)
        except CallFailException:
            logger.error('Error - Bad document ID')
    return dirpath
def test_collect_extra_documents(mock_req, workfile_tempdir):
    mock_req.get(client_add_api_key(make_doc_url("DOCUMENT")),
                 status_code=200,
                 text='{ "fileFormats": '
                 '["https://api.data.gov/regulations/v3/download?'
                 'documentId=OSHA-H117-2006-0947-0647&'
                 'attachmentNumber=1&contentType=pdf"] }')
    mock_req.get(client_add_api_key(
        "https://api.data.gov/regulations/v3/download?documentId="
        "OSHA-H117-2006-0947-0647&attachmentNumber=1&contentType=pdf"),
                 status_code=200,
                 text='Document!')
    result = get_extra_documents(
        api_call_manager(client_add_api_key(make_doc_url("DOCUMENT"))),
        workfile_tempdir, "OSHA-H117-2006-0947-0647")

    assert result == 1
def test_call_fail_raises_exception(mock_req):
    mock_req.get(base_url, status_code=407, text='{}')
    with pytest.raises(CallFailException):
        api_call_manager(base_url)
def test_successful_call(mock_req):
    mock_req.get(base_url, status_code=200, text='{}')
    assert api_call_manager(base_url).text == '{}'
def test_empty_json(mock_req):
    mock_req.get(base_url, status_code=200, text='')
    with pytest.raises(json.JSONDecodeError):
        process_results(api_call_manager(base_url))
def test_bad_json_format(mock_req):
    mock_req.get(base_url, status_code=200, text='{information: [{},{}]}')
    with pytest.raises(json.JSONDecodeError):
        process_results(api_call_manager(base_url))
Exemple #18
0
def test_success(mock_req):
    mock_req.get(client_add_api_key(base_url), status_code=200, text='{}')
    assert api_call_manager(client_add_api_key(base_url)).text == '{}'

    mock_req.get(server_add_api_key(base_url), status_code=200, text='{}')
    assert api_call_manager(server_add_api_key(base_url)).text == '{}'