コード例 #1
0
ファイル: docs_filter.py プロジェクト: bjcoleman/mirrulations
def check_document_exists(json_data):
    """
    Checks to see if a document was already downloaded or already in one of the queues.
    If the document has already been downloaded it will be removed from its workfile.
    If a workfile were to become empty it will be removed to prevent empty doc jobs from existing.
    :param json_data: the json containing the work files
    :return:
    """

    logger.warning(
        'Function Successful: % s',
        'workfile_length_checker: workfile_length_checker successfully called from process_docs',
        extra=d)

    home = os.getenv("HOME")
    path = home + "/regulations_data/"
    for workfile in json_data["data"]:
        count = 0
        for line in workfile:
            document = line["id"]
            alpha_doc_org, docket_id, document_id = df.get_doc_attributes(
                "doc." + document + ".json")
            full_path = path + alpha_doc_org + "/" + docket_id + "/" + document_id + "/" + "doc." + document + ".json"

            count, local_verdict = local_files_check(full_path, count)
            #redis_verdict = redis_files_check

            if local_verdict:  #and redis_verdict:
                workfile.pop(count)

    json_data = remove_empty_lists(json_data)
    return json_data
コード例 #2
0
def test_local_save(workfile_tempdir, savefile_tempdir):
    filename = "doc.FMCSA-1997-2350-21654.json"
    path = workfile_tempdir + '/' + filename
    with open(path, 'w') as f:
        f.write("Stuff was written here")
    org, docket_id, document_id = df.get_doc_attributes(filename)
    df.local_save(path, savefile_tempdir + '/')
    assert os.path.exists(savefile_tempdir + '/' + org + '/' + docket_id +
                          '/' + document_id + '/' + filename)
コード例 #3
0
def test_get_file_list_and_bad_number_work(savefile_tempdir):
    compressed_file = PATH + "Bad_Number_Archive.zip"
    PATHstr = savefile_tempdir
    file_list = df.get_file_list(compressed_file, PATHstr + "/", '123')

    condition = True
    for file in file_list[0]:
        org, docket_id, document_id = df.get_doc_attributes(file)

        if file.startswith("doc.") and df.ending_is_number(
                document_id) and df.beginning_is_letter(document_id):
            pass
        else:
            condition = False

    assert condition is False
コード例 #4
0
def test_get_doc_attributes_other_special():
    org, docket, document = df.get_doc_attributes(
        'doc.FDA-2018-N-0073-0002.json')
    assert org == "FDA"
    assert docket == "FDA-2018-N-0073"
    assert document == "FDA-2018-N-0073-0002"
コード例 #5
0
def test_get_doc_attributes_special():
    org, docket, document = df.get_doc_attributes(
        'doc.AHRQ_FRDOC_0001-0001.json')
    assert org == "AHRQ_FRDOC"
    assert docket == "AHRQ_FRDOC_0001"
    assert document == "AHRQ_FRDOC_0001-0001"
コード例 #6
0
def test_get_doc_attributes_multiple_agencies():
    org, docket, document = df.get_doc_attributes(
        'doc.mesd-abcd-2018-234234-0001.json')
    assert org == "abcd-mesd"
    assert docket == "mesd-abcd-2018-234234"
    assert document == "mesd-abcd-2018-234234-0001"
コード例 #7
0
def test_get_doc_attributes():
    org, docket, document = df.get_doc_attributes(
        'doc.mesd-2018-234234-0001.json')
    assert org == "mesd"
    assert docket == "mesd-2018-234234"
    assert document == "mesd-2018-234234-0001"