def check_document_exists(json_data): """ Checks to see if a document was already downloaded or already in one of the queues. If the document has already been downloaded it will be removed from its workfile. If a workfile were to become empty it will be removed to prevent empty doc jobs from existing. :param json_data: the json containing the work files :return: """ logger.warning( 'Function Successful: % s', 'workfile_length_checker: workfile_length_checker successfully called from process_docs', extra=d) home = os.getenv("HOME") path = home + "/regulations_data/" for workfile in json_data["data"]: count = 0 for line in workfile: document = line["id"] alpha_doc_org, docket_id, document_id = df.get_doc_attributes( "doc." + document + ".json") full_path = path + alpha_doc_org + "/" + docket_id + "/" + document_id + "/" + "doc." + document + ".json" count, local_verdict = local_files_check(full_path, count) #redis_verdict = redis_files_check if local_verdict: #and redis_verdict: workfile.pop(count) json_data = remove_empty_lists(json_data) return json_data
def test_local_save(workfile_tempdir, savefile_tempdir): filename = "doc.FMCSA-1997-2350-21654.json" path = workfile_tempdir + '/' + filename with open(path, 'w') as f: f.write("Stuff was written here") org, docket_id, document_id = df.get_doc_attributes(filename) df.local_save(path, savefile_tempdir + '/') assert os.path.exists(savefile_tempdir + '/' + org + '/' + docket_id + '/' + document_id + '/' + filename)
def test_get_file_list_and_bad_number_work(savefile_tempdir): compressed_file = PATH + "Bad_Number_Archive.zip" PATHstr = savefile_tempdir file_list = df.get_file_list(compressed_file, PATHstr + "/", '123') condition = True for file in file_list[0]: org, docket_id, document_id = df.get_doc_attributes(file) if file.startswith("doc.") and df.ending_is_number( document_id) and df.beginning_is_letter(document_id): pass else: condition = False assert condition is False
def test_get_doc_attributes_other_special(): org, docket, document = df.get_doc_attributes( 'doc.FDA-2018-N-0073-0002.json') assert org == "FDA" assert docket == "FDA-2018-N-0073" assert document == "FDA-2018-N-0073-0002"
def test_get_doc_attributes_special(): org, docket, document = df.get_doc_attributes( 'doc.AHRQ_FRDOC_0001-0001.json') assert org == "AHRQ_FRDOC" assert docket == "AHRQ_FRDOC_0001" assert document == "AHRQ_FRDOC_0001-0001"
def test_get_doc_attributes_multiple_agencies(): org, docket, document = df.get_doc_attributes( 'doc.mesd-abcd-2018-234234-0001.json') assert org == "abcd-mesd" assert docket == "mesd-abcd-2018-234234" assert document == "mesd-abcd-2018-234234-0001"
def test_get_doc_attributes(): org, docket, document = df.get_doc_attributes( 'doc.mesd-2018-234234-0001.json') assert org == "mesd" assert docket == "mesd-2018-234234" assert document == "mesd-2018-234234-0001"