def check_if_document_needs_renew(file, json_data, path): """ Checks to see if a document conforms to our naming conventions """ file_name = get_document_id(file) document_id = dc.get_doc_attributes(file_name)[2] file_starts_with_doc = file.startswith('doc.') file_begin_with_doc_letter = \ document_id_beginning_is_letter(document_id) file_end_is_doc_num = \ document_id_ending_is_number(document_id) file_ends_with_json = file.endswith('.json') file_job_type_is_doc = json_data['type'] == 'doc' file_combined_check = \ file_starts_with_doc and file_begin_with_doc_letter and \ file_end_is_doc_num and file_job_type_is_doc file_combined_check_and_json = \ file_combined_check and file_ends_with_json if file_combined_check_and_json: if document_id_matches_json_id(path + file, document_id): return False return True if file_combined_check: return False return True
def check_document_exists(json_data, path=HOME_REGULATION_PATH): """ Checks to see if a document was already downloaded or already in one of the queues. If the document has already been downloaded it will be removed from its workfile. If a workfile were to become empty it will be removed to prevent empty doc jobs from existing. """ for workfile in json_data["data"]: count = 0 for line in workfile: document = line['id'] alpha_doc_org, docket_id, document_id = \ dc.get_doc_attributes(document) full_path = \ path + alpha_doc_org + '/' + docket_id + '/' + document_id + \ '/' + 'doc.' + document + '.json' count, local_verdict = \ check_if_file_exists_locally(full_path, count) if local_verdict: workfile.pop(count) json_data = remove_empty_lists(json_data) return json_data
def check_document_exists(json_data): """ Checks to see if a document was already downloaded or already in one of the queues. If the document has already been downloaded it will be removed from its workfile. If a workfile were to become empty it will be removed to prevent empty doc jobs from existing. :param json_data: the json containing the work files :return: """ logger.warning( 'Function Successful: % s', 'workfile_length_checker: workfile_length_checker successfully called from process_docs', extra=d) home = os.getenv("HOME") path = home + "/regulations_data/" for workfile in json_data["data"]: count = 0 for line in workfile: document = line["id"] alpha_doc_org, docket_id, document_id = dc.get_doc_attributes( "doc." + document + ".json") full_path = path + alpha_doc_org + "/" + docket_id + "/" + document_id + "/" + "doc." + document + ".json" count, local_verdict = local_files_check(full_path, count) #redis_verdict = redis_files_check if local_verdict: #and redis_verdict: workfile.pop(count) json_data = remove_empty_lists(json_data) return json_data
def check_single_document(file, json_data, path): """ Checks to see if a document conforms to our naming conventions :param file: :param json_data: :param path: :return: """ org, docket_id, document_id = dc.get_doc_attributes(file) ifFileStartsWithDoc = file.startswith("doc.") ifBeginWithDocLetter = beginning_is_letter(document_id) ifEndIsDocNum = ending_is_number(document_id) ifFileEndsWithJson = file.endswith(".json") job_type = json_data["type"] == "doc" ifDocumentsChecks = ifFileStartsWithDoc and ifBeginWithDocLetter and ifEndIsDocNum and job_type ifDocumentsChecksAndJson = ifDocumentsChecks and ifFileEndsWithJson if ifDocumentsChecksAndJson: if id_matches(path + file, document_id): logger.debug( 'Variable Success: %s', 'process_doc: ifFileStartsWithDoc, ifBeginWithDocLetter, ifEndIsDocNum, ' 'and job_type are True', extra=d) else: logger.debug('Variable Failure: %s', 'process_doc: id_matches is False', extra=d) return True write_documents_checks_into_logger(ifBeginWithDocLetter, ifEndIsDocNum, ifFileStartsWithDoc, job_type) return ifDocumentsChecks
def check_document_exists(json_data, path): """ Checks to see if a document was already downloaded or already in one of the queues. If the document has already been downloaded it will be removed from its workfile. If a workfile were to become empty it will be removed to prevent empty doc jobs from existing. """ #path = str( #config.server_read_value('regulations path')) + 'regulations-data/' for workfile in json_data['data']: count = 0 for line in workfile: document = line['id'] alpha_doc_org, docket_id, document_id = \ dc.get_doc_attributes(document) full_path = \ path + alpha_doc_org + '/' + docket_id + '/' + document_id + \ '/' + 'doc.' + document + '.json' count, local_verdict = \ check_if_file_exists_locally(full_path, count) if local_verdict: workfile.pop(count) json_data = remove_empty_lists(json_data) return json_data
def save_single_file_locally(cur_path, destination): """ Save the file located at the current path to the destination location :param cur_path: location of the file to be saved :param destination: location that the file should be saved :return: """ logger.debug( 'Function Successful: % s', 'save_single_file_locally: save_single_file_locally successfully called from process_doc', extra=d) logger.debug( 'Calling Function: % s', 'save_single_file_locally: save_single_file_locally calling get_file_name', extra=d) file_name = get_file_name(cur_path) logger.debug( 'Function Successful: % s', 'save_single_file_locally: save_single_file_locally successfully called get_file_name', extra=d) logger.debug( 'Calling Function: % s', 'save_single_file_locally: save_single_file_locally calling get_doc_attributes', extra=d) doc_id = get_document_id(file_name) org, docket_id, document_id = dc.get_doc_attributes(doc_id) logger.debug( 'Function Successful: % s', 'save_single_file_locally: save_single_file_locally successfully called get_doc_attributes', extra=d) destination_path = destination + org + "/" + docket_id + "/" + document_id + "/" logger.debug( 'Calling Function: % s', 'save_single_file_locally: save_single_file_locally calling create_new_dir', extra=d) create_new_dir(destination_path) logger.debug( 'Function Successful: % s', 'save_single_file_locally: save_single_file_locally successfully called create_new_dir', extra=d) logger.debug( 'Calling Function: % s', 'save_single_file_locally: save_single_file_locally calling copy', extra=d) shutil.copy(cur_path, destination_path + '/' + file_name) logger.debug( 'Function Successful: % s', 'save_single_file_locally: save_single_file_locally successfully called copy', extra=d)
def save_single_file_locally(current_path, destination): """ :param current_path: :param destination: :return: """ file_name = get_file_name(current_path) doc_id = get_document_id(file_name) org, docket_id, document_id = dc.get_doc_attributes(doc_id) destination_path = \ destination + org + '/' + docket_id + '/' + document_id + '/' create_new_directory_for_path(destination_path) shutil.copy(current_path, destination_path + '/' + file_name)
def search_for_document_in_directory(document_id, directory_path=PATH): """ Called by the server to check to see if a document exists in the directory structure :param document_id: the document id being searched :param directory_path: The directory path being searched in :return: Return the full path if the document exists, else return an empty string """ orgs, dock_id, doc_id = dc.get_doc_attributes(document_id) full_path = directory_path + orgs + "/" + dock_id + "/" + doc_id doc_json = "doc." + doc_id + ".json" if os.path.isfile(full_path + "/" + doc_json): return full_path else: return ""
def search_for_document_in_directory(document_id, directory_path=HOME_REGULATION_PATH): """ Called by the server to check to see if a document exists in the directory structure :param document_id: the document id being searched :param directory_path: The directory path being searched in :return: Return the full path if the document exists, else return an empty string """ organisations, dock_id, doc_id = dc.get_doc_attributes(document_id) full_path = directory_path + organisations + '/' + dock_id + '/' + doc_id doc_json = 'doc.' + doc_id + '.json' if os.path.isfile(full_path + '/' + doc_json): return full_path else: return ''
def test_get_doc_attributes_other_special(): org, docket, document = dc.get_doc_attributes('FDA-2018-N-0073-0002') assert org == "FDA" assert docket == "FDA-2018-N-0073" assert document == "FDA-2018-N-0073-0002"
def test_get_doc_attributes_special(): org, docket, document = dc.get_doc_attributes('AHRQ_FRDOC_0001-0001') assert org == "AHRQ_FRDOC" assert docket == "AHRQ_FRDOC_0001" assert document == "AHRQ_FRDOC_0001-0001"
def test_get_doc_attributes_multiple_agencies(): org, docket, document = dc.get_doc_attributes('mesd-abcd-2018-234234-0001') assert org == "abcd-mesd" assert docket == "mesd-abcd-2018-234234" assert document == "mesd-abcd-2018-234234-0001"
def test_get_doc_attributes(): org, docket, document = dc.get_doc_attributes('mesd-2018-234234-0001') assert org == "mesd" assert docket == "mesd-2018-234234" assert document == "mesd-2018-234234-0001"