Example #1
0
def check_if_document_needs_renew(file, json_data, path):
    """
    Checks to see if a document conforms to our naming conventions
    """
    file_name = get_document_id(file)
    document_id = dc.get_doc_attributes(file_name)[2]

    file_starts_with_doc = file.startswith('doc.')
    file_begin_with_doc_letter = \
        document_id_beginning_is_letter(document_id)
    file_end_is_doc_num = \
        document_id_ending_is_number(document_id)
    file_ends_with_json = file.endswith('.json')
    file_job_type_is_doc = json_data['type'] == 'doc'

    file_combined_check = \
        file_starts_with_doc and file_begin_with_doc_letter and \
        file_end_is_doc_num and file_job_type_is_doc
    file_combined_check_and_json = \
        file_combined_check and file_ends_with_json

    if file_combined_check_and_json:
        if document_id_matches_json_id(path + file, document_id):
            return False
        return True

    if file_combined_check:
        return False
    return True
Example #2
0
def check_document_exists(json_data, path=HOME_REGULATION_PATH):
    """
    Checks to see if a document was already downloaded or
        already in one of the queues.
    If the document has already been downloaded
        it will be removed from its workfile.
    If a workfile were to become empty it will be removed
        to prevent empty doc jobs from existing.
    """
    for workfile in json_data["data"]:
        count = 0
        for line in workfile:
            document = line['id']
            alpha_doc_org, docket_id, document_id = \
                dc.get_doc_attributes(document)
            full_path = \
                path + alpha_doc_org + '/' + docket_id + '/' + document_id + \
                '/' + 'doc.' + document + '.json'

            count, local_verdict = \
                check_if_file_exists_locally(full_path, count)

            if local_verdict:
                workfile.pop(count)

    json_data = remove_empty_lists(json_data)
    return json_data
Example #3
0
def check_document_exists(json_data):
    """
    Checks to see if a document was already downloaded or already in one of the queues.
    If the document has already been downloaded it will be removed from its workfile.
    If a workfile were to become empty it will be removed to prevent empty doc jobs from existing.
    :param json_data: the json containing the work files
    :return:
    """

    logger.warning(
        'Function Successful: % s',
        'workfile_length_checker: workfile_length_checker successfully called from process_docs',
        extra=d)

    home = os.getenv("HOME")
    path = home + "/regulations_data/"
    for workfile in json_data["data"]:
        count = 0
        for line in workfile:
            document = line["id"]
            alpha_doc_org, docket_id, document_id = dc.get_doc_attributes(
                "doc." + document + ".json")
            full_path = path + alpha_doc_org + "/" + docket_id + "/" + document_id + "/" + "doc." + document + ".json"

            count, local_verdict = local_files_check(full_path, count)
            #redis_verdict = redis_files_check

            if local_verdict:  #and redis_verdict:
                workfile.pop(count)

    json_data = remove_empty_lists(json_data)
    return json_data
Example #4
0
def check_single_document(file, json_data, path):
    """
    Checks to see if a document conforms to our naming conventions
    :param file:
    :param json_data:
    :param path:
    :return:
    """
    org, docket_id, document_id = dc.get_doc_attributes(file)
    ifFileStartsWithDoc = file.startswith("doc.")
    ifBeginWithDocLetter = beginning_is_letter(document_id)
    ifEndIsDocNum = ending_is_number(document_id)
    ifFileEndsWithJson = file.endswith(".json")
    job_type = json_data["type"] == "doc"
    ifDocumentsChecks = ifFileStartsWithDoc and ifBeginWithDocLetter and ifEndIsDocNum and job_type
    ifDocumentsChecksAndJson = ifDocumentsChecks and ifFileEndsWithJson

    if ifDocumentsChecksAndJson:
        if id_matches(path + file, document_id):
            logger.debug(
                'Variable Success: %s',
                'process_doc: ifFileStartsWithDoc, ifBeginWithDocLetter, ifEndIsDocNum, '
                'and job_type are True',
                extra=d)
        else:
            logger.debug('Variable Failure: %s',
                         'process_doc: id_matches is False',
                         extra=d)
            return True

    write_documents_checks_into_logger(ifBeginWithDocLetter, ifEndIsDocNum,
                                       ifFileStartsWithDoc, job_type)
    return ifDocumentsChecks
Example #5
0
def check_document_exists(json_data, path):
    """
    Checks to see if a document was already downloaded or
        already in one of the queues.
    If the document has already been downloaded
        it will be removed from its workfile.
    If a workfile were to become empty it will be removed
        to prevent empty doc jobs from existing.
    """
    #path = str(
    #config.server_read_value('regulations path')) + 'regulations-data/'
    for workfile in json_data['data']:
        count = 0
        for line in workfile:
            document = line['id']
            alpha_doc_org, docket_id, document_id = \
                dc.get_doc_attributes(document)
            full_path = \
                path + alpha_doc_org + '/' + docket_id + '/' + document_id + \
                '/' + 'doc.' + document + '.json'

            count, local_verdict = \
                check_if_file_exists_locally(full_path, count)

            if local_verdict:
                workfile.pop(count)

    json_data = remove_empty_lists(json_data)
    return json_data
Example #6
0
def save_single_file_locally(cur_path, destination):
    """
    Save the file located at the current path to the destination location
    :param cur_path: location of the file to be saved
    :param destination: location that the file should be saved
    :return:
    """
    logger.debug(
        'Function Successful: % s',
        'save_single_file_locally: save_single_file_locally successfully called from process_doc',
        extra=d)

    logger.debug(
        'Calling Function: % s',
        'save_single_file_locally: save_single_file_locally calling get_file_name',
        extra=d)
    file_name = get_file_name(cur_path)
    logger.debug(
        'Function Successful: % s',
        'save_single_file_locally: save_single_file_locally successfully called get_file_name',
        extra=d)

    logger.debug(
        'Calling Function: % s',
        'save_single_file_locally: save_single_file_locally calling get_doc_attributes',
        extra=d)
    doc_id = get_document_id(file_name)
    org, docket_id, document_id = dc.get_doc_attributes(doc_id)
    logger.debug(
        'Function Successful: % s',
        'save_single_file_locally: save_single_file_locally successfully called get_doc_attributes',
        extra=d)

    destination_path = destination + org + "/" + docket_id + "/" + document_id + "/"

    logger.debug(
        'Calling Function: % s',
        'save_single_file_locally: save_single_file_locally calling create_new_dir',
        extra=d)
    create_new_dir(destination_path)
    logger.debug(
        'Function Successful: % s',
        'save_single_file_locally: save_single_file_locally successfully called create_new_dir',
        extra=d)

    logger.debug(
        'Calling Function: % s',
        'save_single_file_locally: save_single_file_locally calling copy',
        extra=d)
    shutil.copy(cur_path, destination_path + '/' + file_name)
    logger.debug(
        'Function Successful: % s',
        'save_single_file_locally: save_single_file_locally successfully called copy',
        extra=d)
Example #7
0
def save_single_file_locally(current_path, destination):
    """
    :param current_path:
    :param destination:
    :return:
    """
    file_name = get_file_name(current_path)
    doc_id = get_document_id(file_name)
    org, docket_id, document_id = dc.get_doc_attributes(doc_id)
    destination_path = \
        destination + org + '/' + docket_id + '/' + document_id + '/'
    create_new_directory_for_path(destination_path)
    shutil.copy(current_path, destination_path + '/' + file_name)
Example #8
0
def search_for_document_in_directory(document_id, directory_path=PATH):
    """
    Called by the server to check to see if a document exists in the directory structure
    :param document_id: the document id being searched
    :param directory_path: The directory path being searched in
    :return: Return the full path if the document exists, else return an empty string
    """
    orgs, dock_id, doc_id = dc.get_doc_attributes(document_id)

    full_path = directory_path + orgs + "/" + dock_id + "/" + doc_id
    doc_json = "doc." + doc_id + ".json"

    if os.path.isfile(full_path + "/" + doc_json):
        return full_path
    else:
        return ""
Example #9
0
def search_for_document_in_directory(document_id,
                                     directory_path=HOME_REGULATION_PATH):
    """
    Called by the server to check to see if
    a document exists in the directory structure
    :param document_id: the document id being searched
    :param directory_path: The directory path being searched in
    :return: Return the full path if the document exists,
             else return an empty string
    """

    organisations, dock_id, doc_id = dc.get_doc_attributes(document_id)

    full_path = directory_path + organisations + '/' + dock_id + '/' + doc_id
    doc_json = 'doc.' + doc_id + '.json'

    if os.path.isfile(full_path + '/' + doc_json):
        return full_path
    else:
        return ''
Example #10
0
def test_get_doc_attributes_other_special():
    org, docket, document = dc.get_doc_attributes('FDA-2018-N-0073-0002')
    assert org == "FDA"
    assert docket == "FDA-2018-N-0073"
    assert document == "FDA-2018-N-0073-0002"
Example #11
0
def test_get_doc_attributes_special():
    org, docket, document = dc.get_doc_attributes('AHRQ_FRDOC_0001-0001')
    assert org == "AHRQ_FRDOC"
    assert docket == "AHRQ_FRDOC_0001"
    assert document == "AHRQ_FRDOC_0001-0001"
Example #12
0
def test_get_doc_attributes_multiple_agencies():
    org, docket, document = dc.get_doc_attributes('mesd-abcd-2018-234234-0001')
    assert org == "abcd-mesd"
    assert docket == "mesd-abcd-2018-234234"
    assert document == "mesd-abcd-2018-234234-0001"
Example #13
0
def test_get_doc_attributes():
    org, docket, document = dc.get_doc_attributes('mesd-2018-234234-0001')
    assert org == "mesd"
    assert docket == "mesd-2018-234234"
    assert document == "mesd-2018-234234-0001"