Beispiel #1
0
def rename_and_handle_fileexists(old_filepath, new_filepath):
    try:
        os.rename(old_filepath, new_filepath)
    except FileExistsError:
        files_hashes_match = hash_file(old_filepath) == hash_file(new_filepath)
        print("    File already exists at {}".format(new_filepath))
        if files_hashes_match:
            os.remove(old_filepath)
        else:
            raise FileExistsError(
                "File already exists and the hash does not match")
Beispiel #2
0
def file_already_in_index(indexed_filepath, to_be_indexed_filepath, filehash):
    try:
        new_hash = hash_file(indexed_filepath)
    except FileNotFoundError:
        raise FileNotFoundError(
            "Indexed logfile can't be found in its declared location.")

    try:
        assert new_hash == filehash
    except AssertionError:
        raise AssertionError(
            "The located file doesn't agree with the index hash.")

    print('Already exists in index')
    os.remove(to_be_indexed_filepath)
Beispiel #3
0
def index_logfiles(centre_map, machine_map, logfile_data_directory):
    data_directory = logfile_data_directory
    index_filepath = os.path.abspath(os.path.join(data_directory,
                                                  'index.json'))
    to_be_indexed_directory = os.path.abspath(
        os.path.join(data_directory, 'to_be_indexed'))
    indexed_directory = os.path.abspath(os.path.join(data_directory,
                                                     'indexed'))
    no_mosaiq_record_found = os.path.abspath(
        os.path.join(data_directory, 'no_mosaiq_record_found'))
    unknown_error_in_logfile = os.path.abspath(
        os.path.join(data_directory, 'unknown_error_in_logfile'))
    no_field_label_in_logfile = os.path.abspath(
        os.path.join(data_directory, 'no_field_label_in_logfile'))
    # machine_map = config['machine_map']
    centre_details = centre_map

    centre_server_map = {
        centre: centre_lookup['mosaiq_sql_server']
        for centre, centre_lookup in centre_map.items()
    }

    sql_server_and_ports = [
        "{}".format(details['mosaiq_sql_server'])
        for _, details in centre_details.items()
    ]

    with open(index_filepath, 'r') as json_data_file:
        index = json.load(json_data_file)

    indexset = set(index.keys())

    print('\nConnecting to Mosaiq SQL servers...')
    with multi_mosaiq_connect(sql_server_and_ports) as cursors:

        print('Globbing index directory...')
        to_be_indexed = glob(os.path.join(to_be_indexed_directory, '**/*.trf'),
                             recursive=True)

        chunk_size = 50
        number_to_be_indexed = len(to_be_indexed)
        to_be_indexed_chunked = [
            to_be_indexed[i:i + chunk_size]
            for i in range(0, number_to_be_indexed, chunk_size)
        ]

        for i, a_to_be_indexed_chunk in enumerate(to_be_indexed_chunked):
            print('\nHashing a chunk of logfiles ({}/{})'.format(
                i + 1, len(to_be_indexed_chunked)))
            hashlist = [
                hash_file(filename, dot_feedback=True)
                for filename in a_to_be_indexed_chunk
            ]

            print(' ')

            to_be_indexed_dict = dict(zip(hashlist, a_to_be_indexed_chunk))

            hashset = set(hashlist)

            for filehash in list(hashset.intersection(indexset)):
                file_already_in_index(
                    os.path.join(indexed_directory,
                                 index[filehash]['filepath']),
                    to_be_indexed_dict[filehash], filehash)

            file_ready_to_be_indexed(
                cursors, list(hashset.difference(indexset)),
                to_be_indexed_dict, unknown_error_in_logfile,
                no_mosaiq_record_found, no_field_label_in_logfile,
                indexed_directory, index_filepath, index, machine_map,
                centre_details, centre_server_map)
    print('Complete')