Exemple #1
0
def rename_and_handle_fileexists(old_filepath, new_filepath):
    try:
        os.rename(old_filepath, new_filepath)
    except FileExistsError:
        files_hashes_match = hash_file(old_filepath) == hash_file(new_filepath)
        print("    File already exists at {}".format(new_filepath))
        if files_hashes_match:
            os.remove(old_filepath)
        else:
            raise FileExistsError(
                "File already exists and the hash does not match")
Exemple #2
0
def rename_and_handle_fileexists(old_filepath, new_filepath):
    pathlib.Path(new_filepath).parent.mkdir(parents=True, exist_ok=True)

    try:
        os.rename(old_filepath, new_filepath)
    except FileExistsError:
        files_hashes_match = hash_file(old_filepath) == hash_file(new_filepath)
        print("    File already exists at {}".format(new_filepath))
        if files_hashes_match:
            os.remove(old_filepath)
        else:
            raise FileExistsError(
                "File already exists and the hash does not match")
Exemple #3
0
def file_already_in_index(indexed_filepath, to_be_indexed_filepath, filehash):
    try:
        new_hash = hash_file(indexed_filepath)
    except FileNotFoundError:
        raise FileNotFoundError(
            "Indexed logfile can't be found in its declared location.")

    try:
        assert new_hash == filehash
    except AssertionError:
        raise AssertionError(
            "The located file doesn't agree with the index hash.")

    print("Already exists in index")
    os.remove(to_be_indexed_filepath)
Exemple #4
0
def index_logfiles(centre_map, machine_map, logfile_data_directory):
    data_directory = logfile_data_directory
    index_filepath = os.path.abspath(os.path.join(data_directory,
                                                  "index.json"))
    to_be_indexed_directory = os.path.abspath(
        os.path.join(data_directory, "to_be_indexed"))
    indexed_directory = os.path.abspath(os.path.join(data_directory,
                                                     "indexed"))
    no_mosaiq_record_found = os.path.abspath(
        os.path.join(data_directory, "no_mosaiq_record_found"))
    unknown_error_in_logfile = os.path.abspath(
        os.path.join(data_directory, "unknown_error_in_logfile"))
    no_field_label_in_logfile = os.path.abspath(
        os.path.join(data_directory, "no_field_label_in_logfile"))
    # machine_map = config['machine_map']
    centre_details = centre_map

    centre_server_map = {
        centre: centre_lookup["mosaiq_sql_server"]
        for centre, centre_lookup in centre_map.items()
    }

    sql_server_and_ports = [
        "{}".format(details["mosaiq_sql_server"])
        for _, details in centre_details.items()
    ]

    with open(index_filepath, "r") as json_data_file:
        index = json.load(json_data_file)

    indexset = set(index.keys())

    print("\nConnecting to Mosaiq SQL servers...")
    with multi_mosaiq_connect(sql_server_and_ports) as cursors:

        print("Globbing index directory...")
        to_be_indexed = glob(os.path.join(to_be_indexed_directory, "**/*.trf"),
                             recursive=True)

        chunk_size = 50
        number_to_be_indexed = len(to_be_indexed)
        to_be_indexed_chunked = [
            to_be_indexed[i:i + chunk_size]
            for i in range(0, number_to_be_indexed, chunk_size)
        ]

        for i, a_to_be_indexed_chunk in enumerate(to_be_indexed_chunked):
            print("\nHashing a chunk of logfiles ({}/{})".format(
                i + 1, len(to_be_indexed_chunked)))
            hashlist = [
                hash_file(filename, dot_feedback=True)
                for filename in a_to_be_indexed_chunk
            ]

            print(" ")

            to_be_indexed_dict = dict(zip(hashlist, a_to_be_indexed_chunk))

            hashset = set(hashlist)

            for filehash in list(hashset.intersection(indexset)):
                file_already_in_index(
                    os.path.join(indexed_directory,
                                 index[filehash]["filepath"]),
                    to_be_indexed_dict[filehash],
                    filehash,
                )

            file_ready_to_be_indexed(
                cursors,
                list(hashset.difference(indexset)),
                to_be_indexed_dict,
                unknown_error_in_logfile,
                no_mosaiq_record_found,
                no_field_label_in_logfile,
                indexed_directory,
                index_filepath,
                index,
                machine_map,
                centre_details,
                centre_server_map,
            )
    print("Complete")