コード例 #1
0
def test_crosswalk(input_format, output_format=constants.FORMAT_METAJSON, all_in_one_file=True):
    logging.info("*** Test crosswalk : {0}".format(input_format))
    base_dir = os.path.join(os.getcwd(), "data")
    input_dir = os.path.join(base_dir, input_format)
    output_file_extension = io_service.guess_file_extension_from_format(output_format)

    input_file_list = io_service.get_relevant_file_list_by_format(input_dir, input_format)
    if input_file_list:
        results = crosswalks_service.parse_and_convert_file_list(input_file_list, input_format, output_format, "test", "", False, all_in_one_file)
        col_id = "".join(["test_", input_format, "_to_", output_format])
        col_title = "".join(["Test ", input_format, " to ", output_format])
        if all_in_one_file:
            output_file_path = os.path.join(base_dir, "result", "result_" + input_format + "_" + output_format + "." + output_file_extension)
            io_service.write_items_in_one_file(col_id, col_title, results, output_file_path, output_format)
        else:
            #for idx, result in enumerate(results):
            output_dir_path = os.path.join(base_dir, "result", "result_" + input_format + "_" + output_format)
            io_service.write_items(col_id, col_title, results, output_dir_path, output_format)
コード例 #2
0
    input_format = constants.FORMAT_UNIMARC
    output_dir_path = os.path.join("data", "num", "output")
    if not os.path.exists(output_dir_path):
        os.mkdir(output_dir_path)
    error_file_name = "".join(["validation-", corpus, ".txt"])
    error_file_path = os.path.join(output_dir_path, error_file_name)
    #logging.debug("error_file_path: {}".format(error_file_path))

    # conf corpus
    corpus_service.clean_corpus(corpus)
    corpus_service.conf_corpus(corpus, "aime")
    date_clean = datetime.datetime.now()
    chrono.chrono_trace("Clean and conf corpus", date_begin, date_clean, None)

    # import
    input_file_paths = io_service.get_relevant_file_list_by_format(
        input_dir_path, input_format)
    results = corpus_service.import_metadata_files(corpus, input_file_paths,
                                                   input_format, source,
                                                   rec_id_prefix, True, None)
    date_import = datetime.datetime.now()
    chrono.chrono_trace("Import corpus", date_clean, date_import, None)

    # Validate
    corpus_service.validate_corpus(corpus, error_file_path)
    date_validate = datetime.datetime.now()
    chrono.chrono_trace("Validate corpus", date_import, date_validate, None)

    # Export mods
    corpus_service.export_corpus(corpus, output_dir_path,
                                 constants.FORMAT_MODS, False, True)
    date_export_mods = datetime.datetime.now()
コード例 #3
0
ファイル: num_prep.py プロジェクト: dinosv/reference_manager
    input_format = constants.FORMAT_UNIMARC
    output_dir_path = os.path.join("data", "num", "output")
    if not os.path.exists(output_dir_path):
        os.mkdir(output_dir_path)
    error_file_name = "".join(["validation-", corpus, ".txt"])
    error_file_path = os.path.join(output_dir_path, error_file_name)
    #logging.debug("error_file_path: {}".format(error_file_path))

    # conf corpus
    corpus_service.clean_corpus(corpus)
    corpus_service.conf_corpus(corpus, "aime")
    date_clean = datetime.datetime.now()
    chrono.chrono_trace("Clean and conf corpus", date_begin, date_clean, None)

    # import
    input_file_paths = io_service.get_relevant_file_list_by_format(input_dir_path, input_format)
    results = corpus_service.import_metadata_files(corpus, input_file_paths, input_format, source, rec_id_prefix, True, None)
    date_import = datetime.datetime.now()
    chrono.chrono_trace("Import corpus", date_clean, date_import, None)

    # Validate
    corpus_service.validate_corpus(corpus, error_file_path)
    date_validate = datetime.datetime.now()
    chrono.chrono_trace("Validate corpus", date_import, date_validate, None)

    # Export mods
    corpus_service.export_corpus(corpus, output_dir_path, constants.FORMAT_MODS, False, True)
    date_export_mods = datetime.datetime.now()
    chrono.chrono_trace("Export corpus mods", date_validate, date_export_mods, None)

    # Export oai_dc