Example #1
0
def import_metadatas(args):
    corpus = args.corpus
    if not corpus:
        corpus = default_corpus
    logging.info("corpus: {}".format(corpus))
    input_format = args.input_format
    logging.info("input_format: {}".format(input_format))
    input_file_path = args.input_file_path
    logging.info("input_file_path: {}".format(input_file_path))
    source = args.source
    logging.info("source: {}".format(source))
    rec_id_prefix = args.rec_id_prefix
    logging.info("rec_id_prefix: {}".format(rec_id_prefix))
    corpus_service.import_metadata_file(corpus, input_file_path, input_format, source, rec_id_prefix, True, None)
Example #2
0
def import_metadatas(args):
    corpus = args.corpus
    if not corpus:
        corpus = default_corpus
    logging.info("corpus: {}".format(corpus))
    input_format = args.input_format
    logging.info("input_format: {}".format(input_format))
    input_file_path = args.input_file_path
    logging.info("input_file_path: {}".format(input_file_path))
    source = args.source
    logging.info("source: {}".format(source))
    rec_id_prefix = args.rec_id_prefix
    logging.info("rec_id_prefix: {}".format(rec_id_prefix))
    corpus_service.import_metadata_file(corpus, input_file_path, input_format,
                                        source, rec_id_prefix, True, None)

if __name__ == "__main__":
    date_begin = datetime.datetime.now()

    # conf params
    corpus = "perio"
    source = "Sciences Po | la bibliothèque"
    rec_id_prefix = ""
    input_file_path = os.path.join("data", "unimarc", "periouni.mrc")
    input_format = constants.FORMAT_UNIMARC
    csv_file_name = "".join(["validation-", corpus, ".csv"])
    csv_file_path = os.path.join("data", "result", csv_file_name)

    # conf corpus
    corpus_service.clean_corpus(corpus)
    corpus_service.conf_corpus(corpus, "aime")
    date_clean = datetime.datetime.now()
    chrono.chrono_trace("Clean and conf corpus", date_begin, date_clean, None)

    # import
    corpus_service.import_metadata_file(corpus, input_file_path, input_format, source, rec_id_prefix, True, None)
    date_import = datetime.datetime.now()
    chrono.chrono_trace("Import corpus", date_clean, date_import, None)

    # Validate perio
    documents = repository_service.get_documents(corpus)
    validate_perios(documents, csv_file_path)
    date_validate = datetime.datetime.now()
    chrono.chrono_trace("Validate perio", date_import, date_validate, None)
Example #4
0
if __name__ == "__main__":
    date_begin = datetime.datetime.now()

    # conf params
    corpus = "perio"
    source = "Sciences Po | la bibliothèque"
    rec_id_prefix = ""
    input_file_path = os.path.join("data", "unimarc", "periouni.mrc")
    input_format = constants.FORMAT_UNIMARC
    csv_file_name = "".join(["validation-", corpus, ".csv"])
    csv_file_path = os.path.join("data", "result", csv_file_name)

    # conf corpus
    corpus_service.clean_corpus(corpus)
    corpus_service.conf_corpus(corpus, "aime")
    date_clean = datetime.datetime.now()
    chrono.chrono_trace("Clean and conf corpus", date_begin, date_clean, None)

    # import
    corpus_service.import_metadata_file(corpus, input_file_path, input_format,
                                        source, rec_id_prefix, True, None)
    date_import = datetime.datetime.now()
    chrono.chrono_trace("Import corpus", date_clean, date_import, None)

    # Validate perio
    documents = repository_service.get_documents(corpus)
    validate_perios(documents, csv_file_path)
    date_validate = datetime.datetime.now()
    chrono.chrono_trace("Validate perio", date_import, date_validate, None)