def migrate_isis_parser(sargs):
    parser = argparse.ArgumentParser(description="ISIS database migration tool")
    subparsers = parser.add_subparsers(title="Commands", metavar="", dest="command")

    extract_parser = subparsers.add_parser("extract", help="Extract mst files to json")
    extract_parser.add_argument(
        "mst_file_path", metavar="file", help="Path to MST file that will be extracted"
    )
    extract_parser.add_argument("--output", required=True, help="The output file path")

    import_parser = subparsers.add_parser(
        "import",
        parents=[mongodb_parser(sargs)],
        help="Process JSON files then import into Kernel database",
    )
    import_parser.add_argument(
        "import_file",
        metavar="file",
        help="JSON file path that contains mst extraction result, e.g: collection-title.json",
    )
    import_parser.add_argument(
        "--type",
        help="Type of JSON file that will load into Kernel database",
        choices=["journal", "issue", "documents-bundles-link"],
        required=True,
    )

    link_parser = subparsers.add_parser(
        "link",
        help="Generate JSON file of journals' ids and their issues linked by ISSN",
    )
    link_parser.add_argument(
        "issues",
        help="JSON file path that contains mst extraction result, e.g: ~/json/collection-issues.json",
    )
    link_parser.add_argument("--output", required=True, help="The output file path")

    args = parser.parse_args(sargs)

    if args.command == "extract":
        extract_isis.create_output_dir(args.output)
        extract_isis.run(args.mst_file_path, args.output)
    elif args.command == "import":
        mongo = ds_adapters.MongoDB(uri=args.uri, dbname=args.db)
        Session = ds_adapters.Session.partial(mongo)

        if args.type == "journal":
            pipeline.import_journals(args.import_file, session=Session())
        elif args.type == "issue":
            pipeline.import_issues(args.import_file, session=Session())
        elif args.type == "documents-bundles-link":
            pipeline.import_documents_bundles_link_with_journal(
                args.import_file, session=Session()
            )
    elif args.command == "link":
        pipeline.link_documents_bundles_with_journals(args.issues, args.output)
    else:
        parser.print_help()
Exemple #2
0
def link_documents_bundles_with_journals(issue_path: str, output_path: str):
    """Busca pelo relacionamento entre periódicos e fascículos a partir
    de arquivos JSON extraídos de uma base MST. O resultado é escrito
    em um arquivo JSON contendo um objeto (dict) com identificadores de
    periócios como chaves e arrays de ids das issues que compõe o
    periódico"""

    journals_bundles = {}
    extract_isis.create_output_dir(output_path)
    issues_as_json = reading.read_json_file(issue_path)
    issues = conversion.conversion_issues_to_xylose(issues_as_json)
    issues = filter_issues(issues)

    for issue in issues:
        journal_id = issue.data["issue"]["v35"][0]["_"]
        journals_bundles.setdefault(journal_id, [])
        _issue_id = issue_to_kernel(issue)["_id"]

        exist_item = len(
            list(
                filter(lambda d: d["id"] == _issue_id,
                       journals_bundles[journal_id])))

        if not exist_item:
            _creation_date = parse_date(issue.publication_date)

            _supplement = ""
            if issue.type is "supplement":
                _supplement = "0"

                if issue.supplement_volume:
                    _supplement = issue.supplement_volume
                elif issue.supplement_number:
                    _supplement = issue.supplement_number

            journals_bundles[journal_id].append({
                "id":
                _issue_id,
                "order":
                issue.order,
                "number":
                issue.number,
                "volume":
                issue.volume,
                "year":
                str(date_to_datetime(_creation_date).year),
                "supplement":
                _supplement,
            })

    with open(output_path, "w") as output:
        output.write(json.dumps(journals_bundles, indent=4, sort_keys=True))
Exemple #3
0
def link_documents_bundles_with_journals(
    journal_path: str, issue_path: str, output_path: str
):
    """Busca pelo relacionamento entre periódicos e fascículos a partir
    de arquivos JSON extraídos de uma base MST. O resultado é escrito
    em um arquivo JSON contendo um objeto (dict) com identificadores de
    periócios como chaves e arrays de ids das issues que compõe o
    periódico"""

    journals_bundles = {}
    extract_isis.create_output_dir(output_path)

    journals_as_json = reading.read_json_file(journal_path)
    issues_as_json = reading.read_json_file(issue_path)
    journals = conversion.conversion_journals_to_kernel(journals_as_json)
    issues = conversion.conversion_issues_to_xylose(issues_as_json)
    issues = filter_issues(issues)

    for journal in journals:
        journals_bundles[journal["id"]] = find_documents_bundles(journal, issues)

    with open(output_path, "w") as output:
        output.write(json.dumps(journals_bundles, indent=4, sort_keys=True))
Exemple #4
0
 def test_should_not_try_to_create_an_existing_dir(
     self, path_exists_mock, makedirs_mock
 ):
     path_exists_mock.return_value = True
     extract_isis.create_output_dir("/random/dir/file.json")
     makedirs_mock.assert_not_called()
Exemple #5
0
 def test_should_create_an_output_dir(self, path_exists_mock, makedirs_mock):
     path_exists_mock.return_value = False
     extract_isis.create_output_dir("/random/dir/file.json")
     makedirs_mock.assert_called_once_with("/random/dir")