Example #1
0
def link_documents_bundles_with_journals(issue_path: str, output_path: str):
    """Busca pelo relacionamento entre periódicos e fascículos a partir
    de arquivos JSON extraídos de uma base MST. O resultado é escrito
    em um arquivo JSON contendo um objeto (dict) com identificadores de
    periócios como chaves e arrays de ids das issues que compõe o
    periódico"""

    journals_bundles = {}
    extract_isis.create_output_dir(output_path)
    issues_as_json = reading.read_json_file(issue_path)
    issues = conversion.conversion_issues_to_xylose(issues_as_json)
    issues = filter_issues(issues)

    for issue in issues:
        journal_id = issue.data["issue"]["v35"][0]["_"]
        journals_bundles.setdefault(journal_id, [])
        _issue_id = issue_to_kernel(issue)["_id"]

        exist_item = len(
            list(
                filter(lambda d: d["id"] == _issue_id,
                       journals_bundles[journal_id])))

        if not exist_item:
            _creation_date = parse_date(issue.publication_date)

            _supplement = ""
            if issue.type is "supplement":
                _supplement = "0"

                if issue.supplement_volume:
                    _supplement = issue.supplement_volume
                elif issue.supplement_number:
                    _supplement = issue.supplement_number

            journals_bundles[journal_id].append({
                "id":
                _issue_id,
                "order":
                issue.order,
                "number":
                issue.number,
                "volume":
                issue.volume,
                "year":
                str(date_to_datetime(_creation_date).year),
                "supplement":
                _supplement,
            })

    with open(output_path, "w") as output:
        output.write(json.dumps(journals_bundles, indent=4, sort_keys=True))
Example #2
0
def import_issues(json_file: str, session: Session):
    """Fachada com passo a passo de processamento e carga de fascículo
    em formato JSON para a base Kernel"""

    issues_as_json = reading.read_json_file(json_file)
    issues_as_xylose = conversion.conversion_issues_to_xylose(issues_as_json)
    issues_as_xylose = filter_issues(issues_as_xylose)
    manifests = conversion.conversion_issues_to_kernel(issues_as_xylose)

    for manifest in manifests:
        issue = DocumentsBundle(manifest=manifest)
        try:
            add_bundle(session, issue)
        except AlreadyExists as exc:
            logger.info(exc)
def import_issues(json_file: str, session: Session):
    """Fachada com passo a passo de processamento e carga de fascículo
    em formato JSON para a base Kernel"""

    issues_as_json = reading.read_json_file(json_file)
    issues_as_xylose = conversion.conversion_issues_to_xylose(issues_as_json)
    issues_as_xylose = filter_issues(issues_as_xylose)
    issues_as_kernel = conversion.conversion_issues_to_kernel(issues_as_xylose)

    for issue in issues_as_kernel:
        manifest = ManifestDomainAdapter(manifest=issue)

        try:
            session.documents_bundles.add(manifest)
            session.changes.add({
                "timestamp": utcnow(),
                "entity": "DocumentsBundle",
                "id": manifest.id(),
            })
        except AlreadyExists as exc:
            logger.info(str(exc))
Example #4
0
def link_documents_bundles_with_journals(
    journal_path: str, issue_path: str, output_path: str
):
    """Busca pelo relacionamento entre periódicos e fascículos a partir
    de arquivos JSON extraídos de uma base MST. O resultado é escrito
    em um arquivo JSON contendo um objeto (dict) com identificadores de
    periócios como chaves e arrays de ids das issues que compõe o
    periódico"""

    journals_bundles = {}
    extract_isis.create_output_dir(output_path)

    journals_as_json = reading.read_json_file(journal_path)
    issues_as_json = reading.read_json_file(issue_path)
    journals = conversion.conversion_journals_to_kernel(journals_as_json)
    issues = conversion.conversion_issues_to_xylose(issues_as_json)
    issues = filter_issues(issues)

    for journal in journals:
        journals_bundles[journal["id"]] = find_documents_bundles(journal, issues)

    with open(output_path, "w") as output:
        output.write(json.dumps(journals_bundles, indent=4, sort_keys=True))
Example #5
0
 def test_filter_should_remove_pressreleases_and_ahead_issues(self):
     issues = conversion.conversion_issues_to_xylose(self.issues_json)
     issues = pipeline.filter_issues(issues)
     self.assertEqual(0, len(issues))