Python entity_from_json 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: fatcat_scholar.api_entities

메소드/함수: entity_from_json

hotexamples.com에서의 예제들: 5

Python entity_from_json - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 fatcat_scholar.api_entities.entity_from_json에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

    def run_releases(self, release_stream: Sequence[str]) -> None:
        """
        Iterates over the stream of releases, which are expected to be grouped
        (sorted) by work_ident.

        Collects releases under same work_ident into a batch and processes a
        work from that.

        TODO: what is the right API here? stream iterator? how should
        parallelism work?
        """
        batch = []
        batch_work_id = None
        for line in release_stream:
            if not line:
                continue
            release = entity_from_json(line, ReleaseEntity)
            if release.work_id == batch_work_id:
                batch.append(release)
                continue
            if batch:
                ib = self.process_release_list(batch)
                print(ib.json(exclude_none=True, sort_keys=True))
                batch_work_id = None
            batch = [
                release,
            ]
            batch_work_id = release.work_id

        if batch:
            ib = self.process_release_list(batch)
            print(ib.json(exclude_none=True, sort_keys=True))

예제 #2

파일 보기

파일: test_transform.py 프로젝트: internetarchive/fatcat-scholar

def test_es_biblio_from_release() -> None:

    with open("tests/files/release_hsmo6p4smrganpb3fndaj2lon4.json", "r") as f:
        release = entity_from_json(f.read(), ReleaseEntity)

    obj = es_biblio_from_release(release)
    d = json.loads(obj.json())

    assert (obj.release_ident == release.ident == d["release_ident"] ==
            "hsmo6p4smrganpb3fndaj2lon4")

예제 #3

파일 보기

파일: test_transform.py 프로젝트: internetarchive/fatcat-scholar

def test_es_release_from_release() -> None:

    with open("tests/files/release_hsmo6p4smrganpb3fndaj2lon4.json", "r") as f:
        release = entity_from_json(f.read(), ReleaseEntity)

    obj = es_release_from_release(release)
    d = json.loads(obj.json())

    assert obj.ident == release.ident == d[
        "ident"] == "hsmo6p4smrganpb3fndaj2lon4"
    assert obj.doi_registrar == "crossref"
    assert obj.doi_prefix == "10.7717"

예제 #4

파일 보기

 def from_json(obj: Dict[Any, Any]) -> "IntermediateBundle":
     return IntermediateBundle(
         doc_type=DocType(obj.get("doc_type")),
         releases=[
             entity_from_json(json.dumps(re), ReleaseEntity)
             for re in obj.get("releases", [])
         ],
         biblio_release_ident=obj.get("biblio_release_ident"),
         crossref=obj.get("crossref"),
         grobid_fulltext=obj.get("grobid_fulltext"),
         pdftotext_fulltext=obj.get("pdftotext_fulltext"),
         pdf_meta=obj.get("pdf_meta"),
         sim_fulltext=obj.get("sim_fulltext"),
         html_fulltext=obj.get("html_fulltext"),
     )

예제 #5

파일 보기

    def process_batch(self, batch: List[dict]) -> None:

        bulk_actions = []
        for obj in batch:
            bundle = IntermediateBundle(
                doc_type=DocType(obj["doc_type"]),
                releases=[
                    entity_from_json(json.dumps(re), ReleaseEntity)
                    for re in obj["releases"]
                ],
                biblio_release_ident=obj.get("biblio_release_ident"),
                grobid_fulltext=obj.get("grobid_fulltext"),
                pdftotext_fulltext=obj.get("pdftotext_fulltext"),
                pdf_meta=obj.get("pdf_meta"),
                html_fulltext=obj.get("html_fulltext"),
                sim_fulltext=obj.get("sim_fulltext"),
            )
            es_doc = transform_heavy(bundle)
            if not es_doc:
                continue
            else:
                bulk_actions.append({
                    "_index":
                    self.es_index,
                    "_op_type":
                    "index",
                    "_id":
                    es_doc.key,
                    "_source":
                    es_doc.json(exclude_none=True, sort_keys=True),
                })
                self.counts["docs-indexed"] += 1

        if not bulk_actions:
            return

        elasticsearch.helpers.bulk(self.es_client, bulk_actions, timeout="30s")
        self.counts["batches-indexed"] += 1