Esempio n. 1
0
def export_dataset(dataset: Dataset, database: Database):
    """Dump the contents of the dataset to the output directory."""
    context = Context(dataset)
    context.bind()
    loader = database.view(dataset, export_assembler)
    exporters = [Exporter(context, loader) for Exporter in EXPORTERS]
    for entity in loader:
        for exporter in exporters:
            exporter.feed(entity)

    for exporter in exporters:
        exporter.finish()

    # Make sure the exported resources are visible in the database
    db.session.commit()

    # Export list of data issues from crawl stage
    issues_path = context.get_resource_path("issues.json")
    context.log.info("Writing dataset issues list", path=issues_path)
    with open(issues_path, "w", encoding=settings.ENCODING) as fh:
        data = {"issues": Issue.query(dataset=dataset).all()}
        write_json(data, fh)

    # Export full metadata
    index_path = context.get_resource_path("index.json")
    context.log.info("Writing dataset index", path=index_path)
    with open(index_path, "w", encoding=settings.ENCODING) as fh:
        meta = dataset.to_index()
        write_json(meta, fh)

    context.close()
Esempio n. 2
0
 def __init__(self, context: Context, loader: Loader[Dataset, Entity]):
     self.context = context
     self.dataset = context.dataset
     self.resource_name = f"{self.NAME}.{self.EXTENSION}"
     self.path = context.get_resource_path(self.resource_name)
     self.path.parent.mkdir(exist_ok=True, parents=True)
     self.loader = loader
Esempio n. 3
0
def export_dataset(dataset: Dataset, database: Database):
    """Dump the contents of the dataset to the output directory."""
    try:
        context = Context(dataset)
        loader = database.view(dataset, assemble)
        if dataset.type != External.TYPE:
            export_data(context, loader)

        # Export list of data issues from crawl stage
        issues_path = context.get_resource_path("issues.json")
        context.log.info("Writing dataset issues list", path=issues_path)
        with open(issues_path, "wb") as fh:
            with engine.begin() as conn:
                data = {"issues": list(all_issues(conn, dataset))}
            write_json(data, fh)

        # Export full metadata
        index_path = context.get_resource_path("index.json")
        context.log.info("Writing dataset index", path=index_path)
        with open(index_path, "wb") as fh:
            meta = dataset_to_index(dataset)
            write_json(meta, fh)
    finally:
        context.close()