Esempio n. 1
0
def get_all_concepts(db: PartitionedDatabase) -> List[JsonDict]:
    with db.transaction() as tx:
        models = db.get_models_tx(tx)
        property_counts = db.get_property_counts_tx(tx, [model.id for model in models])
        return [to_concept_dict(m, property_counts[m.id]) for m in models]
Esempio n. 2
0
def publish_schema(
    db: PartitionedDatabase,
    tx: Transaction,
    config: PublishConfig,
    s3,
    file_manifests: List[FileManifest],
    proxy_relationship_names: List[RelationshipName],
) -> Tuple[ExportGraphSchema, FileManifest]:
    """
    Export the schema of the partitioned database into a `GraphSchema`
    instance.
    """
    schema_models: List[ExportModel] = []
    schema_relationships: List[ExportModelRelationship] = []

    log.info("Exporting graph schema")

    models: List[Model] = db.get_models_tx(tx)
    model_index: Dict[UUID, Model] = {m.id: m for m in models}

    for m in models:
        log.info(f"Building schema for model '{m.name}'")
        properties: List[ModelProperty] = db.get_properties_tx(tx, m)
        linked_properties: List[ModelRelationship] = list(
            db.get_outgoing_model_relationships_tx(tx,
                                                   from_model=m,
                                                   one_to_many=False))

        publish_properties: List[ExportProperty] = [
            ExportProperty.model_property(
                name=p.name,
                display_name=p.display_name,
                description=p.description,
                data_type=p.data_type,
            ) for p in properties
        ] + [
            ExportProperty.linked_property(
                name=r.name,
                display_name=r.display_name,
                description=r.description,
                data_type=LinkedModelDataType(
                    to=model_index[r.to].name,
                    file=str(OutputFile.csv_for_model(m.name)),
                ),
            ) for r in sorted(linked_properties,
                              key=lambda l: l.index or sys.maxsize)
        ]

        model = ExportModel(
            model=m,
            name=m.name,
            display_name=m.display_name,
            description=m.description,
            properties=publish_properties,
        )
        schema_models.append(model)

    # If any packages exist in this dataset, add a special-cased "File" model
    if len(file_manifests) > 0:
        log.info(f"Building schema for proxy package model")
        proxy_package_model = ExportModel.package_proxy()

        # TODO: gracefully handle this case to avoid overwriting "files.csv"
        assert not any(
            m.name == proxy_package_model.name for m in schema_models
        ), (f"Cannot export package proxy schema model with name '{proxy_package_model.name}' - "
            f"a model '{m.name}' already exists. See https://app.clickup.com/t/102ndc for issue"
            )
        schema_models.append(proxy_package_model)

    relationships = db.get_outgoing_model_relationships_tx(tx,
                                                           one_to_many=True)

    for r in relationships:
        log.info(f"Building schema for relationship '{r.name}'")
        relationship = ExportModelRelationship(
            relationship=r,
            name=r.name,
            from_=model_index[r.from_].name,
            to=model_index[r.to].name,
        )
        schema_relationships.append(relationship)

    for p in proxy_relationship_names:
        log.info(f"Building schema for proxy relationship '{p}'")
        relationship = ExportModelRelationship(relationship=None,
                                               name=p,
                                               from_="",
                                               to="")
        schema_relationships.append(relationship)

    schema = ExportGraphSchema(models=schema_models,
                               relationships=schema_relationships)

    # Write "schema.json" to S3
    # ======================================================================
    schema_output_file = OutputFile.json_for_schema().with_prefix(
        os.path.join(config.s3_publish_key, METADATA))
    s3.put_object(
        Bucket=config.s3_bucket,
        Key=str(schema_output_file),
        Body=schema.to_json(camel_case=True,
                            pretty_print=True,
                            drop_nulls=True),
        RequestPayer="requester",
    )
    schema_manifest = schema_output_file.with_prefix(METADATA).as_manifest(
        size_of(s3, config.s3_bucket, schema_output_file))

    return schema, schema_manifest