Esempio n. 1
0
    def _get_ownership(self, creator_id: int) -> Optional[OwnershipClass]:
        user_info_url = f"{self.config.connect_uri}/api/user/{creator_id}"
        try:
            user_info_response = self.session.get(user_info_url)
            user_info_response.raise_for_status()
            user_details = user_info_response.json()
        except HTTPError as http_error:
            self.report.report_failure(
                key=f"metabase-user-{creator_id}",
                reason=f"Unable to retrieve User info. "
                f"Reason: {str(http_error)}",
            )
            return None

        owner_urn = builder.make_user_urn(user_details.get("email", ""))
        if owner_urn is not None:
            ownership: OwnershipClass = OwnershipClass(owners=[
                OwnerClass(
                    owner=owner_urn,
                    type=OwnershipTypeClass.DATAOWNER,
                )
            ])
            return ownership

        return None
Esempio n. 2
0
 def _get_owners_aspect(self, node: DBTNode) -> OwnershipClass:
     owners = [
         OwnerClass(
             owner=f"urn:li:corpuser:{node.owner}",
             type=OwnershipTypeClass.DATAOWNER,
         )
     ]
     return OwnershipClass(owners=owners, )
Esempio n. 3
0
 def transform_one(
         self, mce: MetadataChangeEventClass) -> MetadataChangeEventClass:
     ownership = builder.get_or_add_aspect(
         mce,
         OwnershipClass(owners=[], ),
     )
     ownership.owners = []
     return mce
Esempio n. 4
0
 def transform_one(
         self, mce: MetadataChangeEventClass) -> MetadataChangeEventClass:
     if not isinstance(mce.proposedSnapshot, DatasetSnapshotClass):
         return mce
     ownership = builder.get_or_add_aspect(
         mce,
         OwnershipClass(owners=[], ),
     )
     ownership.owners = []
     return mce
Esempio n. 5
0
def create_metadata_work_unit(timestamp):
    dataset_snapshot = DatasetSnapshot(
        urn="urn:li:dataset:(urn:li:dataPlatform:glue,datalake_grilled.Barbeque,PROD)",
        aspects=[],
    )

    dataset_snapshot.aspects.append(Status(removed=False))

    dataset_snapshot.aspects.append(
        OwnershipClass(
            owners=[
                OwnerClass(
                    owner="urn:li:corpuser:Susan", type=OwnershipTypeClass.DATAOWNER
                )
            ],
            lastModified=AuditStampClass(
                time=timestamp, actor="urn:li:corpuser:datahub"
            ),
        )
    )

    dataset_snapshot.aspects.append(
        DatasetPropertiesClass(
            description="Grilled Food",
            customProperties={},
            uri=None,
            tags=[],
        )
    )

    fields = [
        SchemaField(
            fieldPath="Size",
            nativeDataType="int",
            type=SchemaFieldDataType(type=NumberTypeClass()),
            description="Maximum attendees permitted",
            nullable=True,
            recursive=False,
        )
    ]

    schema_metadata = SchemaMetadata(
        schemaName="datalake_grilled.Barbeque",
        version=0,
        fields=fields,
        platform="urn:li:dataPlatform:glue",
        created=AuditStamp(time=timestamp, actor="urn:li:corpuser:etl"),
        lastModified=AuditStamp(time=timestamp, actor="urn:li:corpuser:etl"),
        hash="",
        platformSchema=MySqlDDL(tableSchema=""),
    )
    dataset_snapshot.aspects.append(schema_metadata)

    mce = MetadataChangeEvent(proposedSnapshot=dataset_snapshot)
    return MetadataWorkUnit(id="glue-datalake_grilled.Barbeque", mce=mce)
Esempio n. 6
0
    def _generate_base_aspects(
        self,
        node: DBTNode,
        additional_custom_props_filtered: Dict[str, str],
        mce_platform: str,
        meta_aspects: Dict[str, Any],
    ) -> List[Any]:
        """
        There are some common aspects that get generated for both dbt node and platform node depending on whether dbt
        node creation is enabled or not.
        """

        # create an empty list of aspects and keep adding to it. Initializing with Any to avoid a
        # large union of aspect types.
        aspects: List[Any] = []

        # add dataset properties aspect
        dbt_properties = self._create_dataset_properties_aspect(
            node, additional_custom_props_filtered)
        aspects.append(dbt_properties)

        # add status aspect
        status = StatusClass(removed=False)
        aspects.append(status)
        # add owners aspect
        # we need to aggregate owners added by meta properties and the owners that are coming from server.
        meta_owner_aspects = meta_aspects.get(Constants.ADD_OWNER_OPERATION)
        aggregated_owners = self._aggregate_owners(node, meta_owner_aspects)
        if aggregated_owners:
            aspects.append(OwnershipClass(owners=aggregated_owners))

        # add tags aspects
        meta_tags_aspect = meta_aspects.get(Constants.ADD_TAG_OPERATION)
        aggregated_tags = self._aggregate_tags(node, meta_tags_aspect)
        if aggregated_tags:
            aspects.append(
                mce_builder.make_global_tag_aspect_with_tag_list(
                    aggregated_tags))

        # add meta term aspects
        if (meta_aspects.get(Constants.ADD_TERM_OPERATION)
                and self.config.enable_meta_mapping):
            aspects.append(meta_aspects.get(Constants.ADD_TERM_OPERATION))

        # add schema metadata aspect
        schema_metadata = get_schema_metadata(self.report, node, mce_platform)
        # When generating these aspects for a dbt node, we will always include schema information. When generating
        # these aspects for a platform node (which only happens when disable_dbt_node_creation is set to true) we
        # honor the flag.
        if mce_platform == DBT_PLATFORM:
            aspects.append(schema_metadata)
        else:
            if self.config.load_schemas:
                aspects.append(schema_metadata)
        return aspects
Esempio n. 7
0
 def _get_tag_mce_for_urn(tag_urn: str) -> MetadataChangeEvent:
     assert tag_urn in LookerUtil.tag_definitions
     ownership = OwnershipClass(owners=[
         OwnerClass(
             owner="urn:li:corpuser:datahub",
             type=OwnershipTypeClass.DATAOWNER,
         )
     ])
     return MetadataChangeEvent(proposedSnapshot=TagSnapshotClass(
         urn=tag_urn,
         aspects=[ownership, LookerUtil.tag_definitions[tag_urn]]))
Esempio n. 8
0
 def get_owner() -> Optional[OwnershipClass]:
     owner = table.get("Owner")
     if owner:
         owners = [
             OwnerClass(
                 owner=f"urn:li:corpuser:{owner}",
                 type=OwnershipTypeClass.DATAOWNER,
             )
         ]
         return OwnershipClass(owners=owners, )
     return None
Esempio n. 9
0
 def get_owner() -> OwnershipClass:
     owner = table.get("Owner")
     if owner:
         owners = [
             OwnerClass(
                 owner=f"urn:li:corpuser:{owner}",
                 type=OwnershipTypeClass.DATAOWNER,
             )
         ]
     else:
         owners = []
     return OwnershipClass(owners=owners, )
Esempio n. 10
0
    def _get_ownership(self, user: str) -> Optional[OwnershipClass]:
        if user is not None:
            owner_urn = builder.make_user_urn(user)
            ownership: OwnershipClass = OwnershipClass(owners=[
                OwnerClass(
                    owner=owner_urn,
                    type=OwnershipTypeClass.DATAOWNER,
                )
            ])
            return ownership

        return None
Esempio n. 11
0
    def transform_aspect(  # type: ignore
            self, entity_urn: str, aspect_name: str,
            aspect: Optional[OwnershipClass]) -> Optional[OwnershipClass]:

        owners_to_add = self.owners
        assert aspect is None or isinstance(aspect, OwnershipClass)

        if owners_to_add:
            ownership = (aspect if aspect else OwnershipClass(owners=[], ))
            ownership.owners.extend(owners_to_add)

        return ownership
    def transform_one(
            self, mce: MetadataChangeEventClass) -> MetadataChangeEventClass:
        if not isinstance(mce.proposedSnapshot, DatasetSnapshotClass):
            return mce
        owners_to_add = self.config.get_owners_to_add(mce.proposedSnapshot)
        if owners_to_add:
            ownership = builder.get_or_add_aspect(
                mce,
                OwnershipClass(owners=[], ),
            )
            ownership.owners.extend(owners_to_add)

        return mce
Esempio n. 13
0
    def get_group_wu(
        self, group_details: "DescribeModelPackageGroupOutputTypeDef"
    ) -> MetadataWorkUnit:
        """
        Get a workunit for a model group.
        """

        # params to remove since we extract them
        redundant_fields = {"ModelPackageGroupName", "CreationTime"}

        group_arn = group_details["ModelPackageGroupArn"]
        group_name = group_details["ModelPackageGroupName"]

        self.group_arn_to_name[group_arn] = group_name

        owners = []

        if group_details.get("CreatedBy",
                             {}).get("UserProfileName") is not None:
            owners.append(
                OwnerClass(
                    owner=
                    f"urn:li:corpuser:{group_details['CreatedBy']['UserProfileName']}",
                    type=OwnershipTypeClass.DATAOWNER,
                ))

        group_snapshot = MLModelGroupSnapshot(
            urn=builder.make_ml_model_group_urn("sagemaker", group_name,
                                                self.env),
            aspects=[
                MLModelGroupPropertiesClass(
                    createdAt=int(
                        group_details.get("CreationTime",
                                          datetime.now()).timestamp() * 1000),
                    description=group_details.get(
                        "ModelPackageGroupDescription"),
                    customProperties={
                        key: str(value)
                        for key, value in group_details.items()
                        if key not in redundant_fields
                    },
                ),
                OwnershipClass(owners),
                BrowsePathsClass(paths=[f"/sagemaker/{group_name}"]),
            ],
        )

        # make the MCE and workunit
        mce = MetadataChangeEvent(proposedSnapshot=group_snapshot)

        return MetadataWorkUnit(id=group_name, mce=mce)
Esempio n. 14
0
 def get_ownership(
         self,
         looker_dashboard: LookerDashboard) -> Optional[OwnershipClass]:
     if looker_dashboard.owner is not None:
         owner_urn = looker_dashboard.owner._get_urn(
             self.source_config.strip_user_ids_from_email)
         if owner_urn is not None:
             ownership: OwnershipClass = OwnershipClass(owners=[
                 OwnerClass(
                     owner=owner_urn,
                     type=OwnershipTypeClass.DATAOWNER,
                 )
             ])
             return ownership
     return None
Esempio n. 15
0
def add_owner_to_entity_wu(entity_type: str, entity_urn: str,
                           owner_urn: str) -> Iterable[MetadataWorkUnit]:
    mcp = MetadataChangeProposalWrapper(
        entityType=entity_type,
        changeType=ChangeTypeClass.UPSERT,
        entityUrn=f"{entity_urn}",
        aspectName="ownership",
        aspect=OwnershipClass(owners=[
            OwnerClass(
                owner=owner_urn,
                type=OwnershipTypeClass.DATAOWNER,
            )
        ]),
    )
    wu = MetadataWorkUnit(id=f"{owner_urn}-to-{entity_urn}", mcp=mcp)
    yield wu
Esempio n. 16
0
    def _make_dashboard_and_chart_mces(
        self, looker_dashboard: LookerDashboard
    ) -> List[MetadataChangeEvent]:
        actor = self.source_config.actor
        sys_time = get_sys_time()

        chart_mces = [
            self._make_chart_mce(element)
            for element in looker_dashboard.dashboard_elements
        ]

        dashboard_urn = f"urn:li:dashboard:({self.source_config.platform_name},{looker_dashboard.get_urn_dashboard_id()})"
        dashboard_snapshot = DashboardSnapshot(
            urn=dashboard_urn,
            aspects=[],
        )

        last_modified = ChangeAuditStamps(
            created=AuditStamp(time=sys_time, actor=actor),
            lastModified=AuditStamp(time=sys_time, actor=actor),
        )

        dashboard_info = DashboardInfoClass(
            description=looker_dashboard.description
            if looker_dashboard.description is not None
            else "",
            title=looker_dashboard.title,
            charts=[mce.proposedSnapshot.urn for mce in chart_mces],
            lastModified=last_modified,
            dashboardUrl=looker_dashboard.url(self.source_config.base_url),
        )

        dashboard_snapshot.aspects.append(dashboard_info)
        owners = [OwnerClass(owner=actor, type=OwnershipTypeClass.DATAOWNER)]
        dashboard_snapshot.aspects.append(
            OwnershipClass(
                owners=owners,
                lastModified=AuditStampClass(
                    time=sys_time, actor=self.source_config.actor
                ),
            )
        )
        dashboard_snapshot.aspects.append(Status(removed=looker_dashboard.is_deleted))

        dashboard_mce = MetadataChangeEvent(proposedSnapshot=dashboard_snapshot)

        return chart_mces + [dashboard_mce]
Esempio n. 17
0
 def generate_ownership_aspect(self):
     ownership = OwnershipClass(
         owners=[
             OwnerClass(
                 owner=builder.make_user_urn(owner),
                 type=OwnershipTypeClass.DEVELOPER,
                 source=OwnershipSourceClass(
                     type=OwnershipSourceTypeClass.SERVICE,
                     # url=dag.filepath,
                 ),
             ) for owner in (self.owners or [])
         ],
         lastModified=AuditStampClass(time=0,
                                      actor=builder.make_user_urn(
                                          self.orchestrator)),
     )
     return [ownership]
Esempio n. 18
0
 def get_owner(time: int) -> OwnershipClass:
     owner = table.get("Owner")
     if owner:
         owners = [
             OwnerClass(
                 owner=f"urn:li:corpuser:{owner}",
                 type=OwnershipTypeClass.DATAOWNER,
             )
         ]
     else:
         owners = []
     return OwnershipClass(
         owners=owners,
         lastModified=AuditStampClass(
             time=time,
             actor="urn:li:corpuser:datahub",
         ),
     )
Esempio n. 19
0
def create_ownership_aspect_mce(
        directive: Directive) -> MetadataChangeEventClass:
    return MetadataChangeEventClass(proposedSnapshot=DatasetSnapshotClass(
        urn=dataset_name_to_urn(directive.table),
        aspects=[
            OwnershipClass(
                owners=[
                    OwnerClass(
                        owner=owner_name_to_urn(clean_owner_name(owner)),
                        type=OwnershipTypeClass.DATAOWNER,
                    ) for owner in directive.owners
                ],
                lastModified=AuditStampClass(
                    time=int(time.time() * 1000),
                    actor="urn:li:corpuser:datahub",
                ),
            )
        ],
    ))
Esempio n. 20
0
def make_ownership_aspect_from_urn_list(
    owner_urns: List[str],
    source_type: Optional[Union[str,
                                OwnershipSourceTypeClass]]) -> OwnershipClass:
    for owner_urn in owner_urns:
        assert owner_urn.startswith(
            "urn:li:corpuser:"******"urn:li:corpGroup:")
    ownership_source_type: Union[None, OwnershipSourceClass] = None
    if source_type:
        ownership_source_type = OwnershipSourceClass(type=source_type)

    owners_list = [
        OwnerClass(
            owner=owner_urn,
            type=OwnershipTypeClass.DATAOWNER,
            source=ownership_source_type,
        ) for owner_urn in owner_urns
    ]
    return OwnershipClass(owners=owners_list, )
Esempio n. 21
0
    def transform_one(
            self, mce: MetadataChangeEventClass) -> MetadataChangeEventClass:
        if not isinstance(mce.proposedSnapshot, DatasetSnapshotClass):
            return mce

        owners_to_add = self.config.get_owners_to_add(mce.proposedSnapshot)
        if owners_to_add:
            ownership = builder.get_or_add_aspect(
                mce,
                OwnershipClass(
                    owners=[],
                    lastModified=AuditStampClass(
                        time=builder.get_sys_time(),
                        actor=self.config.default_actor,
                    ),
                ),
            )
            ownership.owners.extend(owners_to_add)

        return mce
Esempio n. 22
0
    def transform_one(self, mce: MetadataChangeEventClass) -> MetadataChangeEventClass:
        assert isinstance(mce.proposedSnapshot, DatasetSnapshotClass)
        owners_to_add = self.config.get_owners_to_add(mce.proposedSnapshot)
        if owners_to_add:
            ownership = builder.get_or_add_aspect(
                mce,
                OwnershipClass(
                    owners=[],
                ),
            )
            ownership.owners.extend(owners_to_add)

            if self.config.semantics == Semantics.PATCH:
                assert self.ctx.graph
                patch_ownership = AddDatasetOwnership.get_ownership_to_set(
                    self.ctx.graph, mce.proposedSnapshot.urn, ownership
                )
                builder.set_aspect(
                    mce, aspect=patch_ownership, aspect_type=OwnershipClass
                )
        return mce
Esempio n. 23
0
    OwnershipTypeClass,
)

log = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)

# Inputs -> owner, ownership_type, dataset
owner_to_add = make_user_urn("jdoe")
ownership_type = OwnershipTypeClass.DATAOWNER
dataset_urn = make_dataset_urn(platform="hive",
                               name="realestate_db.sales",
                               env="PROD")

# Some objects to help with conditional pathways later
owner_class_to_add = OwnerClass(owner=owner_to_add, type=ownership_type)
ownership_to_add = OwnershipClass(owners=[owner_class_to_add])

# First we get the current owners
gms_endpoint = "http://localhost:8080"
graph = DataHubGraph(DatahubClientConfig(server=gms_endpoint))

current_owners: Optional[OwnershipClass] = graph.get_aspect_v2(
    entity_urn=dataset_urn,
    aspect="ownership",
    aspect_type=OwnershipClass,
)

need_write = False
if current_owners:
    if (owner_to_add, ownership_type) not in [(x.owner, x.type)
                                              for x in current_owners.owners]:
Esempio n. 24
0
    def get_workunits(self) -> Iterable[MetadataWorkUnit]:
        (
            nodes,
            manifest_schema,
            manifest_version,
            catalog_schema,
            catalog_version,
        ) = loadManifestAndCatalog(
            self.config.manifest_path,
            self.config.catalog_path,
            self.config.sources_path,
            self.config.load_schemas,
            self.config.use_identifiers,
            self.config.tag_prefix,
            self.config.target_platform,
            self.config.env,
            self.config.node_type_pattern,
            self.report,
        )

        additional_custom_props = {
            "manifest_schema": manifest_schema,
            "manifest_version": manifest_version,
            "catalog_schema": catalog_schema,
            "catalog_version": catalog_version,
        }

        additional_custom_props_filtered = {
            key: value
            for key, value in additional_custom_props.items()
            if value is not None
        }

        for node in nodes:

            dataset_snapshot = DatasetSnapshot(urn=node.datahub_urn,
                                               aspects=[])

            description = None

            if node.comment and node.description and node.comment != node.description:
                description = f"{self.config.target_platform} comment: {node.comment}\n\ndbt model description: {node.description}"
            elif node.comment:
                description = node.comment
            elif node.description:
                description = node.description

            custom_props = {
                **get_custom_properties(node),
                **additional_custom_props_filtered,
            }

            dbt_properties = DatasetPropertiesClass(
                description=description,
                customProperties=custom_props,
                tags=node.tags)
            dataset_snapshot.aspects.append(dbt_properties)

            if node.owner:
                owners = [
                    OwnerClass(
                        owner=f"urn:li:corpuser:{node.owner}",
                        type=OwnershipTypeClass.DATAOWNER,
                    )
                ]
                dataset_snapshot.aspects.append(OwnershipClass(
                    owners=owners, ))

            if node.tags:
                dataset_snapshot.aspects.append(
                    GlobalTagsClass(tags=[
                        TagAssociationClass(f"urn:li:tag:{tag}")
                        for tag in node.tags
                    ]))

            upstreams = get_upstream_lineage(node.upstream_urns)
            if upstreams is not None:
                dataset_snapshot.aspects.append(upstreams)

            if self.config.load_schemas:
                schema_metadata = get_schema_metadata(
                    self.report, node, self.config.target_platform)
                dataset_snapshot.aspects.append(schema_metadata)

            mce = MetadataChangeEvent(proposedSnapshot=dataset_snapshot)
            wu = MetadataWorkUnit(id=dataset_snapshot.urn, mce=mce)
            self.report.report_workunit(wu)

            yield wu
Esempio n. 25
0
    def __to_datahub_dashboard(
        self,
        dashboard: PowerBiAPI.Dashboard,
        chart_mcps: List[MetadataChangeProposalWrapper],
        user_mcps: List[MetadataChangeProposalWrapper],
    ) -> List[MetadataChangeProposalWrapper]:
        """
        Map PowerBi dashboard to Datahub dashboard
        """

        dashboard_urn = builder.make_dashboard_urn(self.__config.platform_name,
                                                   dashboard.get_urn_part())

        chart_urn_list: List[str] = self.to_urn_set(chart_mcps)
        user_urn_list: List[str] = self.to_urn_set(user_mcps)

        def chart_custom_properties(dashboard: PowerBiAPI.Dashboard) -> dict:
            return {
                "chartCount": str(len(dashboard.tiles)),
                "workspaceName": dashboard.workspace_name,
                "workspaceId": dashboard.id,
            }

        # DashboardInfo mcp
        dashboard_info_cls = DashboardInfoClass(
            description=dashboard.displayName or "",
            title=dashboard.displayName or "",
            charts=chart_urn_list,
            lastModified=ChangeAuditStamps(),
            dashboardUrl=dashboard.webUrl,
            customProperties={**chart_custom_properties(dashboard)},
        )

        info_mcp = self.new_mcp(
            entity_type=Constant.DASHBOARD,
            entity_urn=dashboard_urn,
            aspect_name=Constant.DASHBOARD_INFO,
            aspect=dashboard_info_cls,
        )

        # removed status mcp
        removed_status_mcp = self.new_mcp(
            entity_type=Constant.DASHBOARD,
            entity_urn=dashboard_urn,
            aspect_name=Constant.STATUS,
            aspect=StatusClass(removed=False),
        )

        # dashboardKey mcp
        dashboard_key_cls = DashboardKeyClass(
            dashboardTool=self.__config.platform_name,
            dashboardId=Constant.DASHBOARD_ID.format(dashboard.id),
        )

        # Dashboard key
        dashboard_key_mcp = self.new_mcp(
            entity_type=Constant.DASHBOARD,
            entity_urn=dashboard_urn,
            aspect_name=Constant.DASHBOARD_KEY,
            aspect=dashboard_key_cls,
        )

        # Dashboard Ownership
        owners = [
            OwnerClass(owner=user_urn, type=OwnershipTypeClass.CONSUMER)
            for user_urn in user_urn_list if user_urn is not None
        ]
        ownership = OwnershipClass(owners=owners)
        # Dashboard owner MCP
        owner_mcp = self.new_mcp(
            entity_type=Constant.DASHBOARD,
            entity_urn=dashboard_urn,
            aspect_name=Constant.OWNERSHIP,
            aspect=ownership,
        )

        # Dashboard browsePaths
        browse_path = BrowsePathsClass(
            paths=["/powerbi/{}".format(self.__config.workspace_id)])
        browse_path_mcp = self.new_mcp(
            entity_type=Constant.DASHBOARD,
            entity_urn=dashboard_urn,
            aspect_name=Constant.BROWSERPATH,
            aspect=browse_path,
        )

        return [
            browse_path_mcp,
            info_mcp,
            removed_status_mcp,
            dashboard_key_mcp,
            owner_mcp,
        ]
Esempio n. 26
0
    def process_dataflow_node(
        self,
        node: Dict[str, Any],
        flow_urn: str,
        new_dataset_ids: List[str],
        new_dataset_mces: List[MetadataChangeEvent],
        s3_formats: typing.DefaultDict[str, Set[Union[str, None]]],
    ) -> Dict[str, Any]:

        node_type = node["NodeType"]

        # for nodes representing datasets, we construct a dataset URN accordingly
        if node_type in ["DataSource", "DataSink"]:

            node_args = {
                x["Name"]: json.loads(x["Value"])
                for x in node["Args"]
            }

            # if data object is Glue table
            if "database" in node_args and "table_name" in node_args:

                full_table_name = f"{node_args['database']}.{node_args['table_name']}"

                # we know that the table will already be covered when ingesting Glue tables
                node_urn = f"urn:li:dataset:(urn:li:dataPlatform:glue,{full_table_name},{self.env})"

            # if data object is S3 bucket
            elif node_args.get("connection_type") == "s3":

                # remove S3 prefix (s3://)
                s3_name = node_args["connection_options"]["path"][5:]

                if s3_name.endswith("/"):
                    s3_name = s3_name[:-1]

                # append S3 format if different ones exist
                if len(s3_formats[s3_name]) > 1:
                    node_urn = f"urn:li:dataset:(urn:li:dataPlatform:s3,{s3_name}_{node_args.get('format')},{self.env})"

                else:
                    node_urn = (
                        f"urn:li:dataset:(urn:li:dataPlatform:s3,{s3_name},{self.env})"
                    )

                dataset_snapshot = DatasetSnapshot(
                    urn=node_urn,
                    aspects=[],
                )

                dataset_snapshot.aspects.append(Status(removed=False))
                dataset_snapshot.aspects.append(
                    OwnershipClass(
                        owners=[],
                        lastModified=AuditStampClass(
                            time=mce_builder.get_sys_time(),
                            actor="urn:li:corpuser:datahub",
                        ),
                    ))
                dataset_snapshot.aspects.append(
                    DatasetPropertiesClass(
                        customProperties={
                            k: str(v)
                            for k, v in node_args.items()
                        },
                        tags=[],
                    ))

                new_dataset_mces.append(
                    MetadataChangeEvent(proposedSnapshot=dataset_snapshot))
                new_dataset_ids.append(f"{node['NodeType']}-{node['Id']}")

            else:

                raise ValueError(
                    f"Unrecognized Glue data object type: {node_args}")

        # otherwise, a node represents a transformation
        else:
            node_urn = mce_builder.make_data_job_urn_with_flow(
                flow_urn, job_id=f'{node["NodeType"]}-{node["Id"]}')

        return {
            **node,
            "urn": node_urn,
            # to be filled in after traversing edges
            "inputDatajobs": [],
            "inputDatasets": [],
            "outputDatasets": [],
        }