Ejemplo n.º 1
0
    def get_datajob_wu(self, node: Dict[str, Any],
                       job: Dict[str, Any]) -> MetadataWorkUnit:
        """
        Generate a DataJob workunit for a component (node) in a Glue job.

        Parameters
        ----------
            node:
                Node from process_dataflow_graph()
            job:
                Job object from get_all_jobs()
        """
        mce = MetadataChangeEventClass(proposedSnapshot=DataJobSnapshotClass(
            urn=node["urn"],
            aspects=[
                DataJobInfoClass(
                    name=f"{job['Name']}:{node['NodeType']}-{node['Id']}",
                    type="GLUE",
                    customProperties={
                        **{x["Name"]: x["Value"]
                           for x in node["Args"]},
                        "transformType": node["NodeType"],
                        "nodeId": node["Id"],
                    },
                ),
                DataJobInputOutputClass(
                    inputDatasets=node["inputDatasets"],
                    outputDatasets=node["outputDatasets"],
                    inputDatajobs=node["inputDatajobs"],
                ),
            ],
        ))

        return MetadataWorkUnit(id=f'{job["Name"]}-{node["Id"]}', mce=mce)
Ejemplo n.º 2
0
    def construct_job_workunits(
        self,
        job_urn: str,
        job_name: str,
        external_url: str,
        job_type: str,
        description: Optional[str],
        job_properties: Optional[Dict[str, str]] = None,
        inlets: List[str] = [],
        outlets: List[str] = [],
        inputJobs: List[str] = [],
        status: Optional[str] = None,
    ) -> Iterable[MetadataWorkUnit]:
        if job_properties:
            job_properties = {k: v for k, v in job_properties.items() if v is not None}

        mcp = MetadataChangeProposalWrapper(
            entityType="dataJob",
            entityUrn=job_urn,
            changeType=ChangeTypeClass.UPSERT,
            aspectName="dataJobInfo",
            aspect=DataJobInfoClass(
                name=job_name,
                type=job_type,
                description=description,
                customProperties=job_properties,
                externalUrl=external_url,
                status=status,
            ),
        )

        wu = MetadataWorkUnit(
            id=f"{NIFI}.{job_name}.{mcp.aspectName}",
            mcp=mcp,
        )
        self.report.report_workunit(wu)
        yield wu

        inlets.sort()
        outlets.sort()
        inputJobs.sort()

        mcp = MetadataChangeProposalWrapper(
            entityType="dataJob",
            entityUrn=job_urn,
            changeType=ChangeTypeClass.UPSERT,
            aspectName="dataJobInputOutput",
            aspect=DataJobInputOutputClass(
                inputDatasets=inlets, outputDatasets=outlets, inputDatajobs=inputJobs
            ),
        )

        wu = MetadataWorkUnit(
            id=f"{NIFI}.{job_name}.{mcp.aspectName}",
            mcp=mcp,
        )
        self.report.report_workunit(wu)
        yield wu
Ejemplo n.º 3
0
    def create_common_job_snapshot(
        self,
        job: Dict[str, Any],
        job_type: JobType,
        job_url: Optional[str] = None,
    ) -> Tuple[DataJobSnapshotClass, str, str]:
        """
        General function for generating a job snapshot.
        """

        job_type_info = job_type_to_info[job_type]

        name = job[job_type_info.describe_name_key]
        arn = job[job_type_info.describe_arn_key]

        sagemaker_status = job[job_type_info.describe_status_key]

        mapped_status = job_type_info.status_map.get(sagemaker_status)

        if mapped_status is None:
            mapped_status = JobStatusClass.UNKNOWN

            self.report.report_warning(
                name,
                f"Unknown status for {name} ({arn}): {sagemaker_status}",
            )

        job_urn = make_sagemaker_job_urn(job_type.value, name, arn, self.env)
        job_snapshot = DataJobSnapshotClass(
            urn=job_urn,
            aspects=[
                DataJobInfoClass(
                    name=name,
                    type="SAGEMAKER",
                    status=mapped_status,
                    externalUrl=job_url,
                    customProperties={
                        **{key: str(value)
                           for key, value in job.items()},
                        "jobType": job_type.value,
                    },
                ),
                BrowsePathsClass(paths=[f"/{job_type.value}/{name}"]),
            ],
        )

        return job_snapshot, name, arn
Ejemplo n.º 4
0
    def get_datajob_wu(
        self, node: Dict[str, Any], job: Dict[str, Any]
    ) -> MetadataWorkUnit:
        """
        Generate a DataJob workunit for a component (node) in a Glue job.

        Parameters
        ----------
            node:
                Node from process_dataflow_graph()
            job:
                Job object from get_all_jobs()
        """

        region = self.source_config.aws_region

        mce = MetadataChangeEventClass(
            proposedSnapshot=DataJobSnapshotClass(
                urn=node["urn"],
                aspects=[
                    DataJobInfoClass(
                        name=f"{job['Name']}:{node['NodeType']}-{node['Id']}",
                        type="GLUE",
                        # there's no way to view an individual job node by link, so just show the graph
                        externalUrl=f"https://{region}.console.aws.amazon.com/gluestudio/home?region={region}#/editor/job/{job['Name']}/graph",
                        customProperties={
                            **{x["Name"]: x["Value"] for x in node["Args"]},
                            "transformType": node["NodeType"],
                            "nodeId": node["Id"],
                        },
                    ),
                    DataJobInputOutputClass(
                        inputDatasets=node["inputDatasets"],
                        outputDatasets=node["outputDatasets"],
                        inputDatajobs=node["inputDatajobs"],
                    ),
                ],
            )
        )

        return MetadataWorkUnit(id=f'{job["Name"]}-{node["Id"]}', mce=mce)
Ejemplo n.º 5
0
    def generate_mce(self) -> MetadataChangeEventClass:
        job_mce = MetadataChangeEventClass(
            proposedSnapshot=DataJobSnapshotClass(
                urn=str(self.urn),
                aspects=[
                    DataJobInfoClass(
                        name=self.name if self.name is not None else self.id,
                        type=AzkabanJobTypeClass.COMMAND,
                        description=self.description,
                        customProperties=self.properties,
                        externalUrl=self.url,
                    ),
                    DataJobInputOutputClass(
                        inputDatasets=[str(urn) for urn in self.inlets],
                        outputDatasets=[str(urn) for urn in self.outlets],
                        inputDatajobs=[str(urn) for urn in self.upstream_urns],
                    ),
                    *self.generate_ownership_aspect(),
                    *self.generate_tags_aspect(),
                ],
            ))

        return job_mce
Ejemplo n.º 6
0
    def generate_mcp(self) -> Iterable[MetadataChangeProposalWrapper]:
        mcp = MetadataChangeProposalWrapper(
            entityType="datajob",
            entityUrn=str(self.urn),
            aspectName="dataJobInfo",
            aspect=DataJobInfoClass(
                name=self.name if self.name is not None else self.id,
                type=AzkabanJobTypeClass.COMMAND,
                description=self.description,
                customProperties=self.properties,
                externalUrl=self.url,
            ),
            changeType=ChangeTypeClass.UPSERT,
        )
        yield mcp

        yield from self.generate_data_input_output_mcp()

        for owner in self.generate_ownership_aspect():
            mcp = MetadataChangeProposalWrapper(
                entityType="datajob",
                entityUrn=str(self.urn),
                aspectName="ownership",
                aspect=owner,
                changeType=ChangeTypeClass.UPSERT,
            )
            yield mcp

        for tag in self.generate_tags_aspect():
            mcp = MetadataChangeProposalWrapper(
                entityType="datajob",
                entityUrn=str(self.urn),
                aspectName="globalTags",
                aspect=tag,
                changeType=ChangeTypeClass.UPSERT,
            )
            yield mcp