Example #1
0
    def __init__(self, config: SimpleDatasetTermsConfig, ctx: PipelineContext):
        terms = [GlossaryTermAssociationClass(urn=term) for term in config.term_urns]

        generic_config = AddDatasetTermsConfig(
            get_terms_to_add=lambda _: terms,
        )
        super().__init__(generic_config, ctx)
 def __init__(self, config: PatternDatasetTermsConfig,
              ctx: PipelineContext):
     term_pattern = config.term_pattern
     generic_config = AddDatasetTermsConfig(get_terms_to_add=lambda _: [
         GlossaryTermAssociationClass(urn=urn)
         for urn in term_pattern.value(_.urn)
     ], )
     super().__init__(generic_config, ctx)
Example #3
0
def make_glossary_terms_aspect_from_urn_list(
        term_urns: List[str]) -> GlossaryTerms:
    for term_urn in term_urns:
        assert term_urn.startswith("urn:li:glossaryTerm:")
    glossary_terms = GlossaryTerms(
        [GlossaryTermAssociationClass(term_urn) for term_urn in term_urns],
        AuditStampClass(
            time=int(time.time() * 1000),
            actor="urn:li:corpuser:datahub",
        ),
    )
    return glossary_terms
    ChangeTypeClass,
    GlossaryTermAssociationClass,
    GlossaryTermsClass,
)

log = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)

# First we get the current terms
gms_endpoint = "http://localhost:8080"
rest_emitter = DatahubRestEmitter(gms_server=gms_endpoint)

dataset_urn = make_dataset_urn(platform="hive", name="realestate_db.sales", env="PROD")

term_to_add = make_term_urn("Classification.HighlyConfidential")
term_association_to_add = GlossaryTermAssociationClass(urn=term_to_add)
# an audit stamp that basically says we have no idea when these terms were added to this dataset
# change the time value to (time.time() * 1000) if you want to specify the current time of running this code as the time of the application
unknown_audit_stamp = AuditStampClass(time=0, actor="urn:li:corpuser:ingestion")

# create a brand new terms aspect
terms_aspect = GlossaryTermsClass(
    terms=[term_association_to_add],
    auditStamp=unknown_audit_stamp,
)

event: MetadataChangeProposalWrapper = MetadataChangeProposalWrapper(
    entityType="dataset",
    changeType=ChangeTypeClass.UPSERT,
    entityUrn=dataset_urn,
    aspectName="glossaryTerms",
                nativeDataType=
                "VARCHAR(100)",  # use this to provide the type of the field in the source system's vernacular
                jsonPath="",  # Unused field, can omit
                nullable=True,
                description=
                "This is the zipcode of the address. Specified using extended form and limited to addresses in the United States",
                recursive=False,  # Unused field, can omit
                # It is rare to attach tags to fields as part of the technical schema unless you are purely reflecting state that exists in the source system.
                # For an editable (in UI) version of this, use the editableSchemaMetadata aspect
                globalTags=GlobalTagsClass(
                    tags=[TagAssociationClass(tag=make_tag_urn("location"))]),
                # It is rare to attach glossary terms to fields as part of the technical schema unless you are purely reflecting state that exists in the source system.
                # For an editable (in UI) version of this, use the editableSchemaMetadata aspect
                glossaryTerms=GlossaryTermsClass(
                    terms=[
                        GlossaryTermAssociationClass(
                            urn=make_term_urn("Classification.PII"))
                    ],
                    auditStamp=
                    AuditStampClass(  # represents the time when this term was attached to this field?
                        time=
                        0,  # time in milliseconds, leave as 0 if no time of association is known
                        actor=
                        "urn:li:corpuser:ingestion",  # if this is a system provided tag, use a bot user id like ingestion
                    ),
                ),
            )
        ],
    ),
)

# Create rest emitter