def __init__(self, config: SimpleDatasetTermsConfig, ctx: PipelineContext): terms = [GlossaryTermAssociationClass(urn=term) for term in config.term_urns] generic_config = AddDatasetTermsConfig( get_terms_to_add=lambda _: terms, ) super().__init__(generic_config, ctx)
def __init__(self, config: PatternDatasetTermsConfig, ctx: PipelineContext): term_pattern = config.term_pattern generic_config = AddDatasetTermsConfig(get_terms_to_add=lambda _: [ GlossaryTermAssociationClass(urn=urn) for urn in term_pattern.value(_.urn) ], ) super().__init__(generic_config, ctx)
def make_glossary_terms_aspect_from_urn_list( term_urns: List[str]) -> GlossaryTerms: for term_urn in term_urns: assert term_urn.startswith("urn:li:glossaryTerm:") glossary_terms = GlossaryTerms( [GlossaryTermAssociationClass(term_urn) for term_urn in term_urns], AuditStampClass( time=int(time.time() * 1000), actor="urn:li:corpuser:datahub", ), ) return glossary_terms
ChangeTypeClass, GlossaryTermAssociationClass, GlossaryTermsClass, ) log = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) # First we get the current terms gms_endpoint = "http://localhost:8080" rest_emitter = DatahubRestEmitter(gms_server=gms_endpoint) dataset_urn = make_dataset_urn(platform="hive", name="realestate_db.sales", env="PROD") term_to_add = make_term_urn("Classification.HighlyConfidential") term_association_to_add = GlossaryTermAssociationClass(urn=term_to_add) # an audit stamp that basically says we have no idea when these terms were added to this dataset # change the time value to (time.time() * 1000) if you want to specify the current time of running this code as the time of the application unknown_audit_stamp = AuditStampClass(time=0, actor="urn:li:corpuser:ingestion") # create a brand new terms aspect terms_aspect = GlossaryTermsClass( terms=[term_association_to_add], auditStamp=unknown_audit_stamp, ) event: MetadataChangeProposalWrapper = MetadataChangeProposalWrapper( entityType="dataset", changeType=ChangeTypeClass.UPSERT, entityUrn=dataset_urn, aspectName="glossaryTerms",
nativeDataType= "VARCHAR(100)", # use this to provide the type of the field in the source system's vernacular jsonPath="", # Unused field, can omit nullable=True, description= "This is the zipcode of the address. Specified using extended form and limited to addresses in the United States", recursive=False, # Unused field, can omit # It is rare to attach tags to fields as part of the technical schema unless you are purely reflecting state that exists in the source system. # For an editable (in UI) version of this, use the editableSchemaMetadata aspect globalTags=GlobalTagsClass( tags=[TagAssociationClass(tag=make_tag_urn("location"))]), # It is rare to attach glossary terms to fields as part of the technical schema unless you are purely reflecting state that exists in the source system. # For an editable (in UI) version of this, use the editableSchemaMetadata aspect glossaryTerms=GlossaryTermsClass( terms=[ GlossaryTermAssociationClass( urn=make_term_urn("Classification.PII")) ], auditStamp= AuditStampClass( # represents the time when this term was attached to this field? time= 0, # time in milliseconds, leave as 0 if no time of association is known actor= "urn:li:corpuser:ingestion", # if this is a system provided tag, use a bot user id like ingestion ), ), ) ], ), ) # Create rest emitter