ownership_type = OwnershipTypeClass.DATAOWNER
dataset_urn = make_dataset_urn(platform="hive",
                               name="realestate_db.sales",
                               env="PROD")

# Some objects to help with conditional pathways later
owner_class_to_add = OwnerClass(owner=owner_to_add, type=ownership_type)
ownership_to_add = OwnershipClass(owners=[owner_class_to_add])

# First we get the current owners
gms_endpoint = "http://localhost:8080"
graph = DataHubGraph(DatahubClientConfig(server=gms_endpoint))

current_owners: Optional[OwnershipClass] = graph.get_aspect_v2(
    entity_urn=dataset_urn,
    aspect="ownership",
    aspect_type=OwnershipClass,
)

need_write = False
if current_owners:
    if (owner_to_add, ownership_type) not in [(x.owner, x.type)
                                              for x in current_owners.owners]:
        # owners exist, but this owner is not present in the current owners
        current_owners.owners.append(owner_class_to_add)
        need_write = True
else:
    # create a brand new ownership aspect
    current_owners = ownership_to_add
    need_write = True
Esempio n. 2
0

# Inputs -> the column, dataset and the tag to set
column = "address.zipcode"
dataset_urn = make_dataset_urn(platform="hive", name="realestate_db.sales", env="PROD")
tag_to_add = make_tag_urn("location")


# First we get the current editable schema metadata
gms_endpoint = "http://localhost:8080"
graph = DataHubGraph(DatahubClientConfig(server=gms_endpoint))


current_editable_schema_metadata = graph.get_aspect_v2(
    entity_urn=dataset_urn,
    aspect="editableSchemaMetadata",
    aspect_type=EditableSchemaMetadataClass,
)


# Some pre-built objects to help all the conditional pathways
tag_association_to_add = TagAssociationClass(tag=tag_to_add)
tags_aspect_to_set = GlobalTagsClass(tags=[tag_association_to_add])
field_info_to_set = EditableSchemaFieldInfoClass(
    fieldPath=column, globalTags=tags_aspect_to_set
)


need_write = False
field_match = False
if current_editable_schema_metadata:
)

log = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)

# First we get the current terms
gms_endpoint = "http://localhost:8080"
graph = DataHubGraph(DatahubClientConfig(server=gms_endpoint))

dataset_urn = make_dataset_urn(platform="hive",
                               name="realestate_db.sales",
                               env="PROD")

current_terms: Optional[GlossaryTermsClass] = graph.get_aspect_v2(
    entity_urn=dataset_urn,
    aspect="glossaryTerms",
    aspect_type=GlossaryTermsClass,
)

term_to_add = make_term_urn("Classification.HighlyConfidential")
term_association_to_add = GlossaryTermAssociationClass(urn=term_to_add)
# an audit stamp that basically says we have no idea when these terms were added to this dataset
# change the time value to (time.time() * 1000) if you want to specify the current time of running this code as the time
unknown_audit_stamp = AuditStampClass(time=0,
                                      actor="urn:li:corpuser:ingestion")
need_write = False
if current_terms:
    if term_to_add not in [x.urn for x in current_terms.terms]:
        # terms exist, but this term is not present in the current terms
        current_terms.terms.append(term_association_to_add)
        need_write = True
now = int(time.time() * 1000)  # milliseconds since epoch
current_timestamp = AuditStampClass(time=now, actor="urn:li:corpuser:ingestion")
institutional_memory_element = InstitutionalMemoryMetadataClass(
    url=link_to_add,
    description=link_description,
    createStamp=current_timestamp,
)


# First we get the current owners
gms_endpoint = "http://localhost:8080"
graph = DataHubGraph(config=DatahubClientConfig(server=gms_endpoint))

current_editable_properties = graph.get_aspect_v2(
    entity_urn=dataset_urn,
    aspect="editableDatasetProperties",
    aspect_type=EditableDatasetPropertiesClass,
)

need_write = False
if current_editable_properties:
    if documentation_to_add != current_editable_properties.description:
        current_editable_properties.description = documentation_to_add
        need_write = True
else:
    # create a brand new editable dataset properties aspect
    current_editable_properties = EditableDatasetPropertiesClass(
        created=current_timestamp, description=documentation_to_add
    )
    need_write = True
Esempio n. 5
0
    TagAssociationClass,
)

log = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)


# First we get the current tags
gms_endpoint = "http://localhost:8080"
graph = DataHubGraph(DatahubClientConfig(server=gms_endpoint))

dataset_urn = make_dataset_urn(platform="hive", name="realestate_db.sales", env="PROD")

current_tags: Optional[GlobalTagsClass] = graph.get_aspect_v2(
    entity_urn=dataset_urn,
    aspect="globalTags",
    aspect_type=GlobalTagsClass,
)

tag_to_add = make_tag_urn("purchase")
tag_association_to_add = TagAssociationClass(tag=tag_to_add)

need_write = False
if current_tags:
    if tag_to_add not in [x.tag for x in current_tags.tags]:
        # tags exist, but this tag is not present in the current tags
        current_tags.tags.append(TagAssociationClass(tag_to_add))
        need_write = True
else:
    # create a brand new tags aspect
    current_tags = GlobalTagsClass(tags=[tag_association_to_add])