コード例 #1
0
    def get_workunits(self) -> Iterable[MetadataWorkUnit]:
        # for future developers: The actual logic of this ingestion wants to be executed, in order:
        # 1) the groups
        # 2) the groups' memberships
        # 3) the users

        # Create MetadataWorkUnits for CorpGroups
        if self.config.ingest_groups:
            # 1) the groups
            for azure_ad_groups in self._get_azure_ad_groups():
                logger.info("Processing another groups batch...")
                datahub_corp_group_snapshots = self._map_azure_ad_groups(
                    azure_ad_groups)
                for datahub_corp_group_snapshot in datahub_corp_group_snapshots:
                    mce = MetadataChangeEvent(
                        proposedSnapshot=datahub_corp_group_snapshot)
                    wu = MetadataWorkUnit(id=datahub_corp_group_snapshot.urn,
                                          mce=mce)
                    self.report.report_workunit(wu)
                    yield wu

        # Populate GroupMembership Aspects for CorpUsers
        datahub_corp_user_urn_to_group_membership: Dict[
            str, GroupMembershipClass] = {}
        if (self.config.ingest_group_membership
                and len(self.selected_azure_ad_groups) > 0):
            # 2) the groups' membership
            for azure_ad_group in self.selected_azure_ad_groups:
                datahub_corp_group_urn = self._map_azure_ad_group_to_urn(
                    azure_ad_group)
                if not datahub_corp_group_urn:
                    error_str = "Failed to extract DataHub Group Name from Azure AD Group named {}. Skipping...".format(
                        azure_ad_group.get("displayName"))
                    self.report.report_failure("azure_ad_group_mapping",
                                               error_str)
                    continue
                # Extract and map users for each group
                for azure_ad_group_users in self._get_azure_ad_group_users(
                        azure_ad_group):
                    # if group doesn't have any members, continue
                    if not azure_ad_group_users:
                        continue
                    for azure_ad_user in azure_ad_group_users:
                        datahub_corp_user_urn = self._map_azure_ad_user_to_urn(
                            azure_ad_user)
                        if not datahub_corp_user_urn:
                            error_str = "Failed to extract DataHub Username from Azure ADUser {}. Skipping...".format(
                                azure_ad_user.get("displayName"))
                            self.report.report_failure("azure_ad_user_mapping",
                                                       error_str)
                            continue
                        self.azure_ad_groups_users.append(azure_ad_user)
                        # update/create the GroupMembership aspect for this group member.
                        if (datahub_corp_user_urn
                                in datahub_corp_user_urn_to_group_membership):
                            datahub_corp_user_urn_to_group_membership[
                                datahub_corp_user_urn].groups.append(
                                    datahub_corp_group_urn)
                        else:
                            datahub_corp_user_urn_to_group_membership[
                                datahub_corp_user_urn] = GroupMembershipClass(
                                    groups=[datahub_corp_group_urn])

        if (self.config.ingest_groups_users
                and self.config.ingest_group_membership
                and not self.config.ingest_users):
            # 3) the users
            # getting infos about the users belonging to the found groups
            datahub_corp_user_snapshots = self._map_azure_ad_users(
                self.azure_ad_groups_users)
            yield from self.ingest_ad_users(
                datahub_corp_user_snapshots,
                datahub_corp_user_urn_to_group_membership)

        # Create MetadatWorkUnits for CorpUsers
        if self.config.ingest_users:
            # 3) the users
            for azure_ad_users in self._get_azure_ad_users():
                # azure_ad_users = next(self._get_azure_ad_users())
                datahub_corp_user_snapshots = self._map_azure_ad_users(
                    azure_ad_users)
                yield from self.ingest_ad_users(
                    datahub_corp_user_snapshots,
                    datahub_corp_user_urn_to_group_membership,
                )
コード例 #2
0
ファイル: azure_ad.py プロジェクト: shirshanka/datahub
    def get_workunits(self) -> Iterable[MetadataWorkUnit]:
        # for future developers: The actual logic of this ingestion wants to be executed, in order:
        # 1) the groups
        # 2) the groups' memberships
        # 3) the users

        # Create MetadataWorkUnits for CorpGroups
        if self.config.ingest_groups:
            # 1) the groups
            for azure_ad_groups in self._get_azure_ad_groups():
                logger.info("Processing another groups batch...")
                datahub_corp_group_snapshots = self._map_azure_ad_groups(
                    azure_ad_groups)
                for datahub_corp_group_snapshot in datahub_corp_group_snapshots:
                    mce = MetadataChangeEvent(
                        proposedSnapshot=datahub_corp_group_snapshot)
                    wu = MetadataWorkUnit(id=datahub_corp_group_snapshot.urn,
                                          mce=mce)
                    self.report.report_workunit(wu)
                    yield wu

        # Populate GroupMembership Aspects for CorpUsers
        datahub_corp_user_urn_to_group_membership: Dict[
            str, GroupMembershipClass] = defaultdict(
                lambda: GroupMembershipClass(groups=[]))
        if (self.config.ingest_group_membership
                and len(self.selected_azure_ad_groups) > 0):
            # 2) the groups' membership
            for azure_ad_group in self.selected_azure_ad_groups:
                # Azure supports nested groups, but not DataHub.  We need to explode the nested groups into a flat list.
                datahub_corp_group_urn = self._map_azure_ad_group_to_urn(
                    azure_ad_group)
                if not datahub_corp_group_urn:
                    error_str = f"Failed to extract DataHub Group Name from Azure AD Group named {azure_ad_group.get('displayName')}. Skipping..."
                    self.report.report_failure("azure_ad_group_mapping",
                                               error_str)
                    continue
                self._add_group_members_to_group_membership(
                    datahub_corp_group_urn,
                    azure_ad_group,
                    datahub_corp_user_urn_to_group_membership,
                )

        if (self.config.ingest_groups_users
                and self.config.ingest_group_membership
                and not self.config.ingest_users):
            # 3) the users
            # getting infos about the users belonging to the found groups
            datahub_corp_user_snapshots = self._map_azure_ad_users(
                self.azure_ad_groups_users)
            yield from self.ingest_ad_users(
                datahub_corp_user_snapshots,
                datahub_corp_user_urn_to_group_membership)

        # Create MetadataWorkUnits for CorpUsers
        if self.config.ingest_users:
            # 3) the users
            for azure_ad_users in self._get_azure_ad_users():
                # azure_ad_users = next(self._get_azure_ad_users())
                datahub_corp_user_snapshots = self._map_azure_ad_users(
                    azure_ad_users)
                yield from self.ingest_ad_users(
                    datahub_corp_user_snapshots,
                    datahub_corp_user_urn_to_group_membership,
                )
コード例 #3
0
ファイル: okta.py プロジェクト: taufiqibrahim/datahub
    def get_workunits(self) -> Iterable[MetadataWorkUnit]:

        # Step 1: Produce MetadataWorkUnits for CorpGroups.
        if self.config.ingest_groups:
            okta_groups = list(self._get_okta_groups())
            datahub_corp_group_snapshots = self._map_okta_groups(okta_groups)
            for datahub_corp_group_snapshot in datahub_corp_group_snapshots:
                mce = MetadataChangeEvent(
                    proposedSnapshot=datahub_corp_group_snapshot)
                wu = MetadataWorkUnit(id=datahub_corp_group_snapshot.urn,
                                      mce=mce)
                self.report.report_workunit(wu)
                yield wu

        # Step 2: Populate GroupMembership Aspects for CorpUsers
        datahub_corp_user_urn_to_group_membership: Dict[
            str, GroupMembershipClass] = {}
        if self.config.ingest_group_membership and okta_groups is not None:

            # Fetch membership for each group.
            for okta_group in okta_groups:
                datahub_corp_group_urn = self._map_okta_group_profile_to_urn(
                    okta_group.profile)
                if datahub_corp_group_urn is None:
                    error_str = f"Failed to extract DataHub Group Name from Okta Group: Invalid regex pattern provided or missing profile attribute for group named {okta_group.profile.name}. Skipping..."
                    logger.error(error_str)
                    self.report.report_failure("okta_group_mapping", error_str)
                    continue

                # Extract and map users for each group.
                okta_group_users = self._get_okta_group_users(okta_group)
                for okta_user in okta_group_users:
                    datahub_corp_user_urn = self._map_okta_user_profile_to_urn(
                        okta_user.profile)
                    if datahub_corp_user_urn is None:
                        error_str = f"Failed to extract DataHub Username from Okta User: Invalid regex pattern provided or missing profile attribute for User with login {okta_user.profile.login}. Skipping..."
                        logger.error(error_str)
                        self.report.report_failure("okta_user_mapping",
                                                   error_str)
                        continue

                    # Either update or create the GroupMembership aspect for this group member.
                    # TODO: Production of the GroupMembership aspect will overwrite the existing
                    # group membership for the DataHub user.
                    if (datahub_corp_user_urn
                            in datahub_corp_user_urn_to_group_membership):
                        datahub_corp_user_urn_to_group_membership[
                            datahub_corp_user_urn].groups.append(
                                datahub_corp_group_urn)
                    else:
                        datahub_corp_user_urn_to_group_membership[
                            datahub_corp_user_urn] = GroupMembershipClass(
                                groups=[datahub_corp_group_urn])

        # Step 3: Produce MetadataWorkUnits for CorpUsers.
        if self.config.ingest_users:
            okta_users = self._get_okta_users()
            filtered_okta_users = filter(self._filter_okta_user, okta_users)
            datahub_corp_user_snapshots = self._map_okta_users(
                filtered_okta_users)
            for datahub_corp_user_snapshot in datahub_corp_user_snapshots:

                # Add GroupMembership aspect populated in Step 2 if applicable.
                if (datahub_corp_user_snapshot.urn
                        in datahub_corp_user_urn_to_group_membership):
                    datahub_group_membership = (
                        datahub_corp_user_urn_to_group_membership.get(
                            datahub_corp_user_snapshot.urn))
                    assert datahub_group_membership is not None
                    datahub_corp_user_snapshot.aspects.append(
                        datahub_group_membership)
                mce = MetadataChangeEvent(
                    proposedSnapshot=datahub_corp_user_snapshot)
                wu = MetadataWorkUnit(id=datahub_corp_user_snapshot.urn,
                                      mce=mce)
                self.report.report_workunit(wu)
                yield wu
コード例 #4
0
ファイル: azure_ad.py プロジェクト: erley/datahub
    def get_workunits(self) -> Iterable[MetadataWorkUnit]:
        # Create MetadataWorkUnits for CorpGroups
        if self.config.ingest_groups:
            azure_ad_groups = next(self._get_azure_ad_groups())
            datahub_corp_group_snapshots = self._map_azure_ad_groups(azure_ad_groups)
            for datahub_corp_group_snapshot in datahub_corp_group_snapshots:
                mce = MetadataChangeEvent(proposedSnapshot=datahub_corp_group_snapshot)
                wu = MetadataWorkUnit(id=datahub_corp_group_snapshot.urn, mce=mce)
                self.report.report_workunit(wu)
                yield wu
        # Populate GroupMembership Aspects for CorpUsers
        datahub_corp_user_urn_to_group_membership: Dict[str, GroupMembershipClass] = {}
        if self.config.ingest_group_membership and azure_ad_groups:
            # Fetch membership for each group
            for azure_ad_group in azure_ad_groups:
                datahub_corp_group_urn = self._map_azure_ad_group_to_urn(azure_ad_group)
                if not datahub_corp_group_urn:
                    error_str = "Failed to extract DataHub Group Name from Azure AD Group named {}. Skipping...".format(
                        azure_ad_group.get("displayName")
                    )
                    self.report.report_failure("azure_ad_group_mapping", error_str)
                    continue
                # Extract and map users for each group
                azure_ad_group_users = next(
                    self._get_azure_ad_group_users(azure_ad_group)
                )
                # if group doesn't have any members, continue
                if not azure_ad_group_users:
                    continue
                for azure_ad_user in azure_ad_group_users:
                    datahub_corp_user_urn = self._map_azure_ad_user_to_urn(
                        azure_ad_user
                    )
                    if not datahub_corp_user_urn:
                        error_str = "Failed to extract DataHub Username from Azure ADUser {}. Skipping...".format(
                            azure_ad_user.get("displayName")
                        )
                        self.report.report_failure("azure_ad_user_mapping", error_str)
                        continue

                    # update/create the GroupMembership aspect for this group member.
                    if (
                        datahub_corp_user_urn
                        in datahub_corp_user_urn_to_group_membership
                    ):
                        datahub_corp_user_urn_to_group_membership[
                            datahub_corp_user_urn
                        ].groups.append(datahub_corp_group_urn)
                    else:
                        datahub_corp_user_urn_to_group_membership[
                            datahub_corp_user_urn
                        ] = GroupMembershipClass(groups=[datahub_corp_group_urn])

        # Create MetadatWorkUnits for CorpUsers
        if self.config.ingest_users:
            azure_ad_users = next(self._get_azure_ad_users())
            datahub_corp_user_snapshots = self._map_azure_ad_users(azure_ad_users)
            for datahub_corp_user_snapshot in datahub_corp_user_snapshots:
                # Add GroupMembership if applicable
                if (
                    datahub_corp_user_snapshot.urn
                    in datahub_corp_user_urn_to_group_membership
                ):
                    datahub_group_membership = (
                        datahub_corp_user_urn_to_group_membership.get(
                            datahub_corp_user_snapshot.urn
                        )
                    )
                    assert datahub_group_membership
                    datahub_corp_user_snapshot.aspects.append(datahub_group_membership)
                mce = MetadataChangeEvent(proposedSnapshot=datahub_corp_user_snapshot)
                wu = MetadataWorkUnit(id=datahub_corp_user_snapshot.urn, mce=mce)
                self.report.report_workunit(wu)
                yield wu