def test_make_column_schema_for_jaql_formula_should_process_all_parts(
            self, mock_make_column_schema_for_jaql):

        metadata = {
            'formula': 'AVG([OrderDateYears], [CountOrderID])',
            'context': {
                '[OrderDateYears]': {
                    'dim': '[Orders.OrderDate (Calendar)]',
                    'level': 'years',
                },
                '[CountOrderID]': {
                    'dim': '[Orders.OrderID]',
                    'agg': 'count',
                },
            },
            'title': 'AVG test',
        }

        column = datacatalog.ColumnSchema()
        mock_make_column_schema_for_jaql.return_value = column

        column = self.__factory \
            ._DataCatalogEntryFactory__make_column_schema_for_jaql_formula(
                metadata)

        self.assertEqual('formula', column.column)
        self.assertEqual('array', column.type)
        self.assertEqual('The AVG test formula', column.description)
        self.assertEqual(2, len(column.subcolumns))
    def test_make_column_schema_for_jaql_filter_by_should_process_all_fields(
            self, mock_make_column_schema_for_jaql):

        metadata = {
            'dim': '[TableA.ColumnA]',
            'datatype': 'text',
            'title': 'Test Table and Column A',
            'filter': {
                'by': {
                    'dim': '[TableB.ColumnB]',
                    'datatype': 'numeric',
                    'agg': 'sum',
                    'title': 'Test Table and Column B',
                },
            },
        }

        column = datacatalog.ColumnSchema()
        mock_make_column_schema_for_jaql.return_value = column

        column = self.__factory \
            ._DataCatalogEntryFactory__make_column_schema_for_jaql_filter_by(
                metadata)

        self.assertEqual('filterBy', column.column)
        self.assertEqual('array', column.type)
        self.assertEqual('The Test Table and Column A nested filter',
                         column.description)
        self.assertEqual(1, len(column.subcolumns))
    def __make_filters_column_for_widget(
            cls, widget_metadata: Dict[str, Any]) -> Optional[ColumnSchema]:

        if not (widget_metadata.get('metadata')
                and widget_metadata['metadata'].get('panels')):
            return

        panels = widget_metadata['metadata']['panels']
        filters = next(
            (panel.get('items') for panel in panels
             if panel.get('name') == constants.WIDGET_FILTERS_PANEL_NAME),
            None)
        if not filters:
            return

        filters_column = datacatalog.ColumnSchema()
        filters_column.column = constants.ENTRY_COLUMN_FILTERS
        filters_column.type = 'array'
        filters_column.description = 'The Widget filters'

        for widget_filter in filters:
            filters_column.subcolumns.append(
                cls.__make_column_schema_for_jaql(widget_filter.get('jaql')))

        return filters_column if filters_column.subcolumns else None
    def make_entry_for_tables(self, table, table_container_name):
        """Create Datacatalog entries from a table dict.

         :param table:
         :param table_container_name:
         :return: entry_id, entry
        """

        entry_id = self._format_id('{}__{}'.format(table_container_name,
                                                   table['name']))

        entry = datacatalog.Entry()

        entry.user_specified_type = self.__metadata_definition['table_def'][
            'type']
        entry.user_specified_system = self.__entry_group_id

        entry.display_name = self._format_display_name(table['name'])

        entry.name = datacatalog.DataCatalogClient.entry_path(
            self.__project_id, self.__location_id, self.__entry_group_id,
            entry_id)

        desc = table.get('desc')
        if pd.isna(desc):
            desc = ''

        entry.description = desc

        entry.linked_resource = '//{}//{}'.format(
            self.__metadata_host_server, self._format_id(table['name']))

        create_time, update_time = \
            DataCatalogEntryFactory.__convert_source_system_timestamp_fields(
                table.get('create_time'),
                table.get('update_time'))
        if create_time and update_time:
            created_timestamp = timestamp_pb2.Timestamp()
            created_timestamp.FromSeconds(create_time)
            entry.source_system_timestamps.create_time = created_timestamp

            updated_timestamp = timestamp_pb2.Timestamp()
            updated_timestamp.FromSeconds(update_time)
            entry.source_system_timestamps.update_time = updated_timestamp

        columns = []
        for column in table['columns']:
            desc = column.get('desc')
            if pd.isna(desc):
                desc = ''
            columns.append(
                datacatalog.ColumnSchema(
                    column=self._format_id(column['name']),
                    description=desc,
                    type=DataCatalogEntryFactory.__format_entry_column_type(
                        column['type'])))
        entry.schema.columns.extend(columns)

        return entry_id, entry
    def make_entry_for_table(self, table_metadata, database_name):
        entry_id = '{}__{}'.format(database_name, table_metadata.name)
        # Force lowercase since hive is case insensitive
        entry_id = entry_id.lower()

        entry = datacatalog.Entry()

        entry.user_specified_type = 'table'
        entry.user_specified_system = 'hive'

        entry.display_name = table_metadata.name

        entry.name = datacatalog.DataCatalogClient.entry_path(
            self.__project_id, self.__location_id, self.__entry_group_id,
            entry_id)

        # For now we are using the first table_storage relationship,
        # with table partitions we might have to deal
        # with more than one record
        table_storage = table_metadata.table_storages[0]

        entry.linked_resource = \
            '//{}//{}'.format(self.__metadata_host_server,
                              table_storage.location)

        created_timestamp = timestamp_pb2.Timestamp()
        created_timestamp.FromSeconds(table_metadata.create_time)

        entry.source_system_timestamps.create_time = created_timestamp

        update_time_seconds = \
            DataCatalogEntryFactory. \
            __extract_update_time_from_table_metadata(table_metadata)
        if update_time_seconds is not None:
            updated_timestamp = timestamp_pb2.Timestamp()
            updated_timestamp.FromSeconds(update_time_seconds)

            entry.source_system_timestamps.update_time = updated_timestamp
        else:
            entry.source_system_timestamps.update_time = created_timestamp

        columns = []
        for column in table_storage.columns:
            columns.append(
                datacatalog.ColumnSchema(
                    column=column.name,
                    type=DataCatalogEntryFactory.__format_entry_column_type(
                        column.type),
                    description=column.comment))
        entry.schema.columns.extend(columns)

        return entry_id, entry
    def test_make_schema_for_widget_make_filters_column(
            self, mock_make_filters_column_for_widget):

        metadata = {'metadata': {'panels': [{}]}}

        column = datacatalog.ColumnSchema()
        column.column = 'test'
        mock_make_filters_column_for_widget.return_value = column

        schema = self.__factory\
            ._DataCatalogEntryFactory__make_schema_for_widget(metadata)

        mock_make_filters_column_for_widget.assert_called_once_with(metadata)
        self.assertEqual(column, schema.columns[0])
コード例 #7
0
    def make_entry_for_table(self, table_metadata, database_name):
        entry_id = self.__make_entry_id_for_table(database_name,
                                                  table_metadata)

        entry = datacatalog.Entry()

        entry.user_specified_type = 'table'
        entry.user_specified_system = 'hive'

        entry.display_name = self._format_display_name(table_metadata.name)

        entry.name = datacatalog.DataCatalogClient.entry_path(
            self.__project_id, self.__location_id, self.__entry_group_id,
            entry_id)

        table_storage = table_metadata.table_storages[0]

        entry.linked_resource = \
            self._format_linked_resource(
                '//{}//{}'.format(self.__metadata_host_server,
                                  table_storage.location))

        created_timestamp = timestamp_pb2.Timestamp()
        created_timestamp.FromSeconds(table_metadata.create_time)

        entry.source_system_timestamps.create_time = created_timestamp

        update_time_seconds = \
            DataCatalogEntryFactory. \
            __extract_update_time_from_table_metadata(table_metadata)
        if update_time_seconds is not None:
            updated_timestamp = timestamp_pb2.Timestamp()
            updated_timestamp.FromSeconds(update_time_seconds)

            entry.source_system_timestamps.update_time = updated_timestamp
        else:
            entry.source_system_timestamps.update_time = created_timestamp

        columns = []
        for column in table_storage.columns:
            columns.append(
                datacatalog.ColumnSchema(
                    column=column.name,
                    type=DataCatalogEntryFactory.__format_entry_column_type(
                        column.type),
                    description=column.comment))
        entry.schema.columns.extend(columns)

        return entry_id, entry
    def test_make_column_schema_for_jaql_should_set_all_available_fields(
            self, mock_make_column_schema_for_jaql_formula,
            mock_make_column_schema_for_jaql_filter_by):

        metadata = {'datatype': 'datetime', 'title': 'TEST'}

        column = datacatalog.ColumnSchema()
        column.column = 'formula'
        mock_make_column_schema_for_jaql_formula.return_value = column

        column = datacatalog.ColumnSchema()
        column.column = 'filterBy'
        mock_make_column_schema_for_jaql_filter_by.return_value = column

        column = self.__factory\
            ._DataCatalogEntryFactory__make_column_schema_for_jaql(metadata)

        self.assertEqual('TEST', column.column)
        self.assertEqual('datetime', column.type)

        mock_make_column_schema_for_jaql_formula.assert_called_once_with(
            metadata)
        mock_make_column_schema_for_jaql_filter_by.assert_called_once_with(
            metadata)
    def test_make_schema_for_dashboard_should_make_filters_column(
            self, mock_make_column_schema_for_jaql):

        jaql_metadata = {'datatype': 'datetime', 'title': 'TEST'}
        metadata = {'filters': [{'jaql': jaql_metadata}]}

        column = datacatalog.ColumnSchema()
        mock_make_column_schema_for_jaql.return_value = column

        schema = \
            self.__factory._DataCatalogEntryFactory__make_schema_for_dashboard(
                metadata)

        self.assertEqual('filters', schema.columns[0].column)
        mock_make_column_schema_for_jaql.assert_called_once_with(jaql_metadata)
        self.assertEqual(column, schema.columns[0].subcolumns[0])
    def __make_column_schema_for_jaql_filter_by(
            cls, jaql_metadata: Dict[str, Any]) -> Optional[ColumnSchema]:

        jaql_filter = jaql_metadata.get(constants.JAQL_FILTER_FIELD_NAME)
        if not jaql_filter:
            return

        filter_by = jaql_filter.get(constants.JAQL_FILTER_BY_FIELD_NAME)
        if not filter_by:
            return

        column = datacatalog.ColumnSchema()
        column.column = constants.ENTRY_COLUMN_FILTER_BY
        column.type = 'array'
        column.description = f'The {jaql_metadata.get("title")} nested filter'

        column.subcolumns.append(cls.__make_column_schema_for_jaql(filter_by))

        return column
    def __make_column_schema_for_jaql_formula(
            cls, jaql_metadata: Dict[str, Any]) -> Optional[ColumnSchema]:

        formula = jaql_metadata.get(constants.JAQL_FORMULA_FIELD_NAME)
        context = jaql_metadata.get(constants.JAQL_CONTEXT_FIELD_NAME)
        if not (formula and context):
            return

        column = datacatalog.ColumnSchema()
        column.column = constants.ENTRY_COLUMN_FORMULA
        column.type = 'array'
        column.description = \
            f'The {jaql_metadata.get("title")} formula'

        parts = re.findall(r'\[(.*?)]', formula)
        for part in parts:
            column.subcolumns.append(
                cls.__make_column_schema_for_jaql(context.get(f'[{part}]')))

        return column
コード例 #12
0
 def __create_schema(cls, entry, columns):
     entry_columns = []
     if columns:
         for column in columns:
             column_data = column.get('data')
             if column_data:
                 column_attributes = column_data.get('attributes')
                 data_type = attr_normalizer.\
                     DataCatalogAttributeNormalizer.\
                     get_column_data_type(column_attributes)
                 column_name = column_attributes.get('name')
                 column_desc = column_attributes.get('comment')
                 if data_type and column_name:
                     column_name = attr_normalizer.\
                         DataCatalogAttributeNormalizer.format_name(
                             column_name)
                     entry_columns.append(
                         datacatalog.ColumnSchema(column=column_name,
                                                  description=column_desc,
                                                  type=data_type))
     entry.schema.columns.extend(entry_columns)
    def __make_column_schema_for_jaql(
            cls, jaql_metadata: Dict[str, Any]) -> ColumnSchema:

        column = datacatalog.ColumnSchema()
        column.column = sisense_connector_strings_helper\
            .SisenseConnectorStringsHelper\
            .format_column_name(jaql_metadata.get('title'))
        column.type = jaql_metadata.get('datatype') or jaql_metadata.get(
            'type') or 'unknown'

        formula_subcolumn = cls.__make_column_schema_for_jaql_formula(
            jaql_metadata)
        if formula_subcolumn:
            column.subcolumns.append(formula_subcolumn)

        filter_by_subcolumn = cls.__make_column_schema_for_jaql_filter_by(
            jaql_metadata)
        if filter_by_subcolumn:
            column.subcolumns.append(filter_by_subcolumn)

        return column
    def __make_schema_for_dashboard(
            cls, dashboard_metadata: Dict[str, Any]) -> Optional[Schema]:

        if not dashboard_metadata.get('filters'):
            return

        filters_column = datacatalog.ColumnSchema()
        filters_column.column = constants.ENTRY_COLUMN_FILTERS
        filters_column.type = 'array'
        filters_column.description = 'The Dashboard filters'

        for dashboard_filter in dashboard_metadata[
                constants.DASHBOARD_FILTERS_FIELD_NAME]:
            filters_column.subcolumns.append(
                cls.__make_column_schema_for_jaql(
                    dashboard_filter.get('jaql')))

        schema = datacatalog.Schema()
        schema.columns.append(filters_column)

        return schema
    def test_make_filters_column_for_widget_should_return_column(
            self, mock_make_column_schema_for_jaql):

        jaql_metadata = {'datatype': 'datetime', 'title': 'TEST'}
        metadata = {
            'metadata': {
                'panels': [{
                    'name': 'filters',
                    'items': [{
                        'jaql': jaql_metadata
                    }]
                }]
            }
        }

        column = datacatalog.ColumnSchema()
        mock_make_column_schema_for_jaql.return_value = column

        schema = self.__factory\
            ._DataCatalogEntryFactory__make_filters_column_for_widget(metadata)

        mock_make_column_schema_for_jaql.assert_called_once_with(jaql_metadata)
        self.assertEqual(column, schema.subcolumns[0])
    def __make_fields_column_for_widget(
            cls, widget_metadata: Dict[str, Any]) -> Optional[ColumnSchema]:

        if not (widget_metadata.get('metadata')
                and widget_metadata['metadata'].get('panels')):
            return

        fields_column = datacatalog.ColumnSchema()
        fields_column.column = constants.ENTRY_COLUMN_FIELDS
        fields_column.type = 'array'
        fields_column.description = 'The Widget fields'

        panels = widget_metadata['metadata']['panels']
        fields = [
            panel for panel in panels
            if not panel.get('name') == constants.WIDGET_FILTERS_PANEL_NAME
        ]
        for field in fields:
            for item in field.get('items'):
                fields_column.subcolumns.append(
                    cls.__make_column_schema_for_jaql(item.get('jaql')))

        return fields_column if fields_column.subcolumns else None
    def make_entry_for_tables(self, table, table_container_name):
        """Create Datacatalog entries from a table dict.

         :param table:
         :param table_container_name:
         :return: entry_id, entry
        """

        entry_id = self._format_id('{}__{}'.format(table_container_name,
                                                   table['name']))

        entry = datacatalog.Entry()

        # some RDBMS' store views and tables definitions in the same
        # system table, and the name is not user friendly, so we only
        # keep it if it's a VIEW type.
        table_type = table.get(constants.TABLE_TYPE_KEY)
        if table_type and table_type.lower() == \
                constants.VIEW_TYPE_VALUE:

            table_type = table_type.lower()
        else:
            table_type = self.__metadata_definition['table_def']['type']

        entry.user_specified_type = table_type

        entry.user_specified_system = self.__entry_group_id

        entry.display_name = self._format_display_name(table['name'])

        entry.name = datacatalog.DataCatalogClient.entry_path(
            self.__project_id, self.__location_id, self.__entry_group_id,
            entry_id)

        desc = table.get('desc')
        if pd.isna(desc):
            desc = ''

        entry.description = desc

        entry.linked_resource = '{}/{}/{}'.format(
            self.__entry_resource_url_prefix, table_container_name,
            self._format_id(table['name']))

        create_time, update_time = \
            DataCatalogEntryFactory.__convert_source_system_timestamp_fields(
                table.get('create_time'),
                table.get('update_time'))
        if create_time and update_time:
            created_timestamp = timestamp_pb2.Timestamp()
            created_timestamp.FromSeconds(create_time)
            entry.source_system_timestamps.create_time = created_timestamp

            updated_timestamp = timestamp_pb2.Timestamp()
            updated_timestamp.FromSeconds(update_time)
            entry.source_system_timestamps.update_time = updated_timestamp

        columns = []
        for column in table['columns']:
            desc = column.get('desc')
            if pd.isna(desc):
                desc = ''
            columns.append(
                datacatalog.ColumnSchema(
                    column=self._format_id(column['name']),
                    description=desc,
                    type=DataCatalogEntryFactory.__format_entry_column_type(
                        column['type'])))
        entry.schema.columns.extend(columns)

        return entry_id, entry
コード例 #18
0
 def create_column_schema(cls, name, column_type, description, mode=None):
     return datacatalog.ColumnSchema(column=name,
                                     type=column_type,
                                     description=description,
                                     mode=mode)