def test_create_records_programmatic_description(self) -> None: schema = SchemaModel(schema_key='db://cluster.schema', schema='schema_name', description='foo', description_source='bar') schema_record = schema.create_next_record() serialized_schema_record = mysql_serializer.serialize_record( schema_record) schema_prog_desc_record = schema.create_next_record() serialized_schema_prog_desc_record = mysql_serializer.serialize_record( schema_prog_desc_record) self.assertDictEqual( serialized_schema_record, { 'rk': 'db://cluster.schema', 'name': 'schema_name', 'cluster_rk': 'db://cluster' }) self.assertDictEqual( serialized_schema_prog_desc_record, { 'rk': 'db://cluster.schema/_bar_description', 'description_source': 'bar', 'description': 'foo', 'schema_rk': 'db://cluster.schema' })
def test_dashboard_table_records(self) -> None: dashboard_table = DashboardTable( table_ids=[ 'hive://gold.schema/table1', 'hive://gold.schema/table2' ], cluster='cluster_id', product='product_id', dashboard_id='dashboard_id', dashboard_group_id='dashboard_group_id') actual1 = dashboard_table.create_next_record() actual1_serialized = mysql_serializer.serialize_record(actual1) expected1 = { 'dashboard_rk': 'product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id', 'table_rk': 'hive://gold.schema/table1' } actual2 = dashboard_table.create_next_record() actual2_serialized = mysql_serializer.serialize_record(actual2) expected2 = { 'dashboard_rk': 'product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id', 'table_rk': 'hive://gold.schema/table2' } assert actual1 is not None self.assertDictEqual(expected1, actual1_serialized) assert actual2 is not None self.assertDictEqual(expected2, actual2_serialized) self.assertIsNone(dashboard_table.create_next_record())
def test_dashboard_usage_user_records(self) -> None: dashboard_usage = DashboardUsage( dashboard_group_id='dashboard_group_id', dashboard_id='dashboard_id', email='*****@*****.**', view_count=123, cluster='cluster_id', product='product_id', should_create_user_node=True) actual1 = dashboard_usage.create_next_record() actual1_serialized = mysql_serializer.serialize_record(actual1) expected1 = { 'rk': '*****@*****.**', 'email': '*****@*****.**', } assert actual1 is not None self.assertDictEqual(expected1, actual1_serialized) actual2 = dashboard_usage.create_next_record() actual2_serialized = mysql_serializer.serialize_record(actual2) expected2 = { 'user_rk': '*****@*****.**', 'dashboard_rk': 'product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id', 'read_count': 123 } assert actual2 is not None self.assertDictEqual(expected2, actual2_serialized) self.assertIsNone(dashboard_usage.create_next_record())
def test_create_records(self) -> None: schema_record = self.schema.create_next_record() serialized_schema_record = mysql_serializer.serialize_record(schema_record) schema_desc_record = self.schema.create_next_record() serialized_schema_desc_record = mysql_serializer.serialize_record(schema_desc_record) self.assertDictEqual(serialized_schema_record, {'rk': 'db://cluster.schema', 'name': 'schema_name', 'cluster_rk': 'db://cluster'}) self.assertDictEqual(serialized_schema_desc_record, {'rk': 'db://cluster.schema/_description', 'description_source': 'description', 'description': 'foo', 'schema_rk': 'db://cluster.schema'})
def test_table_serializable(self) -> None: actors = [Actor('Tom Cruise'), Actor('Meg Ryan')] movie = Movie('Top Gun', actors) actual = [] node_row = movie.next_record() while node_row: actual.append(mysql_serializer.serialize_record(node_row)) node_row = movie.next_record() expected = [ { 'rk': 'movie://Top Gun', 'name': 'Top Gun' }, { 'rk': 'actor://Tom Cruise', 'name': 'Tom Cruise' }, { 'movie_rk': 'movie://Top Gun', 'actor_rk': 'actor://Tom Cruise' }, { 'rk': 'actor://Meg Ryan', 'name': 'Meg Ryan' }, { 'movie_rk': 'movie://Top Gun', 'actor_rk': 'actor://Meg Ryan' } ] self.assertEqual(expected, actual)
def test_dashboard_record_without_tags(self) -> None: expected_records_without_tags = [{ 'rk': '_dashboard://gold', 'name': 'gold' }, { 'rk': '_dashboard://gold.Product - Atmoskop', 'name': 'Product - Atmoskop', 'cluster_rk': '_dashboard://gold' }, { 'rk': '_dashboard://gold.Product - Atmoskop/Atmoskop', 'name': 'Atmoskop', 'dashboard_group_rk': '_dashboard://gold.Product - Atmoskop' }, { 'rk': '_dashboard://gold.Product - Atmoskop/Atmoskop/_description', 'description': 'Atmoskop dashboard description', 'dashboard_rk': '_dashboard://gold.Product - Atmoskop/Atmoskop' }] record = self.dashboard_metadata2.next_record() actual = [] while record: record_serialized = mysql_serializer.serialize_record(record) actual.append(record_serialized) record = self.dashboard_metadata2.next_record() self.assertEqual(expected_records_without_tags, actual)
def load(self, csv_serializable: TableSerializable) -> None: """ Writes TableSerializable records into CSV files. There are multiple CSV files meaning different tables that this method writes. Common pattern for table records: 1. retrieve csv row (a dict where keys represent a header, values represent a row) 2. using this dict to get a appropriate csv writer and write to it. 3. repeat 1 and 2 :param csv_serializable: :return: """ record = csv_serializable.next_record() while record: record_dict = mysql_serializer.serialize_record(record) table_name = record.__tablename__ key = (table_name, self._make_key(record_dict)) file_suffix = '{}_{}'.format(*key) record_writer = self._get_writer(record_dict, self._record_file_mapping, key, self._record_dir, file_suffix) record_writer.writerow(record_dict) record = csv_serializable.next_record()
def test_mysql_serialize(self) -> None: col_readers = [ ColumnReader(database='db', cluster='gold', schema='scm', table='foo', column='*', user_email='*****@*****.**') ] table_col_usage = TableColumnUsage(col_readers=col_readers) actual = [] record = table_col_usage.next_record() while record: actual.append(mysql_serializer.serialize_record(record)) record = table_col_usage.next_record() expected_user = {'rk': '*****@*****.**', 'email': '*****@*****.**'} expected_usage = { 'table_rk': 'db://gold.scm/foo', 'user_rk': '*****@*****.**', 'read_count': 1 } expected = [expected_user, expected_usage] self.assertEqual(expected, actual)
def test_create_records(self) -> None: expected = [ { 'rk': User.USER_NODE_KEY_FORMAT.format(email=owner1), 'email': owner1 }, { 'table_rk': TABLE_KEY, 'user_rk': owner1 }, { 'rk': User.USER_NODE_KEY_FORMAT.format(email=owner2), 'email': owner2 }, { 'table_rk': TABLE_KEY, 'user_rk': owner2 } ] actual = [] record = self.table_owner.create_next_record() while record: serialized_record = mysql_serializer.serialize_record(record) actual.append(serialized_record) record = self.table_owner.create_next_record() self.assertEqual(actual, expected)
def test_create_records(self) -> None: expected_application_record = { 'rk': 'application://gold.airflow/event_test/hive.default.test_table', 'application_url': 'airflow_host.net/admin/airflow/tree?dag_id=event_test', 'id': 'event_test/hive.default.test_table', 'name': 'Airflow', 'description': 'Airflow with id event_test/hive.default.test_table' } expected_application_table_record = { 'rk': 'hive://gold.default/test_table', 'application_rk': 'application://gold.airflow/event_test/hive.default.test_table' } expected = [ expected_application_record, expected_application_table_record ] actual = [] record = self.application.create_next_record() while record: serialized_record = mysql_serializer.serialize_record(record) actual.append(serialized_record) record = self.application.create_next_record() self.assertEqual(expected, actual)
def test_mysql_serialize(self) -> None: col_readers = [ColumnReader(database='db', cluster='gold', schema='scm', table='foo', column='*', user_email='*****@*****.**')] table_col_usage = TableColumnUsage(col_readers=col_readers) actual = [] record = table_col_usage.next_record() while record: actual.append(mysql_serializer.serialize_record(record)) record = table_col_usage.next_record() expected_user = {'rk': '*****@*****.**', 'first_name': '', 'last_name': '', 'full_name': '', 'employee_type': '', 'is_active': True, 'updated_at': 0, 'slack_id': '', 'github_username': '', 'team_name': '', 'email': '*****@*****.**', 'role_name': ''} expected_usage = {'table_rk': 'db://gold.scm/foo', 'user_rk': '*****@*****.**', 'read_count': 1} expected = [expected_user, expected_usage] self.assertEqual(expected, actual)
def test_dashboard_record_full_example(self) -> None: expected_records = [{ 'rk': '_dashboard://gold', 'name': 'gold' }, { 'rk': '_dashboard://gold.Product - Jobs.cz', 'name': 'Product - Jobs.cz', 'cluster_rk': '_dashboard://gold', 'dashboard_group_url': 'https://foo.bar/dashboard_group/foo' }, { 'rk': '_dashboard://gold.Product - Jobs.cz/_description', 'description': 'foo dashboard group description', 'dashboard_group_rk': '_dashboard://gold.Product - Jobs.cz' }, { 'rk': '_dashboard://gold.Product - Jobs.cz/Agent', 'name': 'Agent', 'dashboard_group_rk': '_dashboard://gold.Product - Jobs.cz', 'created_timestamp': 123456789, 'dashboard_url': 'https://foo.bar/dashboard_group/foo/dashboard/bar' }, { 'rk': '_dashboard://gold.Product - Jobs.cz/Agent/_description', 'description': 'Agent dashboard description', 'dashboard_rk': '_dashboard://gold.Product - Jobs.cz/Agent' }, { 'rk': 'test_tag', 'tag_type': 'dashboard' }, { 'dashboard_rk': '_dashboard://gold.Product - Jobs.cz/Agent', 'tag_rk': 'test_tag' }, { 'rk': 'tag2', 'tag_type': 'dashboard' }, { 'dashboard_rk': '_dashboard://gold.Product - Jobs.cz/Agent', 'tag_rk': 'tag2' }] record = self.full_dashboard_metadata.next_record() actual = [] while record: record_serialized = mysql_serializer.serialize_record(record) actual.append(record_serialized) record = self.full_dashboard_metadata.next_record() self.assertEqual(expected_records, actual)
def test_create_records_no_description(self) -> None: schema = SchemaModel(schema_key='db://cluster.schema', schema='schema_name') schema_record = schema.create_next_record() serialized_schema_record = mysql_serializer.serialize_record(schema_record) self.assertDictEqual(serialized_schema_record, {'rk': 'db://cluster.schema', 'name': 'schema_name', 'cluster_rk': 'db://cluster'}) self.assertIsNone(schema.create_next_record())
def test_usage_record(self) -> None: actual = [] record = self.usage.create_next_record() while record: serialized_record = mysql_serializer.serialize_record(record) actual.append(serialized_record) record = self.usage.create_next_record() self.assertEqual(actual, self.expected_records)
def test_serialize_mysql(self) -> None: actual = [] record = self.table_metadata.next_record() while record: serialized_record = mysql_serializer.serialize_record(record) actual.append(serialized_record) record = self.table_metadata.next_record() self.assertEqual(EXPECTED_RECORDS_MYSQL, actual)
def test_dashboard_usage_user_records(self) -> None: dashboard_usage = DashboardUsage( dashboard_group_id='dashboard_group_id', dashboard_id='dashboard_id', email='*****@*****.**', view_count=123, cluster='cluster_id', product='product_id', should_create_user_node=True) actual1 = dashboard_usage.create_next_record() actual1_serialized = mysql_serializer.serialize_record(actual1) expected1 = { 'rk': '*****@*****.**', 'email': '*****@*****.**', 'is_active': True, 'first_name': '', 'last_name': '', 'full_name': '', 'github_username': '', 'team_name': '', 'employee_type': '', 'slack_id': '', 'role_name': '', 'updated_at': 0 } assert actual1 is not None self.assertDictEqual(expected1, actual1_serialized) actual2 = dashboard_usage.create_next_record() actual2_serialized = mysql_serializer.serialize_record(actual2) expected2 = { 'user_rk': '*****@*****.**', 'dashboard_rk': 'product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id', 'read_count': 123 } assert actual2 is not None self.assertDictEqual(expected2, actual2_serialized) self.assertIsNone(dashboard_usage.create_next_record())
def test_create_records(self) -> None: dashboard_chart = DashboardChart(dashboard_group_id='dg_id', dashboard_id='d_id', query_id='q_id', chart_id='c_id', chart_name='c_name', chart_type='bar', chart_url='http://gold.foo/chart') actual = dashboard_chart.create_next_record() actual_serialized = mysql_serializer.serialize_record(actual) expected = { 'rk': '_dashboard://gold.dg_id/d_id/query/q_id/chart/c_id', 'id': 'c_id', 'query_rk': '_dashboard://gold.dg_id/d_id/query/q_id', 'name': 'c_name', 'type': 'bar', 'url': 'http://gold.foo/chart' } assert actual is not None self.assertDictEqual(expected, actual_serialized) self.assertIsNone(dashboard_chart.create_next_record()) dashboard_chart = DashboardChart(dashboard_group_id='dg_id', dashboard_id='d_id', query_id='q_id', chart_id='c_id', chart_url='http://gold.foo.bar/') actual2 = dashboard_chart.create_next_record() actual2_serialized = mysql_serializer.serialize_record(actual2) expected2 = { 'rk': '_dashboard://gold.dg_id/d_id/query/q_id/chart/c_id', 'id': 'c_id', 'query_rk': '_dashboard://gold.dg_id/d_id/query/q_id', 'url': 'http://gold.foo.bar/' } assert actual2 is not None self.assertDictEqual(expected2, actual2_serialized)
def test_create_records(self) -> None: for tc in self.test_cases: expected = tc.expected_records actual = [] record = tc.application.create_next_record() while record: serialized_record = mysql_serializer.serialize_record(record) actual.append(serialized_record) record = tc.application.create_next_record() self.assertEqual(expected, actual)
def test_dashboard_owner_record(self) -> None: actual = self.dashboard_owner.create_next_record() actual_serialized = mysql_serializer.serialize_record(actual) expected = { 'user_rk': '*****@*****.**', 'dashboard_rk': 'product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id' } assert actual is not None self.assertDictEqual(expected, actual_serialized) self.assertIsNone(self.dashboard_owner.create_next_record())
def test_create_records(self) -> None: expected = [{ 'rk': 'amundsen_updated_timestamp', 'latest_timestamp': 100 }] actual = [] record = self.es_last_updated.create_next_record() while record: serialized_record = mysql_serializer.serialize_record(record) actual.append(serialized_record) record = self.es_last_updated.create_next_record() self.assertEqual(expected, actual)
def test_dashboard_timestamp_records(self) -> None: actual = self.dashboard_last_modified.create_next_record() actual_serialized = mysql_serializer.serialize_record(actual) expected = { 'rk': 'product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id/_last_modified_timestamp', 'timestamp': 123456789, 'name': 'last_updated_timestamp', 'dashboard_rk': 'product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id' } assert actual is not None self.assertDictEqual(actual_serialized, expected) self.assertIsNone(self.dashboard_last_modified.create_next_record())
def test_create_records(self) -> None: actual = self.dashboard_query.create_next_record() actual_serialized = mysql_serializer.serialize_record(actual) expected = { 'rk': '_dashboard://gold.dg_id/d_id/query/q_id', 'name': 'q_name', 'id': 'q_id', 'dashboard_rk': '_dashboard://gold.dg_id/d_id', 'url': 'http://foo.bar/query/baz', 'query_text': 'SELECT * FROM foo.bar' } assert actual is not None self.assertDictEqual(expected, actual_serialized) self.assertIsNone(self.dashboard_query.create_next_record())
def test_create_records(self) -> None: expected = [{ 'rk': self.table_source.get_source_model_key(), 'source': self.table_source.source, 'source_type': self.table_source.source_type, 'table_rk': self.table_source.get_metadata_model_key() }] actual = [] record = self.table_source.create_next_record() while record: serialized_record = mysql_serializer.serialize_record(record) actual.append(serialized_record) record = self.table_source.create_next_record() self.assertEqual(expected, actual)
def test_create_records(self) -> None: expected = [{ 'rk': self.start_key, 'partition_key': 'ds', 'partition_value': '2017-09-18/feature_id=9', 'create_time': '2017-09-18T00:00:00', 'table_rk': self.end_key }] actual = [] record = self.watermark.create_next_record() while record: serialized_record = mysql_serializer.serialize_record(record) actual.append(serialized_record) record = self.watermark.create_next_record() self.assertEqual(actual, expected)
def test_create_records(self) -> None: expected = [{ 'rk': 'hive://gold.default/test_table/timestamp', 'last_updated_timestamp': 25195665, 'timestamp': 25195665, 'name': 'last_updated_timestamp', 'table_rk': 'hive://gold.default/test_table' }] actual = [] record = self.tableLastUpdated.create_next_record() while record: serialized_record = mysql_serializer.serialize_record(record) actual.append(serialized_record) record = self.tableLastUpdated.create_next_record() self.assertEqual(expected, actual)
def test_create_record_additional_attr_mysql(self) -> None: test_user = User(first_name='test_first', last_name='test_last', name='test_first test_last', email='*****@*****.**', github_username='******', team_name='test_team', employee_type='FTE', manager_email='*****@*****.**', slack_id='slack', is_active=True, updated_at=1, role_name='swe', enable_notify=True) record = test_user.create_next_record() serialized_record = mysql_serializer.serialize_record(record) self.assertEqual(serialized_record['email'], '*****@*****.**') self.assertEqual(serialized_record['role_name'], 'swe')
def test_create_records(self) -> None: expected = [{ 'rk': 'hive://gold.base/test/col/avg/', 'stat_val': '1', 'stat_type': 'avg', 'start_epoch': '1', 'end_epoch': '2', 'column_rk': 'hive://gold.base/test/col' }] actual = [] record = self.table_stats.create_next_record() while record: serialized_record = mysql_serializer.serialize_record(record) actual.append(serialized_record) record = self.table_stats.create_next_record() self.assertEqual(actual, expected)
def test_create_records(self) -> None: expected = [{ 'rk': BadgeMetadata.BADGE_KEY_FORMAT.format(badge=badge1.name), 'category': badge1.category }, { 'rk': BadgeMetadata.BADGE_KEY_FORMAT.format(badge=badge2.name), 'category': badge2.category }] actual = [] record = self.badge_metada.create_next_record() while record: serialized_record = mysql_serializer.serialize_record(record) actual.append(serialized_record) record = self.badge_metada.create_next_record() self.assertEqual(expected, actual)
def test_dashboard_record_no_description(self) -> None: expected_records_without_description = [{ 'rk': '_dashboard://gold', 'name': 'gold' }, { 'rk': '_dashboard://gold.Product - Jobs.cz', 'name': 'Product - Jobs.cz', 'cluster_rk': '_dashboard://gold' }, { 'rk': '_dashboard://gold.Product - Jobs.cz/Dohazovac', 'name': 'Dohazovac', 'dashboard_group_rk': '_dashboard://gold.Product - Jobs.cz' }, { 'rk': 'test_tag', 'tag_type': 'dashboard' }, { 'dashboard_rk': '_dashboard://gold.Product - Jobs.cz/Dohazovac', 'tag_rk': 'test_tag' }, { 'rk': 'tag3', 'tag_type': 'dashboard' }, { 'dashboard_rk': '_dashboard://gold.Product - Jobs.cz/Dohazovac', 'tag_rk': 'tag3' }] record = self.dashboard_metadata3.next_record() actual = [] while record: record_serialized = mysql_serializer.serialize_record(record) actual.append(record_serialized) record = self.dashboard_metadata3.next_record() self.assertEqual(expected_records_without_description, actual)