def infer_dataset_data(s3_path, max_lines): """ :type s3_path: str :param s3_path: The full s3 path to a sample delimited dataset file. :type max_lines: int :param max_lines: The maximum number of lines to peek. """ guesses, has_header, compression, dialect = get_guesses(s3_path, max_lines) columns = columns_with_best_guess(guesses, has_header) location = guess_location(s3_path) return Dataset(data=DatasetData(name='guessed_dataset', table_name='public', data_format=DataFormat( file_format=FileFormat.TEXTFILE, row_format=RowFormat.DELIMITED, delimited_by=dialect.delimiter, escaped_by=dialect.escapechar, quoted_by=dialect.quotechar, ), location=location, columns=columns, compression=compression, load_type=LoadType.INSERT))
def test_crud(self): columns = [ Column('c1', DataType.VARCHAR, 50), Column('c2', DataType.BIGINT) ] df = DataFormat(FileFormat.PARQUET, RowFormat.NONE) ds = Dataset( data=DatasetData(NoOpActionTypes.action_that_succeeds.name, NoOpActionTypes.action_that_succeeds.name, 's3://bucket/prefix', df, columns, tags=['foo'])) posted_dataset = self.dart.save_dataset(ds) self.assertEqual(posted_dataset.data.to_dict(), ds.data.to_dict()) dataset = self.dart.get_dataset(posted_dataset.id) self.assertEqual(posted_dataset.to_dict(), dataset.to_dict()) dataset.data.compression = Compression.GZIP put_dataset = self.dart.save_dataset(dataset) self.assertEqual(put_dataset.data.compression, Compression.GZIP) self.assertNotEqual(posted_dataset.to_dict(), put_dataset.to_dict()) self.dart.delete_dataset(dataset.id) try: self.dart.get_dataset(dataset.id) except DartRequestException as e: self.assertEqual(e.response.status_code, 404) return self.fail('dataset should have been missing after delete!')
def test_dataset_schema(self): columns = [Column('c1', DataType.VARCHAR, 50), Column('c2', DataType.BIGINT)] num_header_rows = None df = DataFormat(FileFormat.PARQUET, RowFormat.NONE, num_header_rows) ds = Dataset(data=DatasetData('test-dataset', 'test_dataset_table', 's3://bucket/prefix', df, columns)) obj_before = ds.to_dict() obj_after = default_and_validate(ds, dataset_schema()).to_dict() # num_header_rows should have been defaulted to 0, making these unequal self.assertNotEqual(obj_before, obj_after)
def test_dataset_schema_invalid(self): with self.assertRaises(DartValidationException) as context: columns = [Column('c1', DataType.VARCHAR, 50), Column('c2', DataType.BIGINT)] df = DataFormat(FileFormat.PARQUET, RowFormat.NONE) location = None ds = Dataset(data=DatasetData('test-dataset', 'test_dataset_table', location, df, columns)) # should fail because location is required default_and_validate(ds, dataset_schema()) self.assertTrue(isinstance(context.exception, DartValidationException))
def test_impala_table_definition_step(self): ds = Dataset(data=DatasetData( name='weblogs_v01', table_name='weblogs_parquet', location='s3://wsm-log-servers/weblogs/www.retailmenot.com/ec2/', data_format=DataFormat( file_format=FileFormat.PARQUET, row_format=RowFormat.NONE, ), columns=[ Column('ip', 'STRING'), Column('user', 'STRING'), Column('requestDate', 'TIMESTAMP', date_pattern='dd/MMM/yyyy:HH:mm:ss Z'), Column('httpMethod', 'STRING'), Column('urlPath', 'STRING'), Column('queryString', 'STRING'), Column('httpVersion', 'STRING'), Column('statusCode', 'STRING'), Column('bytesSent', 'INT'), Column('referrer', 'STRING'), Column('userAgent', 'STRING'), Column('responseTime', 'BIGINT'), Column('hostname', 'STRING'), Column('userFingerprint', 'STRING'), Column('userId', 'STRING'), Column('sessionId', 'STRING'), Column('requestId', 'STRING'), Column('visitorId', 'STRING'), Column('vegSlice', 'STRING'), Column('fruitSlice', 'STRING'), Column('cacheHitMiss', 'STRING'), ], compression='GZIP', partitions=[ Column('year', 'STRING'), Column('week', 'STRING'), Column('day', 'STRING'), ], )) call('mkdir -p /tmp/dart-emr-test/impala/') this_path = os.path.dirname(os.path.abspath(__file__)) shutil.copyfile(this_path + '/../../../engine/emr/steps/impala/copy_to_table.sql', '/tmp/dart-emr-test/impala/copy_to_table.sql') impala_copy_to_table(ds, 'weblogs_stage', ds, 'weblogs_parquet', 's3://test', '/tmp/dart-emr-test/', 'actionid123', 1, 1) with open(os.path.join(this_path, 'copy_to_table_weblogs_parquet.sql')) as f: expected_contents = f.read() with open('/tmp/dart-emr-test/impala/copy_to_table_weblogs_parquet.sql') as f: actual_contents = f.read() self.assertEqual(expected_contents, actual_contents)
def setUp(self): dart = Dart(host='localhost', port=5000) """ :type dart: dart.client.python.dart_client.Dart """ self.dart = dart cs = [ Column('c1', DataType.VARCHAR, 50), Column('c2', DataType.BIGINT) ] df = DataFormat(FileFormat.TEXTFILE, RowFormat.DELIMITED) dataset_data = DatasetData( name='test-dataset', table_name='test_dataset_table', load_type=LoadType.INSERT, location=('s3://' + os.environ['DART_TEST_BUCKET'] + '/impala'), data_format=df, columns=cs, tags=[]) self.dataset = self.dart.save_dataset(Dataset(data=dataset_data)) start = 's3://' + os.environ['DART_TEST_BUCKET'] + '/impala/impala' end = 's3://' + os.environ['DART_TEST_BUCKET'] + '/impala/install' regex = '.*\\.rpm' ds = Subscription(data=SubscriptionData( 'test-subscription', self.dataset.id, start, end, regex)) self.subscription = self.dart.save_subscription(ds) dst_args = {'action_sleep_time_in_seconds': 0} dst = Datastore(data=DatastoreData('test-datastore', 'no_op_engine', args=dst_args, state=DatastoreState.TEMPLATE)) self.datastore = self.dart.save_datastore(dst) wf = Workflow(data=WorkflowData( 'test-workflow', self.datastore.id, state=WorkflowState.ACTIVE)) self.workflow = self.dart.save_workflow(wf, self.datastore.id) a_args = {'subscription_id': self.subscription.id} a0 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name, NoOpActionTypes.action_that_succeeds.name, state=ActionState.TEMPLATE)) a1 = Action(data=ActionData(NoOpActionTypes.consume_subscription.name, NoOpActionTypes.consume_subscription.name, a_args, state=ActionState.TEMPLATE)) self.action0, self.action1 = self.dart.save_actions( [a0, a1], workflow_id=self.workflow.id)
data=(DatasetData( name='beacon_native_app_parsed_gzipped_v03', table_name='beacon_native_app', location= 's3://example-bucket/prd/beacon/native_app/v3/dwh-delimited/gzipped', load_type=LoadType.INSERT, distribution_keys=['created'], sort_keys=['created', 'eventtype'], hive_compatible_partition_folders=True, data_format=DataFormat( FileFormat.TEXTFILE, RowFormat.DELIMITED, delimited_by='\t', quoted_by='"', escaped_by='\\', null_string='NULL', ), compression=Compression.GZIP, partitions=[Column('createdpartition', DataType.STRING)], columns=[ Column('logfileid', DataType.INT), Column('linenumber', DataType.INT), Column('created', DataType.TIMESTAMP, date_pattern="yyyy-MM-dd HH:mm:ss"), Column('remoteip', DataType.VARCHAR, 500), Column('useragent', DataType.VARCHAR, 2500), Column('eventtype', DataType.VARCHAR, 255), Column('appversion', DataType.VARCHAR, 255), Column('advertiserid', DataType.VARCHAR, 2048), Column('couponsonpage', DataType.INT), Column('coupons', DataType.VARCHAR, 10000), Column('channel', DataType.VARCHAR, 128), Column('geocouponcount', DataType.BIGINT), Column('geofence', DataType.VARCHAR, 255), Column('geofencetimespent', DataType.NUMERIC, precision=14, scale=5), Column('loginstatus', DataType.VARCHAR, 25), Column('products', DataType.VARCHAR, 2500), Column('session', DataType.VARCHAR, 5000), Column('systemname', DataType.VARCHAR, 2500), Column('systemversion', DataType.VARCHAR, 2500), Column('udid', DataType.VARCHAR, 128), Column('userqualifier', DataType.VARCHAR, 64), Column('url', DataType.VARCHAR, 5000), Column('user_uuid', DataType.VARCHAR, 64), Column('userid', DataType.INT), Column('searchtype', DataType.VARCHAR, 128), Column('searchlistterm', DataType.VARCHAR, 512), Column('searchterm', DataType.VARCHAR, 512), Column('emailuuid', DataType.VARCHAR, 128), Column('userfingerprint', DataType.VARCHAR, 64), Column('locationstatus', DataType.VARCHAR, 128), Column('pushnotificationstatus', DataType.VARCHAR, 128), Column('placement', DataType.VARCHAR, 1024), Column('loc', DataType.VARCHAR, 128), Column('ppoi0', DataType.VARCHAR, 128), Column('ppoi1', DataType.VARCHAR, 128), Column('ppoi2', DataType.VARCHAR, 128), Column('ppoi3', DataType.VARCHAR, 128), Column('ppoi4', DataType.VARCHAR, 128), Column('applaunchnotificationtype', DataType.VARCHAR, 128), Column('scenarioname', DataType.VARCHAR, 128), Column('behaviorname', DataType.VARCHAR, 128), Column('coupontype', DataType.VARCHAR, 128), Column('couponposition', DataType.VARCHAR, 128), Column('hasqsrcontent', DataType.VARCHAR, 128), Column('promptname', DataType.VARCHAR, 128), Column('locationpermissionchanage', DataType.VARCHAR, 128), Column('couponproblemtype', DataType.VARCHAR, 128), Column('storetitle', DataType.VARCHAR, 128), Column('mallname', DataType.VARCHAR, 128), Column('restaurantname', DataType.VARCHAR, 128), Column('milesaway', DataType.VARCHAR, 128), Column('menuitem', DataType.VARCHAR, 128), Column('toolname', DataType.VARCHAR, 128), Column('toolaction', DataType.VARCHAR, 128), Column('toolstep', DataType.VARCHAR, 128), Column('mallposition', DataType.VARCHAR, 128), Column('recommendstorename', DataType.VARCHAR, 128), Column('recommendstoreposition', DataType.VARCHAR, 128), Column('favoritestorename', DataType.VARCHAR, 128), Column('favoritestoreaction', DataType.VARCHAR, 128), Column('favoritestoreposition', DataType.VARCHAR, 128), Column('favoritesiteid', DataType.VARCHAR, 128), Column('receivername', DataType.VARCHAR, 128), Column('outclickbuttonprompt', DataType.VARCHAR, 128), Column('datasource', DataType.VARCHAR, 1024), Column('searchresultcount', DataType.VARCHAR, 128), Column('searchresultposition', DataType.VARCHAR, 128), Column('sharetype', DataType.VARCHAR, 128), Column('daysuntilexpiration', DataType.VARCHAR, 128), Column('firedate', DataType.VARCHAR, 128), Column('settingschangevalue', DataType.VARCHAR, 128), Column('settingschangetype', DataType.VARCHAR, 128), Column('settingschangelocation', DataType.VARCHAR, 128), Column('clickaction', DataType.VARCHAR, 128), Column('tnt', DataType.VARCHAR, 128), Column('previouspage', DataType.VARCHAR, 2500), Column('clickpage', DataType.VARCHAR, 2500), Column('launchreason', DataType.VARCHAR, 128), Column('taplyticsData', DataType.VARCHAR, 150), Column('appCampaign', DataType.VARCHAR, 50), Column('accountMethod', DataType.VARCHAR, 60), Column('appState', DataType.VARCHAR, 100), Column('btStatus', DataType.BOOLEAN), Column('btBeaconId', DataType.VARCHAR, 500), Column('btBeaconFactoryId', DataType.VARCHAR, 500), Column('btBeaconName', DataType.VARCHAR, 500), Column('btTimeSpent', DataType.VARCHAR, 50), Column('purchaseId', DataType.VARCHAR, 500), Column('transactionId', DataType.VARCHAR, 500), Column('outclickLink', DataType.VARCHAR, 1000), Column('outclickPage', DataType.VARCHAR, 300), Column('featuredCouponPosition', DataType.INT), Column('commentCount', DataType.INT), Column('mallCount', DataType.INT), Column('clickCount', DataType.INT), Column('merchantName', DataType.VARCHAR, 100), Column('merchantPosition', DataType.INT), Column('couponUuids', DataType.VARCHAR, 10000), Column('favoriteSiteUuid', DataType.VARCHAR, 50), Column('deepLinkType', DataType.VARCHAR, 40), Column('adUnitUuid', DataType.VARCHAR, 50), ], ))))
Dataset(data=(DatasetData( name='owen_eu_DW-3411_v1', table_name='owen_eu', location='s3://example-bucket/prd/inbound/overlord/eu-all-events', load_type=LoadType.MERGE, data_format=DataFormat( file_format=FileFormat.TEXTFILE, row_format=RowFormat.JSON, ), compression=Compression.GZIP, partitions=[ Column('year', DataType.STRING), Column('month', DataType.STRING), Column('day', DataType.STRING), ], columns=[ Column('host', DataType.STRING, path='metadata.host'), Column('referer', DataType.STRING, path='metadata.referer'), Column( 'eventName', DataType.STRING, path='owen.event.eventName'), Column('eventVersion', DataType.STRING, path='owen.event.eventVersion'), Column('eventPlatform', DataType.STRING, path='owen.event.eventPlatform'), Column('eventInstanceUuid', DataType.STRING, path='owen.event.eventInstanceUuid'), Column('eventCategory', DataType.STRING, path='owen.event.eventCategory'), Column('eventTimestamp', DataType.TIMESTAMP, path='owen.event.eventTimestamp', date_pattern="yyyy-MM-dd'T'HH:mm:ss'Z'"), Column('eventTarget', DataType.STRING, path='owen.event.eventTarget'), Column('eventAction', DataType.STRING, path='owen.event.eventAction'), Column('eventPlatformVersion', DataType.STRING, path='owen.event.eventPlatformVersion'), Column('osName', DataType.STRING, path='owen.context.osName'), Column('loggedInFlag', DataType.BOOLEAN, path='owen.context.loggedInFlag'), Column('custom', DataType.STRING, path='owen.context.custom'), Column('browserVersion', DataType.STRING, path='owen.context.browserVersion'), Column( 'referrer', DataType.STRING, path='owen.context.referrer'), Column('previousPageName', DataType.STRING, path='owen.context.previousPageName'), Column('screenHeight', DataType.STRING, path='owen.context.screenHeight'), Column('breadCrumb', DataType.STRING, path='owen.context.breadCrumb'), Column( 'pageName', DataType.STRING, path='owen.context.pageName'), Column('country', DataType.STRING, path='owen.context.country'), Column('propertyName', DataType.STRING, path='owen.context.propertyName'), Column('launchCount', DataType.STRING, path='owen.context.launchCount'), Column('viewInstanceUuid', DataType.STRING, path='owen.context.viewInstanceUuid'), Column('osVersion', DataType.STRING, path='owen.context.osVersion'), Column('connectionType', DataType.STRING, path='owen.context.connectionType'), Column('partialSearchTerm', DataType.STRING, path='owen.context.partialSearchTerm'), Column('carrier', DataType.STRING, path='owen.context.carrier'), Column('longitude', DataType.STRING, path='owen.context.longitude'), Column( 'productSectionPosition_0', DataType.STRING, path='owen.context.inventory[0].productSectionPosition'), Column('savedFlag_0', DataType.BOOLEAN, path='owen.context.inventory[0].savedFlag'), Column('position_0', DataType.STRING, path='owen.context.inventory[0].position'), Column('brand_0', DataType.STRING, path='owen.context.inventory[0].brand'), Column('affiliateNetwork_0', DataType.STRING, path='owen.context.inventory[0].affiliateNetwork'), Column('deepLinkUrl_0', DataType.STRING, path='owen.context.inventory[0].deepLinkUrl'), Column('conquestingFlag_0', DataType.BOOLEAN, path='owen.context.inventory[0].conquestingFlag'), Column('originalPrice_0', DataType.STRING, path='owen.context.inventory[0].originalPrice'), Column('adUnitUuid_0', DataType.STRING, path='owen.context.inventory[0].adUnitUuid'), Column('startDate_0', DataType.TIMESTAMP, path='owen.context.inventory[0].startDate', date_pattern="yyyy-MM-dd'T'HH:mm:ss'Z'"), Column('proximityUnit_0', DataType.STRING, path='owen.context.inventory[0].proximityUnit'), Column('commentsCount_0', DataType.STRING, path='owen.context.inventory[0].commentsCount'), Column('outRedirectUrl_0', DataType.STRING, path='owen.context.inventory[0].outRedirectUrl'), Column('productCardPosition_0', DataType.STRING, path='owen.context.inventory[0].productCardPosition'), Column('productSectionUuid_0', DataType.STRING, path='owen.context.inventory[0].productSectionUuid'), Column('lastVerifiedDate_0', DataType.TIMESTAMP, path='owen.context.inventory[0].lastVerifiedDate', date_pattern="yyyy-MM-dd'T'HH:mm:ss'Z'"), Column('productCardUuid_0', DataType.STRING, path='owen.context.inventory[0].productCardUuid'), Column('redemptionChannel_0', DataType.STRING, path='owen.context.inventory[0].redemptionChannel'), Column('noVotes_0', DataType.STRING, path='owen.context.inventory[0].noVotes'), Column('retailCategory_0', DataType.STRING, path='owen.context.inventory[0].retailCategory'), Column('couponRank_0', DataType.STRING, path='owen.context.inventory[0].couponRank'), Column('inventoryChannel_0', DataType.STRING, path='owen.context.inventory[0].inventoryChannel'), Column('yesVotes_0', DataType.STRING, path='owen.context.inventory[0].yesVotes'), Column('inventorySource_0', DataType.STRING, path='owen.context.inventory[0].inventorySource'), Column('inventoryName_0', DataType.STRING, path='owen.context.inventory[0].inventoryName'), Column('monetizableFlag_0', DataType.BOOLEAN, path='owen.context.inventory[0].monetizableFlag'), Column('recommendedFlag_0', DataType.BOOLEAN, path='owen.context.inventory[0].recommendedFlag'), Column('expirationDate_0', DataType.TIMESTAMP, path='owen.context.inventory[0].expirationDate', date_pattern="yyyy-MM-dd'T'HH:mm:ss'Z'"), Column('clickLocation_0', DataType.STRING, path='owen.context.inventory[0].clickLocation'), Column('finalPrice_0', DataType.STRING, path='owen.context.inventory[0].finalPrice'), Column('usedByCount_0', DataType.STRING, path='owen.context.inventory[0].usedByCount'), Column('proximity_0', DataType.STRING, path='owen.context.inventory[0].proximity'), Column('inventoryUuid_0', DataType.STRING, path='owen.context.inventory[0].inventoryUuid'), Column('siteUuid_0', DataType.STRING, path='owen.context.inventory[0].siteUuid'), Column('outclickUuid_0', DataType.STRING, path='owen.context.inventory[0].outclickUuid'), Column('adUnitType_0', DataType.STRING, path='owen.context.inventory[0].adUnitType'), Column('exclusivityFlag_0', DataType.BOOLEAN, path='owen.context.inventory[0].exclusivityFlag'), Column('inventoryType_0', DataType.STRING, path='owen.context.inventory[0].inventoryType'), Column('successPercentage_0', DataType.STRING, path='owen.context.inventory[0].successPercentage'), Column('claimUuid_0', DataType.STRING, path='owen.context.inventory[0].claimUuid'), Column('region', DataType.STRING, path='owen.context.region'), Column('session', DataType.STRING, path='owen.context.session'), Column('content', DataType.STRING, path='owen.context.marketing.content'), Column('marketingVendor', DataType.STRING, path='owen.context.marketing.vendor'), Column('campaign', DataType.STRING, path='owen.context.marketing.campaign'), Column('adGroup', DataType.STRING, path='owen.context.marketing.adGroup'), Column('campaignUuid', DataType.STRING, path='owen.context.marketing.campaignUuid'), Column('campaignSendCount', DataType.STRING, path='owen.context.marketing.campaignSendCount'), Column('source', DataType.STRING, path='owen.context.marketing.source'), Column('term', DataType.STRING, path='owen.context.marketing.term'), Column('channel', DataType.STRING, path='owen.context.marketing.channel'), Column('medium', DataType.STRING, path='owen.context.marketing.medium'), Column('cdRank', DataType.STRING, path='owen.context.marketing.cdRank'), Column('notificationUuid', DataType.STRING, path='owen.context.marketing.notificationUuid'), Column('inventoryCount', DataType.STRING, path='owen.context.inventoryCount'), Column('favoriteFlag', DataType.BOOLEAN, path='owen.context.favoriteFlag'), Column( 'pageType', DataType.STRING, path='owen.context.pageType'), Column('bluetoothBeaconType', DataType.STRING, path='owen.context.bluetoothBeaconType'), Column('variation_0', DataType.STRING, path='owen.context.experiment[0].variation'), Column('campaign_0', DataType.STRING, path='owen.context.experiment[0].campaign'), Column('locationEnabledFlag', DataType.BOOLEAN, path='owen.context.locationEnabledFlag'), Column('macAddress', DataType.STRING, path='owen.context.macAddress'), Column('browserFamily', DataType.STRING, path='owen.context.browserFamily'), Column('geofenceUuid', DataType.STRING, path='owen.context.geofenceUuid'), Column('mobileDeviceMake', DataType.STRING, path='owen.context.mobileDeviceMake'), Column('vendor_0', DataType.STRING, path='owen.context.vendor[0].vendor'), Column('vendorClickUuid_0', DataType.STRING, path='owen.context.vendor[0].vendorClickUuid'), Column('udid', DataType.STRING, path='owen.context.udid'), Column( 'latitude', DataType.STRING, path='owen.context.latitude'), Column('bluetoothEnabledFlag', DataType.BOOLEAN, path='owen.context.bluetoothEnabledFlag'), Column('environment', DataType.STRING, path='owen.context.environment'), Column('city', DataType.STRING, path='owen.context.city'), Column( 'userUuid', DataType.STRING, path='owen.context.userUuid'), Column('dma', DataType.STRING, path='owen.context.dma'), Column('testUuid', DataType.STRING, path='owen.context.test.testUuid'), Column('userAgent', DataType.STRING, path='owen.context.userAgent'), Column('previousViewInstanceUuid', DataType.STRING, path='owen.context.previousViewInstanceUuid'), Column( 'language', DataType.STRING, path='owen.context.language'), Column('deviceCategory', DataType.STRING, path='owen.context.deviceCategory'), Column('bluetoothBeaconId', DataType.STRING, path='owen.context.bluetoothBeaconId'), Column('screenWidth', DataType.STRING, path='owen.context.screenWidth'), Column('personalizationFlag', DataType.BOOLEAN, path='owen.context.personalizationFlag'), Column('appForegroundFlag', DataType.BOOLEAN, path='owen.context.appForegroundFlag'), Column('mobileDeviceModel', DataType.STRING, path='owen.context.mobileDeviceModel'), Column('userQualifier', DataType.STRING, path='owen.context.userQualifier'), Column('deviceFingerprint', DataType.STRING, path='owen.context.deviceFingerprint'), Column('ipAddress', DataType.STRING, path='owen.context.ipAddress'), Column( 'osFamily', DataType.STRING, path='owen.context.osFamily'), Column('advertiserUuid', DataType.STRING, path='owen.context.advertiserUuid'), Column('notificationEnabledFlag', DataType.BOOLEAN, path='owen.context.notificationEnabledFlag'), Column('inventory', DataType.STRING, path='owen.context.inventory'), Column('vendor', DataType.STRING, path='owen.context.vendor'), Column('experiment', DataType.STRING, path='owen.context.experiment'), ], ))))
Dataset(data=(DatasetData( name='beacon_native_app_parsed_v01', table_name='beacon_native_app', location='s3://example-bucket/nb.retailmenot.com/parsed_logs', load_type=LoadType.INSERT, data_format=DataFormat(FileFormat.TEXTFILE, RowFormat.DELIMITED, delimited_by='\t', quoted_by='"', escaped_by='\\', null_string='NULL', num_header_rows=1), compression=Compression.NONE, partitions=[ Column('year', DataType.STRING), Column('week', DataType.STRING), ], columns=[ Column('logFileId', DataType.BIGINT), Column('lineNumber', DataType.INT), Column('created', DataType.TIMESTAMP, date_pattern="yyyy-MM-dd HH:mm:ss"), Column('remoteip', DataType.STRING), Column('useragent', DataType.STRING), Column('eventType', DataType.STRING), Column('appVersion', DataType.STRING), Column('advertiserID', DataType.STRING), Column('couponsOnPage', DataType.INT), Column('coupons', DataType.STRING), Column('channel', DataType.STRING), Column('geoCouponCount', DataType.STRING), Column('geofence', DataType.STRING), Column('geofenceTimeSpent', DataType.STRING), Column('loginStatus', DataType.STRING), Column('products', DataType.STRING), Column('session', DataType.STRING), Column('systemName', DataType.STRING), Column('systemVersion', DataType.STRING), Column('udid', DataType.STRING), Column('userQualifier', DataType.STRING), Column('url', DataType.STRING), Column('user_uuid', DataType.STRING), Column('userId', DataType.STRING), Column('searchType', DataType.STRING), Column('searchListTerm', DataType.STRING), Column('searchTerm', DataType.STRING), Column('emailUUId', DataType.STRING), Column('userFingerprint', DataType.STRING), Column('locationStatus', DataType.STRING), Column('pushNotificationStatus', DataType.BOOLEAN), Column('placement', DataType.STRING), Column('loc', DataType.STRING), Column('ppoi0', DataType.STRING), Column('ppoi1', DataType.STRING), Column('ppoi2', DataType.STRING), Column('ppoi3', DataType.STRING), Column('ppoi4', DataType.STRING), Column('appLaunchNotificationType', DataType.STRING), Column('scenarioName', DataType.STRING), Column('behaviorName', DataType.STRING), Column('couponType', DataType.STRING), Column('couponPosition', DataType.STRING), Column('hasQSRContent', DataType.BOOLEAN), Column('promptName', DataType.STRING), Column('locationPermissionChanage', DataType.STRING), Column('couponProblemType', DataType.STRING), Column('storeTitle', DataType.STRING), Column('mallName', DataType.STRING), Column('restaurantName', DataType.STRING), Column('milesAway', 'float'), Column('menuItem', DataType.STRING), Column('toolName', DataType.STRING), Column('toolAction', DataType.STRING), Column('toolStep', DataType.STRING), Column('mallPosition', DataType.INT), Column('recommendStoreName', DataType.STRING), Column('recommendStorePosition', DataType.INT), Column('favoriteStoreName', DataType.STRING), Column('favoriteStoreAction', DataType.STRING), Column('favoriteStorePosition', DataType.INT), Column('favoriteSiteId', DataType.STRING), Column('receiverName', DataType.STRING), Column('outclickButtonPrompt', DataType.STRING), Column('dataSource', DataType.STRING), Column('searchResultCount', DataType.INT), Column('searchResultPosition', DataType.INT), Column('shareType', DataType.STRING), Column('daysUntilExpiration', DataType.INT), Column('fireDate', DataType.BIGINT), Column('settingsChangeValue', DataType.STRING), Column('settingsChangeType', DataType.STRING), Column('settingsChangeLocation', DataType.STRING), Column('clickAction', DataType.STRING), Column('tnt', DataType.STRING), Column('previousPage', DataType.STRING), Column('clickPage', DataType.STRING), Column('launchReason', DataType.STRING), Column('taplyticsData', DataType.STRING), Column('appCampaign', DataType.STRING), Column('accountMethod', DataType.STRING), Column('appState', DataType.STRING), Column('btStatus', DataType.BOOLEAN), Column('btBeaconId', DataType.STRING), Column('btBeaconFactoryId', DataType.STRING), Column('btBeaconName', DataType.STRING), Column('btTimeSpent', DataType.STRING), Column('purchaseId', DataType.STRING), Column('transactionId', DataType.STRING), Column('outclickLink', DataType.STRING), Column('outclickPage', DataType.STRING), Column('featuredCouponPosition', DataType.INT), Column('commentCount', DataType.INT), Column('mallCount', DataType.INT), Column('clickCount', DataType.INT), Column('merchantName', DataType.STRING), Column('merchantPosition', DataType.INT), ], ))))
def test_hive_table_definition_step(self): ds = Dataset(data=DatasetData( name='owen_eu_v01', table_name='owen_eu', location='s3://s3-rpt-uss-dat-warehouse/prd/inbound/overlord/eu-all-events', data_format=DataFormat( file_format=FileFormat.TEXTFILE, row_format=RowFormat.JSON, ), columns=[ Column('host', 'STRING', path='metadata.host'), Column('pageName', 'STRING', path='owen.context.pageName'), Column('viewInstanceUuid', 'STRING', path='owen.context.viewInstanceUuid'), Column('previousPageName', 'STRING', path='owen.context.previousPageName'), Column('previousViewInstanceUuid', 'STRING', path='owen.context.previousViewInstanceUuid'), Column('session', 'STRING', path='owen.context.session'), Column('pageType', 'STRING', path='owen.context.pageType'), Column('propertyName', 'STRING', path='owen.context.propertyName'), Column('enviroment', 'STRING', path='owen.context.environment'), Column('appForegroundFlag', 'BOOLEAN', path='owen.context.appForegroundFlag'), Column('bluetoothEnabledFlag', 'BOOLEAN', path='owen.context.bluetoothEnabledFlag'), Column('favoriteFlag', 'BOOLEAN', path='owen.context.favoriteFlag'), Column('locationEnabledFlag', 'BOOLEAN', path='owen.context.locationEnabledFlag'), Column('loggedInFlag', 'BOOLEAN', path='owen.context.loggedInFlag'), Column('notificationEnabledFlag', 'BOOLEAN', path='owen.context.notificationEnabledFlag'), Column('personalizationFlag', 'BOOLEAN', path='owen.context.personalizationFlag'), Column('advertiserUuid', 'STRING', path='owen.context.advertiserUuid'), Column('udid', 'STRING', path='owen.context.udid'), Column('userQualifier', 'STRING', path='owen.context.userQualifier'), Column('userId', 'STRING', path='owen.context.custom.legacy.userId'), Column('userUuid', 'STRING', path='owen.context.userUuid'), Column('macAddress', 'STRING', path='owen.context.macAddress'), Column('ipAddress', 'STRING', path='owen.context.ipAddress'), Column('osVersion', 'STRING', path='owen.context.osVersion'), Column('osFamily', 'STRING', path='owen.context.osFamily'), Column('osName', 'STRING', path='owen.context.osName'), Column('browserFamily', 'STRING', path='owen.context.browserFamily'), Column('deviceCategory', 'STRING', path='owen.context.deviceCategory'), Column('deviceMake', 'STRING', path='owen.context.mobileDeviceMake'), Column('deviceModel', 'STRING', path='owen.context.mobileDeviceModel'), Column('connectionType', 'STRING', path='owen.context.connectionType'), Column('userAgent', 'STRING', path='owen.context.userAgent'), Column('geofenceId', 'STRING', path='owen.context.custom.legacy.geofenceId'), Column('eventTimestamp', 'TIMESTAMP', path='owen.event.eventTimestamp', date_pattern="yyyy-MM-dd'T'HH:mm:ssZ"), Column('eventInstanceUuid', 'STRING', path='owen.event.eventInstanceUuid'), Column('eventPlatformVersion', 'STRING', path='owen.event.eventPlatformVersion'), Column('eventVersion', 'STRING', path='owen.event.eventVersion'), Column('eventCategory', 'STRING', path='owen.event.eventCategory'), Column('eventName', 'STRING', path='owen.event.eventName'), Column('eventAction', 'STRING', path='owen.event.eventAction'), Column('eventPlatform', 'STRING', path='owen.event.eventPlatform'), Column('testUnixTimestampSecondsPattern', 'TIMESTAMP', path='some.fake.path.testUnixTimestampSecondsPattern', date_pattern='UNIX_TIMESTAMP_SECONDS'), Column('testUnixTimestampMillisPattern', 'TIMESTAMP', path='some.fake.path.testUnixTimestampMillisPattern', date_pattern='UNIX_TIMESTAMP_MILLIS'), ], compression='GZIP', partitions=[ Column('year', 'STRING'), Column('week', 'STRING'), Column('day', 'STRING'), ], )) call('mkdir -p /tmp/dart-emr-test/hive/') this_path = os.path.dirname(os.path.abspath(__file__)) shutil.copyfile(this_path + '/../../../engine/emr/steps/hive/copy_to_table.hql', '/tmp/dart-emr-test/hive/copy_to_table.hql') action_id = 'actionid123' target_dataset = Dataset.from_dict(ds.to_dict()) target_dataset.data.data_format.num_header_rows = 0 target_dataset.data.data_format = DataFormat(FileFormat.RCFILE, RowFormat.NONE) stage_dataset = Dataset.from_dict(ds.to_dict()) assert isinstance(stage_dataset, Dataset) for c in stage_dataset.data.columns: c.data_type = DataType.STRING hive_copy_to_table(stage_dataset, 'owen_eu_stage', target_dataset, 'owen_eu', 's3://test', '/tmp/dart-emr-test/', action_id, None, 1, 1) with open(os.path.join(this_path, 'copy_to_table_owen_eu.hql')) as f: expected_contents = f.read() with open('/tmp/dart-emr-test/hive/copy_to_table_owen_eu.hql') as f: actual_contents = f.read() self.assertEqual(expected_contents, actual_contents)
dataset = dart.save_dataset(Dataset(data=DatasetData( name='weblogs_v01', table_name='weblogs', location='s3://example-bucket/weblogs/www.retailmenot.com/ec2/', data_format=DataFormat( file_format=FileFormat.TEXTFILE, row_format=RowFormat.REGEX, regex_input="(?<ip>^(?:(?:unknown(?:,\\s)?|(?:\\d+\\.\\d+\\.\\d+\\.\\d+(?:,\\s)?))+)|\\S*)\\s+\\S+\\s+(?<userIdentifier>(?:[^\\[]+|\\$\\S+\\['\\S+'\\]|\\[username\\]))\\s*\\s+\\[(?<requestDate>[^\\]]+)\\]\\s+\"(?<httpMethod>(?:GET|HEAD|POST|PUT|DELETE|TRACE))\\s(?<urlPath>(?:[^ ?]+))(?:\\?(?<queryString>(?:[^ ]+)))?\\sHTTP/(?<httpVersion>(?:[\\d\\.]+))\"\\s+(?<statusCode>[0-9]+)\\s+(?<bytesSent>\\S+)\\s+\"(?<referrer>(?:[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*))\"\\s+\"(?<userAgent>(?:[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*))\"\\s+(?<responseTime>[-0-9]*)\\s+\"(?<hostName>(?:[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*))\"\\s+\"(?<userFingerprint>(?:[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*))\"\\s+\"(?<userId>(?:[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*))\"\\s+\"(?<sessionId>(?:[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*))\"\\s+\"(?<requestId>(?:[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*))\"\\s+\"(?<visitorId>(?:[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*))\"\\s+\"(?<vegSlice>(?:[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*))\"\\s+\"(?<fruitSlice>(?:[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*))\"\\s+\"(?<cacheHitMiss>(?:[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*))\"\\s*\\Z", regex_output="%1$s %2$s %3$s %4$s %5$s %6$s %7$s %8$s %9$s %10$s %11$s %12$s %13$s %14$s %15$s %16$s %17$s %18$s %19$s %20$s %21s", ), columns=[ Column('ip', DataType.STRING), Column('user', DataType.STRING), Column('requestDate', DataType.TIMESTAMP, date_pattern='dd/MMM/yyyy:HH:mm:ss Z'), Column('httpMethod', DataType.STRING), Column('urlPath', DataType.STRING), Column('queryString', DataType.STRING), Column('httpVersion', DataType.STRING), Column('statusCode', DataType.STRING), Column('bytesSent', DataType.INT), Column('referrer', DataType.STRING), Column('userAgent', DataType.STRING), Column('responseTime', DataType.BIGINT), Column('hostname', DataType.STRING), Column('userFingerprint', DataType.STRING), Column('userId', DataType.STRING), Column('sessionId', DataType.STRING), Column('requestId', DataType.STRING), Column('visitorId', DataType.STRING), Column('vegSlice', DataType.STRING), Column('fruitSlice', DataType.STRING), Column('cacheHitMiss', DataType.STRING), ], compression=Compression.BZ2, partitions=[ Column('year', DataType.STRING), Column('week', DataType.STRING), ], )))
def test_hive_table_definition_step(self): ds = Dataset(data=DatasetData( name='weblogs_v01', table_name='weblogs', location='s3://wsm-log-servers/weblogs/www.retailmenot.com/ec2/', data_format=DataFormat( file_format=FileFormat.TEXTFILE, row_format=RowFormat.REGEX, regex_input= "(?<ip>^(?:(?:unknown(?:,\\s)?|(?:\\d+\\.\\d+\\.\\d+\\.\\d+(?:,\\s)?))+)|\\S*)\\s+\\S+\\s+(?<userIdentifier>(?:[^\\[]+|\\$\\S+\\['\\S+'\\]|\\[username\\]))\\s*\\s+\\[(?<requestDate>[^\\]]+)\\]\\s+\"(?<httpMethod>(?:GET|HEAD|POST|PUT|DELETE|TRACE))\\s(?<urlPath>(?:[^ ?]+))(?:\\?(?<queryString>(?:[^ ]+)))?\\sHTTP/(?<httpVersion>(?:[\\d\\.]+))\"\\s+(?<statusCode>[0-9]+)\\s+(?<bytesSent>\\S+)\\s+\"(?<referrer>(?:[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*))\"\\s+\"(?<userAgent>(?:[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*))\"\\s+(?<responseTime>[-0-9]*)\\s+\"(?<hostName>(?:[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*))\"\\s+\"(?<userFingerprint>(?:[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*))\"\\s+\"(?<userId>(?:[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*))\"\\s+\"(?<sessionId>(?:[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*))\"\\s+\"(?<requestId>(?:[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*))\"\\s+\"(?<visitorId>(?:[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*))\"\\s+\"(?<vegSlice>(?:[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*))\"\\s+\"(?<fruitSlice>(?:[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*))\"\\s+\"(?<cacheHitMiss>(?:[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*))\"\\s*\\Z", regex_output= "%1$s %2$s %3$s %4$s %5$s %6$s %7$s %8$s %9$s %10$s %11$s %12$s %13$s %14$s %15$s %16$s %17$s %18$s %19$s %20$s %21s", ), columns=[ Column('ip', 'STRING'), Column('user', 'STRING'), Column('requestDate', 'TIMESTAMP', date_pattern='dd/MMM/yyyy:HH:mm:ss Z'), Column('httpMethod', 'STRING'), Column('urlPath', 'STRING'), Column('queryString', 'STRING'), Column('httpVersion', 'STRING'), Column('statusCode', 'STRING'), Column('bytesSent', 'INT'), Column('referrer', 'STRING'), Column('userAgent', 'STRING'), Column('responseTime', 'BIGINT'), Column('hostname', 'STRING'), Column('userFingerprint', 'STRING'), Column('userId', 'STRING'), Column('sessionId', 'STRING'), Column('requestId', 'STRING'), Column('visitorId', 'STRING'), Column('vegSlice', 'STRING'), Column('fruitSlice', 'STRING'), Column('cacheHitMiss', 'STRING'), ], compression='GZIP', partitions=[ Column('year', 'STRING'), Column('week', 'STRING'), Column('day', 'STRING'), ], )) call('mkdir -p /tmp/dart-emr-test/hive/') this_path = os.path.dirname(os.path.abspath(__file__)) shutil.copyfile( this_path + '/../../../engine/emr/steps/hive/copy_to_table.hql', '/tmp/dart-emr-test/hive/copy_to_table.hql') hive_copy_to_table(ds, 'weblogs_stage', ds, 'weblogs', 's3://test', '/tmp/dart-emr-test/', 'actionid123', None, 1, 1) with open(os.path.join(this_path, 'copy_to_table_weblogs.hql')) as f: expected_contents = f.read() with open('/tmp/dart-emr-test/hive/copy_to_table_weblogs.hql') as f: actual_contents = f.read() self.assertEqual(expected_contents, actual_contents)
Dataset(data=(DatasetData( name='owen_eu_DW-3213_v3', table_name='owen_eu', location='s3://example-bucket/prd/inbound/overlord/eu-all-events', load_type=LoadType.MERGE, data_format=DataFormat( file_format=FileFormat.TEXTFILE, row_format=RowFormat.JSON, ), compression=Compression.GZIP, partitions=[ Column('year', DataType.STRING), Column('month', DataType.STRING), Column('day', DataType.STRING), ], columns=[ Column('host', DataType.STRING, path='metadata.host'), Column('referer', DataType.STRING, path='metadata.referer'), Column('userAgent', DataType.STRING, path='owen.context.userAgent'), Column('ipAddress', DataType.STRING, path='owen.context.ipAddress'), Column('session', DataType.STRING, path='owen.context.session'), Column('propertyName', DataType.STRING, path='owen.context.propertyName'), Column( 'pageName', DataType.STRING, path='owen.context.pageName'), Column('previousPageName', DataType.STRING, path='owen.context.previousPageName'), Column('viewInstanceUuid', DataType.STRING, path='owen.context.viewInstanceUuid'), Column('previousViewInstanceUuid', DataType.STRING, path='owen.context.previousViewInstanceUuid'), Column( 'pageType', DataType.STRING, path='owen.context.pageType'), Column('udid', DataType.STRING, path='owen.context.udid'), Column('advertiserUuid', DataType.STRING, path='owen.context.advertiserUuid'), Column( 'osFamily', DataType.STRING, path='owen.context.osFamily'), Column( 'latitude', DataType.STRING, path='owen.context.latitude'), Column('longitude', DataType.STRING, path='owen.context.longitude'), Column('userId', DataType.STRING, path='owen.context.custom.legacy.userId'), Column('geofenceId', DataType.STRING, path='owen.context.custom.legacy.geofenceId'), Column( 'userUuid', DataType.STRING, path='owen.context.userUuid'), Column('offerId', DataType.STRING, path='owen.context.inventory[0].inventoryUuid'), Column('inventorySource', DataType.STRING, path='owen.context.inventory[0].inventorySource'), Column('expirationDate', DataType.STRING, path='owen.context.inventory[0].expirationDate'), Column('position', DataType.STRING, path='owen.context.inventory[0].position'), Column('offerType', DataType.STRING, path='owen.context.inventory[0].inventoryType'), Column('eventInstanceUuid', DataType.STRING, path='owen.event.eventInstanceUuid'), Column('eventTimestamp', DataType.TIMESTAMP, path='owen.event.eventTimestamp', date_pattern="yyyy-MM-dd'T'HH:mm:ss'Z'"), Column('eventPlatform', DataType.STRING, path='owen.event.eventPlatform'), Column('eventCategory', DataType.STRING, path='owen.event.eventCategory'), Column('eventAction', DataType.STRING, path='owen.event.eventAction'), Column( 'eventName', DataType.STRING, path='owen.event.eventName'), Column('eventTarget', DataType.STRING, path='owen.event.eventTarget'), Column('eventVersion', DataType.STRING, path='owen.event.eventVersion'), Column('userQualifier', DataType.STRING, path='owen.context.userQualifier'), Column('outclickUuid', DataType.STRING, path='owen.context.inventory[0].outclickUuid'), Column('inventoryName', DataType.STRING, path='owen.context.inventory[0].inventoryName'), Column('enviroment', DataType.STRING, path='owen.context.environment'), Column('loggedInFlag', DataType.STRING, path='owen.context.loggedInFlag'), Column('eventPlatformVersion', DataType.STRING, path='owen.event.eventPlatformVersion'), Column('appForegroundFlag', DataType.BOOLEAN, path='owen.context.appForegroundFlag'), Column('bluetoothEnabledFlag', DataType.BOOLEAN, path='owen.context.bluetoothEnabledFlag'), Column('favoriteFlag', DataType.BOOLEAN, path='owen.context.favoriteFlag'), Column('locationEnabledFlag', DataType.BOOLEAN, path='owen.context.locationEnabledFlag'), Column('notificationEnabledFlag', DataType.BOOLEAN, path='owen.context.notificationEnabledFlag'), Column('personalizationFlag', DataType.BOOLEAN, path='owen.context.personalizationFlag'), Column('macAddress', DataType.STRING, path='owen.context.macAddress'), Column('osVersion', DataType.STRING, path='owen.context.osVersion'), Column('osName', DataType.STRING, path='owen.context.osName'), Column('browserFamily', DataType.STRING, path='owen.context.browserFamily'), Column('deviceCategory', DataType.STRING, path='owen.context.deviceCategory'), Column('deviceMake', DataType.STRING, path='owen.context.mobileDeviceMake'), Column('deviceModel', DataType.STRING, path='owen.context.mobileDeviceModel'), Column('connectionType', DataType.STRING, path='owen.context.connectionType'), Column('browserVersion', DataType.STRING, path='owen.context.browserVersion'), Column('city', DataType.STRING, path='owen.context.city'), Column('country', DataType.STRING, path='owen.context.country'), Column('region', DataType.STRING, path='owen.context.region'), Column('partialSearchTerm', DataType.STRING, path='owen.context.partialSearchTerm'), Column('outclickURL', DataType.STRING, path='owen.context.inventory[0].outRedirectUrl'), Column('clickLocation', DataType.STRING, path='owen.context.inventory[0].clickLocation'), Column('inventoryChannel', DataType.STRING, path='owen.context.inventory[0].inventoryChannel'), Column('brand', DataType.STRING, path='owen.context.inventory[0].brand'), Column('commentsCount', DataType.INT, path='owen.context.inventory[0].commentsCount'), Column('legacyOfferId', DataType.STRING, path='owen.context.custom.legacy.offerIds.offerId'), Column('pageViewHash', DataType.STRING, path='owen.context.custom.legacy.pageViewHash'), Column('vIdInt', DataType.STRING, path='owen.context.custom.legacy.vIdInt'), Column('merchantId', DataType.STRING, path='owen.context.custom.legacy.merchantId'), Column('facebookConnect', DataType.STRING, path='owen.context.custom.facebookConnect'), Column('schemaKey', DataType.STRING, path='schema.key'), ], ))))
dataset = dart.save_dataset(Dataset(data=(DatasetData( name='beacon_native_app_v02', table_name='beacon_native_app', location='s3://example-bucket/prd/beacon/native_app/v2/parquet/snappy', hive_compatible_partition_folders=True, load_type=LoadType.INSERT, data_format=DataFormat('parquet'), columns=[ Column('logFileId', DataType.BIGINT), Column('lineNumber', DataType.INT), Column('created', DataType.BIGINT), Column('remoteip', DataType.STRING), Column('useragent', DataType.STRING), Column('eventType', DataType.STRING), Column('appVersion', DataType.STRING), Column('advertiserID', DataType.STRING), Column('couponsOnPage', DataType.INT), Column('coupons', DataType.STRING), Column('channel', DataType.STRING), Column('geoCouponCount', DataType.STRING), Column('geofence', DataType.STRING), Column('geofenceTimeSpent', DataType.STRING), Column('loginStatus', DataType.STRING), Column('products', DataType.STRING), Column('session', DataType.STRING), Column('systemName', DataType.STRING), Column('systemVersion', DataType.STRING), Column('udid', DataType.STRING), Column('userQualifier', DataType.STRING), Column('url', DataType.STRING), Column('user_uuid', DataType.STRING), Column('userId', DataType.STRING), Column('searchType', DataType.STRING), Column('searchListTerm', DataType.STRING), Column('searchTerm', DataType.STRING), Column('emailUUId', DataType.STRING), Column('userFingerprint', DataType.STRING), Column('locationStatus', DataType.STRING), Column('pushNotificationStatus', DataType.BOOLEAN), Column('placement', DataType.STRING), Column('loc', DataType.STRING), Column('ppoi0', DataType.STRING), Column('ppoi1', DataType.STRING), Column('ppoi2', DataType.STRING), Column('ppoi3', DataType.STRING), Column('ppoi4', DataType.STRING), Column('appLaunchNotificationType', DataType.STRING), Column('scenarioName', DataType.STRING), Column('behaviorName', DataType.STRING), Column('couponType', DataType.STRING), Column('couponPosition', DataType.STRING), Column('hasQSRContent', DataType.BOOLEAN), Column('promptName', DataType.STRING), Column('locationPermissionChanage', DataType.STRING), Column('couponProblemType', DataType.STRING), Column('storeTitle', DataType.STRING), Column('mallName', DataType.STRING), Column('restaurantName', DataType.STRING), Column('milesAway', 'float'), Column('menuItem', DataType.STRING), Column('toolName', DataType.STRING), Column('toolAction', DataType.STRING), Column('toolStep', DataType.STRING), Column('mallPosition', DataType.INT), Column('recommendStoreName', DataType.STRING), Column('recommendStorePosition', DataType.INT), Column('favoriteStoreName', DataType.STRING), Column('favoriteStoreAction', DataType.STRING), Column('favoriteStorePosition', DataType.INT), Column('favoriteSiteId', DataType.STRING), Column('receiverName', DataType.STRING), Column('outclickButtonPrompt', DataType.STRING), Column('dataSource', DataType.STRING), Column('searchResultCount', DataType.INT), Column('searchResultPosition', DataType.INT), Column('shareType', DataType.STRING), Column('daysUntilExpiration', DataType.INT), Column('fireDate', DataType.BIGINT), Column('settingsChangeValue', DataType.STRING), Column('settingsChangeType', DataType.STRING), Column('settingsChangeLocation', DataType.STRING), Column('clickAction', DataType.STRING), Column('tnt', DataType.STRING), Column('previousPage', DataType.STRING), Column('clickPage', DataType.STRING), Column('launchReason', DataType.STRING), Column('taplyticsData', DataType.STRING), Column('appCampaign', DataType.STRING), Column('accountMethod', DataType.STRING), Column('appState', DataType.STRING), Column('btStatus', DataType.BOOLEAN), Column('btBeaconId', DataType.STRING), Column('btBeaconFactoryId', DataType.STRING), Column('btBeaconName', DataType.STRING), Column('btTimeSpent', DataType.STRING), Column('purchaseId', DataType.STRING), Column('transactionId', DataType.STRING), Column('outclickLink', DataType.STRING), Column('outclickPage', DataType.STRING), Column('featuredCouponPosition', DataType.INT), Column('commentCount', DataType.INT), Column('mallCount', DataType.INT), Column('clickCount', DataType.INT), Column('merchantName', DataType.STRING), Column('merchantPosition', DataType.INT), ], compression=Compression.SNAPPY, partitions=[Column('createdpartition', DataType.STRING)], ))))
def setUp(self): dart = Dart(host='localhost', port=5000) """ :type dart: dart.client.python.dart_client.Dart """ self.dart = dart cs = [Column('c1', DataType.VARCHAR, 50), Column('c2', DataType.BIGINT)] df = DataFormat(FileFormat.PARQUET, RowFormat.NONE) dataset_data = DatasetData('test-dataset0', 'test_dataset_table0', 's3://test/dataset/0/%s' + random_id(), df, cs) self.dataset0 = self.dart.save_dataset(Dataset(data=dataset_data)) cs = [Column('c1', DataType.VARCHAR, 50), Column('c2', DataType.BIGINT)] df = DataFormat(FileFormat.PARQUET, RowFormat.NONE) dataset1_location = 's3://test/dataset/1/%s' + random_id() dataset_data = DatasetData('test-dataset1', 'test_dataset_table1', dataset1_location, df, cs) self.dataset1 = self.dart.save_dataset(Dataset(data=dataset_data)) cs = [Column('c1', DataType.VARCHAR, 50), Column('c2', DataType.BIGINT)] df = DataFormat(FileFormat.PARQUET, RowFormat.NONE) dataset_data = DatasetData('test-dataset2-no-show', 'test_dataset_table2', 's3://test/dataset/2/%s' + random_id(), df, cs) self.dataset2 = self.dart.save_dataset(Dataset(data=dataset_data)) s = Subscription(data=SubscriptionData('test-subscription0', self.dataset0.id)) self.subscription0 = self.dart.save_subscription(s) s = Subscription(data=SubscriptionData('test-subscription2-no-show', self.dataset2.id)) self.subscription2 = self.dart.save_subscription(s) dst_args = {'action_sleep_time_in_seconds': 0} dst = Datastore(data=DatastoreData('test-datastore0', 'no_op_engine', args=dst_args, state=DatastoreState.TEMPLATE)) self.datastore0 = self.dart.save_datastore(dst) dst = Datastore(data=DatastoreData('test-datastore1', 'no_op_engine', args=dst_args, state=DatastoreState.TEMPLATE)) self.datastore1 = self.dart.save_datastore(dst) dst = Datastore(data=DatastoreData('test-datastore2-no-show', 'no_op_engine', args=dst_args, state=DatastoreState.ACTIVE)) self.datastore2 = self.dart.save_datastore(dst) wf0 = Workflow(data=WorkflowData('test-workflow0', self.datastore0.id, state=WorkflowState.ACTIVE)) self.workflow0 = self.dart.save_workflow(wf0, self.datastore0.id) wf1 = Workflow(data=WorkflowData('test-workflow1', self.datastore1.id, state=WorkflowState.ACTIVE)) self.workflow1 = self.dart.save_workflow(wf1, self.datastore1.id) wf2 = Workflow(data=WorkflowData('test-workflow2-no-show', self.datastore2.id, state=WorkflowState.ACTIVE)) self.workflow2 = self.dart.save_workflow(wf2, self.datastore2.id) a_args = {'source_hdfs_path': 'hdfs:///user/hive/warehouse/test', 'destination_s3_path': dataset1_location} a00 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name, NoOpActionTypes.action_that_succeeds.name, state=ActionState.TEMPLATE)) a01 = Action(data=ActionData(NoOpActionTypes.consume_subscription.name, NoOpActionTypes.consume_subscription.name, {'subscription_id': self.subscription0.id}, state=ActionState.TEMPLATE)) a02 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name, NoOpActionTypes.action_that_succeeds.name, state=ActionState.TEMPLATE)) a03 = Action(data=ActionData(NoOpActionTypes.copy_hdfs_to_s3_action.name, NoOpActionTypes.copy_hdfs_to_s3_action.name, a_args, state=ActionState.TEMPLATE)) a04 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name, NoOpActionTypes.action_that_succeeds.name, state=ActionState.TEMPLATE)) self.action00, self.action01, self.action02, self.action03, self.action04 = \ self.dart.save_actions([a00, a01, a02, a03, a04], workflow_id=self.workflow0.id) a10 = Action(data=ActionData(NoOpActionTypes.load_dataset.name, NoOpActionTypes.load_dataset.name, {'dataset_id': self.dataset1.id}, state=ActionState.TEMPLATE)) self.action10 = self.dart.save_actions([a10], workflow_id=self.workflow1.id) a20 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name, NoOpActionTypes.action_that_succeeds.name, state=ActionState.HAS_NEVER_RUN)) a21 = Action(data=ActionData(NoOpActionTypes.load_dataset.name, NoOpActionTypes.load_dataset.name, {'dataset_id': self.dataset2.id}, state=ActionState.TEMPLATE)) self.action20 = self.dart.save_actions([a20], datastore_id=self.datastore2.id) self.action21 = self.dart.save_actions([a21], workflow_id=self.workflow2.id) self.event1 = self.dart.save_event(Event(data=EventData('test-event1', state=EventState.ACTIVE))) self.event2 = self.dart.save_event(Event(data=EventData('test-event2-no-show', state=EventState.ACTIVE))) tr_args = {'event_id': self.event1.id} tr = Trigger(data=TriggerData('test-event-trigger1', 'event', [self.workflow1.id], tr_args, TriggerState.ACTIVE)) self.event_trigger1 = self.dart.save_trigger(tr) tr_args = {'event_id': self.event2.id} tr = Trigger(data=TriggerData('test-event-trigger2-no-show', 'event', [self.workflow2.id], tr_args, TriggerState.ACTIVE)) self.event_trigger2 = self.dart.save_trigger(tr) st_args = {'fire_after': 'ALL', 'completed_trigger_ids': [self.event_trigger1.id]} st = Trigger(data=TriggerData('test-super-trigger1', 'super', None, st_args, TriggerState.ACTIVE)) self.super_trigger1 = self.dart.save_trigger(st) st_args = {'fire_after': 'ANY', 'completed_trigger_ids': [self.super_trigger1.id]} st = Trigger(data=TriggerData('test-super-trigger2', 'super', [self.workflow1.id], st_args, TriggerState.ACTIVE)) self.super_trigger2 = self.dart.save_trigger(st)
Dataset(data=(DatasetData( name='owen_outclick_us_v02', description= 'Owen outclick data, based on overlord schema version. Considered a replacement for outclick events.', table_name='outclick', location= 's3://example-bucket/prd/inbound/overlord/raw-firehose-02/rmn-outclicks', load_type=LoadType.MERGE, data_format=DataFormat( file_format=FileFormat.TEXTFILE, row_format=RowFormat.JSON, ), compression=Compression.GZIP, partitions=[ Column('year', DataType.STRING), Column('month', DataType.STRING), Column('day', DataType.STRING), ], primary_keys=['eventInstanceUuid'], merge_keys=['eventInstanceUuid'], sort_keys=[ 'eventTimestamp', 'eventInstanceUuid', 'derivedEventInstanceId' ], distribution_keys=['eventInstanceUuid'], batch_merge_sort_keys=['owenProcessed DESC'], columns=[ Column('advertiserUuid', DataType.VARCHAR, length=2048, path='owen.context.advertiserUuid'), Column('appBadgeCount', DataType.INT, path='owen.context.appBadgeCount'), Column('appForegroundFlag', DataType.BOOLEAN, path='owen.context.appForegroundFlag'), Column('bluetoothBeaconId', DataType.VARCHAR, length=50, path='owen.context.bluetoothBeaconId'), Column('bluetoothBeaconType', DataType.VARCHAR, length=25, path='owen.context.bluetoothBeaconType'), Column('bluetoothEnabledFlag', DataType.BOOLEAN, path='owen.context.bluetoothEnabledFlag'), Column('breadCrumb', DataType.VARCHAR, length=2048, path='owen.context.breadCrumb'), Column('browserFamily', DataType.VARCHAR, length=50, path='owen.context.browserFamily'), Column('browserVersion', DataType.VARCHAR, length=50, path='owen.context.browserVersion'), Column('carrier', DataType.VARCHAR, length=25, path='owen.context.carrier'), Column('city', DataType.VARCHAR, length=75, path='owen.context.city'), Column('connectionType', DataType.VARCHAR, length=25, path='owen.context.connectionType'), Column('country', DataType.VARCHAR, length=2, path='owen.context.country'), Column('custom', DataType.VARCHAR, path='owen.context.custom'), Column('deviceCategory', DataType.VARCHAR, length=2048, path='owen.context.deviceCategory'), Column('deviceFingerprint', DataType.VARCHAR, length=26, path='owen.context.deviceFingerprint'), Column('dma', DataType.INT, path='owen.context.dma'), Column('environment', DataType.VARCHAR, length=2048, path='owen.context.environment'), Column('experimentObject', DataType.VARCHAR, length=1024, path='owen.context.experiment'), Column('failureFlag', DataType.BOOLEAN, path='owen.context.failureFlag'), Column('failureReason', DataType.VARCHAR, length=2048, path='owen.context.failureReason'), Column('favoriteFlag', DataType.BOOLEAN, path='owen.context.favoriteFlag'), Column('featureFlags', DataType.VARCHAR, path='owen.context.featureFlags'), Column('geofenceUuid', DataType.VARCHAR, length=2048, path='owen.context.geofenceUuid'), Column('inventoryCount', DataType.INT, path='owen.context.inventoryCount'), Column('inventory_affiliateNetwork', DataType.VARCHAR, length=50, path='owen.context.inventory[0].affiliateNetwork'), Column('inventory_brand', DataType.VARCHAR, length=100, path='owen.context.inventory[0].brand'), Column('inventory_claimUuid', DataType.VARCHAR, length=2048, path='owen.context.inventory[0].claimUuid'), Column('inventory_clickLocation', DataType.VARCHAR, length=100, path='owen.context.inventory[0].clickLocation'), Column('inventory_commentsCount', DataType.INT, path='owen.context.inventory[0].commentsCount'), Column('inventory_conquestingFlag', DataType.BOOLEAN, path='owen.context.inventory[0].conquestingFlag'), Column('inventory_couponRank', DataType.NUMERIC, precision=18, scale=4, path='owen.context.inventory[0].couponRank'), Column('inventory_deepLinkUrl', DataType.VARCHAR, length=2048, path='owen.context.inventory[0].deepLinkUrl'), Column('inventory_deepLinkUrlScheme', DataType.VARCHAR, length=2048, path='owen.context.inventory[0].deepLinkUrlScheme'), Column('inventory_exclusivityFlag', DataType.BOOLEAN, path='owen.context.inventory[0].exclusivityFlag'), Column('inventory_expirationDate', DataType.VARCHAR, length=2048, path='owen.context.inventory[0].expirationDate'), Column('inventory_finalPrice', DataType.NUMERIC, precision=18, scale=4, path='owen.context.inventory[0].finalPrice'), Column('inventory_instoreType', DataType.VARCHAR, length=2048, path='owen.context.inventory[0].instoreType'), Column('inventory_inventoryChannel', DataType.VARCHAR, length=2048, path='owen.context.inventory[0].inventoryChannel'), Column('inventory_inventoryName', DataType.VARCHAR, length=2048, path='owen.context.inventory[0].inventoryName'), Column('inventory_inventorySource', DataType.VARCHAR, length=50, path='owen.context.inventory[0].inventorySource'), Column('inventory_inventoryType', DataType.VARCHAR, length=25, path='owen.context.inventory[0].inventoryType'), Column('inventory_inventoryUuid', DataType.VARCHAR, length=2048, path='owen.context.inventory[0].inventoryUuid'), Column('inventory_lastVerifiedDate', DataType.VARCHAR, length=2048, path='owen.context.inventory[0].lastVerifiedDate'), Column('inventory_monetizableFlag', DataType.BOOLEAN, path='owen.context.inventory[0].monetizableFlag'), Column('inventory_noVotes', DataType.INT, path='owen.context.inventory[0].noVotes'), Column('inventory_onlineType', DataType.VARCHAR, length=2048, path='owen.context.inventory[0].onlineType'), Column('inventory_originalPrice', DataType.NUMERIC, precision=18, scale=4, path='owen.context.inventory[0].originalPrice'), Column('inventory_outRedirectUrl', DataType.VARCHAR, length=2048, path='owen.context.inventory[0].outRedirectUrl'), Column('inventory_outclickUuid', DataType.VARCHAR, length=2048, path='owen.context.inventory[0].outclickUuid'), Column('inventory_parentInventoryUuid', DataType.VARCHAR, length=2048, path='owen.context.inventory[0].parentInventoryUuid'), Column('inventory_personalizationFlag', DataType.BOOLEAN, path='owen.context.inventory[0].personalizationFlag'), Column('inventory_position', DataType.INT, path='owen.context.inventory[0].position'), Column('inventory_proximity', DataType.NUMERIC, precision=18, scale=4, path='owen.context.inventory[0].proximity'), Column('inventory_proximityUnit', DataType.VARCHAR, length=2048, path='owen.context.inventory[0].proximityUnit'), Column('inventory_recommendedFlag', DataType.BOOLEAN, path='owen.context.inventory[0].recommendedFlag'), Column('inventory_redemptionChannel', DataType.VARCHAR, length=2048, path='owen.context.inventory[0].redemptionChannel'), Column('inventory_retailCategory', DataType.VARCHAR, length=75, path='owen.context.inventory[0].retailCategory'), Column('inventory_savedFlag', DataType.BOOLEAN, path='owen.context.inventory[0].savedFlag'), Column('inventory_siteUuid', DataType.VARCHAR, length=26, path='owen.context.inventory[0].siteUuid'), Column('inventory_startDate', DataType.VARCHAR, length=2048, path='owen.context.inventory[0].startDate'), Column('inventory_successPercentage', DataType.NUMERIC, precision=18, scale=4, path='owen.context.inventory[0].successPercentage'), Column('inventory_usedByCount', DataType.INT, path='owen.context.inventory[0].usedByCount'), Column('inventory_yesVotes', DataType.INT, path='owen.context.inventory[0].yesVotes'), Column('ipAddress', DataType.VARCHAR, length=45, path='owen.context.ipAddress'), Column('language', DataType.VARCHAR, length=6, path='owen.context.language'), Column('latitude', DataType.NUMERIC, precision=18, scale=4, path='owen.context.latitude'), Column('locationEnabledFlag', DataType.BOOLEAN, path='owen.context.locationEnabledFlag'), Column('loggedInFlag', DataType.BOOLEAN, path='owen.context.loggedInFlag'), Column('longitude', DataType.NUMERIC, precision=18, scale=4, path='owen.context.longitude'), Column('macAddress', DataType.VARCHAR, length=2048, path='owen.context.macAddress'), Column('marketing_adGroup', DataType.VARCHAR, length=2048, path='owen.context.marketing.adGroup'), Column('marketing_campaign', DataType.VARCHAR, length=50, path='owen.context.marketing.campaign'), Column('marketing_campaignSendCount', DataType.INT, path='owen.context.marketing.campaignSendCount'), Column('marketing_campaignUuid', DataType.VARCHAR, length=2048, path='owen.context.marketing.campaignUuid'), Column('marketing_cdRank', DataType.INT, path='owen.context.marketing.cdRank'), Column('marketing_channel', DataType.VARCHAR, length=50, path='owen.context.marketing.channel'), Column('marketing_content', DataType.VARCHAR, length=2048, path='owen.context.marketing.content'), Column('marketing_medium', DataType.VARCHAR, length=50, path='owen.context.marketing.medium'), Column('marketing_notificationUuid', DataType.VARCHAR, length=2048, path='owen.context.marketing.notificationUuid'), Column('marketing_source', DataType.VARCHAR, length=100, path='owen.context.marketing.source'), Column('marketing_term', DataType.VARCHAR, length=2048, path='owen.context.marketing.term'), Column('marketing_vendor', DataType.VARCHAR, length=25, path='owen.context.marketing.vendor'), Column('mobileDeviceMake', DataType.VARCHAR, length=25, path='owen.context.mobileDeviceMake'), Column('mobileDeviceModel', DataType.VARCHAR, length=50, path='owen.context.mobileDeviceModel'), Column('notificationEnabledFlag', DataType.BOOLEAN, path='owen.context.notificationEnabledFlag'), Column('osFamily', DataType.VARCHAR, length=25, path='owen.context.osFamily'), Column('osName', DataType.VARCHAR, length=2048, path='owen.context.osName'), Column('osVersion', DataType.VARCHAR, length=2048, path='owen.context.osVersion'), Column('pageName', DataType.VARCHAR, length=2048, path='owen.context.pageName'), Column('pageType', DataType.VARCHAR, length=100, path='owen.context.pageType'), Column('partialSearchTerm', DataType.VARCHAR, length=2048, path='owen.context.partialSearchTerm'), Column('personalizationFlag', DataType.BOOLEAN, path='owen.context.personalizationFlag'), Column('previousPageName', DataType.VARCHAR, length=2048, path='owen.context.previousPageName'), Column('previousViewInstanceUuid', DataType.VARCHAR, length=2048, path='owen.context.previousViewInstanceUuid'), Column('promptName', DataType.VARCHAR, length=2048, path='owen.context.promptName'), Column('propertyName', DataType.VARCHAR, length=20, path='owen.context.propertyName'), Column('referrer', DataType.VARCHAR, length=2048, path='owen.context.referrer'), Column('region', DataType.VARCHAR, length=25, path='owen.context.region'), Column('screenHeight', DataType.INT, path='owen.context.screenHeight'), Column('screenWidth', DataType.INT, path='owen.context.screenWidth'), Column('session', DataType.VARCHAR, length=2048, path='owen.context.session'), Column('test_testUuid', DataType.VARCHAR, length=26, path='owen.context.test.testUuid'), Column('udid', DataType.VARCHAR, length=40, path='owen.context.udid'), Column('userAgent', DataType.VARCHAR, length=2048, path='owen.context.userAgent'), Column('userQualifier', DataType.VARCHAR, length=26, path='owen.context.userQualifier'), Column('userUuid', DataType.VARCHAR, length=2048, path='owen.context.userUuid'), Column('vendorObject', DataType.VARCHAR, length=512, path='owen.context.vendor'), Column('viewInstanceUuid', DataType.VARCHAR, length=128, path='owen.context.viewInstanceUuid'), Column('eventAction', DataType.VARCHAR, length=2048, path='owen.event.eventAction'), Column('eventCategory', DataType.VARCHAR, length=25, path='owen.event.eventCategory'), Column('eventInstanceUuid', DataType.VARCHAR, length=26, path='owen.event.eventInstanceUuid'), Column('eventName', DataType.VARCHAR, length=50, path='owen.event.eventName'), Column('eventPlatform', DataType.VARCHAR, length=25, path='owen.event.eventPlatform'), Column('eventPlatformVersion', DataType.VARCHAR, length=25, path='owen.event.eventPlatformVersion'), Column('eventTarget', DataType.VARCHAR, length=2048, path='owen.event.eventTarget'), Column('eventVersion', DataType.VARCHAR, length=25, path='owen.event.eventVersion'), Column('eventTimestamp', DataType.DATETIME, date_pattern="yyyy-MM-dd'T'HH:mm:ss'Z'", path='owen.event.eventTimestamp'), Column('derivedEventInstanceId', DataType.VARCHAR, length=64, path='metadata.derivedEventInstanceId'), Column('owenProcessed', DataType.DATETIME, date_pattern="yyyy-MM-dd'T'HH:mm:ss'Z'", path='metadata.analyticsTopologyFinishTime'), ], ))))
def _get_engineless_static_subgraphs_by_related_type(graph_entity_service): sub_graph_map = {} d_entity_models = graph_entity_service.to_entity_models_with_randomized_ids( [ Dataset(id=Ref.dataset(1), data=DatasetData(None, None, None, None, None, columns=[], partitions=[])) ]) e_entity_models = graph_entity_service.to_entity_models_with_randomized_ids( [Event(id=Ref.event(1), data=EventData('event'))]) sub_graph_map[None] = [ SubGraph( name='dataset', description='create a new dataset entity', related_type=None, related_is_a=None, graph=graph_entity_service.to_graph(None, d_entity_models), entity_map=graph_entity_service.to_entity_map(d_entity_models), icon='⬟', ), SubGraph( name='event', description='create a new event entity', related_type=None, related_is_a=None, graph=graph_entity_service.to_graph(None, e_entity_models), entity_map=graph_entity_service.to_entity_map(e_entity_models), icon='★', ), ] entity_models = graph_entity_service.to_entity_models_with_randomized_ids([ Subscription(id=Ref.subscription(1), data=SubscriptionData('subscription', Ref.parent())) ]) sub_graph_map[EntityType.dataset] = [ SubGraph( name='subscription', description='create a new subscription entity', related_type=EntityType.dataset, related_is_a=Relationship.PARENT, graph=graph_entity_service.to_graph(None, entity_models), entity_map=graph_entity_service.to_entity_map(entity_models), icon='⬢', ), ] entity_models = graph_entity_service.to_entity_models_with_randomized_ids([ Trigger(id=Ref.trigger(1), data=TriggerData(name='%s_trigger' % event_trigger.name, trigger_type_name=event_trigger.name, state=TriggerState.INACTIVE, workflow_ids=[], args={'event_id': Ref.parent()})) ]) sub_graph_map[EntityType.event] = [ SubGraph( name='event trigger', description='create a new event trigger entity', related_type=EntityType.event, related_is_a=Relationship.PARENT, graph=graph_entity_service.to_graph(None, entity_models), entity_map=graph_entity_service.to_entity_map(entity_models), icon='▼', ), ] entity_models = graph_entity_service.to_entity_models_with_randomized_ids([ Trigger(id=Ref.trigger(1), data=TriggerData( name='%s_trigger' % subscription_batch_trigger.name, trigger_type_name=subscription_batch_trigger.name, state=TriggerState.INACTIVE, workflow_ids=[], args={ 'subscription_id': Ref.parent(), 'unconsumed_data_size_in_bytes': 1000000 })) ]) sub_graph_map[EntityType.subscription] = [ SubGraph( name='subscription batch trigger', description='create a new subscription batch trigger entity', related_type=EntityType.subscription, related_is_a=Relationship.PARENT, graph=graph_entity_service.to_graph(None, entity_models), entity_map=graph_entity_service.to_entity_map(entity_models), icon='▼', ), ] return sub_graph_map