def testHttpInputOk(self): pl = linter.PipelineLinter( '{"inputs": [{"type": "HttpInput", "url": "http://foo/data.csv",' ' "sinks": ["gs:/b2/o2"]}]}') expect = { linter.PipelineLinter.CHECK_SYNTAX_VALID: { 'pass': True }, linter.PipelineLinter.CHECK_UNKNOWN_CONFIG_KEYS: { 'pass': True }, linter.PipelineLinter.CHECK_REQ_IO_STAGES: { 'pass': True }, 'stages': { 'inputs': [{ linter.StageLinter.CHECK_TYPE_FMT % u'HttpInput': { 'pass': True }, linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'url': { 'pass': True }, linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'sinks': { 'pass': True }, }] } } self.assertTrue(pl.results.valid) self.assertSameStructure(expect, pl.results.results)
def testJunk(self): bad_strings = ((None, 'PreTemplate: expected string or buffer'), ('', 'PreTemplate: No JSON object could be decoded'), ('"', 'PreTemplate: end is out of bounds'), ('"fish', 'PreTemplate: %s' % self.getUnterminatedStringMessage('"fish')), ('fish', 'PreTemplate: No JSON object could be decoded')) for (bad_string, reason) in bad_strings: logging.info('testing: %r', bad_string) pl = linter.PipelineLinter(bad_string) expect = { linter.PipelineLinter.CHECK_UNKNOWN_CONFIG_KEYS: { 'pass': True }, linter.PipelineLinter.CHECK_SYNTAX_VALID: { 'pass': False, 'reason': reason }, linter.PipelineLinter.CHECK_REQ_IO_STAGES: { 'pass': False, 'reason': linter.PipelineLinter.MSG_MISSING_IO_STAGES } } self.assertFalse(pl.results.valid, 'Config:%r ' % bad_string) self.assertSameStructure(expect, pl.results.results, 'Config:%r' % bad_string)
def testGcsOutputOk(self): pl = linter.PipelineLinter( '{"outputs": [{"type": "GcsOutput", "object": "gs://b1/o1",' ' "sources": ["gs:/b2/o2"]}]}') expect = { linter.PipelineLinter.CHECK_SYNTAX_VALID: { 'pass': True }, linter.PipelineLinter.CHECK_UNKNOWN_CONFIG_KEYS: { 'pass': True }, linter.PipelineLinter.CHECK_REQ_IO_STAGES: { 'pass': True }, 'stages': { 'outputs': [{ linter.StageLinter.CHECK_TYPE_FMT % u'GcsOutput': { 'pass': True }, linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'sources': { 'pass': True }, linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'object': { 'pass': True }, }] } } self.assertTrue(pl.results.valid) self.assertSameStructure(expect, pl.results.results)
def testSimpleFailure(self): pl = linter.PipelineLinter('{"inputs": [{"type": "GcsInput"}]}') expect = { linter.PipelineLinter.CHECK_SYNTAX_VALID: { 'pass': True }, linter.PipelineLinter.CHECK_REQ_IO_STAGES: { 'pass': True }, linter.PipelineLinter.CHECK_UNKNOWN_CONFIG_KEYS: { 'pass': True }, 'stages': { 'inputs': [{ linter.StageLinter.CHECK_TYPE_FMT % u'GcsInput': { 'pass': True }, linter.StageLinter.CHECK_FIELD_EXISTS_FMT % [ 'object', 'objects' ]: { 'pass': False, 'reason': linter.StageLinter.MSG_REQUIRE_AT_LEAST_ONE_FMT % ['object', 'objects'] }, }] } } self.assertFalse(pl.results.valid) self.assertSameStructure(expect, pl.results.results)
def RunPipeline(self, p): """Run the pipeline.""" logging.info('Linting pipeline: %s', p.name) options_dict = appconfig.AppConfig.GetAppConfig().AsOptionsDict() self.expandOptionsDict(options_dict, self.GetAllArguments()) logging.info('options_dict is:\n%r', options_dict) logging.info('input config is:\n%s', p.config) pl = linter.PipelineLinter(p.config, options_dict) if not pl.results.valid: self.BadRequest('Linting for pipeline [%s] FAILED.\n%r', p.name, pl.results.results) return logging.info('Running pipeline: %s with config\n%s', p.name, pl.config) config = pl.config storage = config.get('options', {}).get(appconfig.OPTIONS_STORAGE_KEY, {}) bucket = storage[appconfig.OPTIONS_STORAGE_BUCKET_KEY] prefix = storage.get(appconfig.OPTIONS_STORAGE_PREFIX_KEY, '') pr = runner.PipelineRunner() pipe = pr.Build(config, gcs.Gcs.UrlCreator(bucket, prefix)) pipe.max_attempts = 1 pipe.start() p.running_pipeline_ids.append(pipe.pipeline_id) p.put() # show the status page using the default frontend module url = urlparse.urljoin(self.GetModuleUrl('default'), '/_ah/pipeline/status?root=%s' % pipe.pipeline_id) self.redirect(str(url))
def post(self): """Find and lint a pipeline.""" p = json.loads(self.request.body) if not p or 'config' not in p: self.NotFound('Unable to find pipeline config in json request.') else: lint = linter.PipelineLinter( p['config'], appconfig.AppConfig.GetAppConfig().AsOptionsDict()) p['lint'] = lint.results.results self.SendJson(p)
def testS3InputOk(self): pl = linter.PipelineLinter( '{"inputs": [{"type": "S3Input", "object": "s3://b/o",' ' "s3Credentials": {"accessKey": "123", "accessSecret": "abc"},' ' "sinks": ["gs:/b2/o2"]}]}') expect = { linter.PipelineLinter.CHECK_SYNTAX_VALID: { 'pass': True }, linter.PipelineLinter.CHECK_UNKNOWN_CONFIG_KEYS: { 'pass': True }, linter.PipelineLinter.CHECK_REQ_IO_STAGES: { 'pass': True }, 'stages': { 'inputs': [{ linter.StageLinter.CHECK_TYPE_FMT % u'S3Input': { 'pass': True }, linter.StageLinter.CHECK_TYPE_FMT % 's3Credentials': { 'pass': True }, linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'object': { 'pass': True }, linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'sinks': { 'pass': True }, linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 's3Credentials': { 'pass': True }, linter.StageLinter.CHECK_FIELD_EXISTS_FMT % [ 'object', 'objects' ]: { 'pass': True }, linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 's3Credentials.accessKey': { 'pass': True }, linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 's3Credentials.accessSecret': { 'pass': True }, }] } } self.assertTrue(pl.results.valid) self.assertSameStructure(expect, pl.results.results)
def testMissingInputStageType(self): pl = linter.PipelineLinter('{}') expect = { linter.PipelineLinter.CHECK_SYNTAX_VALID: { 'pass': True }, linter.PipelineLinter.CHECK_UNKNOWN_CONFIG_KEYS: { 'pass': True }, linter.PipelineLinter.CHECK_REQ_IO_STAGES: { 'pass': False, 'reason': linter.PipelineLinter.MSG_MISSING_IO_STAGES }, } self.assertFalse(pl.results.valid) self.assertSameStructure(expect, pl.results.results)
def testLintFullFiles(self): directories = ['src/pipelines/testdata', 'static/examples'] for directory in directories: directory = os.path.join(os.path.dirname(__file__), '../..', directory) filenames = [ x for x in os.listdir(directory) if x.endswith('.json') ] logging.info('directory %s files %r', directory, filenames) for filename in filenames: logging.info('Linting %r from %r', filename, os.path.basename(directory)) j = open(os.path.join(directory, filename)).read() pl = linter.PipelineLinter(j) self.assertTrue(pl.results.valid)
def testBadSection(self): pl = linter.PipelineLinter('{"input": [{"type": "UnknownInput"}]}') expect = { linter.PipelineLinter.CHECK_SYNTAX_VALID: { 'pass': True }, linter.PipelineLinter.CHECK_UNKNOWN_CONFIG_KEYS: { 'pass': False, 'reason': linter.PipelineLinter.MSG_UNKNOWN_CONFIG_KEYS_FMT % u'input' }, linter.PipelineLinter.CHECK_REQ_IO_STAGES: { 'pass': False, 'reason': linter.PipelineLinter.MSG_MISSING_IO_STAGES } } self.assertFalse(pl.results.valid) self.assertSameStructure(expect, pl.results.results)
def testDatastoreInputOk(self): pl = linter.PipelineLinter( '{"inputs": [{"type": "DatastoreInput", "gql": "SELECT *",' ' "params": {"projection": ["a", "b"]},' ' "sinks": ["gs:/b2/o2"]}]}') expect = { linter.PipelineLinter.CHECK_SYNTAX_VALID: { 'pass': True }, linter.PipelineLinter.CHECK_UNKNOWN_CONFIG_KEYS: { 'pass': True }, linter.PipelineLinter.CHECK_REQ_IO_STAGES: { 'pass': True }, 'stages': { 'inputs': [{ linter.StageLinter.CHECK_TYPE_FMT % u'DatastoreInput': { 'pass': True }, linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'sinks': { 'pass': True }, linter.StageLinter.CHECK_TYPE_FMT % 'params': { 'pass': True }, linter.StageLinter.CHECK_TYPE_FMT % 'params.projection': { 'pass': True }, linter.StageLinter.CHECK_FIELD_EXISTS_FMT % [ 'gql', 'object' ]: { 'pass': True }, }] } } self.assertTrue(pl.results.valid) self.assertSameStructure(expect, pl.results.results)
def testBadStageType(self): pl = linter.PipelineLinter('{"inputs": [{"type": "UnknownInput"}]}') expect = { linter.PipelineLinter.CHECK_SYNTAX_VALID: { 'pass': True }, linter.PipelineLinter.CHECK_UNKNOWN_CONFIG_KEYS: { 'pass': True }, linter.PipelineLinter.CHECK_REQ_IO_STAGES: { 'pass': True }, 'stages': { 'inputs': [{ linter.StageLinter.CHECK_TYPE_FMT % u'UnknownInput': { 'pass': False, 'reason': 'No module named unknowninput' } }], } } self.assertFalse(pl.results.valid) self.assertSameStructure(expect, pl.results.results)
def testMissingStageType(self): pl = linter.PipelineLinter('{"inputs": [{}]}') expect = { linter.PipelineLinter.CHECK_SYNTAX_VALID: { 'pass': True }, linter.PipelineLinter.CHECK_REQ_IO_STAGES: { 'pass': True }, linter.PipelineLinter.CHECK_UNKNOWN_CONFIG_KEYS: { 'pass': True }, 'stages': { 'inputs': [{ linter.StageLinter.CHECK_TYPE_FMT % None: { 'pass': False, 'reason': linter.StageLinter.MSG_TYPE_NOT_FOUND } }] } } self.assertFalse(pl.results.valid) self.assertSameStructure(expect, pl.results.results)
def testGcsInputNullSink(self): pl = linter.PipelineLinter( '{"inputs": [{"type": "GcsInput", "object": "gs://b1/o1",' ' "sinks": [null]}]}') expect = { linter.PipelineLinter.CHECK_SYNTAX_VALID: { 'pass': True }, linter.PipelineLinter.CHECK_UNKNOWN_CONFIG_KEYS: { 'pass': True }, linter.PipelineLinter.CHECK_REQ_IO_STAGES: { 'pass': True }, 'stages': { 'inputs': [{ linter.StageLinter.CHECK_TYPE_FMT % u'GcsInput': { 'pass': True }, linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'sinks': { 'pass': False, 'reason': linter.StageLinter.MSG_FIELD_INVALID_FMT % 'null' }, linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'object': { 'pass': True }, linter.StageLinter.CHECK_FIELD_EXISTS_FMT % [ 'object', 'objects' ]: { 'pass': True }, }] } } self.assertFalse(pl.results.valid) self.assertSameStructure(expect, pl.results.results)
def testHttpInputBadShardSize(self): pl = linter.PipelineLinter( '{"inputs": [{"type": "HttpInput", "url": "http://foo/data.csv",' ' "shardSize": 33554433, "sinks": ["gs:/b2/o2"]}]}') expect = { linter.PipelineLinter.CHECK_SYNTAX_VALID: { 'pass': True }, linter.PipelineLinter.CHECK_UNKNOWN_CONFIG_KEYS: { 'pass': True }, linter.PipelineLinter.CHECK_REQ_IO_STAGES: { 'pass': True }, 'stages': { 'inputs': [{ linter.StageLinter.CHECK_TYPE_FMT % u'HttpInput': { 'pass': True }, linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'url': { 'pass': True }, linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'sinks': { 'pass': True }, linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'shardSize': { 'pass': False, 'reason': 'Invalid value: \'' 'Size exceeds App Engine response limit.\'' }, }] } } self.assertFalse(pl.results.valid) self.assertSameStructure(expect, pl.results.results)
def testGcsCompositorOk(self): pl = linter.PipelineLinter( '{"inputs": [{"type": "GcsInput", "object": "gs://b1/o1",' ' "sinks": ["gs:/b2/o2"]}],' '"transforms": [{"type": "GcsCompositor", "sources": ["gs:/b2/o2"],' ' "sinks": ["gs:/b3/o3"], "contentType": "text/plain"}],' '"outputs": [{"type": "GcsOutput", "object": "gs://b3/o3",' ' "sources": ["gs:/b4/o4"]}]}') expect = { linter.PipelineLinter.CHECK_SYNTAX_VALID: { 'pass': True }, linter.PipelineLinter.CHECK_UNKNOWN_CONFIG_KEYS: { 'pass': True }, linter.PipelineLinter.CHECK_REQ_IO_STAGES: { 'pass': True }, 'stages': { 'inputs': [{ linter.StageLinter.CHECK_TYPE_FMT % u'GcsInput': { 'pass': True }, linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'sinks': { 'pass': True }, linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'object': { 'pass': True }, linter.StageLinter.CHECK_FIELD_EXISTS_FMT % [ 'object', 'objects' ]: { 'pass': True } }], 'transforms': [{ linter.StageLinter.CHECK_TYPE_FMT % u'GcsCompositor': { 'pass': True }, linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'sources': { 'pass': True }, linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'sinks': { 'pass': True }, linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'contentType': { 'pass': True } }], 'outputs': [{ linter.StageLinter.CHECK_TYPE_FMT % u'GcsOutput': { 'pass': True }, linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'sources': { 'pass': True }, linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'object': { 'pass': True } }] } } self.assertTrue(pl.results.valid) self.assertSameStructure(expect, pl.results.results)
def testCsvMatchReplaceOk(self): pl = linter.PipelineLinter( '{"inputs": [{"type": "GcsInput", "object": "gs://b1/o1",' ' "sinks": ["gs:/b2/o2"]}],' ' "transforms": [{"type": "CsvMatchReplace",' ' "fieldDelimiter": ",", "columns": [{"wanted": true, ' ' "type": "STRING", "name": "col1"}],' ' "sources": ["gs://bucket/foo.csv"],' ' "sinks": ["gs://bucket/results", "gs://bucket/badrows"]}]}') expect = { linter.PipelineLinter.CHECK_SYNTAX_VALID: { 'pass': True }, linter.PipelineLinter.CHECK_UNKNOWN_CONFIG_KEYS: { 'pass': True }, linter.PipelineLinter.CHECK_REQ_IO_STAGES: { 'pass': True }, 'stages': { 'transforms': [{ linter.StageLinter.CHECK_TYPE_FMT % u'CsvMatchReplace': { 'pass': True }, linter.StageLinter.CHECK_TYPE_FMT % 'columns': { 'pass': True }, linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'sources': { 'pass': True }, linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'sinks': { 'pass': True }, linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'fieldDelimiter': { 'pass': True }, linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'columns': { 'pass': True }, }], 'inputs': [{ linter.StageLinter.CHECK_TYPE_FMT % u'GcsInput': { 'pass': True }, linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'sinks': { 'pass': True }, linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'object': { 'pass': True }, linter.StageLinter.CHECK_FIELD_EXISTS_FMT % [ 'object', 'objects' ]: { 'pass': True } }] } } self.assertTrue(pl.results.valid) self.assertSameStructure(expect, pl.results.results)
def testBigQueryOutputOk(self): pl = linter.PipelineLinter( '{"outputs": [{"type": "BigQueryOutput",' ' "destinationTable": {"projectId": "123", "tableId": "abc",' ' "datasetId": "xyz"}, "schema": {"fields": [{"type": "STRING"}]},' ' "sources": ["gs:/b2/o2"]}]}') expect = { linter.PipelineLinter.CHECK_SYNTAX_VALID: { 'pass': True }, linter.PipelineLinter.CHECK_UNKNOWN_CONFIG_KEYS: { 'pass': True }, linter.PipelineLinter.CHECK_REQ_IO_STAGES: { 'pass': True }, 'stages': { 'outputs': [{ linter.StageLinter.CHECK_TYPE_FMT % u'BigQueryOutput': { 'pass': True }, linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'sources': { 'pass': True }, linter.StageLinter.CHECK_TYPE_FMT % 'destinationTable': { 'pass': True }, linter.StageLinter.CHECK_TYPE_FMT % 'schema': { 'pass': True }, linter.StageLinter.CHECK_TYPE_FMT % 'schema.fields': { 'pass': True }, linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'destinationTable': { 'pass': True }, linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'schema': { 'pass': True }, linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'destinationTable.projectId': { 'pass': True }, linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'destinationTable.tableId': { 'pass': True }, linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'destinationTable.datasetId': { 'pass': True }, linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'schema.fields': { 'pass': True }, }] } } self.assertTrue(pl.results.valid) self.assertSameStructure(expect, pl.results.results)