def execute(self, context): hook = DataFlowHook(gcp_conn_id=self.gcp_conn_id, delegate_to=self.delegate_to, poll_sleep=self.poll_sleep) hook.start_template_dataflow(self.job_name, self.dataflow_default_options, self.parameters, self.template)
def execute(self, context): """Execute the python dataflow job.""" bucket_helper = GoogleCloudBucketHelper( self.gcp_conn_id, self.delegate_to) self.py_file = bucket_helper.google_cloud_to_local(self.py_file) hook = DataFlowHook(gcp_conn_id=self.gcp_conn_id, delegate_to=self.delegate_to, poll_sleep=self.poll_sleep) dataflow_options = self.dataflow_default_options.copy() dataflow_options.update(self.options) # Convert argument names from lowerCamelCase to snake case. camel_to_snake = lambda name: re.sub( r'[A-Z]', lambda x: '_' + x.group(0).lower(), name) formatted_options = {camel_to_snake(key): dataflow_options[key] for key in dataflow_options} hook.start_python_dataflow( self.job_name, formatted_options, self.py_file, self.py_options, py_interpreter=self.py_interpreter)
def execute(self, context): hook = DataFlowHook(gcp_conn_id=self.gcp_conn_id, delegate_to=self.delegate_to, poll_sleep=self.poll_sleep) dataflow_options = copy.copy(self.dataflow_default_options) dataflow_options.update(self.options) is_running = False if self.check_if_running != CheckJobRunning.IgnoreJob: is_running = hook.is_job_dataflow_running(self.job_name, dataflow_options) while is_running and self.check_if_running == CheckJobRunning.WaitForRun: is_running = hook.is_job_dataflow_running(self.job_name, dataflow_options) if not is_running: bucket_helper = GoogleCloudBucketHelper( self.gcp_conn_id, self.delegate_to) self.jar = bucket_helper.google_cloud_to_local(self.jar) hook.start_java_dataflow(self.job_name, dataflow_options, self.jar, self.job_class, True, self.multiple_jobs)
def setUp(self): with mock.patch(BASE_STRING.format('GoogleCloudBaseHook.__init__'), new=mock_init): self.dataflow_hook = DataFlowHook(gcp_conn_id='test')