def setUp(self) -> None:
        volume_util.delete_local_volume(self._VOLUME_NAME)
        os.environ['TMPDIR'] = '/tmp'
        self.temp_dir = tempfile.mkdtemp()
        self.liminal_config = {
            'volumes': [{
                'volume': self._VOLUME_NAME,
                'local': {
                    'path':
                    self.temp_dir.replace("/var/folders",
                                          "/private/var/folders")
                }
            }]
        }
        volume_util.create_local_volumes(self.liminal_config, None)

        liminal_apps_builder.build_liminal_apps(
            os.path.join(os.path.dirname(__file__), '../liminal'))
    def test_spark_on_k8s(self):
        volume_util.delete_local_volume(self._VOLUME_NAME)
        os.environ['TMPDIR'] = '/tmp'
        self.temp_dir = tempfile.mkdtemp()
        self.liminal_config = {
            'volumes': [
                {
                    'volume': self._VOLUME_NAME,
                    'local': {
                        'path': self.temp_dir.replace(
                            "/var/folders",
                            "/private/var/folders"
                        )
                    }
                }
            ]
        }
        volume_util.create_local_volumes(self.liminal_config, None)

        # build spark image
        liminal_apps_builder.build_liminal_apps(
            os.path.join(os.path.dirname(__file__), '../../apps/test_spark_app'))

        outputs_dir = os.path.join(self.temp_dir, 'outputs')

        task_config = {
            'task': "my_spark_task",
            'image': "my_spark_image",
            'application_source': 'wordcount.py',
            'application_arguments': ['words.txt', '/mnt/vol1/outputs/'],
            'env_vars': {},
            'mounts': [
                {
                    'mount': 'mymount',
                    'volume': self._VOLUME_NAME,
                    'path': '/mnt/vol1'
                }
            ]
        }

        dag = dag_test_utils.create_dag()

        task1 = SparkTask(
            task_id="my_spark_task",
            dag=dag,
            liminal_config=self.liminal_config,
            pipeline_config={
                'pipeline': 'my_pipeline'
            },
            task_config=task_config,
            parent=None,
            trigger_rule='all_success')

        executor = KubernetesPodExecutor(
            task_id='k8s',
            liminal_config=self.liminal_config,
            executor_config={
                'executor': 'k8s',
                'name': 'mypod'
            }
        )
        executor.apply_task_to_dag(task=task1)

        for task in dag.tasks:
            print(f'Executing task {task.task_id}')
            task.execute(DummyDag('my_dag', task.task_id).context)

        expected_output = '{"word":"my","count":1}\n' \
                          '{"word":"first","count":1}\n' \
                          '{"word":"liminal","count":1}\n' \
                          '{"word":"spark","count":1}\n' \
                          '{"word":"task","count":1}\n'.split("\n")

        actual = ''
        for filename in os.listdir(outputs_dir):
            if filename.endswith(".json"):
                with open(os.path.join(outputs_dir, filename)) as f:
                    actual = f.read()

        self.assertEqual(actual.split("\n"), expected_output)
Esempio n. 3
0
 def setUp(self) -> None:
     volume_util.delete_local_volume(self._VOLUME_NAME)
     self.temp_dir = tempfile.mkdtemp()
     liminal_apps_builder.build_liminal_apps(
         os.path.join(os.path.dirname(__file__), '../liminal'))