Example #1
0
    def setUp(self):
        super(CliKubeflowEndToEndTest, self).setUp()

        # List of packages installed.
        self._pip_list = pip_utils.get_package_names()

        # Check if Kubeflow is installed before running E2E tests.
        if labels.KUBEFLOW_PACKAGE_NAME not in self._pip_list:
            sys.exit('Kubeflow not installed.')

        # Change the encoding for Click since Python 3 is configured to use ASCII as
        # encoding for the environment.
        if codecs.lookup(locale.getpreferredencoding()).name == 'ascii':
            os.environ['LANG'] = 'en_US.utf-8'

        # Initialize CLI runner.
        self.runner = click_testing.CliRunner()

        # Testdata path.
        self._testdata_dir = os.path.join(
            os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
            'testdata')
        self._testdata_dir_updated = self.tmp_dir
        fileio.makedirs(self._testdata_dir_updated)

        self._pipeline_name = ('cli-kubeflow-e2e-test-' +
                               test_utils.generate_random_id())
        absl.logging.info('Pipeline name is %s' % self._pipeline_name)
        self._pipeline_name_v2 = self._pipeline_name + '_v2'

        orig_pipeline_path = os.path.join(self._testdata_dir,
                                          'test_pipeline_kubeflow_1.py')
        self._pipeline_path = os.path.join(self._testdata_dir_updated,
                                           'test_pipeline_kubeflow_1.py')
        self._pipeline_path_v2 = os.path.join(self._testdata_dir_updated,
                                              'test_pipeline_kubeflow_2.py')

        test_utils.copy_and_change_pipeline_name(
            orig_pipeline_path, self._pipeline_path,
            'chicago_taxi_pipeline_kubeflow', self._pipeline_name)
        self.assertTrue(fileio.exists(self._pipeline_path))
        test_utils.copy_and_change_pipeline_name(
            orig_pipeline_path, self._pipeline_path_v2,
            'chicago_taxi_pipeline_kubeflow', self._pipeline_name_v2)
        self.assertTrue(fileio.exists(self._pipeline_path_v2))

        # Endpoint URL
        self._endpoint = self._get_endpoint(
            subprocess.check_output(
                'kubectl describe configmap inverse-proxy-config -n kubeflow'.
                split()))
        absl.logging.info('ENDPOINT: ' + self._endpoint)

        self._pipeline_package_path = '{}.tar.gz'.format(self._pipeline_name)

        try:
            # Create a kfp client for cleanup after running commands.
            self._client = kfp.Client(host=self._endpoint)
        except kfp_server_api.rest.ApiException as err:
            absl.logging.info(err)
Example #2
0
  def _prepare_airflow_with_mysql(self):
    self._mysql_container_name = 'airflow_' + test_utils.generate_random_id()
    db_port = airflow_test_utils.create_mysql_container(
        self._mysql_container_name)
    self.addCleanup(self._cleanup_mysql_container)
    self.enter_context(
        test_case_utils.override_env_var(
            'AIRFLOW__CORE__SQL_ALCHEMY_CONN',
            'mysql://[email protected]:%d/airflow' % db_port))
    # Do not load examples to make this a bit faster.
    self.enter_context(
        test_case_utils.override_env_var('AIRFLOW__CORE__LOAD_EXAMPLES',
                                         'False'))

    self._airflow_initdb()
Example #3
0
    def setUp(self):
        super(CliKubeflowEndToEndTest, self).setUp()
        random.seed(datetime.datetime.now())

        # List of packages installed.
        self._pip_list = str(
            subprocess.check_output(['pip', 'freeze', '--local']))

        # Check if Kubeflow is installed before running E2E tests.
        if labels.KUBEFLOW_PACKAGE_NAME not in self._pip_list:
            sys.exit('Kubeflow not installed.')

        # Change the encoding for Click since Python 3 is configured to use ASCII as
        # encoding for the environment.
        if codecs.lookup(locale.getpreferredencoding()).name == 'ascii':
            os.environ['LANG'] = 'en_US.utf-8'

        # Initialize CLI runner.
        self.runner = click_testing.CliRunner()

        # Testdata path.
        self._testdata_dir = os.path.join(
            os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
            'testdata')
        self._testdata_dir_updated = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
            self._testMethodName)
        tf.io.gfile.makedirs(self._testdata_dir_updated)

        self._pipeline_name = ('cli-kubeflow-e2e-test-' +
                               test_utils.generate_random_id())
        absl.logging.info('Pipeline name is %s' % self._pipeline_name)
        self._pipeline_name_v2 = self._pipeline_name + '_v2'

        orig_pipeline_path = os.path.join(self._testdata_dir,
                                          'test_pipeline_kubeflow_1.py')
        self._pipeline_path = os.path.join(self._testdata_dir_updated,
                                           'test_pipeline_kubeflow_1.py')
        self._pipeline_path_v2 = os.path.join(self._testdata_dir_updated,
                                              'test_pipeline_kubeflow_2.py')

        test_utils.copy_and_change_pipeline_name(
            orig_pipeline_path, self._pipeline_path,
            'chicago_taxi_pipeline_kubeflow', self._pipeline_name)
        self.assertTrue(tf.io.gfile.exists(self._pipeline_path))
        test_utils.copy_and_change_pipeline_name(
            orig_pipeline_path, self._pipeline_path_v2,
            'chicago_taxi_pipeline_kubeflow', self._pipeline_name_v2)
        self.assertTrue(tf.io.gfile.exists(self._pipeline_path_v2))

        # Endpoint URL
        self._endpoint = self._get_endpoint(
            subprocess.check_output(
                'kubectl describe configmap inverse-proxy-config -n kubeflow'.
                split()))
        absl.logging.info('ENDPOINT: ' + self._endpoint)

        # Change home directories
        self._olddir = os.getcwd()
        self._old_kubeflow_home = os.environ.get('KUBEFLOW_HOME')
        os.environ['KUBEFLOW_HOME'] = os.path.join(tempfile.mkdtemp(),
                                                   'CLI_Kubeflow_Pipelines')
        self._kubeflow_home = os.environ['KUBEFLOW_HOME']
        tf.io.gfile.makedirs(self._kubeflow_home)
        os.chdir(self._kubeflow_home)

        self._handler_pipeline_path = os.path.join(self._kubeflow_home,
                                                   self._pipeline_name)
        self._handler_pipeline_args_path = os.path.join(
            self._handler_pipeline_path, 'pipeline_args.json')
        self._pipeline_package_path = '{}.tar.gz'.format(self._pipeline_name)

        try:
            # Create a kfp client for cleanup after running commands.
            self._client = kfp.Client(host=self._endpoint)
        except kfp_server_api.rest.ApiException as err:
            absl.logging.info(err)
    def setUp(self):
        super(AirflowEndToEndTest, self).setUp()
        # setup airflow_home in a temp directory, config and init db.
        self._airflow_home = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', tempfile.mkdtemp()),
            self._testMethodName)
        self._old_airflow_home = os.environ.get('AIRFLOW_HOME')
        os.environ['AIRFLOW_HOME'] = self._airflow_home
        self._old_home = os.environ.get('HOME')
        os.environ['HOME'] = self._airflow_home
        absl.logging.info('Using %s as AIRFLOW_HOME and HOME in this e2e test',
                          self._airflow_home)

        self._mysql_container_name = 'airflow_' + test_utils.generate_random_id(
        )
        db_port = airflow_test_utils.create_mysql_container(
            self._mysql_container_name)
        self.addCleanup(airflow_test_utils.delete_mysql_container,
                        self._mysql_container_name)
        os.environ['AIRFLOW__CORE__SQL_ALCHEMY_CONN'] = (
            'mysql://[email protected]:%d/airflow' % db_port)

        # Set a couple of important environment variables. See
        # https://airflow.apache.org/howto/set-config.html for details.
        os.environ['AIRFLOW__CORE__DAGS_FOLDER'] = os.path.join(
            self._airflow_home, 'dags')
        os.environ['AIRFLOW__CORE__BASE_LOG_FOLDER'] = os.path.join(
            self._airflow_home, 'logs')
        # Do not load examples to make this a bit faster.
        os.environ['AIRFLOW__CORE__LOAD_EXAMPLES'] = 'False'
        # Following environment variables make scheduler process dags faster.
        os.environ['AIRFLOW__SCHEDULER__JOB_HEARTBEAT_SEC'] = '1'
        os.environ['AIRFLOW__SCHEDULER__SCHEDULER_HEARTBEAT_SEC'] = '1'
        os.environ['AIRFLOW__SCHEDULER__RUN_DURATION'] = '-1'
        os.environ['AIRFLOW__SCHEDULER__MIN_FILE_PROCESS_INTERVAL'] = '1'
        os.environ['AIRFLOW__SCHEDULER__PRINT_STATS_INTERVAL'] = '30'

        # Following fields are specific to the chicago_taxi_simple example.
        self._dag_id = 'chicago_taxi_simple'
        self._run_id = 'manual_run_id_1'
        # This execution date must be after the start_date in chicago_taxi_simple
        # but before current execution date.
        self._execution_date = '2019-02-01T01:01:01'
        self._all_tasks = [
            'CsvExampleGen',
            'Evaluator',
            'ExampleValidator',
            'Pusher',
            'SchemaGen',
            'StatisticsGen',
            'Trainer',
            'Transform',
        ]
        # Copy dag file and data.
        chicago_taxi_pipeline_dir = os.path.dirname(__file__)
        simple_pipeline_file = os.path.join(chicago_taxi_pipeline_dir,
                                            'taxi_pipeline_simple.py')

        io_utils.copy_file(
            simple_pipeline_file,
            os.path.join(self._airflow_home, 'dags',
                         'taxi_pipeline_simple.py'))

        data_dir = os.path.join(chicago_taxi_pipeline_dir, 'data', 'simple')
        content = fileio.listdir(data_dir)
        assert content, 'content in {} is empty'.format(data_dir)
        target_data_dir = os.path.join(self._airflow_home, 'taxi', 'data',
                                       'simple')
        io_utils.copy_dir(data_dir, target_data_dir)
        assert fileio.isdir(target_data_dir)
        content = fileio.listdir(target_data_dir)
        assert content, 'content in {} is {}'.format(target_data_dir, content)
        io_utils.copy_file(
            os.path.join(chicago_taxi_pipeline_dir, 'taxi_utils.py'),
            os.path.join(self._airflow_home, 'taxi', 'taxi_utils.py'))

        # Initialize database.
        subprocess.run(['airflow', 'initdb'], check=True)
        subprocess.run(['airflow', 'unpause', self._dag_id], check=True)
Example #5
0
    def setUp(self):
        super(CliAirflowEndToEndTest, self).setUp()

        # List of packages installed.
        self._pip_list = str(
            subprocess.check_output(['pip', 'freeze', '--local']))

        # Check if Apache Airflow is installed before running E2E tests.
        if labels.AIRFLOW_PACKAGE_NAME not in self._pip_list:
            sys.exit('Apache Airflow not installed.')

        # Change the encoding for Click since Python 3 is configured to use ASCII as
        # encoding for the environment.
        if codecs.lookup(locale.getpreferredencoding()).name == 'ascii':
            os.environ['LANG'] = 'en_US.utf-8'

        # Setup airflow_home in a temp directory
        self._airflow_home = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
            self._testMethodName, 'airflow')
        self._old_airflow_home = os.environ.get('AIRFLOW_HOME')
        os.environ['AIRFLOW_HOME'] = self._airflow_home
        self._old_home = os.environ.get('HOME')
        os.environ['HOME'] = self._airflow_home
        absl.logging.info('Using %s as AIRFLOW_HOME and HOME in this e2e test',
                          self._airflow_home)

        # Testdata path.
        self._testdata_dir = os.path.join(
            os.path.dirname(os.path.dirname(__file__)), 'testdata')

        self._pipeline_name = 'chicago_taxi_simple'
        self._pipeline_path = os.path.join(self._testdata_dir,
                                           'test_pipeline_airflow_1.py')

        # Copy data.
        chicago_taxi_pipeline_dir = os.path.join(
            os.path.dirname(
                os.path.dirname(
                    os.path.dirname(os.path.dirname(
                        os.path.abspath(__file__))))), 'examples',
            'chicago_taxi_pipeline')
        data_dir = os.path.join(chicago_taxi_pipeline_dir, 'data', 'simple')
        content = tf.io.gfile.listdir(data_dir)
        assert content, 'content in {} is empty'.format(data_dir)
        target_data_dir = os.path.join(self._airflow_home, 'taxi', 'data',
                                       'simple')
        io_utils.copy_dir(data_dir, target_data_dir)
        assert tf.io.gfile.isdir(target_data_dir)
        content = tf.io.gfile.listdir(target_data_dir)
        assert content, 'content in {} is {}'.format(target_data_dir, content)
        io_utils.copy_file(
            os.path.join(chicago_taxi_pipeline_dir, 'taxi_utils.py'),
            os.path.join(self._airflow_home, 'taxi', 'taxi_utils.py'))

        self._mysql_container_name = 'airflow_' + test_utils.generate_random_id(
        )
        db_port = airflow_test_utils.create_mysql_container(
            self._mysql_container_name)
        self.addCleanup(airflow_test_utils.delete_mysql_container,
                        self._mysql_container_name)
        os.environ['AIRFLOW__CORE__SQL_ALCHEMY_CONN'] = (
            'mysql://[email protected]:%d/airflow' % db_port)
        # Do not load examples to make this a bit faster.
        os.environ['AIRFLOW__CORE__LOAD_EXAMPLES'] = 'False'

        self._airflow_initdb()

        # Initialize CLI runner.
        self.runner = click_testing.CliRunner()