Esempio n. 1
0
    def setUp(self):
        super(CliKubeflowEndToEndTest, self).setUp()

        # List of packages installed.
        self._pip_list = pip_utils.get_package_names()

        # Check if Kubeflow is installed before running E2E tests.
        if labels.KUBEFLOW_PACKAGE_NAME not in self._pip_list:
            sys.exit('Kubeflow not installed.')

        # Change the encoding for Click since Python 3 is configured to use ASCII as
        # encoding for the environment.
        if codecs.lookup(locale.getpreferredencoding()).name == 'ascii':
            os.environ['LANG'] = 'en_US.utf-8'

        # Initialize CLI runner.
        self.runner = click_testing.CliRunner()

        # Testdata path.
        self._testdata_dir = os.path.join(
            os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
            'testdata')
        self._testdata_dir_updated = self.tmp_dir
        fileio.makedirs(self._testdata_dir_updated)

        self._pipeline_name = ('cli-kubeflow-e2e-test-' +
                               test_utils.generate_random_id())
        absl.logging.info('Pipeline name is %s' % self._pipeline_name)
        self._pipeline_name_v2 = self._pipeline_name + '_v2'

        orig_pipeline_path = os.path.join(self._testdata_dir,
                                          'test_pipeline_kubeflow_1.py')
        self._pipeline_path = os.path.join(self._testdata_dir_updated,
                                           'test_pipeline_kubeflow_1.py')
        self._pipeline_path_v2 = os.path.join(self._testdata_dir_updated,
                                              'test_pipeline_kubeflow_2.py')

        test_utils.copy_and_change_pipeline_name(
            orig_pipeline_path, self._pipeline_path,
            'chicago_taxi_pipeline_kubeflow', self._pipeline_name)
        self.assertTrue(fileio.exists(self._pipeline_path))
        test_utils.copy_and_change_pipeline_name(
            orig_pipeline_path, self._pipeline_path_v2,
            'chicago_taxi_pipeline_kubeflow', self._pipeline_name_v2)
        self.assertTrue(fileio.exists(self._pipeline_path_v2))

        # Endpoint URL
        self._endpoint = self._get_endpoint(
            subprocess.check_output(
                'kubectl describe configmap inverse-proxy-config -n kubeflow'.
                split()))
        absl.logging.info('ENDPOINT: ' + self._endpoint)

        self._pipeline_package_path = '{}.tar.gz'.format(self._pipeline_name)

        try:
            # Create a kfp client for cleanup after running commands.
            self._client = kfp.Client(host=self._endpoint)
        except kfp_server_api.rest.ApiException as err:
            absl.logging.info(err)
Esempio n. 2
0
  def setUp(self):
    super().setUp()

    # List of packages installed.
    self._pip_list = pip_utils.get_package_names()

    # Check if Apache Airflow is installed before running E2E tests.
    if labels.AIRFLOW_PACKAGE_NAME not in self._pip_list:
      sys.exit('Apache Airflow not installed.')

    # Change the encoding for Click since Python 3 is configured to use ASCII as
    # encoding for the environment.
    if codecs.lookup(locale.getpreferredencoding()).name == 'ascii':
      os.environ['LANG'] = 'en_US.utf-8'

    # Setup airflow_home in a temp directory
    self._airflow_home = os.path.join(self.tmp_dir, 'airflow')
    self.enter_context(
        test_case_utils.override_env_var('AIRFLOW_HOME', self._airflow_home))
    self.enter_context(
        test_case_utils.override_env_var('HOME', self._airflow_home))

    absl.logging.info('Using %s as AIRFLOW_HOME and HOME in this e2e test',
                      self._airflow_home)

    # Testdata path.
    self._testdata_dir = os.path.join(
        os.path.dirname(os.path.dirname(__file__)), 'testdata')

    self._pipeline_name = 'chicago_taxi_simple'
    self._pipeline_path = os.path.join(self._testdata_dir,
                                       'test_pipeline_airflow_1.py')

    # Copy data.
    chicago_taxi_pipeline_dir = os.path.join(
        os.path.dirname(
            os.path.dirname(
                os.path.dirname(os.path.dirname(os.path.abspath(__file__))))),
        'examples', 'chicago_taxi_pipeline')
    data_dir = os.path.join(chicago_taxi_pipeline_dir, 'data', 'simple')
    content = fileio.listdir(data_dir)
    assert content, 'content in {} is empty'.format(data_dir)
    target_data_dir = os.path.join(self._airflow_home, 'taxi', 'data', 'simple')
    io_utils.copy_dir(data_dir, target_data_dir)
    assert fileio.isdir(target_data_dir)
    content = fileio.listdir(target_data_dir)
    assert content, 'content in {} is {}'.format(target_data_dir, content)
    io_utils.copy_file(
        os.path.join(chicago_taxi_pipeline_dir, 'taxi_utils.py'),
        os.path.join(self._airflow_home, 'taxi', 'taxi_utils.py'))

    # Initialize CLI runner.
    self.runner = click_testing.CliRunner()
Esempio n. 3
0
def detect_handler(flags_dict: Dict[Text, Any]) -> base_handler.BaseHandler:
    """Detect handler from the environment.

  Details:
    When the engine flag is set to 'auto', this method first finds all the
    packages in the local environment. The environment is first checked
    for multiple orchestrators and if true the user must rerun the command with
    required engine. If only one orchestrator is present, the engine is set to
    that.

  Args:
    flags_dict: A dictionary containing the flags of a command.

  Returns:
    Corrosponding Handler object.
  """
    packages_list = pip_utils.get_package_names()
    if (labels.AIRFLOW_PACKAGE_NAME
            in packages_list) and (labels.KUBEFLOW_PACKAGE_NAME
                                   in packages_list):
        sys.exit(
            'Multiple orchestrators found. Choose one using --engine flag.')
    if labels.AIRFLOW_PACKAGE_NAME in packages_list:
        click.echo('Detected Airflow.')
        click.echo(
            'Use --engine flag if you intend to use a different orchestrator.')
        flags_dict[labels.ENGINE_FLAG] = 'airflow'
        from tfx.tools.cli.handler import airflow_handler  # pylint: disable=g-import-not-at-top
        return airflow_handler.AirflowHandler(flags_dict)
    elif labels.KUBEFLOW_PACKAGE_NAME in packages_list:
        click.echo('Detected Kubeflow.')
        click.echo(
            'Use --engine flag if you intend to use a different orchestrator.')
        flags_dict[labels.ENGINE_FLAG] = 'kubeflow'
        from tfx.tools.cli.handler import kubeflow_handler  # pylint: disable=g-import-not-at-top
        return kubeflow_handler.KubeflowHandler(flags_dict)
    else:
        click.echo('Detected Beam.')
        click.echo(
            '[WARNING] Default engine will be changed to "local" in the near future.'
        )
        click.echo(
            'Use --engine flag if you intend to use a different orchestrator.')
        flags_dict[labels.ENGINE_FLAG] = 'beam'
        from tfx.tools.cli.handler import beam_handler  # pylint: disable=g-import-not-at-top
        return beam_handler.BeamHandler(flags_dict)
Esempio n. 4
0
    def setUp(self):
        super(CliAirflowEndToEndTest, self).setUp()

        # List of packages installed.
        self._pip_list = pip_utils.get_package_names()

        # Check if Apache Airflow is installed before running E2E tests.
        if labels.AIRFLOW_PACKAGE_NAME not in self._pip_list:
            sys.exit('Apache Airflow not installed.')

        # Change the encoding for Click since Python 3 is configured to use ASCII as
        # encoding for the environment.
        if codecs.lookup(locale.getpreferredencoding()).name == 'ascii':
            os.environ['LANG'] = 'en_US.utf-8'

        # Setup airflow_home in a temp directory
        self._airflow_home = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
            self._testMethodName, 'airflow')
        self._old_airflow_home = os.environ.get('AIRFLOW_HOME')
        os.environ['AIRFLOW_HOME'] = self._airflow_home
        self._old_home = os.environ.get('HOME')
        os.environ['HOME'] = self._airflow_home
        absl.logging.info('Using %s as AIRFLOW_HOME and HOME in this e2e test',
                          self._airflow_home)

        # Testdata path.
        self._testdata_dir = os.path.join(
            os.path.dirname(os.path.dirname(__file__)), 'testdata')

        self._pipeline_name = 'chicago_taxi_simple'
        self._pipeline_path = os.path.join(self._testdata_dir,
                                           'test_pipeline_airflow_1.py')

        # Copy data.
        chicago_taxi_pipeline_dir = os.path.join(
            os.path.dirname(
                os.path.dirname(
                    os.path.dirname(os.path.dirname(
                        os.path.abspath(__file__))))), 'examples',
            'chicago_taxi_pipeline')
        data_dir = os.path.join(chicago_taxi_pipeline_dir, 'data', 'simple')
        content = fileio.listdir(data_dir)
        assert content, 'content in {} is empty'.format(data_dir)
        target_data_dir = os.path.join(self._airflow_home, 'taxi', 'data',
                                       'simple')
        io_utils.copy_dir(data_dir, target_data_dir)
        assert fileio.isdir(target_data_dir)
        content = fileio.listdir(target_data_dir)
        assert content, 'content in {} is {}'.format(target_data_dir, content)
        io_utils.copy_file(
            os.path.join(chicago_taxi_pipeline_dir, 'taxi_utils.py'),
            os.path.join(self._airflow_home, 'taxi', 'taxi_utils.py'))

        self._mysql_container_name = 'airflow_' + test_utils.generate_random_id(
        )
        db_port = airflow_test_utils.create_mysql_container(
            self._mysql_container_name)
        self.addCleanup(self._cleanup_mysql_container)
        os.environ['AIRFLOW__CORE__SQL_ALCHEMY_CONN'] = (
            'mysql://[email protected]:%d/airflow' % db_port)
        # Do not load examples to make this a bit faster.
        os.environ['AIRFLOW__CORE__LOAD_EXAMPLES'] = 'False'

        self._airflow_initdb()

        # Initialize CLI runner.
        self.runner = click_testing.CliRunner()
Esempio n. 5
0
 def test_get_package_names(self, mock_subprocess):
   self.assertSameElements(pip_utils.get_package_names(),
                           ['absl-py', 'aiohttp', 'alembic'])
   mock_subprocess.assert_called_once()