Ejemplo n.º 1
0
    def test_massage_uri(self):

        finish = HDFS_CLUSTERS['default'].LOGICAL_NAME.set_for_testing(
            'namenode')
        clear_sys_caches()

        try:
            assert_equal('', _massage_uri(''))

            assert_equal('namenode/data', _massage_uri('hdfs:///data'))

            assert_equal('hdfs://nn:11/data',
                         _massage_uri('hdfs://nn:11/data'))

            assert_equal('hdfs://logical/data',
                         _massage_uri('hdfs://logical/data'))

            assert_equal('namenode/data', _massage_uri('/data'))

            assert_equal('file:///data', _massage_uri('file:///data'))
        finally:
            finish()

        finish = HDFS_CLUSTERS['default'].FS_DEFAULTFS.set_for_testing(
            'hdfs://fs_defaultfs:8021')
        clear_sys_caches()

        try:
            assert_equal('', _massage_uri(''))

            assert_equal('hdfs://fs_defaultfs:8021/data',
                         _massage_uri('hdfs:///data'))

            assert_equal('hdfs://nn:11/data',
                         _massage_uri('hdfs://nn:11/data'))

            assert_equal('hdfs://logical/data',
                         _massage_uri('hdfs://logical/data'))

            assert_equal('hdfs://fs_defaultfs:8021/data',
                         _massage_uri('/data'))

            assert_equal('file:///data', _massage_uri('file:///data'))
        finally:
            finish()
Ejemplo n.º 2
0
def shared_cluster():
    global _shared_cluster

    if _shared_cluster is None:
        if is_live_cluster():
            cluster = LiveHdfs()
        else:
            cluster = PseudoHdfs4()
            atexit.register(cluster.stop)

            cluster.start()

            fqdn = socket.getfqdn()
            webhdfs_url = "http://%s:%s/webhdfs/v1" % (
                fqdn,
                cluster.dfs_http_port,
            )

            closers = [
                hadoop.conf.HDFS_CLUSTERS['default'].FS_DEFAULTFS.
                set_for_testing(cluster.fs_default_name),
                hadoop.conf.HDFS_CLUSTERS['default'].WEBHDFS_URL.
                set_for_testing(webhdfs_url),
                hadoop.conf.YARN_CLUSTERS['default'].HOST.set_for_testing(
                    fqdn),
                hadoop.conf.YARN_CLUSTERS['default'].PORT.set_for_testing(
                    cluster._rm_port),
                hadoop.conf.YARN_CLUSTERS['default'].RESOURCE_MANAGER_API_URL.
                set_for_testing('http://%s:%s' % (
                    cluster._fqdn,
                    cluster._rm_webapp_port,
                )),
                hadoop.conf.YARN_CLUSTERS['default'].PROXY_API_URL.
                set_for_testing('http://%s:%s' % (
                    cluster._fqdn,
                    cluster._rm_webapp_port,
                )),
                hadoop.conf.YARN_CLUSTERS['default'].HISTORY_SERVER_API_URL.
                set_for_testing('%s:%s' % (
                    cluster._fqdn,
                    cluster._jh_web_port,
                )),
            ]

            old_caches = clear_sys_caches()

            def restore_config():
                restore_sys_caches(old_caches)
                for x in closers:
                    x()

            cluster.shutdown_hook = restore_config

        _shared_cluster = cluster

    return _shared_cluster
Ejemplo n.º 3
0
  def test_massage_uri(self):

    finish = HDFS_CLUSTERS['default'].LOGICAL_NAME.set_for_testing('namenode')
    clear_sys_caches()

    try:
      assert_equal('', _massage_uri(''))

      assert_equal('namenode/data', _massage_uri('hdfs:///data'))

      assert_equal('hdfs://nn:11/data', _massage_uri('hdfs://nn:11/data'))

      assert_equal('hdfs://logical/data', _massage_uri('hdfs://logical/data'))

      assert_equal('namenode/data', _massage_uri('/data'))

      assert_equal('file:///data', _massage_uri('file:///data'))
    finally:
      finish()

    finish = HDFS_CLUSTERS['default'].FS_DEFAULTFS.set_for_testing('hdfs://fs_defaultfs:8021')
    clear_sys_caches()

    try:
      assert_equal('', _massage_uri(''))

      assert_equal('hdfs://fs_defaultfs:8021/data', _massage_uri('hdfs:///data'))

      assert_equal('hdfs://nn:11/data', _massage_uri('hdfs://nn:11/data'))

      assert_equal('hdfs://logical/data', _massage_uri('hdfs://logical/data'))

      assert_equal('hdfs://fs_defaultfs:8021/data', _massage_uri('/data'))

      assert_equal('file:///data', _massage_uri('file:///data'))
    finally:
      finish()
Ejemplo n.º 4
0
def test_config_validator_basic():
  reset = (
    conf.HDFS_CLUSTERS.set_for_testing({'default': {}}),
    conf.HDFS_CLUSTERS['default'].WEBHDFS_URL.set_for_testing('http://not.the.re:50070/'),
    conf.MR_CLUSTERS.set_for_testing({'default': {}}),
    conf.MR_CLUSTERS['default'].JT_THRIFT_PORT.set_for_testing(70000),
  )
  old_caches = clear_sys_caches()
  try:
    cli = make_logged_in_client()
    resp = cli.get('/desktop/debug/check_config')
    assert_true('hadoop.hdfs_clusters.default.webhdfs_url' in resp.content)
  finally:
    for old_conf in reset:
      old_conf()
    restore_sys_caches(old_caches)
Ejemplo n.º 5
0
def test_config_validator_basic():
  reset = (
    conf.HDFS_CLUSTERS.set_for_testing({'default': {}}),
    conf.HDFS_CLUSTERS['default'].WEBHDFS_URL.set_for_testing('http://not.the.re:50070/'),
    conf.MR_CLUSTERS.set_for_testing({'default': {}}),
    conf.MR_CLUSTERS['default'].JT_THRIFT_PORT.set_for_testing(70000),
  )
  old_caches = clear_sys_caches()
  try:
    cli = make_logged_in_client()
    resp = cli.get('/desktop/debug/check_config')
    assert_true(b'hadoop.hdfs_clusters.default.webhdfs_url' in resp.content)
  finally:
    for old_conf in reset:
      old_conf()
    restore_sys_caches(old_caches)
Ejemplo n.º 6
0
def test_config_validator_more():
    # TODO: Setup DN to not load the plugin, which is a common user error.

    # We don't actually use the mini_cluster. But the cluster sets up the correct
    # configuration that forms the test basis.
    minicluster = pseudo_hdfs4.shared_cluster()
    cli = make_logged_in_client()

    old_caches = clear_sys_caches()
    try:
        resp = cli.get('/debug/check_config')

        assert_false('Failed to access filesystem root' in resp.content)
        assert_false('Failed to create' in resp.content)
        assert_false('Failed to chown' in resp.content)
        assert_false('Failed to delete' in resp.content)
    finally:
        restore_sys_caches(old_caches)
Ejemplo n.º 7
0
def test_config_validator_more():
  # TODO: Setup DN to not load the plugin, which is a common user error.

  # We don't actually use the mini_cluster. But the cluster sets up the correct
  # configuration that forms the test basis.
  minicluster = pseudo_hdfs4.shared_cluster()
  cli = make_logged_in_client()

  old_caches = clear_sys_caches()
  try:
    resp = cli.get('/debug/check_config')

    assert_false('Failed to access filesystem root' in resp.content)
    assert_false('Failed to create' in resp.content)
    assert_false('Failed to chown' in resp.content)
    assert_false('Failed to delete' in resp.content)
  finally:
    restore_sys_caches(old_caches)
Ejemplo n.º 8
0
  def test_update_properties(self):
    finish = []
    finish.append(MR_CLUSTERS.set_for_testing({'default': {}}))
    finish.append(MR_CLUSTERS['default'].SUBMIT_TO.set_for_testing(True))
    finish.append(YARN_CLUSTERS.set_for_testing({'default': {}}))
    finish.append(YARN_CLUSTERS['default'].SUBMIT_TO.set_for_testing(True))
    try:
      properties = {
        'user.name': 'hue',
        'test.1': 'http://localhost/test?test1=test&test2=test',
        'nameNode': 'hdfs://curacao:8020',
        'jobTracker': 'jtaddress',
        'security_enabled': False
      }

      final_properties = properties.copy()
      submission = Submission(None, properties=properties, oozie_id='test', fs=MockFs())
      assert_equal(properties, submission.properties)
      submission._update_properties('jtaddress', 'deployment-directory')
      assert_equal(final_properties, submission.properties)

      clear_sys_caches()
      fs = cluster.get_hdfs()
      jt = cluster.get_next_ha_mrcluster()[1]
      final_properties = properties.copy()
      final_properties.update({
        'jobTracker': 'jtaddress',
        'nameNode': fs.fs_defaultfs
      })
      submission = Submission(None, properties=properties, oozie_id='test', fs=fs, jt=jt)
      assert_equal(properties, submission.properties)
      submission._update_properties('jtaddress', 'deployment-directory')
      assert_equal(final_properties, submission.properties)

      finish.append(HDFS_CLUSTERS['default'].LOGICAL_NAME.set_for_testing('namenode'))
      finish.append(MR_CLUSTERS['default'].LOGICAL_NAME.set_for_testing('jobtracker'))
      clear_sys_caches()
      fs = cluster.get_hdfs()
      jt = cluster.get_next_ha_mrcluster()[1]
      final_properties = properties.copy()
      final_properties.update({
        'jobTracker': 'jobtracker',
        'nameNode': 'namenode'
      })
      submission = Submission(None, properties=properties, oozie_id='test', fs=fs, jt=jt)
      assert_equal(properties, submission.properties)
      submission._update_properties('jtaddress', 'deployment-directory')
      assert_equal(final_properties, submission.properties)
    finally:
      clear_sys_caches()
      for reset in finish:
        reset()
Ejemplo n.º 9
0
def test_non_default_cluster():
    NON_DEFAULT_NAME = 'non_default'
    old_caches = clear_sys_caches()
    reset = (
        conf.HDFS_CLUSTERS.set_for_testing({NON_DEFAULT_NAME: {}}),
        conf.MR_CLUSTERS.set_for_testing({NON_DEFAULT_NAME: {}}),
    )
    try:
        # This is indeed the only hdfs/mr cluster
        assert_equal(1, len(cluster.get_all_hdfs()))
        assert_true(cluster.get_hdfs(NON_DEFAULT_NAME))

        cli = make_logged_in_client()
        # That we can get to a view without errors means that the middlewares work
        cli.get('/about')
    finally:
        for old_conf in reset:
            old_conf()
        restore_sys_caches(old_caches)
Ejemplo n.º 10
0
def shared_cluster():
    global _shared_cluster

    if _shared_cluster is None:
        if is_live_cluster():
            cluster = LiveHdfs()
        else:
            cluster = PseudoHdfs4()
            atexit.register(cluster.stop)

            cluster.start()

            fqdn = socket.getfqdn()
            webhdfs_url = "http://%s:%s/webhdfs/v1" % (fqdn, cluster.dfs_http_port)

            closers = [
                hadoop.conf.HDFS_CLUSTERS["default"].FS_DEFAULTFS.set_for_testing(cluster.fs_default_name),
                hadoop.conf.HDFS_CLUSTERS["default"].WEBHDFS_URL.set_for_testing(webhdfs_url),
                hadoop.conf.YARN_CLUSTERS["default"].HOST.set_for_testing(fqdn),
                hadoop.conf.YARN_CLUSTERS["default"].PORT.set_for_testing(cluster._rm_port),
                hadoop.conf.YARN_CLUSTERS["default"].RESOURCE_MANAGER_API_URL.set_for_testing(
                    "http://%s:%s" % (cluster._fqdn, cluster._rm_webapp_port)
                ),
                hadoop.conf.YARN_CLUSTERS["default"].PROXY_API_URL.set_for_testing(
                    "http://%s:%s" % (cluster._fqdn, cluster._rm_webapp_port)
                ),
                hadoop.conf.YARN_CLUSTERS["default"].HISTORY_SERVER_API_URL.set_for_testing(
                    "%s:%s" % (cluster._fqdn, cluster._jh_web_port)
                ),
            ]

            old_caches = clear_sys_caches()

            def restore_config():
                restore_sys_caches(old_caches)
                for x in closers:
                    x()

            cluster.shutdown_hook = restore_config

        _shared_cluster = cluster

    return _shared_cluster
Ejemplo n.º 11
0
def shared_cluster(conf=False):
    """
  Use a shared cluster that is initialized on demand,
  and that is torn down at process exit.

  If conf is True, then configuration is updated to
  reference the cluster, and relevant caches are cleared.

  Returns a lambda which must be called when you are
  done with the shared cluster.
  """
    cluster = shared_cluster_internal()
    closers = []
    if conf:
        closers.extend([
            hadoop.conf.HDFS_CLUSTERS["default"].NN_HOST.set_for_testing(
                "localhost"),
            hadoop.conf.HDFS_CLUSTERS["default"].NN_HDFS_PORT.set_for_testing(
                cluster.namenode_port),
            hadoop.conf.MR_CLUSTERS["default"].HOST.set_for_testing(
                "localhost"),
            hadoop.conf.MR_CLUSTERS["default"].JT_THRIFT_PORT.set_for_testing(
                cluster.jt.thrift_port),
        ])
        # Clear the caches
        # This is djanky (that's django for "janky").
        # Caches are tricky w.r.t. to to testing;
        # perhaps there are better patterns?
        old_caches = clear_sys_caches()

    def finish():
        if conf:
            restore_sys_caches(old_caches)
        for x in closers:
            x()

    # We don't run the cluster's real stop method,
    # because a shared cluster should be shutdown at
    # exit.
    cluster.shutdown = finish
    return cluster
Ejemplo n.º 12
0
def test_non_default_cluster():
  NON_DEFAULT_NAME = 'non_default'
  old_caches = clear_sys_caches()
  reset = (
    conf.HDFS_CLUSTERS.set_for_testing({ NON_DEFAULT_NAME: { } }),
    conf.MR_CLUSTERS.set_for_testing({ NON_DEFAULT_NAME: { } }),
  )
  try:
    # This is indeed the only hdfs/mr cluster
    assert_equal(1, len(cluster.get_all_hdfs()))
    assert_equal(1, len(cluster.all_mrclusters()))
    assert_true(cluster.get_hdfs(NON_DEFAULT_NAME))
    assert_true(cluster.get_mrcluster(NON_DEFAULT_NAME))

    cli = make_logged_in_client()
    # That we can get to a view without errors means that the middlewares work
    cli.get('/about')
  finally:
    for old_conf in reset:
      old_conf()
    restore_sys_caches(old_caches)
Ejemplo n.º 13
0
  def test_update_properties(self):
    finish = []
    finish.append(MR_CLUSTERS.set_for_testing({'default': {}}))
    finish.append(MR_CLUSTERS['default'].SUBMIT_TO.set_for_testing(True))
    finish.append(YARN_CLUSTERS.set_for_testing({'default': {}}))
    finish.append(YARN_CLUSTERS['default'].SUBMIT_TO.set_for_testing(True))
    try:
      properties = {
        'user.name': 'hue',
        'test.1': 'http://localhost/test?test1=test&test2=test',
        'nameNode': 'hdfs://curacao:8020',
        'jobTracker': 'jtaddress'
      }

      final_properties = properties.copy()
      submission = Submission(None, properties=properties, oozie_id='test', fs=MockFs())
      assert_equal(properties, submission.properties)
      submission._update_properties('jtaddress', 'deployment-directory')
      assert_equal(final_properties, submission.properties)

      clear_sys_caches()
      fs = cluster.get_hdfs()
      final_properties = properties.copy()
      final_properties.update({
        'jobTracker': 'jtaddress',
        'nameNode': fs.fs_defaultfs
      })
      submission = Submission(None, properties=properties, oozie_id='test', fs=fs, jt=None)
      assert_equal(properties, submission.properties)
      submission._update_properties('jtaddress', 'deployment-directory')
      assert_equal(final_properties, submission.properties)

      finish.append(HDFS_CLUSTERS['default'].LOGICAL_NAME.set_for_testing('namenode'))
      finish.append(MR_CLUSTERS['default'].LOGICAL_NAME.set_for_testing('jobtracker'))
      clear_sys_caches()
      fs = cluster.get_hdfs()
      final_properties = properties.copy()
      final_properties.update({
        'jobTracker': 'jobtracker',
        'nameNode': 'namenode'
      })
      submission = Submission(None, properties=properties, oozie_id='test', fs=fs, jt=None)
      assert_equal(properties, submission.properties)
    finally:
      clear_sys_caches()
      for reset in finish:
        reset()
Ejemplo n.º 14
0
def shared_cluster(conf=False):
  """
  Use a shared cluster that is initialized on demand,
  and that is torn down at process exit.

  If conf is True, then configuration is updated to
  reference the cluster, and relevant caches are cleared.

  Returns a lambda which must be called when you are
  done with the shared cluster.
  """
  cluster = shared_cluster_internal()
  closers = [ ]
  if conf:
    closers.extend([
      hadoop.conf.HDFS_CLUSTERS["default"].NN_HOST.set_for_testing("localhost"),
      hadoop.conf.HDFS_CLUSTERS["default"].NN_HDFS_PORT.set_for_testing(cluster.namenode_port),
      hadoop.conf.MR_CLUSTERS["default"].HOST.set_for_testing("localhost"),
      hadoop.conf.MR_CLUSTERS["default"].JT_THRIFT_PORT.set_for_testing(cluster.jt.thrift_port),
    ])
    # Clear the caches
    # This is djanky (that's django for "janky").
    # Caches are tricky w.r.t. to to testing;
    # perhaps there are better patterns?
    old_caches = clear_sys_caches()

  def finish():
    if conf:
      restore_sys_caches(old_caches)
    for x in closers:
      x()

  # We don't run the cluster's real stop method,
  # because a shared cluster should be shutdown at 
  # exit.
  cluster.shutdown = finish
  return cluster