Beispiel #1
0
def config_validator():
  """
  config_validator() -> [ (config_variable, error_message) ]

  Called by core check_config() view.
  """
  from hadoop.cluster import get_all_hdfs
  from liboozie.oozie_api import get_oozie

  res = []

  status = 'down'
  try:
    status = str(get_oozie().get_oozie_status())
  except:
    pass
  if 'NORMAL' not in status:
    res.append((status, _('The Oozie server is not available')))


  class ConfigMock:
    def __init__(self, value): self.value = value
    def get(self): return self.value
    def get_fully_qualifying_key(self): return self.value

  for cluster in get_all_hdfs().values():
    res.extend(validate_path(REMOTE_DEPLOYMENT_DIR, is_dir=True, fs=cluster,
                             message=_('The deployment directory of Oozie workflows does not exist. '
                                       'Please run "Setup App" on the Oozie workflow page.')))
    res.extend(validate_path(ConfigMock('/user/oozie/share/lib'), is_dir=True, fs=cluster,
                             message=_('Oozie Share Lib not installed in default location.')))

  return res
Beispiel #2
0
  def _get_delegation_tokens(self, username, delegation_token_dir):
    """
    If operating against Kerberized Hadoop, we'll need to have obtained delegation tokens for
    the user we want to run the subprocess as. We have to do it here rather than in the subprocess
    because the subprocess does not have Kerberos credentials in that case.
    """
    delegation_token_files = []
    all_clusters = []
    all_clusters += all_mrclusters().values()
    all_clusters += get_all_hdfs().values()

    LOG.debug("Clusters to potentially acquire tokens for: %s" % (repr(all_clusters),))

    for cluster in all_clusters:
      if cluster.security_enabled:
        current_user = cluster.user
        try:
          cluster.setuser(username)
          token = cluster.get_delegation_token()
          token_file = tempfile.NamedTemporaryFile(dir=delegation_token_dir)
          token_file.write(token.delegationTokenBytes)
          token_file.flush()
          delegation_token_files.append(token_file)
        finally:
          cluster.setuser(current_user)

    return delegation_token_files
Beispiel #3
0
  def _get_delegation_tokens(self, username, delegation_token_dir):
    """
    If operating against Kerberized Hadoop, we'll need to have obtained delegation tokens for
    the user we want to run the subprocess as. We have to do it here rather than in the subprocess
    because the subprocess does not have Kerberos credentials in that case.
    """
    delegation_token_files = []
    all_clusters = []
    all_clusters += all_mrclusters().values()
    all_clusters += get_all_hdfs().values()

    LOG.debug("Clusters to potentially acquire tokens for: %s" % (repr(all_clusters),))

    for cluster in all_clusters:
      if cluster.security_enabled:
        current_user = cluster.user
        try:
          cluster.setuser(username)
          token = cluster.get_delegation_token(KERBEROS.HUE_PRINCIPAL.get())
          token_file_no, path = tempfile.mkstemp(dir=delegation_token_dir)
          os.write(token_file_no, token)
          os.close(token_file_no)
          delegation_token_files.append(path)
        finally:
          cluster.setuser(current_user)

    return delegation_token_files
Beispiel #4
0
  def _get_delegation_tokens(self, username, delegation_token_dir):
    """
    If operating against Kerberized Hadoop, we'll need to have obtained delegation tokens for
    the user we want to run the subprocess as. We have to do it here rather than in the subprocess
    because the subprocess does not have Kerberos credentials in that case.
    """
    delegation_token_files = []
    all_clusters = []
    all_clusters += all_mrclusters().values()
    all_clusters += get_all_hdfs().values()

    LOG.debug("Clusters to potentially acquire tokens for: %s" % (repr(all_clusters),))

    for cluster in all_clusters:
      if cluster.security_enabled:
        current_user = cluster.user
        try:
          cluster.setuser(username)
          token = cluster.get_delegation_token(KERBEROS.HUE_PRINCIPAL.get())
          token_file_no, path = tempfile.mkstemp(dir=delegation_token_dir)
          os.write(token_file_no, token)
          os.close(token_file_no)
          delegation_token_files.append(path)
        finally:
          cluster.setuser(current_user)

    return delegation_token_files
Beispiel #5
0
def config_validator(user):
  """
  config_validator() -> [ (config_variable, error_message) ]

  Called by core check_config() view.
  """
  from hadoop.cluster import get_all_hdfs

  res = []

  if OOZIE_URL.get():
    status = get_oozie_status(user)
    if 'NORMAL' not in status:
      res.append((status, _('The Oozie server is not available')))

    class ConfigMock:
      def __init__(self, value): self.value = value
      def get(self): return self.value
      def get_fully_qualifying_key(self): return self.value

    for cluster in get_all_hdfs().values():
      res.extend(validate_path(ConfigMock('/user/oozie/share/lib'), is_dir=True, fs=cluster,
                               message=_('Oozie Share Lib not installed in default location.')))

  return res
Beispiel #6
0
def config_validator(user):
    """
  config_validator() -> [ (config_variable, error_message) ]

  Called by core check_config() view.
  """
    from hadoop.cluster import get_all_hdfs

    res = []

    status = get_oozie_status()
    if 'NORMAL' not in status:
        res.append((status, _('The Oozie server is not available')))

    class ConfigMock:
        def __init__(self, value):
            self.value = value

        def get(self):
            return self.value

        def get_fully_qualifying_key(self):
            return self.value

    for cluster in get_all_hdfs().values():
        res.extend(
            validate_path(
                ConfigMock('/user/oozie/share/lib'),
                is_dir=True,
                fs=cluster,
                message=_(
                    'Oozie Share Lib not installed in default location.')))

    return res
Beispiel #7
0
def config_validator():
  """
  config_validator() -> [ (config_variable, error_message) ]

  Called by core check_config() view.
  """
  from hadoop.cluster import get_all_hdfs
#  from liboozie.oozie_api import get_oozie

  res = []

#  status = 'down'
#  try:
#    status = str(get_oozie().get_oozie_status())
#  except:
#    pass
#  if 'NORMAL' not in status:
#    res.append((status, _('The Oozie server is not available')))


  class ConfigMock:
    def __init__(self, value): self.value = value
    def get(self): return self.value
    def get_fully_qualifying_key(self): return self.value

  for cluster in get_all_hdfs().values():
    res.extend(validate_path(REMOTE_DEPLOYMENT_DIR, is_dir=True, fs=cluster,
                             message=_('The deployment directory of Oozie workflows does not exist. '
                                       'Run "Setup App" on the Oozie workflow page.')))
    res.extend(validate_path(ConfigMock('/user/oozie/share/lib'), is_dir=True, fs=cluster,
                             message=_('Oozie Share Lib not installed in default location.')))

  return res
Beispiel #8
0
def test_non_default_cluster():
    NON_DEFAULT_NAME = 'non_default'
    old_caches = clear_sys_caches()
    reset = (
        conf.HDFS_CLUSTERS.set_for_testing({NON_DEFAULT_NAME: {}}),
        conf.MR_CLUSTERS.set_for_testing({NON_DEFAULT_NAME: {}}),
    )
    try:
        # This is indeed the only hdfs/mr cluster
        assert_equal(1, len(cluster.get_all_hdfs()))
        assert_true(cluster.get_hdfs(NON_DEFAULT_NAME))

        cli = make_logged_in_client()
        # That we can get to a view without errors means that the middlewares work
        cli.get('/about')
    finally:
        for old_conf in reset:
            old_conf()
        restore_sys_caches(old_caches)
Beispiel #9
0
def _init_filesystems():
  """Initialize the module-scoped filesystem dictionary."""
  global _filesystems
  if _filesystems is not None:
    return
  _filesystems = {}

  if has_hadoop():
    # Load HDFSes
    _filesystems.update(get_all_hdfs())

  # Load local
  for identifier in conf.LOCAL_FILESYSTEMS.keys():
    local_fs = LocalSubFileSystem(
        conf.LOCAL_FILESYSTEMS[identifier].PATH.get())
    if identifier in _filesystems: 
      raise Exception(("Filesystem '%s' configured twice. First is " +
        "%s, second is local FS %s") % (identifier, _filesystems[identifier], local_fs))
    _filesystems[identifier] = local_fs
Beispiel #10
0
def test_non_default_cluster():
  NON_DEFAULT_NAME = 'non_default'
  cluster.clear_caches()
  reset = (
    conf.HDFS_CLUSTERS.set_for_testing({ NON_DEFAULT_NAME: { } }),
    conf.MR_CLUSTERS.set_for_testing({ NON_DEFAULT_NAME: { } }),
  )
  try:
    # This is indeed the only hdfs/mr cluster
    assert_equal(1, len(cluster.get_all_hdfs()))
    assert_equal(1, len(cluster.all_mrclusters()))
    assert_true(cluster.get_hdfs(NON_DEFAULT_NAME))
    assert_true(cluster.get_mrcluster(NON_DEFAULT_NAME))

    cli = make_logged_in_client()
    # That we can get to a view without errors means that the middlewares work
    cli.get('/about')
  finally:
    for old_conf in reset:
      old_conf()
Beispiel #11
0
def config_validator():
    """
  config_validator() -> [ (config_variable, error_message) ]

  Called by core check_config() view.
  """
    from hadoop.cluster import get_all_hdfs

    res = []

    class ConfigMock:
        def __init__(self, value):
            self.value = value

        def get(self):
            return self.value

        def get_fully_qualifying_key(self):
            return self.value

    for cluster in get_all_hdfs().values():
        res.extend(
            validate_path(
                REMOTE_DEPLOYMENT_DIR,
                is_dir=True,
                fs=cluster,
                message=_(
                    'The deployment directory of Oozie workflows does not exist. '
                    'Run "Setup Examples" on the Oozie workflow page.')))
        res.extend(
            validate_path(
                ConfigMock('/user/oozie/share/lib'),
                is_dir=True,
                fs=cluster,
                message=_(
                    'Oozie Share Lib not installed in default location.')))

    return res
Beispiel #12
0
def config_validator():
  """
  config_validator() -> [ (config_variable, error_message) ]

  Called by core check_config() view.
  """
  from hadoop.cluster import get_all_hdfs

  res = []

  class ConfigMock:
    def __init__(self, value): self.value = value
    def get(self): return self.value
    def get_fully_qualifying_key(self): return self.value

  for cluster in get_all_hdfs().values():
    res.extend(validate_path(REMOTE_DEPLOYMENT_DIR, is_dir=True, fs=cluster,
                             message=_('The deployment directory of Oozie workflows does not exist. '
                                       'Run "Setup Examples" on the Oozie workflow page.')))
    res.extend(validate_path(ConfigMock('/user/oozie/share/lib'), is_dir=True, fs=cluster,
                             message=_('Oozie Share Lib not installed in default location.')))

  return res
Beispiel #13
0
def config_validator(user):
  """
  config_validator() -> [ (config_variable, error_message) ]

  Called by core check_config() view.
  """
  from hadoop.cluster import get_all_hdfs
  from hadoop.fs.hadoopfs import Hdfs
  from liboozie.oozie_api import get_oozie

  res = []

  if OOZIE_URL.get():
    status = get_oozie_status(user)
    if 'NORMAL' not in status:
      res.append((status, _('The Oozie server is not available')))

    api = get_oozie(user)
    intrumentation = api.get_instrumentation()
    sharelib_url = [param['value'] for group in intrumentation['variables'] for param in group['data'] if param['name'] == 'sharelib.system.libpath']
    if sharelib_url:
      sharelib_url = Hdfs.urlsplit(sharelib_url[0])[2]

    if not sharelib_url:
      res.append((status, _('Oozie Share Lib path is not available')))

    class ConfigMock:
      def __init__(self, value): self.value = value
      def get(self): return self.value
      def get_fully_qualifying_key(self): return self.value

    for cluster in get_all_hdfs().values():
      res.extend(validate_path(ConfigMock(sharelib_url), is_dir=True, fs=cluster,
                               message=_('Oozie Share Lib not installed in default location.')))

  return res
Beispiel #14
0
  def run_bin_hadoop_step(self, step):
    """
    user.name is used by FileSystem.getHomeDirectory().
    The environment variables for _USER and _GROUPS are used
    by the aspectj aspect to overwrite Hadoop's notion of 
    users and groups.
    """
    java_properties = {}
    java_properties["hue.suffix"] = "-via-hue"
    java_properties["user.name"] = self.plan.user
    java_prop_str = " ".join("-D%s=%s" % (k,v) for k, v in java_properties.iteritems())
    env = {      
      'HADOOP_HOME': hadoop.conf.HADOOP_HOME.get(), 
      'HADOOP_OPTS': "-javaagent:%s %s" % (jobsub.conf.ASPECTJWEAVER.get(), java_prop_str),
      'HADOOP_CLASSPATH': ':'.join([jobsub.conf.ASPECTPATH.get(),
                                    hadoop.conf.HADOOP_EXTRA_CLASSPATH_STRING.get()]),
      'HUE_JOBTRACE_LOG': self.internal_file_name("jobs"),
      'HUE_JOBSUB_USER': self.plan.user,
      'HUE_JOBSUB_GROUPS': ",".join(self.plan.groups),
      'LANG': os.getenv('LANG', i18n.get_site_encoding()),
    }

    all_clusters = []
    all_clusters += all_mrclusters().values()
    all_clusters += get_all_hdfs().values()
    delegation_token_files = []
    merged_token_file = tempfile.NamedTemporaryFile()
    try:
      LOG.debug("all_clusters: %s" % (repr(all_clusters),))
      for cluster in all_clusters:
        if cluster.security_enabled:
          cluster.setuser(self.plan.user)
          token = cluster.get_delegation_token()
          token_file = tempfile.NamedTemporaryFile()
          token_file.write(token.delegationTokenBytes)
          token_file.flush()
          delegation_token_files.append(token_file)
  
      java_home = os.getenv('JAVA_HOME')
      if java_home:
        env["JAVA_HOME"] = java_home
      for k, v in env.iteritems():
        assert v is not None, "Environment key %s missing value." % k
  
      base_args = [ hadoop.conf.HADOOP_BIN.get() ]
      if hadoop.conf.HADOOP_CONF_DIR.get():
        base_args.append("--config")
        base_args.append(hadoop.conf.HADOOP_CONF_DIR.get())
  
      if delegation_token_files:
        args = list(base_args) # Make a copy of the base args.
        args += ['jar', hadoop.conf.CREDENTIALS_MERGER_JAR.get(), merged_token_file.name]
        args += [token_file.name for token_file in delegation_token_files]
        LOG.debug("merging credentials files with comand: '%s'" % (' '.join(args),))
        merge_pipe = subprocess.Popen(args, shell=False, close_fds=True)
        retcode = merge_pipe.wait()
        if 0 != retcode:
          raise Exception("bin/hadoop returned non-zero %d while trying to merge credentials" % (retcode,))
        env['HADOOP_TOKEN_FILE_LOCATION'] = merged_token_file.name
  
      args = list(base_args) # Make a copy of the base args.
      args += step.arguments
      LOG.info("Starting %s.  (Env: %s)", repr(args), repr(env))
      LOG.info("Running: %s" % " ".join(args))
      self.pipe = subprocess.Popen(
        args,
        stdin=None,
        cwd=self.work_dir,
        stdout=self.stdout,
        stderr=self.stderr,
        shell=False,
        close_fds=True,
        env=env)
      retcode = self.pipe.wait()
      if 0 != retcode:
        raise Exception("bin/hadoop returned non-zero %d" % retcode)
      LOG.info("bin/hadoop returned %d" % retcode)
    finally:
      for token_file in delegation_token_files + [merged_token_file]:
        token_file.close()
Beispiel #15
0
def config_validator(user):
    """
  config_validator() -> [ (config_variable, error_message) ]

  Called by core check_config() view.
  """
    from desktop.lib.fsmanager import get_filesystem
    from hadoop.cluster import get_all_hdfs
    from hadoop.fs.hadoopfs import Hdfs
    from liboozie.oozie_api import get_oozie

    res = []

    try:
        from oozie.conf import REMOTE_SAMPLE_DIR
    except Exception as e:
        LOG.warn('Config check failed because Oozie app not installed: %s' % e)
        return res

    if OOZIE_URL.get():
        status = get_oozie_status(user)
        if 'NORMAL' not in status:
            res.append((status, _('The Oozie server is not available')))
        fs = get_filesystem()
        NICE_NAME = 'Oozie'
        if fs.do_as_superuser(fs.exists, REMOTE_SAMPLE_DIR.get()):
            stats = fs.do_as_superuser(fs.stats, REMOTE_SAMPLE_DIR.get())
            mode = oct(stats.mode)
            # if neither group nor others have write permission
            group_has_write = int(mode[-2]) & 2
            others_has_write = int(mode[-1]) & 2

            if not group_has_write and not others_has_write:
                res.append(
                    (NICE_NAME,
                     "The permissions of workspace '%s' are too restrictive" %
                     REMOTE_SAMPLE_DIR.get()))

        api = get_oozie(user, api_version="v2")

        configuration = api.get_configuration()
        if 'org.apache.oozie.service.MetricsInstrumentationService' in [
                c.strip()
                for c in configuration.get('oozie.services.ext', '').split(',')
        ]:
            metrics = api.get_metrics()
            sharelib_url = 'gauges' in metrics and 'libs.sharelib.system.libpath' in metrics[
                'gauges'] and [
                    metrics['gauges']['libs.sharelib.system.libpath']['value']
                ] or []
        else:
            intrumentation = api.get_instrumentation()
            sharelib_url = [
                param['value'] for group in intrumentation['variables']
                for param in group['data']
                if param['name'] == 'sharelib.system.libpath'
            ]

        if sharelib_url:
            sharelib_url = Hdfs.urlsplit(sharelib_url[0])[2]

        if not sharelib_url:
            res.append((status, _('Oozie Share Lib path is not available')))

        class ConfigMock(object):
            def __init__(self, value):
                self.value = value

            def get(self):
                return self.value

            def get_fully_qualifying_key(self):
                return self.value

        for cluster in list(get_all_hdfs().values()):
            res.extend(
                validate_path(
                    ConfigMock(sharelib_url),
                    is_dir=True,
                    fs=cluster,
                    message=_(
                        'Oozie Share Lib not installed in default location.')))

    return res
Beispiel #16
0
def _init_filesystems():
    """Initialize the module-scoped filesystem dictionary."""
    global _filesystems
    if _filesystems is not None:
        return
    _filesystems = get_all_hdfs()
Beispiel #17
0
def config_validator(user):
    """
  config_validator() -> [ (config_variable, error_message) ]

  Called by core check_config() view.
  """
    from hadoop.cluster import get_all_hdfs
    from hadoop.fs.hadoopfs import Hdfs
    from liboozie.oozie_api import get_oozie

    res = []

    if OOZIE_URL.get():
        status = get_oozie_status(user)
        if "NORMAL" not in status:
            res.append((status, _("The Oozie server is not available")))

        api = get_oozie(user, api_version="v2")

        configuration = api.get_configuration()
        if "org.apache.oozie.service.MetricsInstrumentationService" in [
            c.strip() for c in configuration.get("oozie.services.ext", "").split(",")
        ]:
            metrics = api.get_metrics()
            sharelib_url = (
                "gauges" in metrics
                and "libs.sharelib.system.libpath" in metrics["gauges"]
                and [metrics["gauges"]["libs.sharelib.system.libpath"]["value"]]
                or []
            )
        else:
            intrumentation = api.get_instrumentation()
            sharelib_url = [
                param["value"]
                for group in intrumentation["variables"]
                for param in group["data"]
                if param["name"] == "sharelib.system.libpath"
            ]

        if sharelib_url:
            sharelib_url = Hdfs.urlsplit(sharelib_url[0])[2]

        if not sharelib_url:
            res.append((status, _("Oozie Share Lib path is not available")))

        class ConfigMock:
            def __init__(self, value):
                self.value = value

            def get(self):
                return self.value

            def get_fully_qualifying_key(self):
                return self.value

        for cluster in get_all_hdfs().values():
            res.extend(
                validate_path(
                    ConfigMock(sharelib_url),
                    is_dir=True,
                    fs=cluster,
                    message=_("Oozie Share Lib not installed in default location."),
                )
            )

    return res
Beispiel #18
0
    def run_bin_hadoop_step(self, step):
        """
    user.name is used by FileSystem.getHomeDirectory().
    The environment variables for _USER and _GROUPS are used
    by the aspectj aspect to overwrite Hadoop's notion of 
    users and groups.
    """
        java_properties = {}
        java_properties["hue.suffix"] = "-via-hue"
        java_properties["user.name"] = self.plan.user
        java_prop_str = " ".join("-D%s=%s" % (k, v) for k, v in java_properties.iteritems())
        env = {
            "HADOOP_HOME": hadoop.conf.HADOOP_HOME.get(),
            "HADOOP_OPTS": "-javaagent:%s %s" % (jobsub.conf.ASPECTJWEAVER.get(), java_prop_str),
            "HADOOP_CLASSPATH": ":".join(
                [jobsub.conf.ASPECTPATH.get(), hadoop.conf.HADOOP_STATIC_GROUP_MAPPING_CLASSPATH.get()]
            ),
            "HUE_JOBTRACE_LOG": self.internal_file_name("jobs"),
            "HUE_JOBSUB_USER": self.plan.user,
            "HUE_JOBSUB_GROUPS": ",".join(self.plan.groups),
        }

        delegation_token_files = []
        all_clusters = []
        all_clusters += all_mrclusters().values()
        all_clusters += get_all_hdfs().values()
        LOG.info("all_clusters: %s" % (repr(all_clusters),))
        for cluster in all_clusters:
            if cluster.security_enabled:
                cluster.setuser(self.plan.user)
                token = cluster.get_delegation_token()
                token_file = tempfile.NamedTemporaryFile()
                token_file.write(token.delegationTokenBytes)
                token_file.flush()
                delegation_token_files.append(token_file)

        if delegation_token_files:
            env["HADOOP_TOKEN_FILE_LOCATION"] = ",".join([token_file.name for token_file in delegation_token_files])

        java_home = os.getenv("JAVA_HOME")
        if java_home:
            env["JAVA_HOME"] = java_home
        for k, v in env.iteritems():
            assert v is not None, "Environment key %s missing value." % k

        args = [hadoop.conf.HADOOP_BIN.get()]
        if hadoop.conf.HADOOP_CONF_DIR.get():
            args.append("--config")
            args.append(hadoop.conf.HADOOP_CONF_DIR.get())

        args += step.arguments
        LOG.info("Starting %s.  (Env: %s)", repr(args), repr(env))
        LOG.info("Running: %s" % " ".join(args))
        self.pipe = subprocess.Popen(
            args,
            stdin=None,
            cwd=self.work_dir,
            stdout=self.stdout,
            stderr=self.stderr,
            shell=False,
            close_fds=True,
            env=env,
        )
        retcode = self.pipe.wait()
        if 0 != retcode:
            raise Exception("bin/hadoop returned non-zero %d" % retcode)
        LOG.info("bin/hadoop returned %d" % retcode)
        for token_file in delegation_token_files:
            token_file.close()
Beispiel #19
0
    def run_bin_hadoop_step(self, step):
        """
    user.name is used by FileSystem.getHomeDirectory().
    The environment variables for _USER and _GROUPS are used
    by the aspectj aspect to overwrite Hadoop's notion of 
    users and groups.
    """
        java_properties = {}
        java_properties["hue.suffix"] = "-via-hue"
        java_properties["user.name"] = self.plan.user
        java_prop_str = " ".join("-D%s=%s" % (k, v)
                                 for k, v in java_properties.iteritems())
        env = {
            'HADOOP_HOME':
            hadoop.conf.HADOOP_HOME.get(),
            'HADOOP_OPTS':
            "-javaagent:%s %s" %
            (jobsub.conf.ASPECTJWEAVER.get(), java_prop_str),
            'HADOOP_CLASSPATH':
            ':'.join([
                jobsub.conf.ASPECTPATH.get(),
                hadoop.conf.HADOOP_EXTRA_CLASSPATH_STRING.get()
            ]),
            'HUE_JOBTRACE_LOG':
            self.internal_file_name("jobs"),
            'HUE_JOBSUB_USER':
            self.plan.user,
            'HUE_JOBSUB_GROUPS':
            ",".join(self.plan.groups),
        }

        delegation_token_files = []
        all_clusters = []
        all_clusters += all_mrclusters().values()
        all_clusters += get_all_hdfs().values()
        LOG.info("all_clusters: %s" % (repr(all_clusters), ))
        for cluster in all_clusters:
            if cluster.security_enabled:
                cluster.setuser(self.plan.user)
                token = cluster.get_delegation_token()
                token_file = tempfile.NamedTemporaryFile()
                token_file.write(token.delegationTokenBytes)
                token_file.flush()
                delegation_token_files.append(token_file)

        if delegation_token_files:
            env['HADOOP_TOKEN_FILE_LOCATION'] = ','.join(
                [token_file.name for token_file in delegation_token_files])

        java_home = os.getenv('JAVA_HOME')
        if java_home:
            env["JAVA_HOME"] = java_home
        for k, v in env.iteritems():
            assert v is not None, "Environment key %s missing value." % k

        args = [hadoop.conf.HADOOP_BIN.get()]
        if hadoop.conf.HADOOP_CONF_DIR.get():
            args.append("--config")
            args.append(hadoop.conf.HADOOP_CONF_DIR.get())

        args += step.arguments
        LOG.info("Starting %s.  (Env: %s)", repr(args), repr(env))
        LOG.info("Running: %s" % " ".join(args))
        self.pipe = subprocess.Popen(args,
                                     stdin=None,
                                     cwd=self.work_dir,
                                     stdout=self.stdout,
                                     stderr=self.stderr,
                                     shell=False,
                                     close_fds=True,
                                     env=env)
        retcode = self.pipe.wait()
        if 0 != retcode:
            raise Exception("bin/hadoop returned non-zero %d" % retcode)
        LOG.info("bin/hadoop returned %d" % retcode)
        for token_file in delegation_token_files:
            token_file.close()
Beispiel #20
0
                for param in group['data']
                if param['name'] == 'sharelib.system.libpath'
            ]

        if sharelib_url:
            sharelib_url = Hdfs.urlsplit(sharelib_url[0])[2]

        if not sharelib_url:
            res.append((status, _('Oozie Share Lib path is not available')))

        class ConfigMock:
            def __init__(self, value):
                self.value = value

            def get(self):
                return self.value

            def get_fully_qualifying_key(self):
                return self.value

        for cluster in get_all_hdfs().values():
            res.extend(
                validate_path(
                    ConfigMock(sharelib_url),
                    is_dir=True,
                    fs=cluster,
                    message=_(
                        'Oozie Share Lib not installed in default location.')))

    return res
Beispiel #21
0
def _init_filesystems():
  """Initialize the module-scoped filesystem dictionary."""
  global _filesystems
  if _filesystems is not None:
    return
  _filesystems = get_all_hdfs()