Ejemplo n.º 1
0
def get_next_ha_yarncluster():
  """
  Return the next available YARN RM instance and cache its name.
  """
  from hadoop.yarn import mapreduce_api
  from hadoop.yarn import resource_manager_api
  from hadoop.yarn.resource_manager_api import ResourceManagerApi
  global MR_NAME_CACHE

  has_ha = sum([conf.YARN_CLUSTERS[name].SUBMIT_TO.get() for name in conf.YARN_CLUSTERS.keys()]) >= 2

  for name in conf.YARN_CLUSTERS.keys():
    config = conf.YARN_CLUSTERS[name]
    if config.SUBMIT_TO.get():
      rm = ResourceManagerApi(config.RESOURCE_MANAGER_API_URL.get(), config.SECURITY_ENABLED.get(), config.SSL_CERT_CA_VERIFY.get())
      rm.setuser(DEFAULT_USER)
      if has_ha:
        try:
          cluster_info = rm.cluster()
          if cluster_info['clusterInfo']['haState'] == 'ACTIVE':
            MR_NAME_CACHE = name
            LOG.warn('Picking RM HA: %s' % name)
            resource_manager_api.API_CACHE = None  # Reset cache
            mapreduce_api.API_CACHE = None
            return (config, rm)
          else:
            LOG.info('RM %s is not RUNNING, skipping it: %s' % (name, cluster_info))
        except resource_manager_api.YarnFailoverOccurred:
          LOG.info('RM %s has failed back to another server' % (name,))
        except Exception, ex:
          LOG.exception('RM %s is not available, skipping it: %s' % (name, ex))
      else:
        return (config, rm)
Ejemplo n.º 2
0
def get_next_ha_yarncluster():
  """
  Return the next available YARN RM instance and cache its name.
  """
  from hadoop.yarn import mapreduce_api
  from hadoop.yarn import resource_manager_api
  from hadoop.yarn.resource_manager_api import ResourceManagerApi
  global MR_NAME_CACHE

  has_ha = sum([conf.YARN_CLUSTERS[name].SUBMIT_TO.get() for name in conf.YARN_CLUSTERS.keys()]) >= 2

  for name in conf.YARN_CLUSTERS.keys():
    config = conf.YARN_CLUSTERS[name]
    if config.SUBMIT_TO.get():
      rm = ResourceManagerApi(config.RESOURCE_MANAGER_API_URL.get(), config.SECURITY_ENABLED.get(), config.SSL_CERT_CA_VERIFY.get())
      rm.setuser(DEFAULT_USER)
      if has_ha:
        try:
          cluster_info = rm.cluster()
          if cluster_info['clusterInfo']['haState'] == 'ACTIVE':
            MR_NAME_CACHE = name
            LOG.warn('Picking RM HA: %s' % name)
            resource_manager_api.API_CACHE = None  # Reset cache
            mapreduce_api.API_CACHE = None
            return (config, rm)
          else:
            LOG.info('RM %s is not RUNNING, skipping it: %s' % (name, cluster_info))
        except resource_manager_api.YarnFailoverOccurred:
          LOG.info('RM %s has failed back to another server' % (name,))
        except Exception, ex:
          LOG.exception('RM %s is not available, skipping it: %s' % (name, ex))
      else:
        return (config, rm)
Ejemplo n.º 3
0
def get_next_ha_yarncluster(current_user=None):
    """
  Return the next available YARN RM instance and cache its name.
  """
    from hadoop.yarn.resource_manager_api import ResourceManagerApi
    global MR_NAME_CACHE

    has_ha = sum([
        conf.YARN_CLUSTERS[name].SUBMIT_TO.get()
        for name in list(conf.YARN_CLUSTERS.keys())
    ]) >= 2

    for name in list(conf.YARN_CLUSTERS.keys()):
        config = conf.YARN_CLUSTERS[name]
        if config.SUBMIT_TO.get():
            rm = ResourceManagerApi(config.RESOURCE_MANAGER_API_URL.get(),
                                    config.SECURITY_ENABLED.get(),
                                    config.SSL_CERT_CA_VERIFY.get())
            if current_user is None:
                rm.setuser(DEFAULT_USER)
            else:
                rm.setuser(current_user)
            if has_ha:
                try:
                    cluster_info = rm.cluster()
                    if cluster_info['clusterInfo']['haState'] == 'ACTIVE':
                        if name != MR_NAME_CACHE:
                            LOG.info('RM %s has failed back to %s server' %
                                     (MR_NAME_CACHE, name))
                            rm.from_failover = True
                        MR_NAME_CACHE = name
                        LOG.warning('Picking RM HA: %s' % name)
                        return (config, rm)
                    else:
                        LOG.info('RM %s is not RUNNING, skipping it: %s' %
                                 (name, cluster_info))
                except Exception as ex:
                    LOG.exception('RM %s is not available, skipping it: %s' %
                                  (name, ex))
            else:
                return (config, rm)
    return None