Ejemplo n.º 1
0
def get_next_ha_yarncluster():
  """
  Return the next available YARN RM instance and cache its name.
  """
  from hadoop.yarn import mapreduce_api
  from hadoop.yarn import resource_manager_api
  from hadoop.yarn.resource_manager_api import ResourceManagerApi
  global MR_NAME_CACHE

  has_ha = sum([conf.YARN_CLUSTERS[name].SUBMIT_TO.get() for name in conf.YARN_CLUSTERS.keys()]) >= 2

  for name in conf.YARN_CLUSTERS.keys():
    config = conf.YARN_CLUSTERS[name]
    if config.SUBMIT_TO.get():
      rm = ResourceManagerApi(config.RESOURCE_MANAGER_API_URL.get(), config.SECURITY_ENABLED.get(), config.SSL_CERT_CA_VERIFY.get())
      if has_ha:
        try:
          cluster_info = rm.cluster()
          if cluster_info['clusterInfo']['haState'] == 'ACTIVE':
            MR_NAME_CACHE = name
            LOG.warn('Picking RM HA: %s' % name)
            resource_manager_api._api_cache = None # Reset cache
            mapreduce_api._api_cache = None
            return (config, rm)
          else:
            LOG.info('RM %s is not RUNNING, skipping it: %s' % (name, cluster_info))
        except resource_manager_api.YarnFailoverOccurred:
          LOG.info('RM %s has failed back to another server' % (name,))
        except Exception, ex:
          LOG.exception('RM %s is not available, skipping it: %s' % (name, ex))
      else:
        return (config, rm)
Ejemplo n.º 2
0
def get_next_ha_yarncluster(current_user=None):
  """
  Return the next available YARN RM instance and cache its name.
  """
  from hadoop.yarn.resource_manager_api import ResourceManagerApi
  global MR_NAME_CACHE

  has_ha = sum([conf.YARN_CLUSTERS[name].SUBMIT_TO.get() for name in conf.YARN_CLUSTERS.keys()]) >= 2

  for name in conf.YARN_CLUSTERS.keys():
    config = conf.YARN_CLUSTERS[name]
    if config.SUBMIT_TO.get():
      rm = ResourceManagerApi(config.RESOURCE_MANAGER_API_URL.get(), config.SECURITY_ENABLED.get(), config.SSL_CERT_CA_VERIFY.get())
      if current_user is None:
        rm.setuser(DEFAULT_USER)
      else:
        rm.setuser(current_user)
      if has_ha:
        try:
          cluster_info = rm.cluster()
          if cluster_info['clusterInfo']['haState'] == 'ACTIVE':
            if name != MR_NAME_CACHE:
              LOG.info('RM %s has failed back to %s server' % (MR_NAME_CACHE, name))
              rm.from_failover = True
            MR_NAME_CACHE = name
            LOG.warn('Picking RM HA: %s' % name)
            return (config, rm)
          else:
            LOG.info('RM %s is not RUNNING, skipping it: %s' % (name, cluster_info))
        except Exception, ex:
          LOG.exception('RM %s is not available, skipping it: %s' % (name, ex))
      else:
        return (config, rm)