Exemple #1
0
 def __init__(self, user):
   self.user = user
   self.resource_manager_api = None
   self.mapreduce_api = None
   self.history_server_api = None
   self.spark_history_server_api = None
   if list(YARN_CLUSTERS.keys()):
     self.resource_manager_api = resource_manager_api.get_resource_manager(user.username)
     self.mapreduce_api = mapreduce_api.get_mapreduce_api(user.username)
     self.history_server_api = history_server_api.get_history_server_api(user.username)
     self.spark_history_server_api = spark_history_server_api.get_history_server_api()  # Spark HS does not support setuser
Exemple #2
0
  def test_update_properties(self):
    finish = []
    finish.append(MR_CLUSTERS.set_for_testing({'default': {}}))
    finish.append(MR_CLUSTERS['default'].SUBMIT_TO.set_for_testing(True))
    finish.append(YARN_CLUSTERS.set_for_testing({'default': {}}))
    finish.append(YARN_CLUSTERS['default'].SUBMIT_TO.set_for_testing(True))
    try:
      properties = {
        'user.name': 'hue',
        'test.1': 'http://localhost/test?test1=test&test2=test',
        'nameNode': 'hdfs://curacao:8020',
        'jobTracker': 'jtaddress',
        'security_enabled': False
      }

      final_properties = properties.copy()
      submission = Submission(None, properties=properties, oozie_id='test', fs=MockFs())
      assert_equal(properties, submission.properties)
      submission._update_properties('jtaddress', 'deployment-directory')
      assert_equal(final_properties, submission.properties)

      cluster.clear_caches()
      fs = cluster.get_hdfs()
      jt = cluster.get_next_ha_mrcluster()[1]
      final_properties = properties.copy()
      final_properties.update({
        'jobTracker': 'jtaddress',
        'nameNode': fs.fs_defaultfs
      })
      submission = Submission(None, properties=properties, oozie_id='test', fs=fs, jt=jt)
      assert_equal(properties, submission.properties)
      submission._update_properties('jtaddress', 'deployment-directory')
      assert_equal(final_properties, submission.properties)

      finish.append(HDFS_CLUSTERS['default'].LOGICAL_NAME.set_for_testing('namenode'))
      finish.append(MR_CLUSTERS['default'].LOGICAL_NAME.set_for_testing('jobtracker'))
      cluster.clear_caches()
      fs = cluster.get_hdfs()
      jt = cluster.get_next_ha_mrcluster()[1]
      final_properties = properties.copy()
      final_properties.update({
        'jobTracker': 'jobtracker',
        'nameNode': 'namenode'
      })
      submission = Submission(None, properties=properties, oozie_id='test', fs=fs, jt=jt)
      assert_equal(properties, submission.properties)
      submission._update_properties('jtaddress', 'deployment-directory')
      assert_equal(final_properties, submission.properties)
    finally:
      cluster.clear_caches()
      for reset in finish:
        reset()
Exemple #3
0
  def test_update_properties(self):
    finish = []
    finish.append(MR_CLUSTERS.set_for_testing({'default': {}}))
    finish.append(MR_CLUSTERS['default'].SUBMIT_TO.set_for_testing(True))
    finish.append(YARN_CLUSTERS.set_for_testing({'default': {}}))
    finish.append(YARN_CLUSTERS['default'].SUBMIT_TO.set_for_testing(True))
    try:
      properties = {
        'user.name': 'hue',
        'test.1': 'http://localhost/test?test1=test&test2=test',
        'nameNode': 'hdfs://curacao:8020',
        'jobTracker': 'jtaddress',
        'security_enabled': False
      }

      final_properties = properties.copy()
      submission = Submission(None, properties=properties, oozie_id='test', fs=MockFs())
      assert_equal(properties, submission.properties)
      submission._update_properties('jtaddress', 'deployment-directory')
      assert_equal(final_properties, submission.properties)

      cluster.clear_caches()
      fs = cluster.get_hdfs()
      jt = cluster.get_next_ha_mrcluster()[1]
      final_properties = properties.copy()
      final_properties.update({
        'jobTracker': 'jtaddress',
        'nameNode': fs.fs_defaultfs
      })
      submission = Submission(None, properties=properties, oozie_id='test', fs=fs, jt=jt)
      assert_equal(properties, submission.properties)
      submission._update_properties('jtaddress', 'deployment-directory')
      assert_equal(final_properties, submission.properties)

      finish.append(HDFS_CLUSTERS['default'].LOGICAL_NAME.set_for_testing('namenode'))
      finish.append(MR_CLUSTERS['default'].LOGICAL_NAME.set_for_testing('jobtracker'))
      cluster.clear_caches()
      fs = cluster.get_hdfs()
      jt = cluster.get_next_ha_mrcluster()[1]
      final_properties = properties.copy()
      final_properties.update({
        'jobTracker': 'jobtracker',
        'nameNode': 'namenode'
      })
      submission = Submission(None, properties=properties, oozie_id='test', fs=fs, jt=jt)
      assert_equal(properties, submission.properties)
      submission._update_properties('jtaddress', 'deployment-directory')
      assert_equal(final_properties, submission.properties)
    finally:
      cluster.clear_caches()
      for reset in finish:
        reset()
Exemple #4
0
  def get_task_log(self, offset=0):
    logs = []
    attempt = self.task.job.job_attempts['jobAttempt'][-1]
    log_link = attempt['logsLink']
    # Get MR task logs

    # Don't hack up the urls if they've been migrated to the job history server.
    for cluster in YARN_CLUSTERS.get().itervalues():
      if log_link.startswith(cluster.HISTORY_SERVER_API_URL.get()):
        break
    else:
      if self.assignedContainerId:
        log_link = log_link.replace(attempt['containerId'], self.assignedContainerId)
      if hasattr(self, 'nodeHttpAddress'):
        log_link = log_link.replace(attempt['nodeHttpAddress'].split(':')[0], self.nodeHttpAddress.split(':')[0])

    for name in ('stdout', 'stderr', 'syslog'):
      link = '/%s/' % name
      params = {}
      if int(offset) >= 0:
        params['start'] = offset

      try:
        log_link = re.sub('job_[^/]+', self.id, log_link)
        root = Resource(get_log_client(log_link), urlparse.urlsplit(log_link)[2], urlencode=False)
        response = root.get(link, params=params)
        log = html.fromstring(response, parser=html.HTMLParser()).xpath('/html/body/table/tbody/tr/td[2]')[0].text_content()
      except Exception, e:
        log = _('Failed to retrieve log: %s' % e)
        try:
          debug_info = '\nLog Link: %s' % log_link
          debug_info += '\nHTML Response: %s' % response
          LOGGER.error(debug_info)
        except:
          LOG.exception('failed to build debug info')

      logs.append(log)