def __init__(self, user): self.user = user self.resource_manager_api = None self.mapreduce_api = None self.history_server_api = None self.spark_history_server_api = None if list(YARN_CLUSTERS.keys()): self.resource_manager_api = resource_manager_api.get_resource_manager(user.username) self.mapreduce_api = mapreduce_api.get_mapreduce_api(user.username) self.history_server_api = history_server_api.get_history_server_api(user.username) self.spark_history_server_api = spark_history_server_api.get_history_server_api() # Spark HS does not support setuser
def test_update_properties(self): finish = [] finish.append(MR_CLUSTERS.set_for_testing({'default': {}})) finish.append(MR_CLUSTERS['default'].SUBMIT_TO.set_for_testing(True)) finish.append(YARN_CLUSTERS.set_for_testing({'default': {}})) finish.append(YARN_CLUSTERS['default'].SUBMIT_TO.set_for_testing(True)) try: properties = { 'user.name': 'hue', 'test.1': 'http://localhost/test?test1=test&test2=test', 'nameNode': 'hdfs://curacao:8020', 'jobTracker': 'jtaddress', 'security_enabled': False } final_properties = properties.copy() submission = Submission(None, properties=properties, oozie_id='test', fs=MockFs()) assert_equal(properties, submission.properties) submission._update_properties('jtaddress', 'deployment-directory') assert_equal(final_properties, submission.properties) cluster.clear_caches() fs = cluster.get_hdfs() jt = cluster.get_next_ha_mrcluster()[1] final_properties = properties.copy() final_properties.update({ 'jobTracker': 'jtaddress', 'nameNode': fs.fs_defaultfs }) submission = Submission(None, properties=properties, oozie_id='test', fs=fs, jt=jt) assert_equal(properties, submission.properties) submission._update_properties('jtaddress', 'deployment-directory') assert_equal(final_properties, submission.properties) finish.append(HDFS_CLUSTERS['default'].LOGICAL_NAME.set_for_testing('namenode')) finish.append(MR_CLUSTERS['default'].LOGICAL_NAME.set_for_testing('jobtracker')) cluster.clear_caches() fs = cluster.get_hdfs() jt = cluster.get_next_ha_mrcluster()[1] final_properties = properties.copy() final_properties.update({ 'jobTracker': 'jobtracker', 'nameNode': 'namenode' }) submission = Submission(None, properties=properties, oozie_id='test', fs=fs, jt=jt) assert_equal(properties, submission.properties) submission._update_properties('jtaddress', 'deployment-directory') assert_equal(final_properties, submission.properties) finally: cluster.clear_caches() for reset in finish: reset()
def get_task_log(self, offset=0): logs = [] attempt = self.task.job.job_attempts['jobAttempt'][-1] log_link = attempt['logsLink'] # Get MR task logs # Don't hack up the urls if they've been migrated to the job history server. for cluster in YARN_CLUSTERS.get().itervalues(): if log_link.startswith(cluster.HISTORY_SERVER_API_URL.get()): break else: if self.assignedContainerId: log_link = log_link.replace(attempt['containerId'], self.assignedContainerId) if hasattr(self, 'nodeHttpAddress'): log_link = log_link.replace(attempt['nodeHttpAddress'].split(':')[0], self.nodeHttpAddress.split(':')[0]) for name in ('stdout', 'stderr', 'syslog'): link = '/%s/' % name params = {} if int(offset) >= 0: params['start'] = offset try: log_link = re.sub('job_[^/]+', self.id, log_link) root = Resource(get_log_client(log_link), urlparse.urlsplit(log_link)[2], urlencode=False) response = root.get(link, params=params) log = html.fromstring(response, parser=html.HTMLParser()).xpath('/html/body/table/tbody/tr/td[2]')[0].text_content() except Exception, e: log = _('Failed to retrieve log: %s' % e) try: debug_info = '\nLog Link: %s' % log_link debug_info += '\nHTML Response: %s' % response LOGGER.error(debug_info) except: LOG.exception('failed to build debug info') logs.append(log)