def submit_job(self, jar, job_args):
   if self.state == CluserState.READY:
     util.checked_do(cfg.hadoop_jobtracker, '/job/start',
                     {'jar': jar, 'args': json.dumps(job_args)})
     return True
   else:
     return False
    def launch_jt(self):
        """Create and monitor the instance running the Jobtracker.

    This also blocks and waits for the NameNode, then starts the JobTracker and
    starts the agent that monitors Hadoop.
    """
        # Keep this on the spawn_scheduler because it's hi-pri
        if not self.spawn_instance(cfg.hadoop_jobtracker,
                                   'hadoop/jobtracker_snitch.py'):
            self.update_state('cluster', CluserState.BROKEN)
            return

        if not self.monitor_instance(cfg.hadoop_jobtracker,
                                     InstanceState.SNITCH_READY):
            self.update_state('cluster', CluserState.BROKEN)
            return
        with self.cv:
            while self.instances[
                    cfg.hadoop_namenode] != InstanceState.HADOOP_READY:
                self.cv.wait()
        util.checked_do(cfg.hadoop_jobtracker, '/start', {})
        self.update_state(cfg.hadoop_jobtracker, InstanceState.HADOOP_READY)
        # Fork off and start our Java Hadoop monitor
        util.bg_exec(
            ['java', '-cp', 'hadoop-tools.jar', 'com.google.HadoopMonitor'],
            '/home/hadoop/monitor_log')
        with self.cv:
            self.cv.notifyAll()
  def launch_jt(self):
    """Create and monitor the instance running the Jobtracker.

    This also blocks and waits for the NameNode, then starts the JobTracker and
    starts the agent that monitors Hadoop.
    """
    # Keep this on the spawn_scheduler because it's hi-pri
    if not self.spawn_instance(cfg.hadoop_jobtracker,
                               'hadoop/jobtracker_snitch.py'):
      self.update_state('cluster', CluserState.BROKEN)
      return

    if not self.monitor_instance(cfg.hadoop_jobtracker,
                                 InstanceState.SNITCH_READY):
      self.update_state('cluster', CluserState.BROKEN)
      return
    with self.cv:
      while self.instances[cfg.hadoop_namenode] != InstanceState.HADOOP_READY:
        self.cv.wait()
    util.checked_do(cfg.hadoop_jobtracker, '/start', {})
    self.update_state(cfg.hadoop_jobtracker, InstanceState.HADOOP_READY)
    # Fork off and start our Java Hadoop monitor
    util.bg_exec(
        ['java', '-cp', 'hadoop-tools.jar', 'com.google.HadoopMonitor'],
        '/home/hadoop/monitor_log'
    )
    with self.cv:
      self.cv.notifyAll()
 def start_slave(self, name):
     assert self.masters_up()
     util.checked_do(name, '/start', {})
     with self.cv:
         self.update_state(name, InstanceState.HADOOP_READY)
         self.live_slaves += 1
         if self.live_slaves >= cfg.needed_slaves:
             self.update_state('cluster', CluserState.READY)
 def start_slave(self, name):
   assert self.masters_up()
   util.checked_do(name, '/start', {})
   with self.cv:
     self.update_state(name, InstanceState.HADOOP_READY)
     self.live_slaves += 1
     if self.live_slaves >= cfg.needed_slaves:
       self.update_state('cluster', CluserState.READY)
 def submit_job(self, jar, job_args):
     if self.state == CluserState.READY:
         util.checked_do(cfg.hadoop_jobtracker, '/job/start', {
             'jar': jar,
             'args': json.dumps(job_args)
         })
         return True
     else:
         return False
 def transfer(self, src, dst):
   # returns None if there's a problem, otherwise the operation name to poll
   if self.state != CluserState.READY:
     return None
   op = self.new_op()
   self.operations[op]['src'] = src
   self.operations[op]['dst'] = dst
   util.checked_do(cfg.hadoop_namenode, '/transfer', {'src': src, 'dst': dst,
                                                      'operation': op})
   return self.operations[op]
 def transfer(self, src, dst):
     # returns None if there's a problem, otherwise the operation name to poll
     if self.state != CluserState.READY:
         return None
     op = self.new_op()
     self.operations[op]['src'] = src
     self.operations[op]['dst'] = dst
     util.checked_do(cfg.hadoop_namenode, '/transfer', {
         'src': src,
         'dst': dst,
         'operation': op
     })
     return self.operations[op]
Ejemplo n.º 9
0
def send_coordinator(cmd, data, verify=False):
    data['secret'] = cfg.secret
    if verify:
        return util.checked_do(cfg.coordinator, cmd, data=data)
    else:
        return util.talk_to_agent(util.name_to_ip(cfg.coordinator),
                                  cmd,
                                  data=data)
 def clean_hdfs(self, path):
     """Recursively deletes files from a HDFS path."""
     util.checked_do(cfg.hadoop_namenode, '/clean', {'path': path})
     return True
 def clean_hdfs(self, path):
   """Recursively deletes files from a HDFS path."""
   util.checked_do(cfg.hadoop_namenode, '/clean', {'path': path})
   return True
def send_coordinator(cmd, data, verify=False):
  data['secret'] = cfg.secret
  if verify:
    return util.checked_do(cfg.coordinator, cmd, data=data)
  else:
    return util.talk_to_agent(util.name_to_ip(cfg.coordinator), cmd, data=data)