예제 #1
0
  def start(self):
    """Run and maintain hodring commands"""
    
    try:
      if self._cfg.has_key('download-addr'):
        self._http = threadedHTTPServer('', self._cfg['http-port-range'])
        self.log.info("Starting http server...")
        self._http.serve_forever()
        self.log.debug("http://%s:%d" % (self._http.server_address[0],
                     self._http.server_address[1]))
      
      hodBaseService.start(self)
      
      ringXRAddress = None
      if self._cfg.has_key('ringmaster-xrs-addr'):
        ringXRAddress = "http://%s:%s/" % (self._cfg['ringmaster-xrs-addr'][0],
                          self._cfg['ringmaster-xrs-addr'][1])
        self.log.debug("Ringmaster at %s" % ringXRAddress)

      self.log.debug("Creating service registry XML-RPC client.")
      serviceClient = hodXRClient(to_http_url(
                                  self._cfg['svcrgy-addr']))
      if ringXRAddress == None:
        self.log.info("Did not get ringmaster XML-RPC address. Fetching information from service registry.")
        ringList = serviceClient.getServiceInfo(self._cfg['userid'], 
            self._cfg['service-id'], 'ringmaster', 'hod')
      
        self.log.debug(pprint.pformat(ringList))
      
        if len(ringList):
          if isinstance(ringList, list):
            ringXRAddress = ringList[0]['xrs']
      
        count = 0
        while (ringXRAddress == None and count < 3000):
          ringList = serviceClient.getServiceInfo(self._cfg['userid'], 
            self._cfg['service-id'], 'ringmaster', 'hod')
        
          if len(ringList):
            if isinstance(ringList, list):
              ringXRAddress = ringList[0]['xrs']
        
          count = count + 1
          time.sleep(.2)
      
      if ringXRAddress == None:
        raise Exception("Could not get ringmaster XML-RPC server address.")
        
      self.log.debug("Creating ringmaster XML-RPC client.")
      ringClient = hodXRClient(ringXRAddress)    
      
      id = self.hostname + "_" + str(os.getpid())
      
      if 'download-addr' in self._cfg:
        self.__download_package(ringClient)
      else:
        self.log.debug("Did not find a download address.")
          
      cmdlist = []
      firstTime = True
      increment = 0
      hadoopStartupTime = 2
       
      cmdlist = ringClient.getCommand(id)

      while (cmdlist == []):
        if firstTime:
          sleepTime = increment + self._cfg['cmd-retry-initial-time'] + hadoopStartupTime\
                        + random.uniform(0,self._cfg['cmd-retry-interval'])
          firstTime = False
        else:
          sleepTime = increment + self._cfg['cmd-retry-initial-time'] + \
                        + random.uniform(0,self._cfg['cmd-retry-interval'])
        self.log.debug("Did not get command list. Waiting for %s seconds." % (sleepTime))
        time.sleep(sleepTime)
        increment = increment + 1
        cmdlist = ringClient.getCommand(id)

      self.log.debug(pformat(cmdlist)) 
      cmdDescs = []
      for cmds in cmdlist:
        cmdDescs.append(CommandDesc(cmds['dict'], self.log))
  
      self._cfg['commanddesc'] = cmdDescs
      
      self.log.info("Running hadoop commands...")

      self.__run_hadoop_commands(False)
        
      masterParams = []
      for k, cmd in self.__running.iteritems():
        masterParams.extend(cmd.filledInKeyVals)
  
      self.log.debug("printing getparams")
      self.log.debug(pformat(id))
      self.log.debug(pformat(masterParams))
      # when this is on a required host, the ringMaster already has our masterParams
      if(len(masterParams) > 0):
        ringClient.addMasterParams(id, masterParams)
    except Exception, e:
      raise Exception(e)
예제 #2
0
    def __init__(self, cfg, log, **kwds):
        """starts nodepool and services"""
        self.download = False
        self.httpServer = None
        self.cfg = cfg
        self.log = log
        self.__hostname = local_fqdn()
        self.workDirs = None

        # ref to the idle job tracker object.
        self.__jtMonitor = None
        self.__idlenessDetected = False
        self.__stopInProgress = False
        self.__isStopped = False  # to let main exit
        self.__exitCode = 0  # exit code with which the ringmaster main method should return

        self.workers_per_ring = self.cfg["ringmaster"]["workers_per_ring"]

        self.__initialize_signal_handlers()

        sdd = self.cfg["servicedesc"]
        gsvc = None
        for key in sdd:
            gsvc = sdd[key]
            break

        npd = self.cfg["nodepooldesc"]
        self.np = NodePoolUtil.getNodePool(npd, cfg, log)

        self.log.debug("Getting service ID.")

        self.serviceId = self.np.getServiceId()

        self.log.debug("Got service ID: %s" % self.serviceId)

        self.tarSrcLoc = None
        if self.cfg["ringmaster"].has_key("hadoop-tar-ball"):
            self.download = True
            self.tarSrcLoc = self.cfg["ringmaster"]["hadoop-tar-ball"]

        self.cd_to_tempdir()

        if self.download:
            self.__copy_tarball(os.getcwd())
            self.basename = self.__find_tarball_in_dir(os.getcwd())
            if self.basename is None:
                raise Exception("Did not find tarball copied from %s in %s." % (self.tarSrcLoc, os.getcwd()))

        self.serviceAddr = to_http_url(self.cfg["ringmaster"]["svcrgy-addr"])

        self.log.debug("Service registry @ %s" % self.serviceAddr)

        self.serviceClient = hodXRClient(self.serviceAddr)
        self.serviceDict = {}
        try:
            sdl = self.cfg["servicedesc"]

            workDirs = self.getWorkDirs(cfg)

            hdfsDesc = sdl["hdfs"]
            hdfs = None

            # Determine hadoop Version
            hadoopVers = hadoopVersion(self.__getHadoopDir(), self.cfg["hodring"]["java-home"], self.log)

            if (hadoopVers["major"] == None) or (hadoopVers["minor"] == None):
                raise Exception(
                    "Could not retrive the version of Hadoop."
                    + " Check the Hadoop installation or the value of the hodring.java-home variable."
                )
            if hdfsDesc.isExternal():
                hdfs = HdfsExternal(hdfsDesc, workDirs, version=int(hadoopVers["minor"]))
                hdfs.setMasterParams(self.cfg["gridservice-hdfs"])
            else:
                hdfs = Hdfs(
                    hdfsDesc, workDirs, 0, version=int(hadoopVers["minor"]), workers_per_ring=self.workers_per_ring
                )

            self.serviceDict[hdfs.getName()] = hdfs

            mrDesc = sdl["mapred"]
            mr = None
            if mrDesc.isExternal():
                mr = MapReduceExternal(mrDesc, workDirs, version=int(hadoopVers["minor"]))
                mr.setMasterParams(self.cfg["gridservice-mapred"])
            else:
                mr = MapReduce(
                    mrDesc, workDirs, 1, version=int(hadoopVers["minor"]), workers_per_ring=self.workers_per_ring
                )

            self.serviceDict[mr.getName()] = mr
        except:
            self.log.critical(
                "Exception in creating Hdfs and Map/Reduce descriptor objects: \
                            %s."
                % get_exception_error_string()
            )
            self.log.debug(get_exception_string())
            raise

        # should not be starting these in a constructor
        ringMasterServer.startService(self.serviceDict, cfg, self.np, log, self)

        self.rpcserver = ringMasterServer.getAddress()

        self.httpAddress = None
        self.tarAddress = None
        hostname = socket.gethostname()
        if self.download:
            self.httpServer = threadedHTTPServer(hostname, self.cfg["ringmaster"]["http-port-range"])

            self.httpServer.serve_forever()
            self.httpAddress = "http://%s:%d/" % (self.httpServer.server_address[0], self.httpServer.server_address[1])
            self.tarAddress = "%s%s" % (self.httpAddress, self.basename)

            ringMasterServer.instance.logMasterSources.registerTarSource(hostname, self.tarAddress)
        else:
            self.log.debug("Download not set.")

        self.log.debug(
            "%s %s %s %s %s" % (self.cfg["ringmaster"]["userid"], self.serviceId, self.__hostname, "ringmaster", "hod")
        )

        if self.cfg["ringmaster"]["register"]:
            if self.httpAddress:
                self.serviceClient.registerService(
                    self.cfg["ringmaster"]["userid"],
                    self.serviceId,
                    self.__hostname,
                    "ringmaster",
                    "hod",
                    {"xrs": self.rpcserver, "http": self.httpAddress},
                )
            else:
                self.serviceClient.registerService(
                    self.cfg["ringmaster"]["userid"],
                    self.serviceId,
                    self.__hostname,
                    "ringmaster",
                    "hod",
                    {"xrs": self.rpcserver},
                )

        self.log.debug("Registered with serivce registry: %s." % self.serviceAddr)

        hodRingPath = os.path.join(cfg["ringmaster"]["base-dir"], "bin", "hodring")
        hodRingWorkDir = os.path.join(cfg["hodring"]["temp-dir"], "hodring" + "_" + getpass.getuser())

        self.cfg["hodring"]["hodring"] = [hodRingWorkDir]
        self.cfg["hodring"]["svcrgy-addr"] = self.cfg["ringmaster"]["svcrgy-addr"]
        self.cfg["hodring"]["service-id"] = self.np.getServiceId()

        self.cfg["hodring"]["ringmaster-xrs-addr"] = self.__url_to_addr(self.rpcserver)

        if self.tarSrcLoc != None:
            cfg["hodring"]["download-addr"] = self.tarAddress

        self.__init_job_tracker_monitor(ringMasterServer.instance.logMasterSources)
예제 #3
0
    def __init__(self, cfg, log, **kwds):
        """starts nodepool and services"""
        self.download = False
        self.httpServer = None
        self.cfg = cfg
        self.log = log
        self.__hostname = local_fqdn()
        self.workDirs = None

        # ref to the idle job tracker object.
        self.__jtMonitor = None
        self.__idlenessDetected = False
        self.__stopInProgress = False
        self.__isStopped = False  # to let main exit
        self.__exitCode = 0  # exit code with which the ringmaster main method should return

        self.workers_per_ring = self.cfg['ringmaster']['workers_per_ring']

        self.__initialize_signal_handlers()

        sdd = self.cfg['servicedesc']
        gsvc = None
        for key in sdd:
            gsvc = sdd[key]
            break

        npd = self.cfg['nodepooldesc']
        self.np = NodePoolUtil.getNodePool(npd, cfg, log)

        self.log.debug("Getting service ID.")

        self.serviceId = self.np.getServiceId()

        self.log.debug("Got service ID: %s" % self.serviceId)

        self.tarSrcLoc = None
        if self.cfg['ringmaster'].has_key('hadoop-tar-ball'):
            self.download = True
            self.tarSrcLoc = self.cfg['ringmaster']['hadoop-tar-ball']

        self.cd_to_tempdir()

        if (self.download):
            self.__copy_tarball(os.getcwd())
            self.basename = self.__find_tarball_in_dir(os.getcwd())
            if self.basename is None:
                raise Exception('Did not find tarball copied from %s in %s.' %
                                (self.tarSrcLoc, os.getcwd()))

        self.serviceAddr = to_http_url(self.cfg['ringmaster']['svcrgy-addr'])

        self.log.debug("Service registry @ %s" % self.serviceAddr)

        self.serviceClient = hodXRClient(self.serviceAddr)
        self.serviceDict = {}
        try:
            sdl = self.cfg['servicedesc']

            workDirs = self.getWorkDirs(cfg)

            hdfsDesc = sdl['hdfs']
            hdfs = None

            # Determine hadoop Version
            hadoopVers = hadoopVersion(self.__getHadoopDir(), \
                                      self.cfg['hodring']['java-home'], self.log)

            if (hadoopVers['major'] == None) or (hadoopVers['minor'] == None):
                raise Exception(
                    'Could not retrive the version of Hadoop.' +
                    ' Check the Hadoop installation or the value of the hodring.java-home variable.'
                )
            if hdfsDesc.isExternal():
                hdfs = HdfsExternal(hdfsDesc,
                                    workDirs,
                                    version=int(hadoopVers['minor']))
                hdfs.setMasterParams(self.cfg['gridservice-hdfs'])
            else:
                hdfs = Hdfs(hdfsDesc,
                            workDirs,
                            0,
                            version=int(hadoopVers['minor']),
                            workers_per_ring=self.workers_per_ring)

            self.serviceDict[hdfs.getName()] = hdfs

            mrDesc = sdl['mapred']
            mr = None
            if mrDesc.isExternal():
                mr = MapReduceExternal(mrDesc,
                                       workDirs,
                                       version=int(hadoopVers['minor']))
                mr.setMasterParams(self.cfg['gridservice-mapred'])
            else:
                mr = MapReduce(mrDesc,
                               workDirs,
                               1,
                               version=int(hadoopVers['minor']),
                               workers_per_ring=self.workers_per_ring)

            self.serviceDict[mr.getName()] = mr
        except:
            self.log.critical(
                "Exception in creating Hdfs and Map/Reduce descriptor objects: \
                            %s." % get_exception_error_string())
            self.log.debug(get_exception_string())
            raise

        # should not be starting these in a constructor
        ringMasterServer.startService(self.serviceDict, cfg, self.np, log,
                                      self)

        self.rpcserver = ringMasterServer.getAddress()

        self.httpAddress = None
        self.tarAddress = None
        hostname = socket.gethostname()
        if (self.download):
            self.httpServer = threadedHTTPServer(
                hostname, self.cfg['ringmaster']['http-port-range'])

            self.httpServer.serve_forever()
            self.httpAddress = "http://%s:%d/" % (
                self.httpServer.server_address[0],
                self.httpServer.server_address[1])
            self.tarAddress = "%s%s" % (self.httpAddress, self.basename)

            ringMasterServer.instance.logMasterSources.registerTarSource(
                hostname, self.tarAddress)
        else:
            self.log.debug("Download not set.")

        self.log.debug("%s %s %s %s %s" %
                       (self.cfg['ringmaster']['userid'], self.serviceId,
                        self.__hostname, 'ringmaster', 'hod'))

        if self.cfg['ringmaster']['register']:
            if self.httpAddress:
                self.serviceClient.registerService(
                    self.cfg['ringmaster']['userid'], self.serviceId,
                    self.__hostname, 'ringmaster', 'hod', {
                        'xrs': self.rpcserver,
                        'http': self.httpAddress
                    })
            else:
                self.serviceClient.registerService(
                    self.cfg['ringmaster']['userid'], self.serviceId,
                    self.__hostname, 'ringmaster', 'hod', {
                        'xrs': self.rpcserver,
                    })

        self.log.debug("Registered with serivce registry: %s." %
                       self.serviceAddr)

        hodRingPath = os.path.join(cfg['ringmaster']['base-dir'], 'bin',
                                   'hodring')
        hodRingWorkDir = os.path.join(cfg['hodring']['temp-dir'],
                                      'hodring' + '_' + getpass.getuser())

        self.cfg['hodring']['hodring'] = [
            hodRingWorkDir,
        ]
        self.cfg['hodring']['svcrgy-addr'] = self.cfg['ringmaster'][
            'svcrgy-addr']
        self.cfg['hodring']['service-id'] = self.np.getServiceId()

        self.cfg['hodring']['ringmaster-xrs-addr'] = self.__url_to_addr(
            self.rpcserver)

        if (self.tarSrcLoc != None):
            cfg['hodring']['download-addr'] = self.tarAddress

        self.__init_job_tracker_monitor(
            ringMasterServer.instance.logMasterSources)
예제 #4
0
    def start(self):
        """Run and maintain hodring commands"""

        try:
            if self._cfg.has_key('download-addr'):
                self._http = threadedHTTPServer('',
                                                self._cfg['http-port-range'])
                self.log.info("Starting http server...")
                self._http.serve_forever()
                self.log.debug("http://%s:%d" % (self._http.server_address[0],
                                                 self._http.server_address[1]))

            hodBaseService.start(self)

            ringXRAddress = None
            if self._cfg.has_key('ringmaster-xrs-addr'):
                ringXRAddress = "http://%s:%s/" % (
                    self._cfg['ringmaster-xrs-addr'][0],
                    self._cfg['ringmaster-xrs-addr'][1])
                self.log.debug("Ringmaster at %s" % ringXRAddress)

            self.log.debug("Creating service registry XML-RPC client.")
            serviceClient = hodXRClient(to_http_url(self._cfg['svcrgy-addr']))
            if ringXRAddress == None:
                self.log.info(
                    "Did not get ringmaster XML-RPC address. Fetching information from service registry."
                )
                ringList = serviceClient.getServiceInfo(
                    self._cfg['userid'], self._cfg['service-id'], 'ringmaster',
                    'hod')

                self.log.debug(pprint.pformat(ringList))

                if len(ringList):
                    if isinstance(ringList, list):
                        ringXRAddress = ringList[0]['xrs']

                count = 0
                while (ringXRAddress == None and count < 3000):
                    ringList = serviceClient.getServiceInfo(
                        self._cfg['userid'], self._cfg['service-id'],
                        'ringmaster', 'hod')

                    if len(ringList):
                        if isinstance(ringList, list):
                            ringXRAddress = ringList[0]['xrs']

                    count = count + 1
                    time.sleep(.2)

            if ringXRAddress == None:
                raise Exception(
                    "Could not get ringmaster XML-RPC server address.")

            self.log.debug("Creating ringmaster XML-RPC client.")
            ringClient = hodXRClient(ringXRAddress)

            id = self.hostname + "_" + str(os.getpid())

            if 'download-addr' in self._cfg:
                self.__download_package(ringClient)
            else:
                self.log.debug("Did not find a download address.")

            cmdlist = []
            firstTime = True
            increment = 0
            hadoopStartupTime = 2

            cmdlist = ringClient.getCommand(id)

            while (cmdlist == []):
                if firstTime:
                    sleepTime = increment + self._cfg['cmd-retry-initial-time'] + hadoopStartupTime\
                                  + random.uniform(0,self._cfg['cmd-retry-interval'])
                    firstTime = False
                else:
                    sleepTime = increment + self._cfg['cmd-retry-initial-time'] + \
                                  + random.uniform(0,self._cfg['cmd-retry-interval'])
                self.log.debug(
                    "Did not get command list. Waiting for %s seconds." %
                    (sleepTime))
                time.sleep(sleepTime)
                increment = increment + 1
                cmdlist = ringClient.getCommand(id)

            self.log.debug(pformat(cmdlist))
            cmdDescs = []
            for cmds in cmdlist:
                cmdDescs.append(CommandDesc(cmds['dict'], self.log))

            self._cfg['commanddesc'] = cmdDescs

            self.log.info("Running hadoop commands...")

            self.__run_hadoop_commands(False)

            masterParams = []
            for k, cmd in self.__running.iteritems():
                masterParams.extend(cmd.filledInKeyVals)

            self.log.debug("printing getparams")
            self.log.debug(pformat(id))
            self.log.debug(pformat(masterParams))
            # when this is on a required host, the ringMaster already has our masterParams
            if (len(masterParams) > 0):
                ringClient.addMasterParams(id, masterParams)
        except Exception, e:
            raise Exception(e)