Esempio n. 1
0
    def __init__(self):

        ObjBase.__init__(self)
        
        # {name : workerObj}
        self.workers = dict()
        self.logger = get_logger('WorkerService_%s' % getnodename())
Esempio n. 2
0
    def create(self, config):
        """ parse config and create relevant classess """

        self.logger.debug('Creating new worker') 

        report = Report()                               
        status = Status()                              
                                                      
        """
            configuration is stored in JSON format, in database. I chose JSON for 
            - being able to store as text in database
            - easily pass data between server and clients (web, console)
            - i like cjson for no particular reason and i want to use it
        
        """
        config = cjson.decode(config)                   


        """
            name string is rewritten as name_timestamp_nodename, to separate the job/worker 
            easily from others when there are a lot of nodes running around. 
            
            Ah yes, forgot to tell. Domo provides a distributed job system, where a job is 
            a crawling process. Codebase can easily be modified to provide another distributed 
            somethingation system. Then you can control your remote nodes via a single client.

            Thanks to wonderful Pyro framework.
        
        """
        name = config.get('options').get('name')[0]             
        version = datetime.today().strftime('%Y%m%d%H%M%S')
        name = '%s_%s_%s' % (name, version, getnodename())
        config.get('options')['name'] = [name]

        crawler = Crawler(config, report=report, status=status)
                
        if crawler is not None:

            """
                Here spawning a crawler process. Multiprocess library makes controlling spawned 
                processes as if they were threads, possible. 
            """
            worker = Process(target=crawler.run, name=name) 

            # attach shared objects to worker
            worker.report = report
            worker.status = status

            self.workers.update({name: worker})
            self.logger.info('Created new worker: %s with status %s' % (name,
                                                                        worker.status.get()))
            # get worker ready for commands 
            worker.start()
            
            return (True, '%s' % name)
        
        self.logger.error('Could not create new worker')
        return (False, 'Could not create new worker')
Esempio n. 3
0
 def exit(self):
     self.logger.info('Node shutting down.. Terminating workers..')
     try:
         for name, worker in self.workers.items():
             # hack for enabling the daemoned mode
             if not self.checkworker(name):
                 continue
             worker.status.set('killed')
             worker.join()
             self.logger.info('Worker terminated: %s' % name)
     except:
         self.logger.error(traceback.format_exc())
         return (False, traceback.format_exc())
     return (True, 'Node %s shutdown', getnodename())
Esempio n. 4
0
def serverprocess():
    Pyro.config.PYRO_DETAILED_TRACEBACK = True
    Pyro.config.PYRO_TRACELEVEL = 0
    Pyro.config.PYRO_USER_TRACELEVEL = 4
    Pyro.config.PYRO_MULTITHREADED = 0  # necessary for pyprocessing
    Pyro.core.initServer()

    # Get nameserver
    #domain = settings.DOMAIN
    hostname = getnodename()
    #ns = probeNS()
    daemon = Pyro.core.Daemon()
    service = WorkerService()
    #daemon.useNameServer(ns)
    #publisher = Clients.Publisher()

    #buf = ''
    #for item in (domain, hostname):
    #    buf += ((buf != '') and '.' or '') + '%s' % item
    #    try:
    #        ns.createGroup('%s' % buf)
    #    except NamingError:
    #        pass

    #try:
    #    ns.unregister('%s.%s.jobservice' % (domain, hostname))
    #except NamingError:
    #    pass
    #daemon.connect(service, '%s.%s.jobservice' % (domain, hostname))
    daemon.connect(service, 'jobservice')

    # Tell everyone that there is a new guy in town
    #publisher.publish("JOBSERVICE", (hostname, 'started'))

    try:
        while 1:
            daemon.handleRequests(timeout=60, callback=service.checkworker())

    except KeyboardInterrupt:
        # tell service to shut down all workers
        service.exit()
Esempio n. 5
0
def serverprocess():
    Pyro.config.PYRO_DETAILED_TRACEBACK = True
    Pyro.config.PYRO_TRACELEVEL = 0
    Pyro.config.PYRO_USER_TRACELEVEL = 4
    Pyro.config.PYRO_MULTITHREADED = 0                      # necessary for pyprocessing
    Pyro.core.initServer()

    # Get nameserver 
    #domain = settings.DOMAIN
    hostname = getnodename()
    #ns = probeNS()
    daemon = Pyro.core.Daemon()
    service = WorkerService()
    #daemon.useNameServer(ns)
    #publisher = Clients.Publisher()
                
    #buf = ''
    #for item in (domain, hostname):
    #    buf += ((buf != '') and '.' or '') + '%s' % item
    #    try:
    #        ns.createGroup('%s' % buf)
    #    except NamingError:
    #        pass

    #try:
    #    ns.unregister('%s.%s.jobservice' % (domain, hostname))
    #except NamingError:
    #    pass
    #daemon.connect(service, '%s.%s.jobservice' % (domain, hostname))
    daemon.connect(service, 'jobservice')
    
    # Tell everyone that there is a new guy in town
    #publisher.publish("JOBSERVICE", (hostname, 'started'))

    try:
        while 1:
            daemon.handleRequests(timeout=60, callback=service.checkworker())

    except KeyboardInterrupt:
        # tell service to shut down all workers
        service.exit()