Example #1
0
    def create(self, config):
        """ parse config and create relevant classess """

        self.logger.debug('Creating new worker') 

        report = Report()                               
        status = Status()                              
                                                      
        """
            configuration is stored in JSON format, in database. I chose JSON for 
            - being able to store as text in database
            - easily pass data between server and clients (web, console)
            - i like cjson for no particular reason and i want to use it
        
        """
        config = cjson.decode(config)                   


        """
            name string is rewritten as name_timestamp_nodename, to separate the job/worker 
            easily from others when there are a lot of nodes running around. 
            
            Ah yes, forgot to tell. Domo provides a distributed job system, where a job is 
            a crawling process. Codebase can easily be modified to provide another distributed 
            somethingation system. Then you can control your remote nodes via a single client.

            Thanks to wonderful Pyro framework.
        
        """
        name = config.get('options').get('name')[0]             
        version = datetime.today().strftime('%Y%m%d%H%M%S')
        name = '%s_%s_%s' % (name, version, getnodename())
        config.get('options')['name'] = [name]

        crawler = Crawler(config, report=report, status=status)
                
        if crawler is not None:

            """
                Here spawning a crawler process. Multiprocess library makes controlling spawned 
                processes as if they were threads, possible. 
            """
            worker = Process(target=crawler.run, name=name) 

            # attach shared objects to worker
            worker.report = report
            worker.status = status

            self.workers.update({name: worker})
            self.logger.info('Created new worker: %s with status %s' % (name,
                                                                        worker.status.get()))
            # get worker ready for commands 
            worker.start()
            
            return (True, '%s' % name)
        
        self.logger.error('Could not create new worker')
        return (False, 'Could not create new worker')