def create(self, config): """ parse config and create relevant classess """ self.logger.debug('Creating new worker') report = Report() status = Status() """ configuration is stored in JSON format, in database. I chose JSON for - being able to store as text in database - easily pass data between server and clients (web, console) - i like cjson for no particular reason and i want to use it """ config = cjson.decode(config) """ name string is rewritten as name_timestamp_nodename, to separate the job/worker easily from others when there are a lot of nodes running around. Ah yes, forgot to tell. Domo provides a distributed job system, where a job is a crawling process. Codebase can easily be modified to provide another distributed somethingation system. Then you can control your remote nodes via a single client. Thanks to wonderful Pyro framework. """ name = config.get('options').get('name')[0] version = datetime.today().strftime('%Y%m%d%H%M%S') name = '%s_%s_%s' % (name, version, getnodename()) config.get('options')['name'] = [name] crawler = Crawler(config, report=report, status=status) if crawler is not None: """ Here spawning a crawler process. Multiprocess library makes controlling spawned processes as if they were threads, possible. """ worker = Process(target=crawler.run, name=name) # attach shared objects to worker worker.report = report worker.status = status self.workers.update({name: worker}) self.logger.info('Created new worker: %s with status %s' % (name, worker.status.get())) # get worker ready for commands worker.start() return (True, '%s' % name) self.logger.error('Could not create new worker') return (False, 'Could not create new worker')