def run(self, args): ctx = Context(is_client=True, local_mode=True) project_name = args.project[0] if ' ' in project_name: self.logger.error('project name cannot contain whitespace') current_dir = os.getcwd() project_dir = os.path.join(current_dir, project_name) if not os.path.exists(project_dir): os.makedirs(project_dir) for filename, src_filename in (('%s.yaml' % project_name, 'project.yaml.tmpl'), ('__init__.py', 'project.py.tmpl')): full_filename = os.path.join(project_dir, filename) full_temp_filename = os.path.join(ctx.get_cola_dir(), 'templates', src_filename) with open(full_temp_filename) as temp_fp: with open(full_filename, 'w') as fp: content = self._replace_variable(temp_fp.read(), {'name': project_name}) fp.write(content) self.logger.info('create project: %s' % project_name)
def run(self, args): if args.start is not None and args.master is not None: ctx = Context(master_addr=args.master, ip=args.start) ctx.start_worker() self.logger.info('start worker at: %s' % ctx.worker_addr) else: self.logger.error('unknown command options')
def testMasterWorker(self): ctx = Context(is_master=True, master_addr='127.0.0.1', working_dir=self.working_dir) master = ctx.start_master() ctx.start_worker() master.run_job(self.job_name, wait_for_workers=True) while master.has_running_jobs(): time.sleep(5) master.shutdown()
def run(self, args): master_addr = args.master ctx = Context(is_client=True, master_addr=master_addr) if args.list is True: jobs = ctx.list_jobs() self.logger.info('list jobs at master: %s' % ctx.master_addr) for job_id, info in jobs.iteritems(): self.logger.info( '====> job id: %s, job description: %s, status: %s' % \ (job_id, info['name'], info['status'])) if len(jobs) == 0: self.logger.info('no jobs exist') elif args.kill is not None: job_id = self._get_matched_job_name(ctx, args.kill) if job_id is not None: ctx.kill_job(job_id) self.logger.info('killed job: %s' % job_id) elif args.upload is not None: path = os.path.abspath(args.upload) if not os.path.exists(path): self.logger.error('upload path does not exist') return job_id = None try: job_id = import_job_desc(path).uniq_name except Exception, e: self.logger.exception(e) self.logger.error('uploading job description failed') return new_upload_dir = os.path.join(tempfile.gettempdir(), job_id) if os.path.exists(new_upload_dir): shutil.rmtree(new_upload_dir) shutil.copytree(path, new_upload_dir) temp_filename = os.path.join(tempfile.gettempdir(), job_id + '.zip') ZipHandler.compress(temp_filename, new_upload_dir, type_filters=('pyc', )) try: FileTransportClient(ctx.master_addr, temp_filename).send_file() finally: os.remove(temp_filename) shutil.rmtree(new_upload_dir) self.logger.info('upload job <id: %s> finished' % job_id) if args.run == 'U': client_call(ctx.master_addr, 'run_job', job_id, True) self.logger.info('submit job <id: %s> to the cluster' % job_id)
def run(self, args): master_addr = args.master ctx = Context(is_client=True, master_addr=master_addr) if args.list is True: jobs = ctx.list_jobs() self.logger.info('list jobs at master: %s' % ctx.master_addr) for job_id, info in jobs.iteritems(): self.logger.info( '====> job id: %s, job description: %s, status: %s' % \ (job_id, info['name'], info['status'])) if len(jobs) == 0: self.logger.info('no jobs exist') elif args.kill is not None: job_id = self._get_matched_job_name(ctx, args.kill) if job_id is not None: ctx.kill_job(job_id) self.logger.info('killed job: %s' % job_id) elif args.upload is not None: path = os.path.abspath(args.upload) if not os.path.exists(path): self.logger.error('upload path does not exist') return job_id = None try: job_id = import_job_desc(path).uniq_name except Exception, e: self.logger.exception(e) self.logger.error('uploading job description failed') return new_upload_dir = os.path.join(tempfile.gettempdir(), job_id) if os.path.exists(new_upload_dir): shutil.rmtree(new_upload_dir) shutil.copytree(path, new_upload_dir) temp_filename = os.path.join(tempfile.gettempdir(), job_id+'.zip') ZipHandler.compress(temp_filename, new_upload_dir, type_filters=('pyc', )) try: FileTransportClient(ctx.master_addr, temp_filename).send_file() finally: os.remove(temp_filename) shutil.rmtree(new_upload_dir) self.logger.info('upload job <id: %s> finished' % job_id) if args.run == 'U': client_call(ctx.master_addr, 'run_job', job_id, True) self.logger.info('submit job <id: %s> to the cluster' % job_id)
def run(self, args): if args.start is not None: ctx = Context(is_master=True, master_addr=args.start, working_dir=args.working) ctx.start_master() self.logger.info("start master at: %s" % ctx.master_addr) elif args.kill is not None: ctx = Context(is_client=True, master_addr=args.kill) ctx.kill_master() self.logger.info("kill master at: %s" % ctx.master_addr) elif args.list is not None: ctx = Context(is_client=True, master_addr=args.list) self.logger.info("list workers at master: %s" % ctx.master_addr) for worker, status in ctx.list_workers(): self.logger.info("====> worker: %s, status: %s" % (worker, status)) else: self.logger.error("unknown command options")
def run(self, args): ctx = Context(is_client=True, local_mode=True) project_name = args.project[0] if " " in project_name: self.logger.error("project name cannot contain whitespace") current_dir = os.getcwd() project_dir = os.path.join(current_dir, project_name) if not os.path.exists(project_dir): os.makedirs(project_dir) for filename, src_filename in ( ("%s.yaml" % project_name, "project.yaml.tmpl"), ("__init__.py", "project.py.tmpl"), ): full_filename = os.path.join(project_dir, filename) full_temp_filename = os.path.join(ctx.get_cola_dir(), "templates", src_filename) with open(full_temp_filename) as temp_fp: with open(full_filename, "w") as fp: content = self._replace_variable(temp_fp.read(), {"name": project_name}) fp.write(content) self.logger.info("create project: %s" % project_name)
username = str(kw['username']) passwd = str(kw['password']) loginer = WeiboLogin(opener, username, passwd) return loginer.login() url_patterns = UrlPatterns( Url(r'http://weibo.com/aj/mblog/mbloglist.*', 'micro_blog', MicroBlogParser), Url(r'http://weibo.com/aj/.+/big.*', 'forward_comment_like', ForwardCommentLikeParser), Url(r'http://weibo.com/\d+/info', 'user_info', UserInfoParser), Url(r'http://weibo.com/\d+/follow.*', 'follows', UserFriendParser), Url(r'http://weibo.com/\d+/fans.*', 'fans', UserFriendParser)) def get_job_desc(): return JobDescription('sina weibo crawler', url_patterns, MechanizeOpener, user_config, starts, unit_cls=WeiboUserBundle, login_hook=login_hook) if __name__ == "__main__": from cola.context import Context ctx = Context(local_mode=True) ctx.run_job(os.path.dirname(os.path.abspath(__file__)))
def run(self, args): if args.start is not None: ctx = Context(is_master=True, master_addr=args.start) ctx.start_master() self.logger.info('start master at: %s' % ctx.master_addr) elif args.kill is not None: ctx = Context(is_client=True, master_addr=args.kill) ctx.kill_master() self.logger.info('kill master at: %s' % ctx.master_addr) elif args.list is not None: ctx = Context(is_client=True, master_addr=args.list) self.logger.info('list workers at master: %s' % ctx.master_addr) for worker, status in ctx.list_workers(): self.logger.info('====> worker: %s, status: %s' % (worker, status)) else: self.logger.error('unknown command options')
from cola.job import JobDescription from login import WeiboLogin from parsers import MicroBlogParser, ForwardCommentLikeParser,\ UserInfoParser, UserFriendParser from conf import starts, user_config, instances from bundle import WeiboUserBundle def login_hook(opener, **kw): username = str(kw['username']) passwd = str(kw['password']) loginer = WeiboLogin(opener, username, passwd) return loginer.login() url_patterns = UrlPatterns( Url(r'http://weibo.com/aj/mblog/mbloglist.*', 'micro_blog', MicroBlogParser), Url(r'http://weibo.com/aj/.+/big.*', 'forward_comment_like', ForwardCommentLikeParser), Url(r'http://weibo.com/\d+/info', 'user_info', UserInfoParser), Url(r'http://weibo.com/\d+/follow.*', 'follows', UserFriendParser), Url(r'http://weibo.com/\d+/fans.*', 'fans', UserFriendParser) ) def get_job_desc(): return JobDescription('sina weibo crawler', url_patterns, MechanizeOpener, user_config, starts, unit_cls=WeiboUserBundle, login_hook=login_hook) if __name__ == "__main__": from cola.context import Context ctx = Context(local_mode=True) ctx.run_job(os.path.dirname(os.path.abspath(__file__)))