def run(manifest, debug=False, pause_on_error=False, dry_run=False): """Runs the bootstrapping process :params Manifest manifest: The manifest to run the bootstrapping process for :params bool debug: Whether to turn debugging mode on :params bool pause_on_error: Whether to pause on error, before rollback :params bool dry_run: Don't actually run the tasks """ import logging log = logging.getLogger(__name__) # Get the tasklist from tasklist import load_tasks from tasklist import TaskList log.info('Generating tasklist') tasks = load_tasks('resolve_tasks', manifest) tasklist = TaskList(tasks) # 'resolve_tasks' is the name of the function to call on the provider and plugins # Create the bootstrap information object that'll be used throughout the bootstrapping process from bootstrapinfo import BootstrapInformation bootstrap_info = BootstrapInformation(manifest=manifest, debug=debug) try: # Run all the tasks the tasklist has gathered tasklist.run(info=bootstrap_info, dry_run=dry_run) # We're done! :-) log.info('Successfully completed bootstrapping') except (Exception, KeyboardInterrupt) as e: # When an error occurs, log it and begin rollback log.exception(e) if pause_on_error: # The --pause-on-error is useful when the user wants to inspect the volume before rollback raw_input('Press Enter to commence rollback') log.error('Rolling back') # Create a useful little function for the provider and plugins to use, # when figuring out what tasks should be added to the rollback list. def counter_task(taskset, task, counter): """counter_task() adds the third argument to the rollback tasklist if the second argument is present in the list of completed tasks :param set taskset: The taskset to add the rollback task to :param Task task: The task to look for in the completed tasks list :param Task counter: The task to add to the rollback tasklist """ if task in tasklist.tasks_completed and counter not in tasklist.tasks_completed: taskset.add(counter) # Ask the provider and plugins for tasks they'd like to add to the rollback tasklist # Any additional arguments beyond the first two are passed directly to the provider and plugins rollback_tasks = load_tasks('resolve_rollback_tasks', manifest, tasklist.tasks_completed, counter_task) rollback_tasklist = TaskList(rollback_tasks) # Run the rollback tasklist rollback_tasklist.run(info=bootstrap_info, dry_run=dry_run) log.info('Successfully completed rollback') raise return bootstrap_info
def run(args): """Runs the bootstrapping process Args: args (dict): Dictionary of arguments from the commandline """ # Load the manifest from manifest import Manifest manifest = Manifest(args.manifest) # Get the tasklist from tasklist import TaskList tasklist = TaskList() # 'resolve_tasks' is the name of the function to call on the provider and plugins tasklist.load('resolve_tasks', manifest) # Create the bootstrap information object that'll be used throughout the bootstrapping process from bootstrapinfo import BootstrapInformation bootstrap_info = BootstrapInformation(manifest=manifest, debug=args.debug) try: # Run all the tasks the tasklist has gathered tasklist.run(info=bootstrap_info, dry_run=args.dry_run) # We're done! :-) log.info('Successfully completed bootstrapping') except (Exception, KeyboardInterrupt) as e: # When an error occurs, log it and begin rollback log.exception(e) if args.pause_on_error: # The --pause-on-error is useful when the user wants to inspect the volume before rollback raw_input('Press Enter to commence rollback') log.error('Rolling back') # Create a new tasklist to gather the necessary tasks for rollback rollback_tasklist = TaskList() # Create a useful little function for the provider and plugins to use, # when figuring out what tasks should be added to the rollback list. def counter_task(task, counter): """counter_task() adds the second argument to the rollback tasklist if the first argument is present in the list of completed tasks Args: task (Task): The task to look for in the completed tasks list counter (Task): The task to add to the rollback tasklist """ if task in tasklist.tasks_completed and counter not in tasklist.tasks_completed: rollback_tasklist.tasks.add(counter) # Ask the provider and plugins for tasks they'd like to add to the rollback tasklist # Any additional arguments beyond the first two are passed directly to the provider and plugins rollback_tasklist.load('resolve_rollback_tasks', manifest, counter_task) # Run the rollback tasklist rollback_tasklist.run(info=bootstrap_info, dry_run=args.dry_run) log.info('Successfully completed rollback')
def run(opts): """Runs the bootstrapping process :params dict opts: Dictionary of options from the commandline """ # Load the manifest from manifest import Manifest manifest = Manifest(opts['MANIFEST']) # Get the tasklist from tasklist import load_tasks from tasklist import TaskList tasks = load_tasks('resolve_tasks', manifest) tasklist = TaskList(tasks) # 'resolve_tasks' is the name of the function to call on the provider and plugins # Create the bootstrap information object that'll be used throughout the bootstrapping process from bootstrapinfo import BootstrapInformation bootstrap_info = BootstrapInformation(manifest=manifest, debug=opts['--debug']) try: # Run all the tasks the tasklist has gathered tasklist.run(info=bootstrap_info, dry_run=opts['--dry-run']) # We're done! :-) log.info('Successfully completed bootstrapping') except (Exception, KeyboardInterrupt) as e: # When an error occurs, log it and begin rollback log.exception(e) if opts['--pause-on-error']: # The --pause-on-error is useful when the user wants to inspect the volume before rollback raw_input('Press Enter to commence rollback') log.error('Rolling back') # Create a useful little function for the provider and plugins to use, # when figuring out what tasks should be added to the rollback list. def counter_task(taskset, task, counter): """counter_task() adds the second argument to the rollback tasklist if the first argument is present in the list of completed tasks :param set taskset: The taskset to add the rollback task to :param Task task: The task to look for in the completed tasks list :param Task counter: The task to add to the rollback tasklist """ if task in tasklist.tasks_completed and counter not in tasklist.tasks_completed: taskset.add(counter) # Ask the provider and plugins for tasks they'd like to add to the rollback tasklist # Any additional arguments beyond the first two are passed directly to the provider and plugins rollback_tasks = load_tasks('resolve_rollback_tasks', manifest, tasklist.tasks_completed, counter_task) rollback_tasklist = TaskList(rollback_tasks) # Run the rollback tasklist rollback_tasklist.run(info=bootstrap_info, dry_run=opts['--dry-run']) log.info('Successfully completed rollback') raise e
def __init__(self): """Initialize the task list.""" self.tasklist = TaskList() self.legend = '\nLegend: Not Due ' + Fore.CYAN + Style.BRIGHT + 'Upcoming ' + Fore.BLUE + \ Style.BRIGHT + 'Due ' + Fore.RED + Style.BRIGHT + 'Overdue ' + Fore.WHITE + Style.BRIGHT + \ Back.WHITE + 'Completed' + Fore.RESET + Style.NORMAL + Back.RESET
def test_simple_path(sentence1): node_0 = Node('', 0, sentence1, state=NodeState.ROOT) tasklist = TaskList(node_0) assert len(tasklist) == 1 next_node_0 = tasklist.next() assert len(tasklist) == 0 assert next_node_0.next_pos == 0 node_1 = Node('..', 0, '-.--.', parent=next_node_0) tasklist.add(node_1) next_node_1 = tasklist.next() assert len(tasklist) == 0 assert next_node_1.next_pos == 2 node_2 = Node('-', 2, '.--.', parent=next_node_1) tasklist.add(node_2) next_node_2 = tasklist.next() assert len(tasklist) == 0 assert next_node_2.next_pos == 3 node_3 = Node('.--', 3, '.', parent=next_node_2) tasklist.add(node_3) next_node_3 = tasklist.next() assert len(tasklist) == 0 assert next_node_3.next_pos == 6 node_4 = Node('.', 6, '', parent=next_node_3) tasklist.add(node_4) next_node_4 = tasklist.next() assert len(tasklist) == 0 assert next_node_4.next_pos == 7 next_node_done = tasklist.next() assert next_node_done is None
def test_task_order(sentence1): node_0 = Node('', 0, sentence1, state=NodeState.ROOT) tasklist = TaskList(node_0) node_1a = Node('6', 6, '', parent=node_0) tasklist.add(node_1a) node_1b = Node('5', 5, '', parent=node_0) tasklist.add(node_1b) node_1c = Node('2', 2, '', parent=node_0) tasklist.add(node_1c) node_1d = Node('3', 3, '', parent=node_0) tasklist.add(node_1d) node_1e = Node('4', 4, '', parent=node_0) tasklist.add(node_1e) node_1f = Node('1', 1, '', parent=node_0) tasklist.add(node_1f) assert len(tasklist) == 7 next_node = tasklist.next() assert next_node.next_pos == 7 assert len(tasklist) == 6 next_node = tasklist.next() assert next_node.next_pos == 6 assert len(tasklist) == 5
def test_take_2(sentence1): node_0 = Node('', 0, sentence1, state=NodeState.ROOT) tasklist = TaskList(node_0) assert len(tasklist) == 1 nodes = tasklist.next(size=2) assert len(nodes) == 1 next_node_0 = nodes[0] assert len(tasklist) == 0 assert next_node_0.next_pos == 0 node_1 = Node('..', 0, '-.--.', parent=next_node_0) tasklist.add(node_1) node_2 = Node('-', 2, '.--.', parent=node_1) tasklist.add(node_2) nodes = tasklist.next(size=2) assert len(nodes) == 2
def loadxml(self, xml, actions, orders): self.TASKS = TaskList(xml, actions, orders)
# This is the main program entry point. # Build up the workers and queues, and then kick everything off. from tasklist import TaskList import queue import threading import page_summary import argparse tasklist = TaskList() output_queue = queue.Queue() def output_worker(): # where the results get printed while True: p = output_queue.get() try: print(p.toString(args["showThreadNames"])) finally: output_queue.task_done( ) #mark the url printed, even if somthing went wrong. def page_worker(): # where the pages get summarised while True: url = tasklist.get() try: p = page_summary.page_summary(url, lambda x: tasklist.put(x)) output_queue.put(p) finally: tasklist.task_done(
from tasklist import TaskList from lxml import etree import time import socket __BIND_ADDR__ = "0.0.0.0" # 监听本机的所有网卡IP __BIND_PORT__ = 8888 # 监听8888端口 __LIST_URL__ = "http://finance.sina.com.cn/china/" # 文章列表的网址 __COUNT_URL__ = 50 # 设定只获取50篇文章的URL __XPATH__URL__ = "//div[@class='feed-card-item']/h2/a/@href" __XPATH_NEXT__ = "//span[@class='pagebox_next']/a" # 初始化一个任务列表 task_list = TaskList(timeout=30) # 初始化一个套接字 sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.bind((__BIND_ADDR__, int(__BIND_PORT__))) sock.listen(50) # 初始化一个Selenium WebDriver chrome_options = webdriver.ChromeOptions() # 获取ChromeWebdriver配置文件 prefs = {"profile.managed_default_content_settings.images": 2} # 设置不加载图片以加快速度 chrome_options.add_experimental_option("prefs", prefs) chrome_options.add_argument("--headless") # 不使用GUI界面 chrome_options.add_argument("--disable-gpu") # 禁用GPU渲染加速 driver = webdriver.Chrome(chrome_options=chrome_options) # 创建ChromeWebdriver driver.set_page_load_timeout(10) # 设置连接超时时间为15s
""" Controller for our app, does the routes for us and calls the methods of the TaskList. """ from flask import Flask, render_template from tasklist import TaskList app = Flask(__name__) task_list = TaskList() @app.route('/') def index(): view_data = { 'title': 'Task List', 'message': 'Welcome to Task List! To see the requests available, try help.' } return render_template('message.html', view_data=view_data) #return view_data['message'] @app.route('/help/') def help(): return 'Requests available: show-tasks, create-task, remove-task' @app.route('/show-tasks/') def get_tasks():
def test_first_task(sentence1): node_0 = Node('', 0, sentence1, state=NodeState.ROOT) tasklist = TaskList(node_0) next_node = tasklist.next() assert next_node.remaining == sentence1
def main(): addr = "0.0.0.0" port = 9992 main_url = "http://money.163.com/special/00252C1E/gjcj.html" task_list = TaskList(timeout=30) sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.bind((addr, port)) sock.listen(50) #driver = webdriver.Chrome() #driver.get(main_url) print("正在从网页中解析URL链接...") def gethtmltext(url, code="gbk"): try: r = requests.get(url) r.raise_for_status() r.encoding = code return r.text except requests.exceptions.ConnectionError: return "" html = gethtmltext(main_url) try: if html == "": print("---html error1!---") soup = BeautifulSoup(html, 'html.parser') url_info = soup.find_all('div', attrs={'class': 'list_item clearfix'}) news_url = list() for i in url_info: # noinspection PyBroadException try: a = i.find(name='h2') url = a.find(name='a').attrs['href'] news_url.append(url) print(url) except: continue task_list.put_tasks(news_url) except: print("---url error2!---") # driver.close() print("等待client中.......") while 1: if task_list.is_empty(): print("====任务完成====") sock.close() break conn, addr = sock.accept() # 接受TCP连接,并返回新的套接字与IP地址 print('Connected by\n', addr, conn) # 输出客户端的IP地址 try: data = conn.recv(1024).decode("gbk") if data.split(',')[0] == "get": client_id = data.split(',')[1] task_url = task_list.get_task() print("向client {0} 分配 {1}".format(client_id, task_url)) conn.send(task_url.encode("gbk")) elif data.split(',')[0] == "done": client_id = data.split(',')[1] client_url = data.split(',')[2] print("client {0}' 完成爬取 {1}".format(client_id, client_url)) task_list.done_task(client_url) conn.send("ok".encode("gbk")) except socket.timeout: print("Timeout!") conn.close() # 关闭连接