コード例 #1
0
def run(manifest, debug=False, pause_on_error=False, dry_run=False):
    """Runs the bootstrapping process

    :params Manifest manifest: The manifest to run the bootstrapping process for
    :params bool debug: Whether to turn debugging mode on
    :params bool pause_on_error: Whether to pause on error, before rollback
    :params bool dry_run: Don't actually run the tasks
    """
    import logging

    log = logging.getLogger(__name__)
    # Get the tasklist
    from tasklist import load_tasks
    from tasklist import TaskList
    log.info('Generating tasklist')
    tasks = load_tasks('resolve_tasks', manifest)
    tasklist = TaskList(tasks)
    # 'resolve_tasks' is the name of the function to call on the provider and plugins

    # Create the bootstrap information object that'll be used throughout the bootstrapping process
    from bootstrapinfo import BootstrapInformation
    bootstrap_info = BootstrapInformation(manifest=manifest, debug=debug)

    try:
        # Run all the tasks the tasklist has gathered
        tasklist.run(info=bootstrap_info, dry_run=dry_run)
        # We're done! :-)
        log.info('Successfully completed bootstrapping')
    except (Exception, KeyboardInterrupt) as e:
        # When an error occurs, log it and begin rollback
        log.exception(e)
        if pause_on_error:
            # The --pause-on-error is useful when the user wants to inspect the volume before rollback
            raw_input('Press Enter to commence rollback')
        log.error('Rolling back')

        # Create a useful little function for the provider and plugins to use,
        # when figuring out what tasks should be added to the rollback list.
        def counter_task(taskset, task, counter):
            """counter_task() adds the third argument to the rollback tasklist
            if the second argument is present in the list of completed tasks

            :param set taskset: The taskset to add the rollback task to
            :param Task task: The task to look for in the completed tasks list
            :param Task counter: The task to add to the rollback tasklist
            """
            if task in tasklist.tasks_completed and counter not in tasklist.tasks_completed:
                taskset.add(counter)

        # Ask the provider and plugins for tasks they'd like to add to the rollback tasklist
        # Any additional arguments beyond the first two are passed directly to the provider and plugins
        rollback_tasks = load_tasks('resolve_rollback_tasks', manifest, tasklist.tasks_completed, counter_task)
        rollback_tasklist = TaskList(rollback_tasks)

        # Run the rollback tasklist
        rollback_tasklist.run(info=bootstrap_info, dry_run=dry_run)
        log.info('Successfully completed rollback')
        raise
    return bootstrap_info
コード例 #2
0
def run(args):
    """Runs the bootstrapping process

	Args:
		args (dict): Dictionary of arguments from the commandline
	"""
    # Load the manifest
    from manifest import Manifest
    manifest = Manifest(args.manifest)

    # Get the tasklist
    from tasklist import TaskList
    tasklist = TaskList()
    # 'resolve_tasks' is the name of the function to call on the provider and plugins
    tasklist.load('resolve_tasks', manifest)

    # Create the bootstrap information object that'll be used throughout the bootstrapping process
    from bootstrapinfo import BootstrapInformation
    bootstrap_info = BootstrapInformation(manifest=manifest, debug=args.debug)

    try:
        # Run all the tasks the tasklist has gathered
        tasklist.run(info=bootstrap_info, dry_run=args.dry_run)
        # We're done! :-)
        log.info('Successfully completed bootstrapping')
    except (Exception, KeyboardInterrupt) as e:
        # When an error occurs, log it and begin rollback
        log.exception(e)
        if args.pause_on_error:
            # The --pause-on-error is useful when the user wants to inspect the volume before rollback
            raw_input('Press Enter to commence rollback')
        log.error('Rolling back')

        # Create a new tasklist to gather the necessary tasks for rollback
        rollback_tasklist = TaskList()

        # Create a useful little function for the provider and plugins to use,
        # when figuring out what tasks should be added to the rollback list.
        def counter_task(task, counter):
            """counter_task() adds the second argument to the rollback tasklist
			if the first argument is present in the list of completed tasks

			Args:
				task (Task): The task to look for in the completed tasks list
				counter (Task): The task to add to the rollback tasklist
			"""
            if task in tasklist.tasks_completed and counter not in tasklist.tasks_completed:
                rollback_tasklist.tasks.add(counter)

        # Ask the provider and plugins for tasks they'd like to add to the rollback tasklist
        # Any additional arguments beyond the first two are passed directly to the provider and plugins
        rollback_tasklist.load('resolve_rollback_tasks', manifest,
                               counter_task)

        # Run the rollback tasklist
        rollback_tasklist.run(info=bootstrap_info, dry_run=args.dry_run)
        log.info('Successfully completed rollback')
コード例 #3
0
def run(opts):
    """Runs the bootstrapping process

	:params dict opts: Dictionary of options from the commandline
	"""
    # Load the manifest
    from manifest import Manifest
    manifest = Manifest(opts['MANIFEST'])

    # Get the tasklist
    from tasklist import load_tasks
    from tasklist import TaskList
    tasks = load_tasks('resolve_tasks', manifest)
    tasklist = TaskList(tasks)
    # 'resolve_tasks' is the name of the function to call on the provider and plugins

    # Create the bootstrap information object that'll be used throughout the bootstrapping process
    from bootstrapinfo import BootstrapInformation
    bootstrap_info = BootstrapInformation(manifest=manifest,
                                          debug=opts['--debug'])

    try:
        # Run all the tasks the tasklist has gathered
        tasklist.run(info=bootstrap_info, dry_run=opts['--dry-run'])
        # We're done! :-)
        log.info('Successfully completed bootstrapping')
    except (Exception, KeyboardInterrupt) as e:
        # When an error occurs, log it and begin rollback
        log.exception(e)
        if opts['--pause-on-error']:
            # The --pause-on-error is useful when the user wants to inspect the volume before rollback
            raw_input('Press Enter to commence rollback')
        log.error('Rolling back')

        # Create a useful little function for the provider and plugins to use,
        # when figuring out what tasks should be added to the rollback list.
        def counter_task(taskset, task, counter):
            """counter_task() adds the second argument to the rollback tasklist
			if the first argument is present in the list of completed tasks

			:param set taskset: The taskset to add the rollback task to
			:param Task task: The task to look for in the completed tasks list
			:param Task counter: The task to add to the rollback tasklist
			"""
            if task in tasklist.tasks_completed and counter not in tasklist.tasks_completed:
                taskset.add(counter)

        # Ask the provider and plugins for tasks they'd like to add to the rollback tasklist
        # Any additional arguments beyond the first two are passed directly to the provider and plugins
        rollback_tasks = load_tasks('resolve_rollback_tasks', manifest,
                                    tasklist.tasks_completed, counter_task)
        rollback_tasklist = TaskList(rollback_tasks)

        # Run the rollback tasklist
        rollback_tasklist.run(info=bootstrap_info, dry_run=opts['--dry-run'])
        log.info('Successfully completed rollback')
        raise e
コード例 #4
0
    def __init__(self):
        """Initialize the task list."""

        self.tasklist = TaskList()
        self.legend = '\nLegend: Not Due  ' + Fore.CYAN + Style.BRIGHT + 'Upcoming  ' + Fore.BLUE + \
                      Style.BRIGHT + 'Due  ' + Fore.RED + Style.BRIGHT + 'Overdue  ' + Fore.WHITE + Style.BRIGHT + \
                      Back.WHITE + 'Completed' + Fore.RESET + Style.NORMAL + Back.RESET
コード例 #5
0
def test_simple_path(sentence1):
    node_0 = Node('', 0, sentence1, state=NodeState.ROOT)
    tasklist = TaskList(node_0)
    assert len(tasklist) == 1

    next_node_0 = tasklist.next()
    assert len(tasklist) == 0
    assert next_node_0.next_pos == 0

    node_1 = Node('..', 0, '-.--.', parent=next_node_0)
    tasklist.add(node_1)
    next_node_1 = tasklist.next()
    assert len(tasklist) == 0
    assert next_node_1.next_pos == 2

    node_2 = Node('-', 2, '.--.', parent=next_node_1)
    tasklist.add(node_2)
    next_node_2 = tasklist.next()
    assert len(tasklist) == 0
    assert next_node_2.next_pos == 3

    node_3 = Node('.--', 3, '.', parent=next_node_2)
    tasklist.add(node_3)
    next_node_3 = tasklist.next()
    assert len(tasklist) == 0
    assert next_node_3.next_pos == 6

    node_4 = Node('.', 6, '', parent=next_node_3)
    tasklist.add(node_4)
    next_node_4 = tasklist.next()
    assert len(tasklist) == 0
    assert next_node_4.next_pos == 7

    next_node_done = tasklist.next()
    assert next_node_done is None
コード例 #6
0
def test_task_order(sentence1):
    node_0 = Node('', 0, sentence1, state=NodeState.ROOT)
    tasklist = TaskList(node_0)

    node_1a = Node('6', 6, '', parent=node_0)
    tasklist.add(node_1a)
    node_1b = Node('5', 5, '', parent=node_0)
    tasklist.add(node_1b)
    node_1c = Node('2', 2, '', parent=node_0)
    tasklist.add(node_1c)
    node_1d = Node('3', 3, '', parent=node_0)
    tasklist.add(node_1d)
    node_1e = Node('4', 4, '', parent=node_0)
    tasklist.add(node_1e)
    node_1f = Node('1', 1, '', parent=node_0)
    tasklist.add(node_1f)

    assert len(tasklist) == 7

    next_node = tasklist.next()
    assert next_node.next_pos == 7
    assert len(tasklist) == 6

    next_node = tasklist.next()
    assert next_node.next_pos == 6
    assert len(tasklist) == 5
コード例 #7
0
def test_take_2(sentence1):
    node_0 = Node('', 0, sentence1, state=NodeState.ROOT)
    tasklist = TaskList(node_0)
    assert len(tasklist) == 1

    nodes = tasklist.next(size=2)
    assert len(nodes) == 1
    next_node_0 = nodes[0]
    assert len(tasklist) == 0
    assert next_node_0.next_pos == 0

    node_1 = Node('..', 0, '-.--.', parent=next_node_0)
    tasklist.add(node_1)
    node_2 = Node('-', 2, '.--.', parent=node_1)
    tasklist.add(node_2)

    nodes = tasklist.next(size=2)
    assert len(nodes) == 2
コード例 #8
0
 def loadxml(self, xml, actions, orders):
     self.TASKS = TaskList(xml, actions, orders)
コード例 #9
0
# This is the main program entry point.
#  Build up the workers and queues, and then kick everything off.

from tasklist import TaskList
import queue
import threading
import page_summary
import argparse

tasklist = TaskList()
output_queue = queue.Queue()


def output_worker():  # where the results get printed
    while True:
        p = output_queue.get()
        try:
            print(p.toString(args["showThreadNames"]))
        finally:
            output_queue.task_done(
            )  #mark the url printed, even if somthing went wrong.


def page_worker():  # where the pages get summarised
    while True:
        url = tasklist.get()
        try:
            p = page_summary.page_summary(url, lambda x: tasklist.put(x))
            output_queue.put(p)
        finally:
            tasklist.task_done(
コード例 #10
0
from tasklist import TaskList
from lxml import etree
import time
import socket

__BIND_ADDR__ = "0.0.0.0"  # 监听本机的所有网卡IP
__BIND_PORT__ = 8888  # 监听8888端口

__LIST_URL__ = "http://finance.sina.com.cn/china/"  # 文章列表的网址
__COUNT_URL__ = 50  # 设定只获取50篇文章的URL

__XPATH__URL__ = "//div[@class='feed-card-item']/h2/a/@href"
__XPATH_NEXT__ = "//span[@class='pagebox_next']/a"

# 初始化一个任务列表
task_list = TaskList(timeout=30)

# 初始化一个套接字
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.bind((__BIND_ADDR__, int(__BIND_PORT__)))
sock.listen(50)

# 初始化一个Selenium WebDriver
chrome_options = webdriver.ChromeOptions()  # 获取ChromeWebdriver配置文件
prefs = {"profile.managed_default_content_settings.images": 2}  # 设置不加载图片以加快速度
chrome_options.add_experimental_option("prefs", prefs)
chrome_options.add_argument("--headless")  # 不使用GUI界面
chrome_options.add_argument("--disable-gpu")  # 禁用GPU渲染加速
driver = webdriver.Chrome(chrome_options=chrome_options)  # 创建ChromeWebdriver
driver.set_page_load_timeout(10)  # 设置连接超时时间为15s
コード例 #11
0
"""
Controller for our app, does the routes for us and calls the methods of the TaskList.
"""

from flask import Flask, render_template

from tasklist import TaskList

app = Flask(__name__)
task_list = TaskList()


@app.route('/')
def index():
    view_data = {
        'title':
        'Task List',
        'message':
        'Welcome to Task List!  To see the requests available, try help.'
    }
    return render_template('message.html', view_data=view_data)
    #return view_data['message']


@app.route('/help/')
def help():
    return 'Requests available: show-tasks, create-task, remove-task'


@app.route('/show-tasks/')
def get_tasks():
コード例 #12
0
def test_first_task(sentence1):
    node_0 = Node('', 0, sentence1, state=NodeState.ROOT)
    tasklist = TaskList(node_0)
    next_node = tasklist.next()
    assert next_node.remaining == sentence1
コード例 #13
0
def main():
    addr = "0.0.0.0"
    port = 9992

    main_url = "http://money.163.com/special/00252C1E/gjcj.html"

    task_list = TaskList(timeout=30)
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    sock.bind((addr, port))
    sock.listen(50)

    #driver = webdriver.Chrome()
    #driver.get(main_url)

    print("正在从网页中解析URL链接...")

    def gethtmltext(url, code="gbk"):
        try:
            r = requests.get(url)
            r.raise_for_status()
            r.encoding = code
            return r.text
        except requests.exceptions.ConnectionError:
            return ""

    html = gethtmltext(main_url)
    try:
        if html == "":
            print("---html error1!---")
        soup = BeautifulSoup(html, 'html.parser')
        url_info = soup.find_all('div', attrs={'class': 'list_item clearfix'})
        news_url = list()
        for i in url_info:
            # noinspection PyBroadException
            try:
                a = i.find(name='h2')
                url = a.find(name='a').attrs['href']
                news_url.append(url)
                print(url)
            except:
                continue
        task_list.put_tasks(news_url)
    except:
        print("---url error2!---")
        # driver.close()

    print("等待client中.......")
    while 1:
        if task_list.is_empty():
            print("====任务完成====")
            sock.close()
            break

        conn, addr = sock.accept()  # 接受TCP连接,并返回新的套接字与IP地址
        print('Connected by\n', addr, conn)  # 输出客户端的IP地址
        try:
            data = conn.recv(1024).decode("gbk")
            if data.split(',')[0] == "get":
                client_id = data.split(',')[1]
                task_url = task_list.get_task()

                print("向client {0} 分配 {1}".format(client_id, task_url))
                conn.send(task_url.encode("gbk"))
            elif data.split(',')[0] == "done":
                client_id = data.split(',')[1]
                client_url = data.split(',')[2]
                print("client {0}' 完成爬取 {1}".format(client_id, client_url))
                task_list.done_task(client_url)
                conn.send("ok".encode("gbk"))
        except socket.timeout:
            print("Timeout!")
        conn.close()  # 关闭连接