コード例 #1
0
ファイル: worker.py プロジェクト: luwenjin/spider_farm
#coding:utf-8
import re
from copy import deepcopy

import requests
import pymongo
from bson import ObjectId, Binary
from pymongo.errors import OperationFailure
import thread
import time

from base import MessageQueue, to_zip, to_zip64, logging

log = logging.getLogger('spider')


class SpiderWorker(MessageQueue):
    def __init__(self, ident):
        super(SpiderWorker, self).__init__(ident)
        self.crawl_handler = CrawlHandler(self)

    def on_receive_json(self, caller, request, request_id):
        cmd = request.get('cmd')
        log.debug('receive cmd: %s', request)

        if cmd == 'crawl':
            result = self.crawl_handler.process(request)
        elif cmd == 'who':
            result = {'role': 'worker'}
        else:
            log.warning('invalid cmd: %s - %s', cmd, request)
コード例 #2
0
ファイル: server.py プロジェクト: luwenjin/spider_farm
#coding:utf-8
from copy import deepcopy
import random
import time
from datetime import datetime

from base import MessageQueue, logging

log = logging.getLogger('server')

class User(object):
    def __init__(self, name):
        self.name = name
        self.role = 'user'
        self.waiting_reply = False

    def waiting(self, flag=None):
        if flag is None:
            return self.waiting_reply
        else:
            self.waiting_reply = True if flag else False

    def __repr__(self):
        return '<User:%s(%s) waiting_reply=%s>' % (self.name, self.role, self.waiting_reply)


class Request(object):
    def __init__(self, request_id, cmd, params, source=None, ttl=600.0):
        self.request_id = request_id
        self.cmd = cmd
        self.params = params
コード例 #3
0
ファイル: client.py プロジェクト: luwenjin/spider_farm
#coding:utf-8
import thread
import time

from base import MessageQueue, logging

log = logging.getLogger('client')


class SpiderClient(MessageQueue):
    def send_request(self, cmd, **kwargs):
        kwargs["type"] = 'request'
        kwargs['cmd'] = cmd
        self.send_json('server', kwargs)

    def on_receive_json(self, ident, obj, mid64):
        log.debug('on_receive_json <- [%s]: %s %s', ident, repr(mid64), obj)


if __name__ == '__main__':
    client = SpiderClient('client')
    client.connect('getf5.com', 44444)

    thread.start_new_thread(client.loop, ())

    while 1:
        print 'sleep'
        time.sleep(10)