예제 #1
0
    def gotResult(self, data, task, ttype):
        '''
            获取数据。任务分2种。
            1. 商铺信息,需要抓取商铺的商品列表
            2. 商品信息,需要抓取商品的基本信息
        '''
        # TODO refactor this
        if data:
            if ttype == 'extract':
                total_page, hrefs = json.loads(data)
                total_page = int(total_page)
                hrefs = json.loads(hrefs)
                tids = check_duplicate(self.redis, hrefs)
                #save_tasks(self.redis, tids)
                for h in hrefs:
                    tmp_tid = self.new_task_id()
                    log.info(h)
                    tmp_tbody = {'task': h}
                    tmp_task = BaseTask(tmp_tid, tmp_tbody)
                    self.redis.push_list_data('task_queue', cPickle.dumps(tmp_task))

                task = cPickle.loads(task)
                page = task.tbody.get('page', 1)
                if page == 1 and page < total_page:
                    tmp_tbody = task.tbody
                    for p in xrange(page, total_page):
                        tmp_tid = self.new_task_id()
                        tmp_tbody['page'] = p+1
                        tmp_task = BaseTask(tmp_tid, tmp_tbody)
                        self.redis.push_list_data('extract_queue', cPickle.dumps(tmp_task))

            else:
                save_items(json.loads(data))
        else:
            log.debug('Got an invalid task: %s when taking task: %s' % (task, ttype))
예제 #2
0
    def register(
        self,
        servicename,
        version_major,
        version_minor,
        nodename,
        client,
    ):
        ''' register the client to controller '''
        clientid = self.newClientId()

        # 如果给定的servicename不在versions中,表示该节点是无效节点
        if servicename not in self.versions:
            log.info("Added client: %s %s Failed. No such servicename" %
                     (str(clientid), servicename))
            return (servicename + ' is not in a known service', 0)

        version = self.versions[servicename]
        client_version = (version_major, version_minor)

        if client_version < version[0]:
            return (
                'version %s is below %s please update the client' % (
                    repr(client_version),
                    repr(version[0]),
                ),
                0,
            )
        if client_version > version[1]:
            return (
                'version %s is above %s please update the controller' % (
                    repr(client_version),
                    repr(version[1]),
                ),
                0,
            )

        self.clients[clientid] = {
            'client': client,
            'id': clientid,
            'servicename': servicename,
            'name': nodename,
            'users': [],
            'processing': {},
            'ip': client.broker.transport.getPeer().host,
            'last_call': time.time(),
        }

        client.notifyOnDisconnect(lambda c: self.unregister(clientid))
        log.info("Added client: %s %s" % (str(clientid), servicename))
        return ('succeed', clientid)
예제 #3
0
    def register(
        self,
        servicename,
        version_major,
        version_minor,
        nodename,
        client,
    ):
        ''' register the client to controller '''
        clientid = self.newClientId()

        # 如果给定的servicename不在versions中,表示该节点是无效节点
        if servicename not in self.versions:
            log.info("Added client: %s %s Failed. No such servicename" % (str(clientid), servicename))
            return (servicename+' is not in a known service', 0)
    
        version = self.versions[servicename]
        client_version = (version_major, version_minor)

        if client_version < version[0]:
            return (
                'version %s is below %s please update the client' % (
                    repr(client_version),
                    repr(version[0]),
                ),
                0,
            )
        if client_version > version[1]:
            return (
                'version %s is above %s please update the controller' % (
                    repr(client_version),
                    repr(version[1]),
                ),
                0,
            )

        self.clients[clientid] = {
            'client': client,
            'id': clientid,
            'servicename': servicename,
            'name': nodename,
            'users': [],
            'processing': {},
            'ip': client.broker.transport.getPeer().host,
            'last_call': time.time(),
        }

        client.notifyOnDisconnect(lambda c: self.unregister(clientid))
        log.info("Added client: %s %s" % (str(clientid), servicename))
        return ('succeed', clientid)
예제 #4
0
    def clientPull(self, clientid):
        ''' '''
        client = self.clients[clientid]
        push_queue = self.push_queue[name]

        # if there are no push_queue
        if not push_queue:
            # generate a new pull record and add into pull_queue
            defer = Deferred()
            pullid = self.newRequestId()
            pull = {
                'id': pullid,
                'servicename': name,
                'defer': defer,
                'clientid': clientid,
            }

            client['pulling'].add(pullid)
            self.pull_requests[pullid] = pull
            self.pull_queue[name].append(pullid)
            request = yield defer

            del self.pull_requests[pullid]
            requestid = request['id']
            client['processing'].add(requestid)
        else:
            # get a request from push_queue and add into processing queue
            requestid = push_queue.popleft()
            client['processing'].add(requestid)
            request = self.push_requests[requestid]

        self.processing_timeout[requestid] = reactor.callLater(
            self.client_request_timeout,
            self.clientProcTimeout,
            requestid,
            clientid,
        )
        log.info("Sent To: clientid %s, requestid %s." % (
            clientid,
            request['id'],
        ))
        # return requestid, method, args, kwargs to client and
        # client run it.
        returnValue((
            request['id'],
            request['method'],
            request['args'],
            request['kwargs'],
        ))
예제 #5
0
    def clientPull(self, clientid):
        ''' '''
        client = self.clients[clientid]
        push_queue = self.push_queue[name]

        # if there are no push_queue
        if not push_queue:
            # generate a new pull record and add into pull_queue
            defer = Deferred()
            pullid = self.newRequestId()
            pull = {
                'id': pullid,
                'servicename': name,
                'defer': defer,
                'clientid': clientid,
            }

            client['pulling'].add(pullid)
            self.pull_requests[pullid] = pull
            self.pull_queue[name].append(pullid)
            request = yield defer

            del self.pull_requests[pullid]
            requestid = request['id']
            client['processing'].add(requestid)
        else:
            # get a request from push_queue and add into processing queue
            requestid = push_queue.popleft()
            client['processing'].add(requestid)
            request = self.push_requests[requestid]

        self.processing_timeout[requestid] = reactor.callLater(
            self.client_request_timeout,
            self.clientProcTimeout,
            requestid,
            clientid,
        )
        log.info("Sent To: clientid %s, requestid %s." % (
            clientid,
            request['id'],
        ))
        # return requestid, method, args, kwargs to client and
        # client run it.
        returnValue((
            request['id'],
            request['method'],
            request['args'],
            request['kwargs'],
        ))
예제 #6
0
    def clientReturn(self, clientid, requestid, result):
        ''' '''
        log.info("Returned: clientid: %s, requestid: %s" % (
            clientid,
            requestid,
        ))

        # remove this request from processing deque
        client = self.clients[clientid]
        client['processing'].discard(requestid)

        # try to cancel the processing request.
        # if occured an exception, that means the request
        # was already finishd.
        try:
            self.processing_timeout[requestid].cancel()
            del self.processing_timeout[requestid]
        except KeyError:  # 已经处理完成
            pass

        if requestid in self.push_requests:
            push = self.push_requests[requestid]
            if 'error' not in result:
                push['defer'].callback(result['result'])
            else:
                error = result['error']
                push['defer'].errback(
                    failure.Failure(
                        pb.RemoteError(
                            error['type'],
                            error['value'],
                            error['traceback'],
                        )))

            servicename = push['servicename']

            # remove this request from push_queue
            try:
                self.push_queue[servicename].remove(requestid)
            except:
                pass

            if push['clientid'] is not None:
                try:
                    self.clients[push['clientid']]['pushing'].discard(
                        requestid)
                except:
                    pass
예제 #7
0
    def clientReturn(self, clientid, requestid, result):
        ''' '''
        log.info("Returned: clientid: %s, requestid: %s" % (
            clientid,
            requestid,
        ))

        # remove this request from processing deque
        client = self.clients[clientid]
        client['processing'].discard(requestid)

        # try to cancel the processing request.
        # if occured an exception, that means the request
        # was already finishd.
        try:
            self.processing_timeout[requestid].cancel()
            del self.processing_timeout[requestid]
        except KeyError: # 已经处理完成
            pass

        if requestid in self.push_requests:
            push = self.push_requests[requestid]
            if 'error' not in result:
                push['defer'].callback(result['result'])
            else:
                error = result['error']
                push['defer'].errback(failure.Failure(
                    pb.RemoteError(
                        error['type'], 
                        error['value'],
                        error['traceback'],
                )))

            servicename = push['servicename']

            # remove this request from push_queue
            try:
                self.push_queue[servicename].remove(requestid)
            except:
                pass

            if push['clientid'] is not None:
                try:
                    self.clients[push['clientid']]['pushing'].discard(requestid)
                except:
                    pass
예제 #8
0
    def gotResult(self, data, task, ttype):
        '''
            获取数据。任务分2种。
            1. 商铺信息,需要抓取商铺的商品列表
            2. 商品信息,需要抓取商品的基本信息
        '''
        # TODO refactor this
        if data:
            if ttype == 'extract':
                total_page, hrefs = json.loads(data)
                total_page = int(total_page)
                hrefs = json.loads(hrefs)
                tids = check_duplicate(self.redis, hrefs)
                #save_tasks(self.redis, tids)
                for h in hrefs:
                    tmp_tid = self.new_task_id()
                    log.info(h)
                    tmp_tbody = {'task': h}
                    tmp_task = BaseTask(tmp_tid, tmp_tbody)
                    self.redis.push_list_data('task_queue',
                                              cPickle.dumps(tmp_task))

                task = cPickle.loads(task)
                page = task.tbody.get('page', 1)
                if page == 1 and page < total_page:
                    tmp_tbody = task.tbody
                    for p in xrange(page, total_page):
                        tmp_tid = self.new_task_id()
                        tmp_tbody['page'] = p + 1
                        tmp_task = BaseTask(tmp_tid, tmp_tbody)
                        self.redis.push_list_data('extract_queue',
                                                  cPickle.dumps(tmp_task))

            else:
                save_items(json.loads(data))
        else:
            log.debug('Got an invalid task: %s when taking task: %s' %
                      (task, ttype))
예제 #9
0
    def unregister(self, clientid):
        ''' unregister the given clientid '''
        if clientid not in self.clients:
            return False

        #client = self.clients[clientid]

        # 判断client中在处理的消息
        # pulling, processing, pushing
        # for pull_id in client['pulling']:
        #     servicename = self.pull_requests[pull_id]['servicename']
        #     try:
        #         self.pull_queue[servicename].remove(pull_id)
        #     except ValueError:
        #         pass

        #     del self.pull_requests[pull_id]

        # # processing set
        # if client['processing']:
        #     for requestid in client['processing']:
        #         servicename = self.push_requests[requestid]['servicename']
        #         self.addRequest(servicename, requestid)

        # # pushing set
        # for push_id in client['pushing']:
        #     servicename = self.push_requests[push_id]['servicename']
        #     try:
        #         self.push_queue[servicename].remove(push_id)
        #     except ValueError:
        #         pass

        #     del self.push_requests[push_id]

        del self.clients[clientid]
        log.info("Removed client: " + str(clientid))

        return True
예제 #10
0
    def unregister(self, clientid):
        ''' unregister the given clientid '''
        if clientid not in self.clients:
            return False

        #client = self.clients[clientid]

        # 判断client中在处理的消息
        # pulling, processing, pushing
        # for pull_id in client['pulling']:
        #     servicename = self.pull_requests[pull_id]['servicename']
        #     try:
        #         self.pull_queue[servicename].remove(pull_id)
        #     except ValueError:
        #         pass

        #     del self.pull_requests[pull_id]

        # # processing set
        # if client['processing']:
        #     for requestid in client['processing']:
        #         servicename = self.push_requests[requestid]['servicename']
        #         self.addRequest(servicename, requestid)

        # # pushing set
        # for push_id in client['pushing']:
        #     servicename = self.push_requests[push_id]['servicename']
        #     try:
        #         self.push_queue[servicename].remove(push_id)
        #     except ValueError:
        #         pass

        #     del self.push_requests[push_id]

        del self.clients[clientid]
        log.info("Removed client: " + str(clientid))

        return True
예제 #11
0
    def searchLoop(self, agent):
        ''' '''
        needbreak = False
        while 1:
            result = None
            if agent.remove:
                self.agent_pool.removeAgent(agent)
                break

            reqid, task = yield self.callController('nextRequest', 'extract')
            log.info(repr(task))

            try:
                result = yield self.search(agent, task)
                log.debug('Got data %s' % repr(result))
            except InfiniteLoginError:
                log.exception()
                yield self.callController("fail", task=task)
                needbreak = True
            except:
                log.exception()
            self.callController('sendResult', reqid, task, json.dumps(result))
            if needbreak:
                break
예제 #12
0
    def searchLoop(self, agent):
        ''' '''
        needbreak = False
        while 1:
            result = None
            if agent.remove:
                self.agent_pool.removeAgent(agent)
                break

            reqid, task = yield self.callController('nextRequest', 'data')
            log.info('Got Task %s with reqid: %s' % (repr(task), reqid))

            try:
                result = yield self.search(agent, task)
                log.debug('Got data %s' % repr(result))
            except InfiniteLoginError:
                log.exception()
                yield self.callController("fail", task=task)
                needbreak = True
            except:
                log.exception()
            self.callController('sendResult', reqid, task, json.dumps(result))
            if needbreak:
                break
예제 #13
0
 def clientFail(self, *args, **kwargs):
     ''' called when client failed '''
     clientid = kwargs.get('clientid')
     log.info("%s Client Failed, reason: %s" %
              (clientid, kwargs.get('reason', '')))
예제 #14
0
 def clientFail(self, *args, **kwargs):
     ''' called when client failed '''
     clientid = kwargs.get('clientid')
     log.info("%s Client Failed, reason: %s" % (clientid,
                                                kwargs.get('reason', '')))