Example #1
0
    def __init__(self, reactor, queue_service, conf):
        self.logger = logging.getLogger("")
        self.reactor = reactor
        self.queue_service = queue_service
        self.hub_extractor = HubExtractor(conf)

        # FIXME delete article server
        self.article_server = ArticleServer(reactor, conf)
Example #2
0
class HubServer(object):

    def __init__(self, reactor, queue_service, conf):
        self.logger = logging.getLogger("")
        self.reactor = reactor
        self.queue_service = queue_service
        self.hub_extractor = HubExtractor(conf)

        # FIXME delete article server
        self.article_server = ArticleServer(reactor, conf)

    def process_request(self, response, url):
        return
        self.logger.info("http response, url:%s, code:%s, phrase:%s, headers:%s" %
                (url, response.code, response.phrase,
                pformat(list(response.headers.getAllRawHeaders()))))

    def process_body(self, body, url):
        self.logger.info("page body, url:%s, body:%s" %
                (url, body[:100]))
        not_exist = self.hub_extractor.extract(body, url)
        if not not_exist:
            print not_exist
        for url in not_exist:
            self.article_server.process_task(url)
        #print body[:100]

    def process_error(self, failure, url):
        print failure.getErrorMessage()
        self.logger.error("download error, url:%s, msg:%s" %
                (url, failure.getErrorMessage()))

    def process_task(self, url):
        url = url.encode('utf-8')
        requestProcess = (self.process_request, (url,), {})
        bodyProcess = (self.process_body, (url,), {})
        errorProcess = (self.process_error, (url,), {})

        #print "process_task:", url
        self.reactor.download_and_process(url, None, requestProcess, bodyProcess, errorProcess, redirect=True)

    def start(self):
        while True:
            try:
                task, msg = self.queue_service.get(10)
                if task:
                    url = task.url
                    self.process_task(url)
                else:
                    print 'queue empty'
            except KeyboardInterrupt:
                sys.exit(0)
            except Exception as e:
                print e
                pass