def process(self, params):
        """"""
        #过滤掉不在范围内的网站或url
        patterns = [
            '^http[s]{0,1}://(news|fashion|women|mil|health|cul|travel|history|learning|book|star.news|sports|(music\.)?yule|baobao|chihe|it|business|mgame)\.sohu\.com/.*',
            '^http[s]{0,1}://pic\.\w+\.sohu\.com/.*',
            '^http[s]{0,1}://gongyi\.sohu\.com/.*',
            '^http[s]{0,1}://pic\.book\.sohu\.com/.*',
            '^http[s]{0,1}://tv\.sohu\.com/.*',
            '^http[s]{0,1}://my\.tv\.sohu\.com/.*',
            '^http[s]{0,1}://www\.sohu\.com.*',
            '^http[s]{0,1}://p\.weather\.com\.cn.*'
        ]

        flag = False
        for pattern in patterns:
            if self.r.search(pattern, params.originalurl):
                flag = True
                break
        if not flag:
            Logger.log(params.originalurl, constant.ERRORCODE_SITE_NOGET_SITE)
            return
        if self.r.search('https?://www\.sohu\.com/a/\d+_\d+',
                         params.originalurl):
            self.process_new_a(params)
        elif self.r.search(
                '^http[s]{0,1}://(www|news)\.sohu\.com.*|^http[s]{0,1}://p\.weather\.com\.cn.*',
                params.originalurl):
            ChangyanComments(self).process(params)
        else:
            self.process_video(params)
Ejemplo n.º 2
0
 def process(self, params):
     if self.r.search('http://sports\.le\.com/.*', params.originalurl):
         ChangyanComments(self).process(params)
     else:
         self.process_ptv(params)
     if params.step == None:
         self.getclick(params)
     elif params.step == self.STEP_PALY:
         self.setclick(params)
class GfanComments(SiteComments):
    STEP_1 = None

    ##############################################################################################
    # @functions:__init__
    # @param: none
    # @return:none
    # @author:Hedian
    # @date:2016/12/15
    # @note:GfanComments类的构造器,初始化内部变量
    ##############################################################################################
    def __init__(self):
        SiteComments.__init__(self)
        self.bbs = None
        self.news = None

    def createobject(self):
        if self.bbs is None:
            self.bbs = CommenComments(self)
        if self.news is None:
            self.news = ChangyanComments(self)

    ##############################################################################################
    # @functions:process
    # @params:共通模块传入的参数(对象url, 原始url, 当前step数,自定义参数)
    # @return:
    # @author:Hedian
    # @date:2016/12/15
    # @note:
    ##############################################################################################
    def process(self, params):
        Logger.getlogging().info(params.originalurl)
        self.createobject()

        field = self.r.parse('^http://(\w+)\.?', params.originalurl)[0]
        Logger.getlogging().debug(field)

        if field == 'bbs':
            # 机锋网论坛
            self.bbs.process(params)
        else:
            # 机锋网其他处理
            self.news.process(params)
 def process(self, params):
     Logger.getlogging().info(params.url)
     field = self.r.parse('^http://(\w+)\.18183\.com*',
                          params.originalurl)[0]
     if field == 'bbs':
         CommenComments.getinstance(self).process(params)
     else:
         ChangyanComments(self).getcomments(params, '', 3, 2)
     #重新设置部分网页的putime
     if field == 'chanye':
         self.setpubtime(params)
Ejemplo n.º 5
0
    def process(self, params):
        # 初始化内部子类对象
        self.createobject()

        # 论坛评论取得
        if self.r.match('http://bbs\.ptbus\.com/.*', params.originalurl):
            self.ptbusBbs.process(params)
        # 新闻评论取得
        elif self.r.match('http://.+\.ptbus\.com/.*', params.originalurl):
            # self.ptbusNews.process(params)
            # 非bbs页面评论改为畅言模式,调用畅言模块
            ChangyanComments(self).process(params)
Ejemplo n.º 6
0
 def __init__(self):
     WebSite.__init__(self)
     self.pattern = r'^http://\w+\.donews\.com.*'
     self.setcommentimpl(ChangyanComments())
Ejemplo n.º 7
0
 def __init__(self):
     WebSite.__init__(self)
     self.pattern = r'^http://www\.52tian\.net/.*'
     self.setcommentimpl(ChangyanComments())
     self.sets2queryimpl(tian52S2Query())
Ejemplo n.º 8
0
 def __init__(self):
     WebSite.__init__(self)
     self.name = 'kumi'
     self.pattern = r'^http://.*\.kumi\.cn/.*'
     self.setcommentimpl(ChangyanComments())
     self.sets2queryimpl(kumiS2Query())
Ejemplo n.º 9
0
 def __init__(self):
     WebSite.__init__(self)
     self.name = 'sootoo'
     self.pattern = '^http://www\.sootoo\.com/.*'
     self.setcommentimpl(ChangyanComments())
Ejemplo n.º 10
0
 def __init__(self):
     WebSite.__init__(self)
     self.pattern = r'^http://\w+\.edu-gov\.cn.*'
     self.setcommentimpl(ChangyanComments())
 def createobject(self):
     if self.bbs is None:
         self.bbs = CommenComments(self)
     if self.news is None:
         self.news = ChangyanComments(self)
Ejemplo n.º 12
0
 def createobject(self):
     if self.changyan is None:
         self.changyan = ChangyanComments(self)
     return self.changyan
Ejemplo n.º 13
0
 def __init__(self):
     WebSite.__init__(self)
     self.name = 'qudong'
     self.pattern = r'^http[s]{0,1}://.*\.qudong\.com'
     self.setcommentimpl(ChangyanComments())