def process(self, params):
     field = self.r.parse(r'^http://(\w+)\.gamersky\.com/.*', params.originalurl)[0]
     # 论坛网址
     if field == 'bbs':
         # 调用共通取得评论
         CommenComments.getinstance(self).process(params)
     # 新闻网址
     else:
         self.processNews(params)
 def process(self, params):
     Logger.getlogging().info(params.url)
     field = self.r.parse('^http://(\w+)\.18183\.com*',
                          params.originalurl)[0]
     if field == 'bbs':
         CommenComments.getinstance(self).process(params)
     else:
         ChangyanComments(self).getcomments(params, '', 3, 2)
     #重新设置部分网页的putime
     if field == 'chanye':
         self.setpubtime(params)
Ejemplo n.º 3
0
class Uuu9Comments(SiteComments):
    BBS_URL_REG = '^http://moba\.uuu9\.com/\w+-\d+-(\d+)-\d+.html'
    PAGE_SIZE = 10
    BBS_TITLE = ''
    STEP_1 = None
    STEP_2 = 2
    STEP_3 = 3

    ##############################################################################################
    # @functions:__init__
    # @param: none
    # @return:none
    # @author:Hedian
    # @date:2016/11/30
    # @note:Uuu9Comments类的构造器,初始化内部变量
    ##############################################################################################
    def __init__(self):
        SiteComments.__init__(self)
        self.bbs = None
        self.news = None

    def createobject(self):
        if self.bbs is None:
            self.bbs = CommenComments(self)
        if self.news is None:
            self.news = NewsComments(self)

    ##############################################################################################
    # @functions:process
    # @param:共通模块传入的参数(对象url, 原始url, 当前step数,自定义参数)
    # @return:无
    # @author:Hedian
    # @date:2016/11/30
    # @note:AppgameComments入口函数,通过Step1设置url,得到评论的总数,并根据评论总数得到获取其他评论的url。
    ##############################################################################################
    def process(self, params):

        field = self.r.parse('^http://(\w+)\.?', params.url)[0]
        params.customized['field'] = field
        Logger.getlogging().debug(field)

        self.createobject()

        field = self.r.parse('^http://(\w+)\.uuu9\.com*',
                             params.originalurl)[0]
        # 论坛
        if field == 'moba':
            self.bbs.process(params)
        else:
            self.news.process(params)
Ejemplo n.º 4
0
 def __init__(self):
     WebSite.__init__(self)
     self.pattern = r'^http://\w+\.laohu\.com/.*'
     # self.setcommentimpl(LaohuComments_all())
     self.setcommentimpl(CommenComments())
     self.sets2queryimpl(LaohuS2Query())
     return
Ejemplo n.º 5
0
 def __init__(self):
     WebSite.__init__(self)
     self.name = 'zymk'
     self.pattern = r'^http://.*\.zymk\.cn\/.*'
     self.setcommentimpl(CommenComments())
     self.sets2queryimpl(
         BBSS2PostQuery('http://bbs.zymk.cn/search.php?mod=forum'))
Ejemplo n.º 6
0
 def __init__(self):
     WebSite.__init__(self)
     self.name = 'tgbus'
     #self.pattern = r'^http://bbs\.zol\.com\.cn/[a-z]*/\w+(_\d+)*\.html'
     self.pattern = r'http[s]{0,1}://.*\.tgbus\.com.*'
     self.setcommentimpl(CommenComments())
     self.sets2queryimpl(TGbusS2Query())
Ejemplo n.º 7
0
 def __init__(self):
     WebSite.__init__(self)
     self.name = 'acg7'
     self.pattern = r'^http://www\.7acg\.com\/*'
     self.setcommentimpl(CommenComments())
     self.sets2queryimpl(
         BBSS2PostQuery('http://www.7acg.com/search.php?mod=forum'))
Ejemplo n.º 8
0
 def __init__(self):
     WebSite.__init__(self)
     self.name = 'gao7'
     self.pattern = r'^http[s]{0,1}://.*\.gao7\.com/.*'
     self.setcommentimpl(CommenComments())
     self.sets2queryimpl(
         BBSS2PostQuery('http://bbs.gao7.com/search.php?mod=forum'))
     return
Ejemplo n.º 9
0
 def __init__(self):
     WebSite.__init__(self)
     self.name = '52pk'
     self.pattern = r'http[s]{0,1}://.*\.52pk\.com.*'
     self.setcommentimpl(CommenComments())
     #self.setcommentimpl(Pk52Comments())
     self.sets2queryimpl(PK52S2Query())
     return
Ejemplo n.º 10
0
class PtbusComments(SiteComments):
    ################################################################################################################
    # @functions:__init__
    # @param: none
    # @return:none
    # @note:none
    ################################################################################################################
    def __init__(self):
        # 使用该URL识别回传S2查询结果的类,推荐使用主站URL
        SiteComments.__init__(self)
        self.ptbusNews = None
        self.ptbusBbs = None

    ################################################################################################################
    # @functions:createobject
    # @params: see WebSite.createobject
    # @return:none
    # @note:SiteS2Query, process S2 query result,一般为查询到的URL列表
    ################################################################################################################
    def createobject(self):
        if self.ptbusNews is None:
            self.ptbusNews = PtbusNewsComments(self)
        if self.ptbusBbs is None:
            self.ptbusBbs = CommenComments(self)

    ################################################################################################################
    # @functions:process
    # @params: see WebSite.process
    # @return:none
    # @note:none
    ################################################################################################################
    def process(self, params):
        # 初始化内部子类对象
        self.createobject()

        # 论坛评论取得
        if self.r.match('http://bbs\.ptbus\.com/.*', params.originalurl):
            self.ptbusBbs.process(params)
        # 新闻评论取得
        elif self.r.match('http://.+\.ptbus\.com/.*', params.originalurl):
            # self.ptbusNews.process(params)
            # 非bbs页面评论改为畅言模式,调用畅言模块
            ChangyanComments(self).process(params)
class GfanComments(SiteComments):
    STEP_1 = None

    ##############################################################################################
    # @functions:__init__
    # @param: none
    # @return:none
    # @author:Hedian
    # @date:2016/12/15
    # @note:GfanComments类的构造器,初始化内部变量
    ##############################################################################################
    def __init__(self):
        SiteComments.__init__(self)
        self.bbs = None
        self.news = None

    def createobject(self):
        if self.bbs is None:
            self.bbs = CommenComments(self)
        if self.news is None:
            self.news = ChangyanComments(self)

    ##############################################################################################
    # @functions:process
    # @params:共通模块传入的参数(对象url, 原始url, 当前step数,自定义参数)
    # @return:
    # @author:Hedian
    # @date:2016/12/15
    # @note:
    ##############################################################################################
    def process(self, params):
        Logger.getlogging().info(params.originalurl)
        self.createobject()

        field = self.r.parse('^http://(\w+)\.?', params.originalurl)[0]
        Logger.getlogging().debug(field)

        if field == 'bbs':
            # 机锋网论坛
            self.bbs.process(params)
        else:
            # 机锋网其他处理
            self.news.process(params)
class dm78Comments(SiteComments):
    ################################################################################################################
    # @functions:__init__
    # @param: none
    # @return:none
    # @note:none
    ################################################################################################################
    def __init__(self):
        # 使用该URL识别回传S2查询结果的类,推荐使用主站URL
        SiteComments.__init__(self)
        self.dm78News = None
        self.dm78Bbs = None

    ################################################################################################################
    # @functions:createobject
    # @params: see WebSite.createobject
    # @return:none
    # @note:none
    ################################################################################################################
    def createobject(self):
        if self.dm78News is None:
            self.dm78News = dm78NewsComments(self)
        if self.dm78Bbs is None:
            self.dm78Bbs = CommenComments(self)

    ################################################################################################################
    # @functions:process
    # @params: see WebSite.process
    # @return:none
    # @note:none
    ################################################################################################################
    def process(self, params):
        # 初始化内部子类对象
        self.createobject()

        # 论坛评论取得
        if self.r.match('http://bbs\.78dm\.net/forum.php.*',
                        params.originalurl):
            self.dm78Bbs.process(params)
        # 新闻评论取得
        else:
            self.dm78News.process(params)
Ejemplo n.º 13
0
class NarutomComments(SiteComments):
    ################################################################################################################
    # @functions:__init__
    # @param: none
    # @return:none
    # @note:none
    ################################################################################################################
    def __init__(self):
        # 使用该URL识别回传S2查询结果的类,推荐使用主站URL
        SiteComments.__init__(self)
        self.narutomVideo = None
        self.narutomBbs = None

    ################################################################################################################
    # @functions:createobject
    # @params: see WebSite.createobject
    # @return:none
    # @note:none
    ################################################################################################################
    def createobject(self):
        if self.narutomVideo is None:
            self.narutomVideo = NarutomVideoComments(self)
        if self.narutomBbs is None:
            self.narutomBbs = CommenComments(self)

    ################################################################################################################
    # @functions:process
    # @params: see WebSite.process
    # @return:none
    # @note:none
    ################################################################################################################
    def process(self, params):
        # 初始化内部子类对象
        self.createobject()

        # 论坛评论取得
        if self.r.match('http://bbs\.narutom\.com\/*', params.originalurl):
            self.narutomBbs.process(params)
        # 视频评论取得
        else:
            self.narutomVideo.process(params)
Ejemplo n.º 14
0
 def process(self, params):
     Logger.getlogging().info(params.url)
     try:
         # 初始化内部子类对象
         self.createobject()
         # 论坛评论取得
         if self.r.match('http://bbs.onlylady.com/.*', params.originalurl):
             # self.onlyladyBbs.process(params)
             # bbs获取评论调用共通方法,onlyladyBdsComments已测试通过
             CommenComments(self).process(params)
     except Exception, e:
         traceback.print_exc()
Ejemplo n.º 15
0
    def process(self, params):
        """
        1.适用在腾讯新闻及其他部分
        2.适用在腾讯视频部分https://v.qq.com/
        3.适用在腾讯动漫视频部分http://ac.qq.com/Comic/
        4.适用在腾讯QQ阅读部分http://ebook.qq.com/
        5.适用在腾讯云起书城http://yunqi.qq.com/
        """

        if self.r.search('^http[s]{0,1}://ac\.qq\.com/Comic/.*',
                         params.originalurl):
            if params.step == self.STEP_DEFAULT_VALUE:
                self.step1_ac(params)
            elif params.step == self.STEP_COMMENT_FIRST_PAGE:
                self.step2_ac(params)
            elif params.step == self.STEP_COMMENT_NEXT_PAGE:
                self.step3_ac(params)
        elif self.r.search('^http[s]{0,1}://v\.qq\.com/.*',
                           params.originalurl):
            if params.step == self.STEP_DEFAULT_VALUE:
                self.get_url_id(params)
            elif params.step == self.STEP_COMMENT_FIRST_PAGE:
                self.step1(params)
            elif params.step == self.STEP_COMMENT_NEXT_PAGE:
                self.step2(params)
        elif self.r.search('^http[s]{0,1}://bbs\.book\.qq\.com/.*',
                           params.originalurl):
            CommenComments(self).process(params)
        elif self.r.search('^http[s]{0,1}://ebook\.qq\.com/.*',
                           params.originalurl):
            if params.step == self.STEP_DEFAULT_VALUE:
                self.step1_ebook(params)
            elif params.step == self.STEP_COMMENT_FIRST_PAGE:
                self.step2_ebook(params)
            elif params.step == self.STEP_COMMENT_NEXT_PAGE:
                self.step3_ebook(params)
        elif self.r.search('^http[s]{0,1}://yunqi\.qq\.com/.*',
                           params.originalurl):
            if params.step == self.STEP_DEFAULT_VALUE:
                self.step1_yunqi(params)
            elif params.step == self.STEP_COMMENT_FIRST_PAGE:
                self.step2_yunqi(params)
            elif params.step == self.STEP_COMMENT_NEXT_PAGE:
                self.step3_yunqi(params)
        elif self.r.search('^http[s]{0,1}://p\.weather\.com\.cn.*',
                           params.originalurl):
            SohuComments(self).process(params)
        else:
            if params.step == self.STEP_DEFAULT_VALUE:
                self.step1(params)
            elif params.step == self.STEP_COMMENT_NEXT_PAGE:
                self.step2(params)
 def step2bbs(self, params):
     Logger.getlogging().info("LaohuComments.STEP_2_BBS")
          
     if self.r.parse('^http://bbs\.laohu\.com\/\w+-(\d+)-\d+-\d+\.html',params.originalurl):
         #S1
         field = params.customized['field']
         # 通过xpath, 从页面上获取页面总数
         # lastpg = CommenComments.gettotalpages(params.content)
         lastpg = int(self.r.parse('<span title=".*">(.*?)</span>',params.content)[0].split('/')[1].split(' ')[1])
         if lastpg is None:
             return
 
         # 当前评论页码
         pg = self.r.parse(self.BBS_URL_REG, params.url)[0]
 
         # 获取当前页评论
         params.customized['lastpg'] = lastpg
         CommenComments.getpagecomments(self, params, self.BBS_URL_REG)
 
         # 如果只有1页,后续处理不要
         if int(lastpg) == 1:
             return
 
         # 对于S1, 需要展开获取所有评论
         urlArr = params.originalurl.split('-')
         if len(urlArr) != 4:
             return
         for page in range(1, lastpg + 1, 1):
             if page == int(pg):
                 continue
             commentUrl = urlArr[0] + '-' + urlArr[1] + '-' + str(page) + '-' + urlArr[3]
             Logger.getlogging().debug(commentUrl)
             self.storeurl(commentUrl, params.originalurl, LaohuComments.STEP_3_BBS,
                           {'field': field, 'lastpg': lastpg})
     else:
         #特殊网址
         CommenComments.getpagecomments2(self, params)
 def process(self, params):
     try:
         field = self.r.parse('^http://(\w+)\.duowan\.com*', params.originalurl)[0]
         if field == 'bbs':
             CommenComments(self).process(params)
             return
         if params.step is None:
             self.step1(params)
         elif params.step == DuowanComments.STEP_2_TU:
             self.step_tu(params)
             # 获取评论总数,拼接所有评论url
         elif params.step == DuowanComments.STEP_2:            
             self.step2(params)
         elif params.step == DuowanComments.STEP_3:    
             self.step3(params)
     except:
         Logger.printexception()
    def process(self, params):
        # 初始化内部子类对象
        field = ''
        # 1. 根据输入原始url, 得到网站的子域名
        if self.r.match('^http[s]{0,1}://www\.dmzj\.com\/(\w+)',
                        params.originalurl):
            field = self.r.parse('^http[s]{0,1}://www\.dmzj\.com\/(\w+)',
                                 params.originalurl)[0]
        else:
            field = self.r.parse('^http[s]{0,1}://(\w+).dmzj.com/.*',
                                 params.originalurl)[0]
        if not field:
            Logger.log(params.originalurl,
                       constant.ERRORCODE_SITE_NOGET_TEMPLATE)

        if field == 'bbs':
            CommenComments(self).process(params)
        # elif field == 'news' or field == 'manhua' or field == 'xs' or field == 'info':
        #     DmzjNewscomments(self).process(params)
        else:
            DmzjNewscomments(self).process(params)
Ejemplo n.º 19
0
 def process(self, params):
     """"""
     if self.r.search('http[s]{0,1}://www\.17k\.com.*', params.originalurl):
         self.process_book(params)
     if self.r.search('http[s]{0,1}://bbs\.17k\.com.*', params.originalurl):
         CommenComments(self).process(params)
Ejemplo n.º 20
0
 def createobject(self):
     if self.narutomVideo is None:
         self.narutomVideo = NarutomVideoComments(self)
     if self.narutomBbs is None:
         self.narutomBbs = CommenComments(self)
Ejemplo n.º 21
0
 def createobject(self):
     if self.ptbusNews is None:
         self.ptbusNews = PtbusNewsComments(self)
     if self.ptbusBbs is None:
         self.ptbusBbs = CommenComments(self)
Ejemplo n.º 22
0
 def __init__(self):
     WebSite.__init__(self)
     self.name = 'lady8844'
     self.pattern = r'^http[s]{0,1}://.*lady8844\.com.*'
     self.setcommentimpl(CommenComments())
Ejemplo n.º 23
0
 def createobject(self):
     if self.bbs is None:
         self.bbs = CommenComments(self)
     if self.news is None:
         self.news = NewsComments(self)
Ejemplo n.º 24
0
 def createobject(self):
     if self.u17News is None:
         self.u17News = U17NewsComments(self)
     if self.u17Bbs is None:
         self.u17Bbs = CommenComments(self)
 def step3bbs(self, params):
     Logger.getlogging().info("LaohuComments.STEP_3_BBS")
     CommenComments.getpagecomments(self, params, self.BBS_URL_REG)
Ejemplo n.º 26
0
 def __init__(self):
     WebSite.__init__(self)
     self.name = 'gxdmw'
     self.pattern = r'^http[s]{0,1}://www\.gxdmw\.com/.*'
     self.setcommentimpl(CommenComments())
Ejemplo n.º 27
0
 def __init__(self):
     WebSite.__init__(self)
     self.name = 'Angeeks'
     self.pattern = r'^http[s]{0,1}://.*\.angeeks\.com/.*'
     self.setcommentimpl(CommenComments())
     self.sets2queryimpl(AngeeksS2Query())
Ejemplo n.º 28
0
 def process(self, params):
     if self.r.search('https?://bbs\.17173\.com/.*', params.originalurl):
         CommenComments(self).process(params)
     else:
         self.process_news(params)
 def createobject(self):
     if self.dm78News is None:
         self.dm78News = dm78NewsComments(self)
     if self.dm78Bbs is None:
         self.dm78Bbs = CommenComments(self)