def process(self, params): field = self.r.parse(r'^http://(\w+)\.gamersky\.com/.*', params.originalurl)[0] # 论坛网址 if field == 'bbs': # 调用共通取得评论 CommenComments.getinstance(self).process(params) # 新闻网址 else: self.processNews(params)
def process(self, params): Logger.getlogging().info(params.url) field = self.r.parse('^http://(\w+)\.18183\.com*', params.originalurl)[0] if field == 'bbs': CommenComments.getinstance(self).process(params) else: ChangyanComments(self).getcomments(params, '', 3, 2) #重新设置部分网页的putime if field == 'chanye': self.setpubtime(params)
class Uuu9Comments(SiteComments): BBS_URL_REG = '^http://moba\.uuu9\.com/\w+-\d+-(\d+)-\d+.html' PAGE_SIZE = 10 BBS_TITLE = '' STEP_1 = None STEP_2 = 2 STEP_3 = 3 ############################################################################################## # @functions:__init__ # @param: none # @return:none # @author:Hedian # @date:2016/11/30 # @note:Uuu9Comments类的构造器,初始化内部变量 ############################################################################################## def __init__(self): SiteComments.__init__(self) self.bbs = None self.news = None def createobject(self): if self.bbs is None: self.bbs = CommenComments(self) if self.news is None: self.news = NewsComments(self) ############################################################################################## # @functions:process # @param:共通模块传入的参数(对象url, 原始url, 当前step数,自定义参数) # @return:无 # @author:Hedian # @date:2016/11/30 # @note:AppgameComments入口函数,通过Step1设置url,得到评论的总数,并根据评论总数得到获取其他评论的url。 ############################################################################################## def process(self, params): field = self.r.parse('^http://(\w+)\.?', params.url)[0] params.customized['field'] = field Logger.getlogging().debug(field) self.createobject() field = self.r.parse('^http://(\w+)\.uuu9\.com*', params.originalurl)[0] # 论坛 if field == 'moba': self.bbs.process(params) else: self.news.process(params)
def __init__(self): WebSite.__init__(self) self.pattern = r'^http://\w+\.laohu\.com/.*' # self.setcommentimpl(LaohuComments_all()) self.setcommentimpl(CommenComments()) self.sets2queryimpl(LaohuS2Query()) return
def __init__(self): WebSite.__init__(self) self.name = 'zymk' self.pattern = r'^http://.*\.zymk\.cn\/.*' self.setcommentimpl(CommenComments()) self.sets2queryimpl( BBSS2PostQuery('http://bbs.zymk.cn/search.php?mod=forum'))
def __init__(self): WebSite.__init__(self) self.name = 'tgbus' #self.pattern = r'^http://bbs\.zol\.com\.cn/[a-z]*/\w+(_\d+)*\.html' self.pattern = r'http[s]{0,1}://.*\.tgbus\.com.*' self.setcommentimpl(CommenComments()) self.sets2queryimpl(TGbusS2Query())
def __init__(self): WebSite.__init__(self) self.name = 'acg7' self.pattern = r'^http://www\.7acg\.com\/*' self.setcommentimpl(CommenComments()) self.sets2queryimpl( BBSS2PostQuery('http://www.7acg.com/search.php?mod=forum'))
def __init__(self): WebSite.__init__(self) self.name = 'gao7' self.pattern = r'^http[s]{0,1}://.*\.gao7\.com/.*' self.setcommentimpl(CommenComments()) self.sets2queryimpl( BBSS2PostQuery('http://bbs.gao7.com/search.php?mod=forum')) return
def __init__(self): WebSite.__init__(self) self.name = '52pk' self.pattern = r'http[s]{0,1}://.*\.52pk\.com.*' self.setcommentimpl(CommenComments()) #self.setcommentimpl(Pk52Comments()) self.sets2queryimpl(PK52S2Query()) return
class PtbusComments(SiteComments): ################################################################################################################ # @functions:__init__ # @param: none # @return:none # @note:none ################################################################################################################ def __init__(self): # 使用该URL识别回传S2查询结果的类,推荐使用主站URL SiteComments.__init__(self) self.ptbusNews = None self.ptbusBbs = None ################################################################################################################ # @functions:createobject # @params: see WebSite.createobject # @return:none # @note:SiteS2Query, process S2 query result,一般为查询到的URL列表 ################################################################################################################ def createobject(self): if self.ptbusNews is None: self.ptbusNews = PtbusNewsComments(self) if self.ptbusBbs is None: self.ptbusBbs = CommenComments(self) ################################################################################################################ # @functions:process # @params: see WebSite.process # @return:none # @note:none ################################################################################################################ def process(self, params): # 初始化内部子类对象 self.createobject() # 论坛评论取得 if self.r.match('http://bbs\.ptbus\.com/.*', params.originalurl): self.ptbusBbs.process(params) # 新闻评论取得 elif self.r.match('http://.+\.ptbus\.com/.*', params.originalurl): # self.ptbusNews.process(params) # 非bbs页面评论改为畅言模式,调用畅言模块 ChangyanComments(self).process(params)
class GfanComments(SiteComments): STEP_1 = None ############################################################################################## # @functions:__init__ # @param: none # @return:none # @author:Hedian # @date:2016/12/15 # @note:GfanComments类的构造器,初始化内部变量 ############################################################################################## def __init__(self): SiteComments.__init__(self) self.bbs = None self.news = None def createobject(self): if self.bbs is None: self.bbs = CommenComments(self) if self.news is None: self.news = ChangyanComments(self) ############################################################################################## # @functions:process # @params:共通模块传入的参数(对象url, 原始url, 当前step数,自定义参数) # @return: # @author:Hedian # @date:2016/12/15 # @note: ############################################################################################## def process(self, params): Logger.getlogging().info(params.originalurl) self.createobject() field = self.r.parse('^http://(\w+)\.?', params.originalurl)[0] Logger.getlogging().debug(field) if field == 'bbs': # 机锋网论坛 self.bbs.process(params) else: # 机锋网其他处理 self.news.process(params)
class dm78Comments(SiteComments): ################################################################################################################ # @functions:__init__ # @param: none # @return:none # @note:none ################################################################################################################ def __init__(self): # 使用该URL识别回传S2查询结果的类,推荐使用主站URL SiteComments.__init__(self) self.dm78News = None self.dm78Bbs = None ################################################################################################################ # @functions:createobject # @params: see WebSite.createobject # @return:none # @note:none ################################################################################################################ def createobject(self): if self.dm78News is None: self.dm78News = dm78NewsComments(self) if self.dm78Bbs is None: self.dm78Bbs = CommenComments(self) ################################################################################################################ # @functions:process # @params: see WebSite.process # @return:none # @note:none ################################################################################################################ def process(self, params): # 初始化内部子类对象 self.createobject() # 论坛评论取得 if self.r.match('http://bbs\.78dm\.net/forum.php.*', params.originalurl): self.dm78Bbs.process(params) # 新闻评论取得 else: self.dm78News.process(params)
class NarutomComments(SiteComments): ################################################################################################################ # @functions:__init__ # @param: none # @return:none # @note:none ################################################################################################################ def __init__(self): # 使用该URL识别回传S2查询结果的类,推荐使用主站URL SiteComments.__init__(self) self.narutomVideo = None self.narutomBbs = None ################################################################################################################ # @functions:createobject # @params: see WebSite.createobject # @return:none # @note:none ################################################################################################################ def createobject(self): if self.narutomVideo is None: self.narutomVideo = NarutomVideoComments(self) if self.narutomBbs is None: self.narutomBbs = CommenComments(self) ################################################################################################################ # @functions:process # @params: see WebSite.process # @return:none # @note:none ################################################################################################################ def process(self, params): # 初始化内部子类对象 self.createobject() # 论坛评论取得 if self.r.match('http://bbs\.narutom\.com\/*', params.originalurl): self.narutomBbs.process(params) # 视频评论取得 else: self.narutomVideo.process(params)
def process(self, params): Logger.getlogging().info(params.url) try: # 初始化内部子类对象 self.createobject() # 论坛评论取得 if self.r.match('http://bbs.onlylady.com/.*', params.originalurl): # self.onlyladyBbs.process(params) # bbs获取评论调用共通方法,onlyladyBdsComments已测试通过 CommenComments(self).process(params) except Exception, e: traceback.print_exc()
def process(self, params): """ 1.适用在腾讯新闻及其他部分 2.适用在腾讯视频部分https://v.qq.com/ 3.适用在腾讯动漫视频部分http://ac.qq.com/Comic/ 4.适用在腾讯QQ阅读部分http://ebook.qq.com/ 5.适用在腾讯云起书城http://yunqi.qq.com/ """ if self.r.search('^http[s]{0,1}://ac\.qq\.com/Comic/.*', params.originalurl): if params.step == self.STEP_DEFAULT_VALUE: self.step1_ac(params) elif params.step == self.STEP_COMMENT_FIRST_PAGE: self.step2_ac(params) elif params.step == self.STEP_COMMENT_NEXT_PAGE: self.step3_ac(params) elif self.r.search('^http[s]{0,1}://v\.qq\.com/.*', params.originalurl): if params.step == self.STEP_DEFAULT_VALUE: self.get_url_id(params) elif params.step == self.STEP_COMMENT_FIRST_PAGE: self.step1(params) elif params.step == self.STEP_COMMENT_NEXT_PAGE: self.step2(params) elif self.r.search('^http[s]{0,1}://bbs\.book\.qq\.com/.*', params.originalurl): CommenComments(self).process(params) elif self.r.search('^http[s]{0,1}://ebook\.qq\.com/.*', params.originalurl): if params.step == self.STEP_DEFAULT_VALUE: self.step1_ebook(params) elif params.step == self.STEP_COMMENT_FIRST_PAGE: self.step2_ebook(params) elif params.step == self.STEP_COMMENT_NEXT_PAGE: self.step3_ebook(params) elif self.r.search('^http[s]{0,1}://yunqi\.qq\.com/.*', params.originalurl): if params.step == self.STEP_DEFAULT_VALUE: self.step1_yunqi(params) elif params.step == self.STEP_COMMENT_FIRST_PAGE: self.step2_yunqi(params) elif params.step == self.STEP_COMMENT_NEXT_PAGE: self.step3_yunqi(params) elif self.r.search('^http[s]{0,1}://p\.weather\.com\.cn.*', params.originalurl): SohuComments(self).process(params) else: if params.step == self.STEP_DEFAULT_VALUE: self.step1(params) elif params.step == self.STEP_COMMENT_NEXT_PAGE: self.step2(params)
def step2bbs(self, params): Logger.getlogging().info("LaohuComments.STEP_2_BBS") if self.r.parse('^http://bbs\.laohu\.com\/\w+-(\d+)-\d+-\d+\.html',params.originalurl): #S1 field = params.customized['field'] # 通过xpath, 从页面上获取页面总数 # lastpg = CommenComments.gettotalpages(params.content) lastpg = int(self.r.parse('<span title=".*">(.*?)</span>',params.content)[0].split('/')[1].split(' ')[1]) if lastpg is None: return # 当前评论页码 pg = self.r.parse(self.BBS_URL_REG, params.url)[0] # 获取当前页评论 params.customized['lastpg'] = lastpg CommenComments.getpagecomments(self, params, self.BBS_URL_REG) # 如果只有1页,后续处理不要 if int(lastpg) == 1: return # 对于S1, 需要展开获取所有评论 urlArr = params.originalurl.split('-') if len(urlArr) != 4: return for page in range(1, lastpg + 1, 1): if page == int(pg): continue commentUrl = urlArr[0] + '-' + urlArr[1] + '-' + str(page) + '-' + urlArr[3] Logger.getlogging().debug(commentUrl) self.storeurl(commentUrl, params.originalurl, LaohuComments.STEP_3_BBS, {'field': field, 'lastpg': lastpg}) else: #特殊网址 CommenComments.getpagecomments2(self, params)
def process(self, params): try: field = self.r.parse('^http://(\w+)\.duowan\.com*', params.originalurl)[0] if field == 'bbs': CommenComments(self).process(params) return if params.step is None: self.step1(params) elif params.step == DuowanComments.STEP_2_TU: self.step_tu(params) # 获取评论总数,拼接所有评论url elif params.step == DuowanComments.STEP_2: self.step2(params) elif params.step == DuowanComments.STEP_3: self.step3(params) except: Logger.printexception()
def process(self, params): # 初始化内部子类对象 field = '' # 1. 根据输入原始url, 得到网站的子域名 if self.r.match('^http[s]{0,1}://www\.dmzj\.com\/(\w+)', params.originalurl): field = self.r.parse('^http[s]{0,1}://www\.dmzj\.com\/(\w+)', params.originalurl)[0] else: field = self.r.parse('^http[s]{0,1}://(\w+).dmzj.com/.*', params.originalurl)[0] if not field: Logger.log(params.originalurl, constant.ERRORCODE_SITE_NOGET_TEMPLATE) if field == 'bbs': CommenComments(self).process(params) # elif field == 'news' or field == 'manhua' or field == 'xs' or field == 'info': # DmzjNewscomments(self).process(params) else: DmzjNewscomments(self).process(params)
def process(self, params): """""" if self.r.search('http[s]{0,1}://www\.17k\.com.*', params.originalurl): self.process_book(params) if self.r.search('http[s]{0,1}://bbs\.17k\.com.*', params.originalurl): CommenComments(self).process(params)
def createobject(self): if self.narutomVideo is None: self.narutomVideo = NarutomVideoComments(self) if self.narutomBbs is None: self.narutomBbs = CommenComments(self)
def createobject(self): if self.ptbusNews is None: self.ptbusNews = PtbusNewsComments(self) if self.ptbusBbs is None: self.ptbusBbs = CommenComments(self)
def __init__(self): WebSite.__init__(self) self.name = 'lady8844' self.pattern = r'^http[s]{0,1}://.*lady8844\.com.*' self.setcommentimpl(CommenComments())
def createobject(self): if self.bbs is None: self.bbs = CommenComments(self) if self.news is None: self.news = NewsComments(self)
def createobject(self): if self.u17News is None: self.u17News = U17NewsComments(self) if self.u17Bbs is None: self.u17Bbs = CommenComments(self)
def step3bbs(self, params): Logger.getlogging().info("LaohuComments.STEP_3_BBS") CommenComments.getpagecomments(self, params, self.BBS_URL_REG)
def __init__(self): WebSite.__init__(self) self.name = 'gxdmw' self.pattern = r'^http[s]{0,1}://www\.gxdmw\.com/.*' self.setcommentimpl(CommenComments())
def __init__(self): WebSite.__init__(self) self.name = 'Angeeks' self.pattern = r'^http[s]{0,1}://.*\.angeeks\.com/.*' self.setcommentimpl(CommenComments()) self.sets2queryimpl(AngeeksS2Query())
def process(self, params): if self.r.search('https?://bbs\.17173\.com/.*', params.originalurl): CommenComments(self).process(params) else: self.process_news(params)
def createobject(self): if self.dm78News is None: self.dm78News = dm78NewsComments(self) if self.dm78Bbs is None: self.dm78Bbs = CommenComments(self)