def __init__(self, parent=None): SiteComments.__init__(self) self.page_size = 20 self.tv_page_size = 30 self.client_id = 'cyqemw6s1' self.tv_client_id = 'cyqyBluaj' self.group_mark = '9000' if parent: self.website = parent.website self.COMMENTS_SOURCE_URL = 'http://changyan.sohu.com/api/3/topic/liteload?client_id={0}&topic_source_id={1}&page_size={2}' self.TV_COMMENTS_SOURCE_URL = 'http://changyan.sohu.com/api/2/topic/load?client_id={0}&topic_url={1}&topic_source_id={2}&page_size={3}' self.COMMENTS_URL = 'http://changyan.sohu.com/api/2/topic/comments?client_id={0}&topic_id={1}&page_no={2}&page_size={3}' self.NEW_NEWS_COMMONURL = 'http://apiv2.sohu.com/api/topic/load?page_size={page_size}&topic_source_id={cmt_id}&page_no={page}&media_id={media_id}&topic_category_id={topic_category_id}' self.NEW_NEWS_COMMONURL2 = 'http://apiv2.sohu.com/api/comment/list?page_size={page_size}&topic_id={topic_id}&page_no={page}' self.STEP_DEFAULT_VALUE = None self.STEP_COMMENT_FIRST_PAGE = 1 self.STEP_COMMENT_NEXT_PAGE = 2 self.STEP_TVCLICK = 'tvclick' self.STEP_MYTVCLICK = 'mytvclick' self.STEP_NEWS_A2 = 'step2_new_a' self.STEP_NEWS_A3 = 'step3_new_a' self.TVCLICKURL = 'http://count.vrs.sohu.com/count/queryext.action?vids={vid}' self.MYTVCLICKURL = 'http://vstat.my.tv.sohu.com/dostat.do?method=getVideoPlayCount&v={vid}'
def __init__(self): SiteComments.__init__(self) self.page_size = 5 self.COMMENTS_URL = 'http://www.syqnr.com/api/?action=com&do=comment&sid={0}&cid={1}&start={2}&length={3}' self.STEP_DEFAULT_VALUE = None self.STEP_COMMENT_FIRST_PAGE = 1 self.STEP_COMMENT_NEXT_PAGE = 2
def __init__(self,parent=None): SiteComments.__init__(self) self.r = RegexUtility() # self.basicstorage = BaseInfoStorage() # self.commentstorage = CommentsStorage() if parent: self.website = parent.website
def __init__(self): SiteComments.__init__(self) self.reBaseUrl = '^http://www.appgame.com/\w+/\d+\.html' self.commentUrl = 'http://comment.appgame.com/api/comment.php?cmtx_page={page}&page_name={name}&page_id={url}' self.page_size = 10.0 self.STEP_COUNT = None self.STEP_PAGES = 1 self.STEP_CMTS = 2
def __init__(self): SiteComments.__init__(self) self.pageUrl = 'http://api.bilibili.com/x/v2/reply?pn={page}&type=1&oid={videoId}&sort=0' self.playurl = 'https://interface.bilibili.com/player?id=cid:{cid}&aid={aid}' self.pageSize = 20.0 self.STEP_COUNT = None self.STEP_PAGES = 1 self.STEP_CMTS = 2 self.STEP_PLAY = 'play'
def __init__(self): SiteComments.__init__(self) self.page_size = 20 self.pageno = 1 self.COMMENTS_URL = 'http://www.wandafilm.com/wanda/news.do?m=getAllComment&pageNo={0}&displayCount={1}&newsId={2}' self.STEP_DEFAULT_VALUE = None self.STEP_COMMENT_FIRST_PAGE = 1 self.STEP_COMMENT_NEXT_PAGE = 2
def __init__(self): SiteComments.__init__(self) self.page_size = 30 # self.COMMENTS_SOURCE_URL = 'http://www.yidianzixun.com/api/q/?path=contents/comments&version={0}&docid={1}&count={2}' # self.COMMENTS_URL = 'http://www.yidianzixun.com/api/q/?path=contents/comments&version={0}&docid={1}&last_comment_id={3}&count={2}' self.COMMENTS_URL = 'http://www.yidianzixun.com/home/q/getcomments?docid={0}&count={1}&last_comment_id={2}&appid=web_yidian' self.STEP_DEFAULT_VALUE = None self.STEP_COMMENT_FIRST_PAGE = 1 self.STEP_COMMENT_NEXT_PAGE = 2
def __init__(self): SiteComments.__init__(self) self.page_size = 10 self.COMMENTS_SOURCE_URL = 'http://bbs.tianya.cn/post-{item}-{artId}-{page}.shtml' #'format(merNum=merNum,page=pageCount)' self.COMMENTS_CHILD_URL = 'http://bbs.tianya.cn/api?method=bbs.api.getCommentList¶ms.item={item}¶ms.articleId={artId}¶ms.replyId={replyId}¶ms.pageNum={page}' #'format(merNum=merNum,replyId=replyId,page=pageNum) self.STEP_DEFAULT_VALUE = None self.STEP_COMMENT_FIRST_PAGE = 1 self.STEP_COMMENT_CHILD_PAGE = 2 self.ITEM_FORMAT = 'ext2\s*:\s*\"(.+?)\"' self.ARTID_FORMAT = 'ext1\s*:\s*\"(.+?)\"'
def __init__(self, parent = None): SiteComments.__init__(self) self.page_size = 10 self.page_size2 = 20 self.page_size3 = 15 self.COMMENTS_URL = 'http://{website}/{area}-{url_id}-{page}-1.html' #'format(website=website,area=area,url_id=url_id,page=page)' self.FORUM_URL = 'http://{website}/forum.php?mod={area}&tid={url_id}&page={page}' self.cmt_page_numCSS = {'pageCss':'#pgt > .pgt > .pg > a','cmtnumCss':'.hm > .xi1'} self.cmt_page_numCSS2 = {'pageCss':'#nav_hd > .la_fy > .pg > a','cmtnumCss':'.hm > .xi1'} self.cmt_page_numCSS3 = {'pageCss':'#pgt > .pg > a','cmtnumCss':'.hm > .xi1'} self.cmt_page_numCSS4 = {'pageCss':'.pg > a','cmtnumCss':'.hm > .xi1'} self.commentCsskey={'subject_idkey':'thread_subject','table_idkey':'pid','time_idkey':'authorposton','content_idkey':'(postmessage)|(locked)','table_summarykey':'pid'} if parent is not None: self.website = parent.website
def __init__(self): SiteComments.__init__(self) # club评论的分页url模板 self.club_counturl = 'http://st01.club.china.com/data/thread/{path}_threadpage.js' self.club_commonurl = 'http://st01.club.china.com/data/thread/{path}_{page}_re.js' # 其他,如:news,sports,lady等评论的分页url模板 self.new_firsturl = 'http://pl.china.com/CommentInfoAction.do?processID=listNewsComment&order=desc' \ '&newsobjectid={objectid}&channelcode={channel}&pageindex={pageno}&typeobjectid={type}' \ '&clienttype={clienttype}&key=N_F_P_{key}' # 相比第一页评论,之后的评论页url多了一个参数:lastCommentId,指第一页评论的最后一个评论id self.new_commonurl = 'http://pl.china.com/CommentInfoAction.do?processID=listNewsComment&order=desc' \ '&newsobjectid={objectid}&channelcode={channel}&pageindex={pageno}&typeobjectid={type}' \ '&clienttype={clienttype}&key=N_F_P_{key}&lastCommentId={lastcmtid}' self.club_pagesize = 100.0 self.news_pagesize = 3.0
def __init__(self): SiteComments.__init__(self) self.page_size = 50 self.page_size_yunqi = 10 self.COMMENTS_URL = 'http://coral.qq.com/article/{0}/comment?commentid={1}&reqnum={2}' # self.AC_COMMENTS_URL = 'http://ac.qq.com/Community/topicList?targetId={0}&page={1}&type=0&_={2}' self.AC_COMMENTS_URL = 'http://ac.qq.com/Community/topicList?targetId={0}&page={1}' self.EBOOK_COMMENTS_URL = 'http://ebook.qq.com/{site}/getComment.html?bid={bid}&pageIndex={page}' self.YUNQI_COMMENT_URL = 'http://yunqi.qq.com/bk/gdyq/%s-b.html?hot=0&p=%d' self.STEP_DEFAULT_VALUE = None self.STEP_COMMENT_FIRST_PAGE = 1 self.STEP_COMMENT_NEXT_PAGE = 2 self.hasnext = True self.cmtlastdays = TimeUtility.getuniformdatebefore( int(SpiderConfigure.getinstance().getlastdays())) self.comment_maxnum = 5000
def __init__(self, parent=None): SiteComments.__init__(self) if parent: self.website = parent.website
def __init__(self, parent=None): SiteComments.__init__(self) self.page_size = 20 if parent: self.website = parent.website
def __init__(self): SiteComments.__init__(self) self.r = RegexUtility() self.basicstorage = BaseInfoStorage() self.commentstorage = CommentsStorage()
def __init__(self): SiteComments.__init__(self)
def __init__(self, parent): SiteComments.__init__(self) self.page_size = 20 self.website = parent.website self.pid = 1
def __init__(self): SiteComments.__init__(self) self.changyan = None
def __init__(self): SiteComments.__init__(self) self.page_size = 20 self.h = HttpUtility()
def __init__(self): SiteComments.__init__(self) self.page_size = 30
def __init__(self, parent=None): SiteComments.__init__(self) self.page_size = 10 self.orkey_patttern = '\'get.php\?orkey=(.*)\'' if parent: self.website = parent.website
def __init__(self): SiteComments.__init__(self) self.offset = 0 self.limit = 30
def __init__(self): SiteComments.__init__(self) self.book = None self.pub = None self.bbs = None
def __init__(self, parent): SiteComments.__init__(self) self.website = parent.website
def __init__(self): SiteComments.__init__(self) self.bbs = None self.news = None
def __init__(self): SiteComments.__init__(self) self.r = RegexUtility()
def __init__(self): SiteComments.__init__(self) self.r = RegexUtility() self.client_id = 'cytaCBUri'
def __init__(self): # 使用该URL识别回传S2查询结果的类,推荐使用主站URL SiteComments.__init__(self) self.u17News = None self.u17Bbs = None
def __init__(self,parent=None): SiteComments.__init__(self) self.r = RegexUtility() if parent: self.website = parent.website
def __init__(self, parent): SiteComments.__init__(self) self.website = parent.website self.per_page = 100
def __init__(self, parent): SiteComments.__init__(self) self.pagelimit = self.maxpages self.website = parent.website