def __init__(self, parent=None):
        SiteComments.__init__(self)
        self.page_size = 20
        self.tv_page_size = 30
        self.client_id = 'cyqemw6s1'
        self.tv_client_id = 'cyqyBluaj'
        self.group_mark = '9000'
        if parent:
            self.website = parent.website

        self.COMMENTS_SOURCE_URL = 'http://changyan.sohu.com/api/3/topic/liteload?client_id={0}&topic_source_id={1}&page_size={2}'
        self.TV_COMMENTS_SOURCE_URL = 'http://changyan.sohu.com/api/2/topic/load?client_id={0}&topic_url={1}&topic_source_id={2}&page_size={3}'
        self.COMMENTS_URL = 'http://changyan.sohu.com/api/2/topic/comments?client_id={0}&topic_id={1}&page_no={2}&page_size={3}'
        self.NEW_NEWS_COMMONURL = 'http://apiv2.sohu.com/api/topic/load?page_size={page_size}&topic_source_id={cmt_id}&page_no={page}&media_id={media_id}&topic_category_id={topic_category_id}'
        self.NEW_NEWS_COMMONURL2 = 'http://apiv2.sohu.com/api/comment/list?page_size={page_size}&topic_id={topic_id}&page_no={page}'
        self.STEP_DEFAULT_VALUE = None
        self.STEP_COMMENT_FIRST_PAGE = 1
        self.STEP_COMMENT_NEXT_PAGE = 2
        self.STEP_TVCLICK = 'tvclick'
        self.STEP_MYTVCLICK = 'mytvclick'

        self.STEP_NEWS_A2 = 'step2_new_a'
        self.STEP_NEWS_A3 = 'step3_new_a'
        self.TVCLICKURL = 'http://count.vrs.sohu.com/count/queryext.action?vids={vid}'
        self.MYTVCLICKURL = 'http://vstat.my.tv.sohu.com/dostat.do?method=getVideoPlayCount&v={vid}'
 def __init__(self):
     SiteComments.__init__(self)
     self.page_size = 5
     self.COMMENTS_URL = 'http://www.syqnr.com/api/?action=com&do=comment&sid={0}&cid={1}&start={2}&length={3}'
     self.STEP_DEFAULT_VALUE = None
     self.STEP_COMMENT_FIRST_PAGE = 1
     self.STEP_COMMENT_NEXT_PAGE = 2
 def __init__(self,parent=None):
     SiteComments.__init__(self)
     self.r = RegexUtility()
     # self.basicstorage = BaseInfoStorage()
     # self.commentstorage = CommentsStorage()
     if parent:
         self.website = parent.website         
 def __init__(self):
     SiteComments.__init__(self)
     self.reBaseUrl = '^http://www.appgame.com/\w+/\d+\.html'
     self.commentUrl = 'http://comment.appgame.com/api/comment.php?cmtx_page={page}&page_name={name}&page_id={url}'
     self.page_size = 10.0
     self.STEP_COUNT = None
     self.STEP_PAGES = 1
     self.STEP_CMTS = 2
Ejemplo n.º 5
0
 def __init__(self):
     SiteComments.__init__(self)
     self.pageUrl = 'http://api.bilibili.com/x/v2/reply?pn={page}&type=1&oid={videoId}&sort=0'
     self.playurl = 'https://interface.bilibili.com/player?id=cid:{cid}&aid={aid}'
     self.pageSize = 20.0
     self.STEP_COUNT = None
     self.STEP_PAGES = 1
     self.STEP_CMTS = 2
     self.STEP_PLAY = 'play'
    def __init__(self):
        SiteComments.__init__(self)
        self.page_size = 20
        self.pageno = 1

        self.COMMENTS_URL = 'http://www.wandafilm.com/wanda/news.do?m=getAllComment&pageNo={0}&displayCount={1}&newsId={2}'
        self.STEP_DEFAULT_VALUE = None
        self.STEP_COMMENT_FIRST_PAGE = 1
        self.STEP_COMMENT_NEXT_PAGE = 2
Ejemplo n.º 7
0
    def __init__(self):
        SiteComments.__init__(self)
        self.page_size = 30

        # self.COMMENTS_SOURCE_URL = 'http://www.yidianzixun.com/api/q/?path=contents/comments&version={0}&docid={1}&count={2}'
        # self.COMMENTS_URL = 'http://www.yidianzixun.com/api/q/?path=contents/comments&version={0}&docid={1}&last_comment_id={3}&count={2}'
        self.COMMENTS_URL = 'http://www.yidianzixun.com/home/q/getcomments?docid={0}&count={1}&last_comment_id={2}&appid=web_yidian'
        self.STEP_DEFAULT_VALUE = None
        self.STEP_COMMENT_FIRST_PAGE = 1
        self.STEP_COMMENT_NEXT_PAGE = 2
    def __init__(self):
        SiteComments.__init__(self)
        self.page_size = 10
        self.COMMENTS_SOURCE_URL = 'http://bbs.tianya.cn/post-{item}-{artId}-{page}.shtml'  #'format(merNum=merNum,page=pageCount)'
        self.COMMENTS_CHILD_URL = 'http://bbs.tianya.cn/api?method=bbs.api.getCommentList&params.item={item}&params.articleId={artId}&params.replyId={replyId}&params.pageNum={page}'
        #'format(merNum=merNum,replyId=replyId,page=pageNum)
        self.STEP_DEFAULT_VALUE = None
        self.STEP_COMMENT_FIRST_PAGE = 1
        self.STEP_COMMENT_CHILD_PAGE = 2

        self.ITEM_FORMAT = 'ext2\s*:\s*\"(.+?)\"'
        self.ARTID_FORMAT = 'ext1\s*:\s*\"(.+?)\"'
Ejemplo n.º 9
0
 def __init__(self, parent = None):
     SiteComments.__init__(self)
     self.page_size = 10
     self.page_size2 = 20
     self.page_size3 = 15
     self.COMMENTS_URL = 'http://{website}/{area}-{url_id}-{page}-1.html'        #'format(website=website,area=area,url_id=url_id,page=page)'
     self.FORUM_URL = 'http://{website}/forum.php?mod={area}&tid={url_id}&page={page}'
     self.cmt_page_numCSS = {'pageCss':'#pgt > .pgt > .pg > a','cmtnumCss':'.hm > .xi1'}
     self.cmt_page_numCSS2 = {'pageCss':'#nav_hd > .la_fy > .pg > a','cmtnumCss':'.hm > .xi1'}
     self.cmt_page_numCSS3 = {'pageCss':'#pgt > .pg > a','cmtnumCss':'.hm > .xi1'}
     self.cmt_page_numCSS4 = {'pageCss':'.pg > a','cmtnumCss':'.hm > .xi1'}
     self.commentCsskey={'subject_idkey':'thread_subject','table_idkey':'pid','time_idkey':'authorposton','content_idkey':'(postmessage)|(locked)','table_summarykey':'pid'}
     if parent is not None:
         self.website = parent.website
Ejemplo n.º 10
0
    def __init__(self):
        SiteComments.__init__(self)
        # club评论的分页url模板
        self.club_counturl = 'http://st01.club.china.com/data/thread/{path}_threadpage.js'
        self.club_commonurl = 'http://st01.club.china.com/data/thread/{path}_{page}_re.js'
        # 其他,如:news,sports,lady等评论的分页url模板
        self.new_firsturl = 'http://pl.china.com/CommentInfoAction.do?processID=listNewsComment&order=desc' \
                             '&newsobjectid={objectid}&channelcode={channel}&pageindex={pageno}&typeobjectid={type}' \
                             '&clienttype={clienttype}&key=N_F_P_{key}'
        # 相比第一页评论,之后的评论页url多了一个参数:lastCommentId,指第一页评论的最后一个评论id
        self.new_commonurl = 'http://pl.china.com/CommentInfoAction.do?processID=listNewsComment&order=desc' \
                             '&newsobjectid={objectid}&channelcode={channel}&pageindex={pageno}&typeobjectid={type}' \
                             '&clienttype={clienttype}&key=N_F_P_{key}&lastCommentId={lastcmtid}'

        self.club_pagesize = 100.0
        self.news_pagesize = 3.0
Ejemplo n.º 11
0
 def __init__(self):
     SiteComments.__init__(self)
     self.page_size = 50
     self.page_size_yunqi = 10
     self.COMMENTS_URL = 'http://coral.qq.com/article/{0}/comment?commentid={1}&reqnum={2}'
     # self.AC_COMMENTS_URL = 'http://ac.qq.com/Community/topicList?targetId={0}&page={1}&type=0&_={2}'
     self.AC_COMMENTS_URL = 'http://ac.qq.com/Community/topicList?targetId={0}&page={1}'
     self.EBOOK_COMMENTS_URL = 'http://ebook.qq.com/{site}/getComment.html?bid={bid}&pageIndex={page}'
     self.YUNQI_COMMENT_URL = 'http://yunqi.qq.com/bk/gdyq/%s-b.html?hot=0&p=%d'
     self.STEP_DEFAULT_VALUE = None
     self.STEP_COMMENT_FIRST_PAGE = 1
     self.STEP_COMMENT_NEXT_PAGE = 2
     self.hasnext = True
     self.cmtlastdays = TimeUtility.getuniformdatebefore(
         int(SpiderConfigure.getinstance().getlastdays()))
     self.comment_maxnum = 5000
Ejemplo n.º 12
0
 def __init__(self, parent=None):
     SiteComments.__init__(self)
     if parent:
         self.website = parent.website
Ejemplo n.º 13
0
 def __init__(self, parent=None):
     SiteComments.__init__(self)
     self.page_size = 20
     if parent:
         self.website = parent.website
Ejemplo n.º 14
0
 def __init__(self):
     SiteComments.__init__(self)
     self.r = RegexUtility()
     self.basicstorage = BaseInfoStorage()
     self.commentstorage = CommentsStorage()
Ejemplo n.º 15
0
 def __init__(self):
     SiteComments.__init__(self)
 def __init__(self, parent):
     SiteComments.__init__(self)
     self.page_size = 20
     self.website = parent.website
     self.pid = 1
Ejemplo n.º 17
0
 def __init__(self):
     SiteComments.__init__(self)
     self.changyan = None
Ejemplo n.º 18
0
 def __init__(self):
     SiteComments.__init__(self)
     self.page_size = 20
     self.h = HttpUtility()
Ejemplo n.º 19
0
 def __init__(self):
     SiteComments.__init__(self)
     self.page_size = 30
Ejemplo n.º 20
0
 def __init__(self, parent=None):
     SiteComments.__init__(self)
     self.page_size = 10
     self.orkey_patttern = '\'get.php\?orkey=(.*)\''
     if parent:
         self.website = parent.website
Ejemplo n.º 21
0
 def __init__(self):
     SiteComments.__init__(self)
     self.offset = 0
     self.limit = 30
Ejemplo n.º 22
0
 def __init__(self):
     SiteComments.__init__(self)
     self.book = None
     self.pub = None
     self.bbs = None
 def __init__(self, parent):
     SiteComments.__init__(self)
     self.website = parent.website
Ejemplo n.º 24
0
 def __init__(self):
     SiteComments.__init__(self)
     self.bbs = None
     self.news = None
Ejemplo n.º 25
0
 def __init__(self):
     SiteComments.__init__(self)
     self.r = RegexUtility()
Ejemplo n.º 26
0
 def __init__(self):
     SiteComments.__init__(self)
     self.r = RegexUtility()
     self.client_id = 'cytaCBUri'
Ejemplo n.º 27
0
 def __init__(self):
     # 使用该URL识别回传S2查询结果的类,推荐使用主站URL
     SiteComments.__init__(self)
     self.u17News = None
     self.u17Bbs = None
Ejemplo n.º 28
0
 def __init__(self,parent=None):
     SiteComments.__init__(self)
     self.r = RegexUtility()
     if parent:
         self.website = parent.website 
 def __init__(self, parent):
     SiteComments.__init__(self)
     self.website = parent.website
     self.per_page = 100
Ejemplo n.º 30
0
 def __init__(self, parent):
     SiteComments.__init__(self)
     self.pagelimit = self.maxpages
     self.website = parent.website