def reply_backchina_forum(post_url, src): """倍可亲回复模块 @param sess: requests.Session() @type sess: Session @param post_url: 帖子地址 @type post_url: str @param src: 用户名,密码,回复内容,等等。 @type src: dict @return: 是否登录成功 @rtype: bool """ # Returnable logger logger = utils.RAPLogger(post_url) host = utils.get_host(post_url) sess = utils.RAPSession(src) # Step 1: 登录 if not login_backchina(sess, src): logger.error(' Login Error') return (False, str(logger)) logger.info(' Login OK') # Step 2: Load post page # 获取所需发帖页面,查找与{'id':'fastpostform'}匹配的form标签 resp = sess.get(post_url) soup = BeautifulSoup(resp.content) form = soup.find('form', attrs={'id': 'fastpostform'}) # Step 3: Submit # 回复内容 payload = utils.get_datadic(form) if 'subject' in src: payload['subject'] = src['subject'] payload['message'] = src['content'] #发送post包 resp = sess.post(host + form['action'], data=payload) #判断回帖后页面是否含有回帖内容,若存在则证明回帖成功,否则失败 if src['content'] in resp.content: logger.info('Reply OK') else: logger.error('Reply Error: Reply Error, please try again !') return (False, str(logger)) return (True, str(logger))
def _comment(self, params): # Convert unicode to utf8. for key in params: params[key] = params[key][0] logger = utils.RAPLogger(params['url']) # Get specific handler # For example: # 'dwnews\.com/news': ('dwnews_news', 'OUT'), for pattern, handler in config.dispatch_rule.items(): if re.search(pattern, params['url']): real_reply = getattr(handlers, 'reply_' + handler[0]) break else: logger.error('No reply handler') return (False, str(logger)) # Prepare arguments. src = { 'content': params['content'], 'TTL': config.max_try, } if 'account' in params: src['username'] = params['account'] if 'password' in params: src['password'] = params['password'] try: src['proxies'] = { params['proxy_type']: params['proxy_type'] + '://' + params['proxy_ip'] + ':' + params['proxy_port'] } except: src['proxies'] = '' try: src['subject'] = params['title'] except: src['subject'] = '' # Real reply. try: r, log = real_reply(params['url'], src) return (r, str(logger) + log) except: logger.exception('Reply Error') return (False, str(logger))
def post_powerapple_forum(post_url, src): """ 超级苹果论坛发主贴函数 @param post_url: 板块地址 如:http://bbs.powerapple.com/forum.php?mod=forumdisplay&fid=50 @type post_url: str @param src: 用户名,密码,标题,主帖内容,等等。 @type src: dict @return: 是否发帖成功,帖子URL @rtype: bool,str """ logger = utils.RAPLogger(post_url) host = utils.get_host(post_url) sess = utils.RAPSession(src) # Step 1: 登录 if not login_powerapple(sess, src): logger.error(' Login Error') return ('', str(logger)) logger.info(' Login OK') # Step 2: Load post page fid = re.findall(r'fid=(\d+)', post_url)[0] # 获取所需发帖页面,查找与{'id':'fastpostform'}匹配的form标签 resp = sess.get( 'http://bbs.powerapple.com/forum.php?mod=post&action=newthread&fid=' + fid) soup = BeautifulSoup(resp.content) form = soup.find('form', attrs={'id': 'postform'}) # Step 3: Submit # 回复内容 payload = get_datadic(form) payload['subject'] = src['subject'] payload['message'] = src['content'] payload['typeid'] = '138' #发送post包 resp = sess.post(host + form['action'], data=payload) #获取回帖页面content的HTML print_to_file(resp.content) #判断回帖后页面是否含有回帖内容,若存在则证明回帖成功,否则失败 if src['subject'] in resp.content: logger.info('Post OK') else: logger.error('Reply Error: please try again !') return ('', str(logger)) return (resp.url, str(logger))
def post_wailaike_forum(post_url, src): """ 外来客论坛发主贴函数 @param post_url: 板块地址 如:http://www.wailaike.net/group_post?gid=1 @type post_url: str @param src: 用户名,密码,标题,主帖内容,等等。用户名:[email protected] 密码:wenshen4921119 @type src: dict @return: 是否发帖成功,帖子URL @rtype: bool,str """ # Returnable logger logger = utils.RAPLogger(post_url) host = utils.get_host(post_url) sess = utils.RAPSession(src) # Step 1: 登录 if not login_wailaike(sess, src): logger.error(' Login Error') return ('', str(logger)) logger.info(' Login OK') gid = re.findall(r'gid=(\d*)', post_url)[0] resp = sess.get('http://www.wailaike.net/newpost?gid='+gid) soup = BeautifulSoup(resp.content) # 获得发帖form form = soup.find('form', attrs={'id': 'editor'}) # 构造回复参数 payload = utils.get_datadic(form) payload['title'] = src['subject'].decode('utf8').encode(CHARSET) payload['rstbody'] = src['content'].decode('utf8').encode(CHARSET) resp = sess.get('http://www.wailaike.net/time.php') payload['time'] = re.findall('"time":"(.*?)"', resp.content)[0] # 发送发帖post包 resp = sess.post('http://www.wailaike.net/newpost?gid='+gid, data=payload) # By sniper 2015-2-1 # 标题中的'('和')'等需要在正则表达式中转义 # 如:[转帖]ZT) 汉服是FQ闹的大笑话 subject = re.escape(src['subject']) url = re.findall(r'<h3 class="titles-txt"><a href=\"(.*?)\" target="_blank">' + subject + '</a></h3>',resp.content)[0] url = "http://www.wailaike.net" + url # 如果url未成功匹配,则抛出异常,Post Error logger.info(' Post OK') return (url, str(logger))
def thumb_up_sohu(post_url, src): logger = utils.RAPLogger(post_url) sess = utils.RAPSession(src) resp = sess.get('http://changyan.sohu.com/api/2/comment/action', headers={'Referer': post_url}, params={ 'callback': 'fn', 'action_type': 1, 'comment_id': src['extra']['comment_id'], 'client_id': post_url.split('/')[4], 'topic_id': src['extra']['topic_id'], '_': int(time.time() * 1000), }) logger.info(resp.content) return (True, str(logger))
def thumb_up_qq(post_url, src): logger = utils.RAPLogger(post_url) sess = utils.RAPSession(src) resp = sess.get(src['extra']['target_url'], headers={'Referer': post_url}, params={ 'targetid': post_url.split('/')[-1], 'callback': 'ding', }) logger.info(resp.content) if 'Operation too frequent' in resp.content: logger.info('Operation too frequent') return (False, str(logger)) return (True, str(logger))
def reply_kdnet(post_url, src): """ 凯迪社区回复函数 - Name: 凯迪社区 - Feature: club.kdnet.net - Captcha: NO - Login: YES @param post_url: 帖子地址 @type post_url: str @param src: 用户名,密码,回复内容,等等。 @type src: dict @return: 是否回复成功 @rtype: bool """ logger = utils.RAPLogger(post_url) sess = utils.RAPSession(src) resp = sess.get(post_url) post_url = resp.url # 获得回复iframe iframe = re.findall('<iframe src=\"(.*?)\"', resp.content)[0] resp = sess.get(iframe.decode(CHARSET)) soup = BeautifulSoup(resp.content) # 获得回复form form = soup.find('form', attrs={'id': 't_form'}) # 获得boardid,作为post参数 boardid = re.findall(r'boardid=(.*\d)', post_url)[0] # 构造回复参数 payload = utils.get_datadic(form) payload['UserName'] = src['username'].decode('utf8').encode(CHARSET,'ignore') payload['password'] = src['password'].decode('utf8').encode(CHARSET,'ignore') payload['body'] = src['content'].decode('utf8').encode(CHARSET,'ignore') # 回复地址 reply_url = 'http://upfile1.kdnet.net/do_lu_shuiyin.asp?'\ + 'action=sre&method=fastreply&BoardID=' # 发送回复post包 resp = sess.post(reply_url + boardid, data=payload) print resp.content.decode(CHARSET) # 若指定字样出现在response中,表示回复成功 if u'成功回复'.encode(CHARSET,'ignore') not in resp.content: logger.error(' Reply Error') return (False, str(logger)) logger.info(' Reply OK') return (True, str(logger))
def post_kdnet(post_url, src): """ 凯迪社区发主贴函数 - Name: 凯迪社区 - Feature: club.kdnet.net - Captcha: NO - Login: NO @param post_url: 板块地址 如:http://club.kdnet.net/list.asp?boardid=2 @type post_url: str @param src: 用户名,密码,标题,主帖内容,等等。 @type src: dict @return: 是否发帖成功,帖子URL @rtype: bool,str """ logger = utils.RAPLogger(post_url) sess = utils.RAPSession(src) # Step 1: 登录 if not login_kdnet(sess, src): logger.error(' Login Error') return ('', str(logger)) logger.info(' Login OK') # 获得boardid,作为post参数 boardid = re.findall(r'boardid=(\d*)',post_url)[0] resp = sess.get('http://upfile1.kdnet.net/textareaeditor/post_ubb.asp?action=new&boardid='+boardid) soup = BeautifulSoup(resp.content) # 获得发帖form form = soup.find('form', attrs={'id': 'Dvform'}) # 构造回复参数 payload = utils.get_datadic(form) payload['topic'] = src['subject'].decode('utf8').encode(CHARSET,'ignore') payload['body'] = src['content'].decode('utf8').encode(CHARSET,'ignore') payload['font1'] = u'[原创]'.encode(CHARSET,'ignore') # 发送发帖post包 resp = sess.post('http://upfile1.kdnet.net/SavePost_ubb.asp?Action=snew&boardid=' + boardid, data=payload) # 若指定字样出现在response中,表示发帖成功 if u'发帖成功'.encode(CHARSET,'ignore') not in resp.content: logger.error(' Post Error') return ('', str(logger)) logger.info(' Post OK') url = re.findall(r'var url="(.*?)"',resp.content)[0] print url return (url, str(logger))
def upload_head_backchina_forum(src): logger = utils.RAPLogger('Upload head 51_forum=>' + src['username']) sess = utils.RAPSession(src) # Step 1: 登录 if not login_backchina(sess, src): logger.error('Login Error') return ('', str(logger)) logger.info('Login OK') resp = sess.get('http://www.backchina.com/home.php?mod=spacecp&ac=avatar') input = urllib.unquote(re.findall(r'input=(.*?)&', resp.content)[0]) agent = re.findall(r'agent=(.*?)&', resp.content)[0] head_url = re.findall(r'<td><img src="(.*?)"', resp.content)[0] print 'input:', input print 'agent:', agent avatar1 = binascii.hexlify(open(src['head'], 'rb').read()).upper() avatar2 = avatar1 avatar3 = avatar1 params = { 'm': 'user', 'inajax': '1', 'a': 'rectavatar', 'appid': '14', 'input': input, 'agent': agent, 'avatartype': 'virtual' } payload = { 'avatar1': avatar1, 'avatar2': avatar2, 'avatar3': avatar3, 'urlReaderTS': str(time.time() * 1000) } resp = sess.post('http://backchina-member.com/ucenter/index.php', data=payload, params=params) print resp.content if 'success="1"' in resp.content: logger.info('uploadavatar OK') return (head_url, str(logger)) else: logger.info('uploadavatar Error') return ('', str(logger))
def post_backchina_forum(post_url, src): """ 倍可亲论坛发主贴函数 @param post_url: 板块地址 如:http://www.backchina.com/forum/37/index-1.html @type post_url: str @param src: 用户名,密码,标题,主帖内容,等等。 @type src: dict @return: 是否发帖成功,帖子URL @rtype: bool,str """ # Returnable logger logger = utils.RAPLogger(post_url) sess = utils.RAPSession(src) # Step 1: 登录 if not login_backchina(sess, src): logger.error(' Login Error') return ('', str(logger)) logger.info(' Login OK') fid = re.findall(r'forum/(\d+)/', post_url)[0] resp = sess.get( 'http://www.backchina.com/forum.php?mod=post&action=newthread&fid=' + fid) soup = BeautifulSoup(resp.content) # 获得发帖form form = soup.find('form', attrs={'id': 'postform'}) # 构造回复参数 payload = utils.get_datadic(form) payload['subject'] = src['subject'] payload['message'] = src['content'] # 发送发帖post包 resp = sess.post( 'http://www.backchina.com/forum.php?mod=post&action=newthread&fid=' + fid + '&extra=&topicsubmit=yes', data=payload) # 若指定字样出现在response中,表示发帖成功 if src['subject'] not in resp.content: logger.error(' Post Error') return ('', str(logger)) logger.info(' Post OK') url = resp.url return (url, str(logger))
def post_creaders_forum(post_url, src): """万维论坛发主贴模块 @param sess: requests.Session() @type sess: Session @param post_url: 帖子地址 如:http://bbs.creaders.net/life/ @type post_url: str @param src: 用户名,密码,回复内容,等等。 @type src: dict @return: 是否登录成功 @rtype: bool """ logger = utils.RAPLogger(post_url) sess = utils.RAPSession(src) url = post_url.rpartition('/')[0] + '/' resp = sess.get(url + 'post.php?') # 构造回复参数 payload = { 'user_name2': src['username'].decode('utf8').encode(CHARSET, 'ignore'), 'user_password2': src['password'].decode('utf8').encode(CHARSET, 'ignore'), 'captcha': '', 'trd_subject': src['subject'].decode('utf8').encode(CHARSET, 'ignore'), 'trd_content': src['content'].decode('utf8').encode(CHARSET, 'ignore') } # 发送发主贴post包 # print url+'post.php' resp = sess.post(url + 'post.php', data=payload) content = resp.content.decode(CHARSET, 'ignore') # utils.print_to_file(resp.content) # By sniper 2015-2-1 # 标题中的'('和')'等需要在正则表达式中转义 # 如:[转帖]ZT) 汉服是FQ闹的大笑话 subject = re.escape(src['subject'].decode('utf8')) href = re.findall(r'<a href=\'(.*?)\' class=\'thread_title\'>' + subject, content)[0] url = post_url + href # 如果url未成功匹配,则抛出异常,Post Error logger.info('Post OK') return (url, str(logger))
def login_sohu(sess, post_url, src): """ 搜狐登录函数 @param sess: requests.Session() @type sess: Session @param post_url: 帖子地址 @type post_url: str @param src: 用户名,密码,回复内容,等等。 @type src: dict @return: 是否登录成功 @rtype: bool """ logger = utils.RAPLogger(post_url) # 登录地址 login_url = 'https://passport.sohu.com/sso/login.jsp' # 发送登录post包 payload = { 'userid': src['username'], 'password': md5(src['password']).hexdigest(), 'appid': '1019', 'persistentcookie': '1', 's': int(time.time()) * 1000, 'b': '7', 'w': '1366', 'pwdtype': '1', 'v': '26' } headers = { 'Host': 'passport.sohu.com', 'Referer': 'http://i.sohu.com/login/logon.do', 'User-Agent': config.user_agent } resp = sess.get(login_url, params=payload, headers=headers) logger.info(' info: ' + resp.content) # 若指定字样出现在response中,表示登录成功 if 'success' not in resp.content: return False resp = sess.get('http://uis.i.sohu.com/api/passport/slogin.do') logger.info(resp.content) return True
def reply_enewstree_forum(post_url, src): """ 消息树回复函数 - Name: 凯迪社区 - Feature: club.kdnet.net - Captcha: NO - Login: NO @param post_url: 帖子地址 @type post_url: str @param src: 用户名,密码,回复内容,等等。 @type src: dict @return: 是否回复成功 @rtype: bool """ logger = utils.RAPLogger(post_url) sess = utils.RAPSession(src) # Step 1: 登录 if not login_enewstree(post_url, sess, src): logger.error(' Login Error') return ('', str(logger)) logger.info(' Login OK') resp = sess.get(post_url) soup = BeautifulSoup(resp.content) # 获得回复form form = soup.find('form', attrs={'id': 'fastpostform'}) # 构造回复参数 payload = utils.get_datadic(form) payload['message'] = src['content'].decode('utf8').encode(CHARSET,'ignore'), resp = sess.post('http://enewstree.com/discuz/'+form['action'], data=payload, headers = { 'Referer':post_url }) if src['content'].decode('utf8') not in resp.content.decode(CHARSET,'ignore'): logger.error(' Reply Error') return (False, str(logger)) logger.info(' Reply OK') return (True, str(logger))
def post_1dpw_forum(post_url, src): """ 加国华人网发主贴函数 @param post_url: 板块地址 如:http://bbs.1dpw.com/forum-71-1.html @type post_url: str @param src: 用户名,密码,标题,主帖内容,等等。 @type src: dict @return: 是否发帖成功,帖子URL @rtype: bool,str """ host = utils.get_host(post_url) logger = utils.RAPLogger(post_url) sess = utils.RAPSession(src) # Step 1: 登录 if not login_1dpw(post_url, sess, src): logger.error(' Login Error') return ('', str(logger)) logger.info(' Login OK') resp = sess.get(post_url) soup = BeautifulSoup(resp.content) # 获得回复form form = soup.find('form', attrs={'id': 'fastpostform'}) # 构造回复参数 payload = utils.get_datadic(form) payload['subject'] = src['subject'] payload['message'] = src['content'] payload['posttime'] = int(time.time()) # 发送登录post包 resp = sess.post(host + form['action'] + '&inajax=1', data=payload) # 若指定字样出现在response中,表示发帖成功 if '主题已发布' not in resp.content: logger.error(' Post Error') return ('', str(logger)) logger.info(' Post OK') url = host + re.findall(r'succeedhandle_fastnewpost\(\'(.*?)\'', resp.content)[0] print url return (url, str(logger))
def thumb_up_dwnews(post_url, src={}): logger = utils.RAPLogger(post_url) sess = utils.RAPSession(src) resp = sess.get('http://blog.dwnews.com/index.php', params={ 'r': 'club/clubzan', 'id': re.findall('(\d+)', post_url)[0], 'type': 'nolikes' if src['extra']['like'] == 'false' or src['extra']['like'] == 'False' else 'likes', 'callback': '?', }) logger.info(resp.content) if 'success' not in resp.content: logger.error('Thumb Up Error') return (False, str(logger)) logger.info('Thumb Up OK') return (True, str(logger))
def reply_unitedtimes(post_url, src): """ 澳洲联合网回复函数 - Name: 澳洲联合网 - Feature: unitedtimes.com.au/ - Captcha: NO - Login: NO @param post_url: 帖子地址 @type post_url: str @param src: 用户名,密码,回复内容,等等。 @type src: dict @return: 是否回复成功 @rtype: bool """ logger = utils.RAPLogger(post_url) sess = utils.RAPSession(src) resp = sess.get(post_url) # 获得回复iframe iframe = re.findall('<iframe src=\"(.*?)\"', resp.content)[0] resp = sess.get(iframe.decode(CHARSET)) soup = BeautifulSoup(resp.content) # 获得回复form form = soup.find('form', attrs={'method': 'post'}) # 构造回复参数 payload = utils.get_datadic(form) payload['content'] = src['content'] # 发送回复post包 resp = sess.post(form['action'], data=payload) # 若指定字样出现在response中,表示回复成功 if '操作成功' not in resp.content: logger.error(' Reply Error') return (False, str(logger)) logger.info(' Reply OK') return (True, str(logger))
def reply_creaders_news(post_url, src): logger = utils.RAPLogger(post_url) host = utils.get_host(post_url) s = utils.RAPSession(src) r = s.get(post_url) payload = { 'news_id': re.findall('news_id=(\d+)', r.content)[0], 'r_nid': re.findall('r_nid=(\d+)', r.content)[0], 'username': src['username'], 'password': src['password'], 'replyid': 0, # The charset of this page is `gb2312` absolutely, but it seems that # `saytext` receives `utf8` only. 'saytext': src['content'], } r = s.post(host + '/headline/postcomment.php', data=payload) if u'评论成功'.encode('gb2312') not in r.content: logger.error('Reply Error') return (False, str(logger)) logger.info('Reply OK') return (True, str(logger))
def reply_creaders_forum(post_url, src): """万维论坛回复模块 @author: sky @since: 2014-11-27 @param post_url: 帖子地址 @type post_url: str @param src: 用户名,密码,回复内容,等等。 @type src: dict @return: 是否登录成功 @rtype: bool """ logger = utils.RAPLogger(post_url) sess = utils.RAPSession(src) # step 2: 获取回帖页面 resp = sess.get(post_url) # step 3: 提交回帖 # 回复内容 payload = { 'user_name2': src['username'].decode('utf8').encode(CHARSET), 'user_password2': src['password'].decode('utf8').encode(CHARSET), 'captcha': '', 'btrd_subject': src['content'].decode('utf8').encode(CHARSET), 'btrd_content': src['content'].decode('utf8').encode(CHARSET) } # 发送post包 resp = sess.post(post_url, data=payload) #判断回帖后页面是否含有回帖内容,若存在则证明回帖成功,否则失败 if src['content'].decode('utf8').encode(CHARSET) in resp.content: logger.info('Reply OK') else: logger.error('Reply Error') return (False, str(logger)) return (True, str(logger))
def reply_penchinese_blog(post_url, src): """ 独立中文笔会博客回复函数 - Name: 独立中文笔会 - Feature: club.kdnet.net - Captcha: NO - Login: YES @param post_url: 帖子地址 @type post_url: str @param src: 用户名,密码,回复内容,等等。 @type src: dict @return: 是否回复成功 @rtype: bool """ logger = utils.RAPLogger(post_url) sess = utils.RAPSession(src) resp = sess.get(post_url) soup = BeautifulSoup(resp.content) # 获得回复form form = soup.find('form', attrs={'id': 'commentform'}) # 构造回复参数 payload = utils.get_datadic(form) payload['author'] = src['username'] payload['email'] = src['password'] payload['comment'] = src['content'] # 发送回复post包 resp = sess.post(form['action'], data=payload) print resp.url # 若指定字样出现在response中,表示回复成功 if resp.url == form['action']: logger.error(' Reply Error') return (False, str(logger)) logger.info(' Reply OK') return (True, str(logger))
def reply_1dpw_forum(post_url, src): """ 加国华人网发回复函数(10个字符) @param post_url: 帖子地址 @type post_url: str @param src: 用户名,密码,内容,等等。 @type src: dict @return: 是否回复成功 @rtype: bool """ host = utils.get_host(post_url) logger = utils.RAPLogger(post_url) sess = utils.RAPSession(src) # Step 1: 登录 if not login_1dpw(post_url, sess, src): logger.error(' Login Error') return (False, str(logger)) logger.info(' Login OK') resp = sess.get(post_url) soup = BeautifulSoup(resp.content) # 获得回复form form = soup.find('form', attrs={'id': 'fastpostform'}) # 构造回复参数 payload = utils.get_datadic(form) payload['message'] = src['content'] payload['posttime'] = int(time.time()) # 发送登录post包 resp = sess.post(host + form['action'] + '&inajax=1', data=payload) # 若指定字样出现在response中,表示发帖成功 if 'Database' not in resp.content: logger.error(' Reply Error') return (False, str(logger)) logger.info(' Reply OK') return (True, str(logger))
def _post(self, params): # Convert unicode to utf8. for key in params: params[key] = params[key][0] logger = utils.RAPLogger(params['website']) if params['website'] not in config.website_rule: logger.error('No post handler') return (False, str(logger)) real_post = getattr( handlers, 'post_' + config.website_rule[params['website']][0]) # Prepare arguments. src = { 'subject': params['title'], 'content': params['article'], 'username': params['account'], 'password': params['password'], 'TTL': config.max_try, } try: src['proxies'] = { params['proxy_type']: params['proxy_type'] + '://' + params['proxy_ip'] + ':' + params['proxy_port'] } except: src['proxies'] = '' # Real post. try: url, log = real_post(config.website_rule[params['website']][1], src) return (url, str(logger) + log) except: logger.exception('Post Error') return ('', str(logger))
def _praise(self, params): for key in params: params[key] = params[key][0] logger = utils.RAPLogger(params['url']) for pattern, handler in config.praise_rule.items(): if re.search(pattern, params['url']): real_thumb_up = getattr(handlers, 'thumb_up_' + handler) break else: logger.error('No praise handler') return False src = {'TTL': config.max_try} if 'extra' in params: src['extra'] = eval(params['extra']) if 'account' in params: src['username'] = params['account'] if 'password' in params: src['password'] = params['password'] try: src['proxies'] = { params['proxy_type']: params['proxy_type'] + '://' + params['proxy_ip'] + ':' + params['proxy_port'] } except: src['proxies'] = '' # Real thumb up. try: r, log = real_thumb_up(params['url'], src) return (r, str(logger) + log) except: logger.exception('Thumb Up Error') return (False, str(logger))
def reply_powerapple_forum(post_url, src): # Returnable logger logger = utils.RAPLogger(post_url) host = utils.get_host(post_url) sess = utils.RAPSession(src) # Step 1: 登录 if not login_powerapple(sess, src): logger.error(' Login Error') return (False, str(logger)) logger.info(' Login OK') # Step 2: Load post page # 获取所需发帖页面,查找与{'id':'fastpostform'}匹配的form标签 resp = sess.get(post_url) soup = BeautifulSoup(resp.content) form = soup.find('form', attrs={'id': 'fastpostform'}) # Step 3: Submit # 回复内容 payload = get_datadic(form) if 'subject' in src: payload['subject'] = src['subject'] payload['message'] = src['content'] #发送post包 resp = sess.post(host + form['action'], data=payload) #获取回帖页面content的HTML soup = BeautifulSoup(resp.content) #判断回帖后页面是否含有回帖内容,若存在则证明回帖成功,否则失败 if src['content'] in resp.content: logger.info('Reply OK') else: logger.error('Reply Error: please try again !') return (False, str(logger)) return (True, str(logger))
def get_account_info_dwnews_blog(src): """ 多维账户信息获取函数 @param src: 用户名,密码 @type src: dict @return: 账户信息 @rtype: dict """ logger = utils.RAPLogger(src['username']) sess = utils.RAPSession(src) # Step 1: 登录 if not login_dwnews(sess, src): logger.error(' Login Error') return ({}, str(logger)) logger.info(' Login OK') resp = sess.get('http://blog.dwnews.com/myinfo.html') soup = BeautifulSoup(resp.content) head_image = soup.select('div.portrait img')[0]['src'] account_score = '' account_class = '' time_register = re.findall(r'注册时间:(.*?)</div>', resp.content)[0] time_last_login = re.findall(r'上次登录时间:(.*?) </div>', resp.content)[0] login_count = '' count_post = re.findall(r'文章<span>\((\d+)\)</span>', resp.content)[0] count_reply = re.findall(r'评论<span>\((\d+)\)</span>', resp.content)[0] account_info = { ######################################### # 用户名 'username': src['username'], # 密码 'password': src['password'], # 头像图片 'head_image': head_image, ######################################### # 积分 'account_score': account_score, # 等级 'account_class': account_class, ######################################### # 注册时间 'time_register': time_register, # 最近登录时间 'time_last_login': time_last_login, # 登录次数 'login_count': login_count, ######################################### # 主帖数 'count_post': count_post, # 回复数 'count_reply': count_reply ######################################### } logger.info('Get account info OK') return (account_info, str(logger))
def post_dwnews_blog(post_url, src): """ 多维社区发主贴函数 - Name: 多维社区 - Feature: http://blog.dwnews.com/ - Captcha: NO - Login: NO @param post_url: 板块地址 如:http://blog.dwnews.com/ @type post_url: str @param src: 用户名,密码,标题,主帖内容,等等。 @type src: dict @return: 是否发帖成功,帖子URL @rtype: bool,str """ logger = utils.RAPLogger(post_url) sess = utils.RAPSession(src) # Step 1: 登录 if not login_dwnews(sess, src): logger.error(' Login Error') return ('', str(logger)) logger.info(' Login OK') # Step 2: 验证用户,取得Token resp = sess.get( 'http://blog.dwnews.com/index.php?r=user/checkMember&callback=?') csrf_token = re.findall(r'"CsrfToken":"(.*?)"', resp.content)[0].replace('\\', '') logger.info('csrf_token: ' + csrf_token) # 回复 payload = { 'title': src['subject'], 'content': '<p>' + src['content'] + '</p>', 'tag': '时事', 'catid': '5', 'facebook': '0', 'twitter': '0', 'linkedin': '0', 'google': '0', 'CsrfToken': csrf_token } resp = sess.post('http://blog.dwnews.com/index.php?r=club/post', data=payload, headers={ 'Accept': 'application/json, text/javascript, */*; q=0.01', 'Referer': 'http://blog.dwnews.com/mytopic', 'X-Requested-With': 'XMLHttpRequest', }) # 若指定字样出现在response中,表示发帖成功 if 'success' not in resp.content: logger.info(resp.content) logger.error(' Post Error') return ('', str(logger)) resp = sess.get('http://blog.dwnews.com/index.php?r=club/makehtml&catid=5') if 'ok' not in resp.content: logger.info(resp.content) logger.error(' Post Error') return ('', str(logger)) logger.info(' Post OK') resp = sess.get('http://blog.dwnews.com/mytopic') soup = BeautifulSoup(resp.content) url = soup.select('div.loadMore li a[href^="http"]')[0]['href'] return (url, str(logger))
def get_account_info_eulam_forum(src): """ 欧浪账户信息获取函数 @param src: 用户名,密码 @type src: dict @return: 账户信息 @rtype: dict """ logger = utils.RAPLogger(src['username']) sess = utils.RAPSession(src) # Step 1: 登录 if not login_eulam_forum(sess, src): logger.error(' Login Error') return ({}, str(logger)) logger.info(' Login OK') resp = sess.get('http://bbs.eulam.com/user/Profile.asp?UserName='******'username']) head_image = 'http://bbs.eulam.com/css/css1/images/photobg.gif' html = resp.content.decode(CHARSET, 'ignore').encode('utf8') account_score = re.findall(r'<li>用户魅力:(\d+)</li>', html)[0] account_class = re.findall(r'<li>用户角色:(.*?)</li>', html)[0] time_register = re.findall(r'<li>注册时间:(.*?)</li>', html)[0] year = time.strftime('%Y', time.localtime(time.time())) time_last_login = year + '-' + re.findall(r'<li>上次登录:(.*?)</li>', html)[0] login_count = re.findall(r'<li>登录次数:(\d+)</li>', html)[0] count_post = re.findall(r'<li>发表原贴:(\d+)</li>', html)[0] count_reply = re.findall(r'<li>发表回贴:(\d+)</li>', html)[0] account_info = { ######################################### # 用户名 'username': src['username'], # 密码 'password': src['password'], # 头像图片 'head_image': head_image, ######################################### # 积分 'account_score': account_score, # 等级 'account_class': account_class, ######################################### # 注册时间 'time_register': time_register, # 最近登录时间 'time_last_login': time_last_login, # 登录次数 'login_count': login_count, ######################################### # 主帖数 'count_post': count_post, # 回复数 'count_reply': count_reply ######################################### } logger.info('Get account info OK') return (account_info, str(logger))
def get_account_info_sina_club(src): """ 新浪账户信息获取函数 @param src: 用户名,密码 @type src: dict @return: 账户信息 @rtype: dict """ logger = utils.RAPLogger(src['username']) sess = utils.RAPSession(src) # Step 1: 登录 if not login_sina(sess, src): logger.error(' Login Error') return ({}, str(logger)) logger.info(' Login OK') resp = sess.get('http://club.mil.news.sina.com.cn/memcp.php') soup = BeautifulSoup(resp.content) head_image = soup.select('div.avatar img')[0]['src'] html = resp.content.decode(CHARSET).encode('utf8') account_score = re.findall(r'<li>积分: (\d+)</li>', html)[0] account_class = re.findall( r'<label>用户组:</label> <font color="green">(.*?)</font>', html)[0] time_register = re.findall(r'<label>注册日期:</label>(.*?)</li>', html)[0] time_last_login = re.findall(r'<label>上次访问:</label>(.*?)</li>', html)[0] login_count = '' count_post = re.findall(r'<li>帖子: (\d+)', html)[0] count_reply = re.findall(r'<li>精华: (\d+)', html)[0] account_info = { ######################################### # 用户名 'username': src['username'], # 密码 'password': src['password'], # 头像图片 'head_image': head_image, ######################################### # 积分 'account_score': account_score, # 等级 'account_class': account_class, ######################################### # 注册时间 'time_register': time_register, # 最近登录时间 'time_last_login': time_last_login, # 登录次数 'login_count': login_count, ######################################### # 主帖数 'count_post': count_post, # 回复数 'count_reply': count_reply ######################################### } logger.info('Get account info OK') return (account_info, str(logger))
def reply_sina_club(post_url, src): """ 新浪论坛回复函数 - Name: 新浪论坛18646492184 - Feature: (forum|club).*.sina.com.cn - Captcha: YES - Login: YES @param post_url: 帖子地址 @type post_url: str @param src: 用户名,密码,回复内容,等等。 @type src: dict @return: 是否回复成功 @rtype: bool """ logger = utils.RAPLogger(post_url) sess = utils.RAPSession(src) # Step 1: 登录 if not login_sina(sess, src): logger.error(' Login Error') return (False, str(logger)) logger.info(' Login OK') # Step 2: 回复 resp = sess.get(post_url) host = utils.get_host(post_url) # 获取回复地址 reply_url = re.findall(r'id=\"postform\" action=\"(.*?)\"', resp.content)[0] soup = BeautifulSoup(resp.content) # 获取回复form form = soup.find('form', attrs={'id': 'postform'}) # 构造回复参数 payload = utils.get_datadic(form) payload['message'] = src['content'].decode('utf8').encode(CHARSET) # 替换回复地址中的特殊符号 reply_url = reply_url.replace('&', '&') # 发送回复post包 resp = sess.post(reply_url, data=payload, headers={ 'Origin': utils.get_host(post_url), 'Referer': post_url }) post_times = 0 # 验证是否成功,如果失败再次发送 # 失败可能原因:验证码错误 while 'postform' not in resp.content \ and post_times < src['TTL']: # 限制最大发送次数 post_times = post_times + 1 logger.info(' reply need captcha') # 获取验证码图片 captcha = sess.get(host + 'seccode.php', headers={ 'Accept': config.accept_image, 'Referer': reply_url }) # 获取验证码字符串 seccode = utils.crack_captcha(captcha.content) logger.info(' seccode:' + seccode) # 回复参数中增加验证码 payload['seccodeverify'] = seccode.decode(CHARSET) # 发送回复post包 resp = sess.post(reply_url, data=payload, headers={ 'Origin': utils.get_host(post_url), 'Referer': post_url }) # 若指定字样出现在response中,表示回复成功 if 'postform' not in resp.content: logger.error(' Reply Error') return (False, str(logger)) logger.info(' Reply OK') return (True, str(logger))
def post_sina_blog(post_url, src): """ 新浪博客发帖函数 @param post_url: 板块地址 blog.sina.com.cn @type post_url: str @param src: 用户名,密码,等等。 @type src: dict @return: 是否发帖成功 @rtype: bool """ logger = utils.RAPLogger(post_url) sess = utils.RAPSession(src) # Step 1: 登录 is_login = False i = 0 while not is_login and i < src['TTL']: i += 1 is_login = login_sina(sess, src) if not is_login: logger.error(' Login Error') return (False, str(logger)) logger.info(' Login OK') resp = sess.get( 'http://control.blog.sina.com.cn/admin/article/article_add.php') soup = BeautifulSoup(resp.content) # 获取回复form form = soup.find('form', attrs={'id': 'editorForm'}) payload = utils.get_datadic(form) payload['blog_title'] = src['subject'] payload['blog_body'] = src['content'] payload['conlen'] = 9 payload['x_cms_flag'] = 0 payload['x_rank'] = '' #print payload resp = sess.post(form['action'], data=payload) jsonData = json.loads(resp.content) logger.info(resp.content) if jsonData['code'] == u'B06001': url = 'http://blog.sina.com.cn/s/blog_' + jsonData['data'] + '.html' logger.info(' Post OK') return (url, str(logger)) while jsonData['code'] == u'B06013' and src['TTL']: src['TTL'] = src['TTL'] - 1 captcha = sess.get( 'http://interface.blog.sina.com.cn/riaapi/checkwd_image.php?r=0.8578676988836378', headers={'Accept': config.accept_image}) # 获取验证码字符串 seccode = utils.crack_captcha(captcha.content) logger.info(' captcha:' + seccode) payload['checkword'] = seccode resp = sess.post(form['action'], data=payload) jsonData = json.loads(resp.content) logger.info(resp.content) if jsonData['code'] == u'B06001': url = 'http://blog.sina.com.cn/s/blog_' + jsonData['data'] + '.html' logger.info(' Post OK') return (url, str(logger)) logger.info(' Post Error') return ('', str(logger))
def get_account_info_wailaike_forum(src): """ 外来客账户信息获取函数 @param src: 用户名,密码 @type src: dict @return: 账户信息 @rtype: dict """ logger = utils.RAPLogger(src['username']) sess = utils.RAPSession(src) # Step 1: 登录 if not login_wailaike(sess, src): logger.error(' Login Error') return ({}, str(logger)) logger.info(' Login OK') resp = sess.get('http://www.wailaike.net/index.php') # f = open('1.html','w') # f.write(resp.content) # f.close() id_count = re.findall(r'/user_page\?i=(\d+)', resp.content) # id_count = re.findall(r'<a href="/user_page?i=(.*?)" class="s_menu-title-hover">', resp.content) print id_count[0] #resp = sess.get('http://www.wailaike.net/user_page?i='+str(id_count[0])) head_image = '' account_score = '' account_class = '' time_register = '' time_last_login = '' login_count = '' count_reply = '' count = -1 count_post = 0 resp = sess.get('http://www.wailaike.net/user_page?i='+id_count[0]) count = len(re.findall(r'index-brief-main', resp.content)) count_post = count_post + count print count_post # page = page + 1 # count = -1 # page = 1 # count_reply = 0 # while count != 0: # resp = sess.get('http://bbs.163.com/user/reply.do?page='+str(page)) # count = len(re.findall(r'my_bbs_title', resp.content)) # count_reply = count_reply + count # page = page + 1 account_info = { ######################################### # 用户名 'username':src['username'], # 密码 'password':src['password'], # 头像图片 'head_image':head_image, ######################################### # 积分 'account_score':account_score, # 等级 'account_class':account_class, ######################################### # 注册时间 'time_register':time_register, # 最近登录时间 'time_last_login':time_last_login, # 登录次数 'login_count':login_count, ######################################### # 主帖数 'count_post':count_post, # 回复数 'count_reply':count_reply ######################################### } logger.info('Get account info OK') return (account_info, str(logger))