def home_timeline(self, count=20): '''Get home timeline get statuses of yours and your friends' @param count: number of statuses ''' api_params = dict(method="feed.get", type=10, page=1, count=count) try: jsonlist = self.renren_request(api_params) except Exception as e: logger.warning("catch expection: %s", e) jsonlist = [] statuslist = snstype.MessageList() for j in jsonlist: try: statuslist.append(self.Message(j,\ platform = self.jsonconf['platform'],\ channel = self.jsonconf['channel_name']\ )) except Exception as e: logger.warning("catch expection '%s' in parsing '%s'", e, j) logger.info("Read %d statuses from '%s'", len(statuslist), self.jsonconf.channel_name) return statuslist
def home_timeline(self, count=None, channel=None): """ Route to home_timeline method of snsapi. :param channel: The channel name. Use None to read all channels """ status_list = snstype.MessageList() if channel: if channel in self: if self[channel].is_expired(): logger.warning("channel '%s' is expired. Do nothing.", channel) else: status_list.extend( self._home_timeline(count, self[channel])) else: logger.warning("channel '%s' is not in pocket. Do nothing.", channel) else: for c in self.itervalues(): if self.__check_method(c, 'home_timeline') and not c.is_expired(): status_list.extend(self._home_timeline(count, c)) logger.info("Read %d statuses", len(status_list)) return status_list
def home_timeline(self, count=20): all_weibo = snstype.MessageList() page = 1 while len(all_weibo) < count: weibos = self._get_weibo(page) all_weibo += weibos[0:min(len(weibos), count - len(all_weibo))] page += 1 return all_weibo
def home_timeline(self, count=20): try: jsonlist = self.instagram_request(resource="users/self/feed", method="get", count=count) except Exception, e: logger.warning("InstagramAPIError, %s", e) return snstype.MessageList()
def _get_weibo(self, page = 1): #FIXME: 获取转发和评论数应该修改为分析DOM而不是正则表达式(以免与内容重复) #FIXME: 对于转发的微博,原微博信息不足 req = urllib2.Request('http://weibo.cn/?gsid=' + self.token['gsid'] + '&page=%d' % (page)) req = self._process_req(req) m = urllib2.urlopen(req, timeout = 10).read() h = lxml.html.fromstring(m) weibos = [] for i in h.find_class('c'): if i.get('id') and i.get('id')[0:2] == 'M_': weibo = None if i.find_class('cmt'): # 转发微博 weibo = { 'uid' : self._get_uid_by_pageurl(i.find_class('nk')[0].attrib['href'], self.jsonconf['uidtype']), 'author' : i.find_class('nk')[0].text, 'id': i.get('id')[2:], 'time': i.find_class('ct')[0].text.encode('utf-8').strip(' ').split(' ')[0].decode('utf-8'), 'text' : None, 'orig' : { 'text': i.find_class('ctt')[0].text_content(), 'author': re.search(u'转发了\xa0(.*)\xa0的微博', i.find_class('cmt')[0].text_content()).group(1), 'comments_count' : 0, 'reposts_count' : 0 }, 'comments_count' : 0, 'reposts_count' : 0 } parent = i.find_class('cmt')[-1].getparent() retweet_reason = re.sub(r'转发理由:(.*)赞\[[0-9]*\] 转发\[[0-9]*\] 评论\[[0-9]*\] 收藏.*$', r'\1', parent.text_content().encode('utf-8')) weibo['text'] = retweet_reason.decode('utf-8') zf = re.search(r'赞\[([0-9]*)\] 转发\[([0-9]*)\] 评论\[([0-9]*)\]', parent.text_content().encode('utf-8')) if zf: weibo['comments_count'] = int(zf.group(3)) weibo['reposts_count'] = int(zf.group(2)) zf = re.search(r'赞\[([0-9]*)\] 原文转发\[([0-9]*)\] 原文评论\[([0-9]*)\]', i.text_content().encode('utf-8')) if zf: weibo['orig']['comments_count'] = int(zf.group(3)) weibo['orig']['reposts_count'] = int(zf.group(2)) else: weibo = {'author' : i.find_class('nk')[0].text, 'uid' : self._get_uid_by_pageurl(i.find_class('nk')[0].attrib['href'], self.jsonconf['uidtype']), 'text': i.find_class('ctt')[0].text_content()[1:], 'id': i.get('id')[2:], 'time': i.find_class('ct')[0].text.encode('utf-8').strip(' ').split(' ')[0].decode('utf-8') } zf = re.search(r'赞\[([0-9]*)\] 转发\[([0-9]*)\] 评论\[([0-9]*)\]', i.text_content().encode('utf-8')) if zf: weibo['comments_count'] = int(zf.group(3)) weibo['reposts_count'] = int(zf.group(2)) weibos.append(weibo) statuslist = snstype.MessageList() for i in weibos: statuslist.append(self.Message(i, platform = self.jsonconf['platform'], channel = self.jsonconf['channel_name'])) return statuslist
def _get_user_status_list(self, count, userid, username): try: jsonlist = self.renren_request( method="status/list", pageNumberint=1, pageSize=count, ownerId=userid, ) except RenrenAPIError, e: logger.warning("RenrenAPIError, %s", e) return snstype.MessageList()
def _get_user_status_list(self, count, userid, username): try: jsonlist = self.renren_request( method="status.gets", page=1, count=count, uid = userid, ) except RenrenAPIError, e: logger.warning("RenrenAPIError, %s", e) return snstype.MessageList()
def home_timeline(self, count=20): ''' Return count ``Message`` for each uid configured. Configure 'friend_list' in your ``channel.json`` first. Or, it returns your own status list by default. ''' statuslist = snstype.MessageList() for user in self.jsonconf['friend_list']: userid = user['userid'] username = user['username'] statuslist.extend(self._get_user_status_list(count, userid, username)) logger.info("Read %d statuses from '%s'", len(statuslist), self.jsonconf['channel_name']) return statuslist
class RenrenShare(RenrenBase): Message = RenrenShareMessage def __init__(self, channel=None): super(RenrenShare, self).__init__(channel) self.platform = self.__class__.__name__ @staticmethod def new_channel(full=False): ''' docstring placeholder ''' c = RenrenBase.new_channel(full) c['platform'] = 'RenrenShare' return c @require_authed def home_timeline(self, count=20): ''' Get timeline of Renren statuses :param count: Number of statuses :return: At most ``count`` statuses (can be less). ''' api_params = dict(method = "feed.get", \ type = "21,32,33,50,51,52", \ page = 1, count = count) try: jsonlist = self.renren_request(api_params) except RenrenAPIError, e: logger.warning("RenrenAPIError, %s", e) return snstype.MessageList() statuslist = snstype.MessageList() try: for j in jsonlist: statuslist.append(self.Message(j,\ platform = self.jsonconf['platform'],\ channel = self.jsonconf['channel_name']\ )) except Exception, e: logger.warning("Catch expection: %s", e)
def home_timeline(self, count=20, **kwargs): #FIXME: automatic paging for count > 100 ttype='10,11,20,21,22,23,30,31,32,33,34,35,36,40,41,50,51,52,53,54,55' if 'type' in kwargs: ttype = kwargs['type'] try: jsonlist = self.renren_request( method="feed.get", page=1, count=count, type=ttype, ) except RenrenAPIError, e: logger.warning("RenrenAPIError, %s", e) return snstype.MessageList()
def home_timeline(self, count=20): ret = snstype.MessageList() logger.debug("acquiring lock") self.dblock.acquire() try: conn = sqlite3.connect(self.sqlitefile) c = conn.cursor() c.execute( "SELECT pickled_object FROM home_timeline ORDER BY time DESC LIMIT 0, %d" % (count, )) p = c.fetchall() logger.info("%d messages read from database" % (len(p))) for i in p: ret.append(str2obj(str(i[0]))) except Exception, e: logger.warning("Error while reading database: %s" % (str(e)))
def home_timeline(self, count=20, channel=None): """ Route to home_timeline method of snsapi. :param channel: The channel name. Use None to read all channels """ status_list = snstype.MessageList() if channel: status_list.extend(self[channel].home_timeline(count)) else: for c in self.itervalues(): if self.__check_method(c, 'home_timeline'): status_list.extend(c.home_timeline(count)) logger.info("Read %d statuses", len(status_list)) return status_list
class RenrenBlog(RenrenBase): Message = RenrenBlogMessage def __init__(self, channel=None): super(RenrenBlog, self).__init__(channel) self.platform = self.__class__.__name__ @staticmethod def new_channel(full=False): c = RenrenBase.new_channel(full) c['platform'] = 'RenrenBlog' return c @require_authed def home_timeline(self, count=20): ''' Get blog timeline :param count: Number of blogs ''' api_params = { 'method': 'feed.get', 'type': '20,22', 'page': 1, 'count': count } try: jsonlist = self.renren_request(api_params) logger.debug("Get %d elements in response", len(jsonlist)) except RenrenAPIError, e: logger.warning("RenrenAPIError, %s", e) return snstype.MessageList() statuslist = snstype.MessageList() try: for j in jsonlist: statuslist.append( self.Message(j, platform=self.jsonconf['platform'], channel=self.jsonconf['channel_name'])) except Exception, e: logger.warning("Catch expection: %s", e)
class RenrenStatusDirect(RenrenFeed): Message = RenrenStatusDirectMessage def __init__(self, channel=None): super(RenrenStatusDirect, self).__init__(channel) @staticmethod def new_channel(full=False): c = RenrenFeed.new_channel(full) c['platform'] = 'RenrenStatusDirect' c['friend_list'] = [ { "username": "******", "userid": "ID" } ] return c @require_authed def update(self, text): return RenrenFeed._update_status(self, text) def _get_user_status_list(self, count, userid, username): try: jsonlist = self.renren_request( method="status.gets", page=1, count=count, uid = userid, ) except RenrenAPIError, e: logger.warning("RenrenAPIError, %s", e) return snstype.MessageList() statuslist = snstype.MessageList() for j in jsonlist: try: j['name'] = username statuslist.append(self.Message( j, platform = self.jsonconf['platform'], channel = self.jsonconf['channel_name'] )) except Exception, e: logger.warning("Catch exception: %s", e)
def home_timeline(self, count=20, **kwargs): # FIXME: automatic paging for count > 100 # BUG: It seems that ttype has no influence # on the returned value of renren_request() ttype = 'ALL' if 'type' in kwargs: ttype = kwargs['type'] try: jsonlist = self.renren_request( method="feed/list", page=1, count=count, type=ttype, ) except RenrenAPIError, e: logger.warning("RenrenAPIError, %s", e) return snstype.MessageList()
def home_timeline(self, count=20): ''' Get timeline of Renren statuses :param count: Number of statuses :return: At most ``count`` statuses (can be less). ''' api_params = dict(method = "feed.get", \ type = "21,32,33,50,51,52", \ page = 1, count = count) try: jsonlist = self.renren_request(api_params) except RenrenAPIError, e: logger.warning("RenrenAPIError, %s", e) return snstype.MessageList()
def home_timeline(self, count=20): '''Get home timeline * function : get statuses of yours and your friends' * parameter count: number of statuses ''' jsonobj = self.tencent_request("statuses/home_timeline", reqnum=count) #logger.debug("returned: %s", jsonobj) statuslist = snstype.MessageList() try: for j in jsonobj['data']['info']: statuslist.append( self.Message(j, platform=self.jsonconf['platform'], channel=self.jsonconf['channel_name'])) except Exception, e: logger.warning("Catch exception: %s", e) return []
def home_timeline(self, count=20): ''' Get blog timeline :param count: Number of blogs ''' api_params = { 'method': 'feed.get', 'type': '20,22', 'page': 1, 'count': count } try: jsonlist = self.renren_request(api_params) logger.debug("Get %d elements in response", len(jsonlist)) except RenrenAPIError, e: logger.warning("RenrenAPIError, %s", e) return snstype.MessageList()
def home_timeline(self, count=20): '''Get home timeline :param count: number of statuses ''' statuslist = snstype.MessageList() try: jsonobj = self.weibo_request('statuses/home_timeline', 'GET', {'count': count}) if("error" in jsonobj): logger.warning("error json object returned: %s", jsonobj) return [] for j in jsonobj['statuses']: statuslist.append(self.Message(j,\ platform = self.jsonconf['platform'],\ channel = self.jsonconf['channel_name']\ )) except Exception, e: logger.warning("Catch exception: %s", e)
def home_timeline_for_test(self, count=20): try: jsonlist = self.dummy() except Exception, e: logger.warning("DoubanAPIError, %s", e) return snstype.MessageList()
return self._instagram_request_v1(method, resource, kwargs) else: raise InstagramAPIError(response['meta']["error_message"]) return response @require_authed def home_timeline(self, count=20): try: jsonlist = self.instagram_request(resource="users/self/feed", method="get", count=count) except Exception, e: logger.warning("InstagramAPIError, %s", e) return snstype.MessageList() statuslist = snstype.MessageList() for j in jsonlist["data"]: try: statuslist.append( self.Message(j, platform=self.jsonconf['platform'], channel=self.jsonconf['channel_name'])) except Exception, e: logger.warning("Catch exception: %s", e) logger.info("Read %d statuses from '%s'", len(statuslist), self.jsonconf['channel_name']) return statuslist def update(self, text): logger.warning("Instagram does not support update()!")
class SinaWeiboWapStatus(SNSBase): Message = SinaWeiboWapStatusMessage def __init__(self, channel=None): super(SinaWeiboWapStatus, self).__init__(channel) assert channel['auth_by'] in ['userpass', 'gsid'] self.platform = self.__class__.__name__ self.Message.platform = self.platform @staticmethod def new_channel(full=False): c = SNSBase.new_channel(full) c['platform'] = 'SinaWeiboWapStatus' c['uidtype'] = 'path' c['auth_by'] = 'userpass' c['auth_info'] = { 'save_token_file': "(default)", 'login_username': '', 'login_password': '' } return c def _process_req(self, req): req.add_header( 'User-Agent', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.151 Safari/535.19' ) return req def _get_weibo_homepage(self, token=None): if token: gsid = token['gsid'] elif self.token and 'gsid' in self.token: gsid = self.token['gsid'] else: gsid = '' req = urllib2.Request('http://weibo.cn/?gsid=' + gsid) req = self._process_req(req) m = urllib2.urlopen(req, timeout=10).read() return m def auth(self): if self.get_saved_token(): return self.is_authed() if self.jsonconf['auth_by'] == 'gsid': self.token['gsid'] = self.jsonconf['gsid'] elif self.jsonconf['auth_by'] == 'userpass': show_verification = False verification_code = '' req = urllib2.Request( 'http://login.weibo.cn/login/?vt=4&revalid=2&ns=1&pt=1') req = self._process_req(req) response = urllib2.urlopen(req, timeout=10) p = response.read() while True: req = urllib2.Request( 'http://login.weibo.cn/login/?rand=' + (re.search("rand=([0-9]*)", p).group(1)) + '&backURL=http%3A%2F%2Fweibo.cn&backTitle=%E6%89%8B%E6%9C%BA%E6%96%B0%E6%B5%AA%E7%BD%91&vt=4&revalid=2&ns=1' ) data = { 'mobile': self.auth_info['login_username'], 'password_%s' % (re.search('name="password_([0-9]*)"', p).group(1)): self.auth_info['login_password'], 'backURL': 'http%3A%2F%2Fweibo.cn', 'backTitle': '手机新浪网', 'tryCount': '', 'vk': re.search('name="vk" value="([^"]*)"', p).group(1), 'submit': '登录' } if show_verification: data['code'] = verification_code data['capId'] = re.search('name="capId" value="([^"]*)"', p).group(1) show_verification = False req = self._process_req(req) data = urllib.urlencode(data) response = urllib2.urlopen(req, data, timeout=10) p = response.read() final_url = response.geturl() if 'newlogin' in final_url: final_gsid = re.search('g=([^&]*)', final_url).group(1) self.token = {'gsid': final_gsid} break elif '验证码' in p: err_msg = re.search('class="me">([^>]*)<', p).group(1) if '请输入图片中的字符' in p: captcha_url = re.search(r'"([^"]*captcha[^"]*)', p).group(1) show_verification = True import Image import StringIO ss = urllib2.urlopen(captcha_url, timeout=10).read() sss = StringIO.StringIO(ss) img = Image.open(sss) img.show() verification_code = raw_input(err_msg) else: err_msg = re.search('class="me">([^>]*)<', p).group(1) logger.warning(err_msg) break else: return False res = self.is_authed() if res: self.save_token() return res def _is_authed(self, token=None): ''' ``is_authed`` is an ``SNSBase`` general method. It invokes platform specific ``expire_after`` to determine whether this platform is authed. Rename this method. ''' return '<input type="submit" value="发布" />' in self._get_weibo_homepage( token) def expire_after(self, token=None): if self._is_authed(token): return -1 else: return 0 def _get_uid_by_pageurl(self, url, type='num'): if url[0:len('http://weibo.cn')] == 'http://weibo.cn': url = url[len('http://weibo.cn'):] if type == 'num': if re.search('\/u\/[0-9]*', url): return re.search('\/u\/([0-9]*)', url).group(1) req = urllib2.Request('http://weibo.cn' + url) req = self._process_req(req) m = urllib2.urlopen(req, timeout=10).read() return re.search(r'\/([0-9]*)\/info', m).group(1) elif type == 'path': return re.search(r'\/([^?]*)\?', url).group(1) def _get_weibo(self, page=1): #FIXME: 获取转发和评论数应该修改为分析DOM而不是正则表达式(以免与内容重复) #FIXME: 对于转发的微博,原微博信息不足 req = urllib2.Request('http://weibo.cn/?gsid=' + self.token['gsid'] + '&page=%d' % (page)) req = self._process_req(req) m = urllib2.urlopen(req, timeout=10).read() h = lxml.html.fromstring(m) weibos = [] for i in h.find_class('c'): try: if i.get('id') and i.get('id')[0:2] == 'M_': weibo = None if i.find_class('cmt'): # 转发微博 weibo = { 'uid': self._get_uid_by_pageurl( i.find_class('nk')[0].attrib['href'], self.jsonconf['uidtype']), 'author': i.find_class('nk')[0].text, 'id': i.get('id')[2:], 'time': i.find_class('ct')[0].text.encode('utf-8').strip( ' ').split(' ')[0].decode('utf-8'), 'text': None, 'orig': { 'text': unicode(i.find_class('ctt')[0].text_content()), 'author': re.search( u'转发了\xa0(.*)\xa0的微博', i.find_class('cmt') [0].text_content()).group(1), 'comments_count': 0, 'reposts_count': 0 }, 'comments_count': 0, 'reposts_count': 0 } parent = i.find_class('cmt')[-1].getparent() retweet_reason = re.sub( r'转发理由:(.*)赞\[[0-9]*\] 转发\[[0-9]*\] 评论\[[0-9]*\] 收藏.*$', r'\1', parent.text_content().encode('utf-8')) weibo['text'] = retweet_reason.decode('utf-8') zf = re.search( r'赞\[([0-9]*)\] 转发\[([0-9]*)\] 评论\[([0-9]*)\]', parent.text_content().encode('utf-8')) if zf: weibo['comments_count'] = int(zf.group(3)) weibo['reposts_count'] = int(zf.group(2)) zf = re.search( r'赞\[([0-9]*)\] 原文转发\[([0-9]*)\] 原文评论\[([0-9]*)\]', i.text_content().encode('utf-8')) if zf: weibo['orig']['comments_count'] = int(zf.group(3)) weibo['orig']['reposts_count'] = int(zf.group(2)) else: weibo = { 'author': i.find_class('nk')[0].text, 'uid': self._get_uid_by_pageurl( i.find_class('nk')[0].attrib['href'], self.jsonconf['uidtype']), 'text': i.find_class('ctt')[0].text_content()[1:], 'id': i.get('id')[2:], 'time': i.find_class('ct')[0].text.encode('utf-8').strip( ' ').split(' ')[0].decode('utf-8') } zf = re.search( r'赞\[([0-9]*)\] 转发\[([0-9]*)\] 评论\[([0-9]*)\]', i.text_content().encode('utf-8')) if zf: weibo['comments_count'] = int(zf.group(3)) weibo['reposts_count'] = int(zf.group(2)) if i.find_class('ib'): #FIXME: Still not able to process a collections of pictures weibo['attachment_img'] = i.find_class('ib')[0].get( 'src').replace('wap128', 'woriginal') weibos.append(weibo) except Exception, e: logger.warning("Catch exception: %s" % (str(e))) statuslist = snstype.MessageList() for i in weibos: statuslist.append( self.Message(i, platform=self.jsonconf['platform'], channel=self.jsonconf['channel_name'])) return statuslist
def home_timeline(self, count=20): try: jsonlist = self.client.miniblog.home_timeline(count) except Exception, e: logger.warning("DoubanAPIError, %s", e) return snstype.MessageList()