コード例 #1
0
ファイル: weibo.py プロジェクト: mlnotes/weibo
	def get_followings(self, userid):
		url = "http://weibo.cn/%s" % (userid)
		s = grasp.get_content(url)
		soup = bs(s)
		tip = soup.find('div', class_='tip2')
		if(tip == None):
			return []

		a = tip.findChildren() 
		num = int(re.search("\d+", a[1].string).group())
		pages = int(num/10)
		if( num%10 > 0):
			pages += 1

		furl = urljoin('http://weibo.cn/', a[1].attrs['href'])
		for i in range(1, pages+1):
			url = furl + '?page=%d' % i
			self.db.hset(self.pages, userid, url)
			print url

			# retry 10 times if fails
			for j in range(10):
				if self.get_users_by_url(userid, url):break
				print url
				time.sleep(5)
			time.sleep(3)
		self.db.save()
コード例 #2
0
ファイル: weibo.py プロジェクト: mlnotes/weibo
	def get_users_by_url(self, userid, url):
		s = None
		try:
			s = grasp.get_content(url)
		except:
			return False

		soup = bs(s)
		tables = soup('table')
		for t in tables:
			link = t.find('a')
			u = link.attrs['href']
			sp = urlsplit(u)
			self.add_following(userid, sp.path[1:].strip())
		return True