class katsomo():
	URL = { 
		'login' : 'http://m.katsomo.fi/katsomo/login',
		'programdir' : 'http://m.katsomo.fi/katsomo/programs',
		'programs' : 'http://m.katsomo.fi/katsomo/?treeId=',
		'videolink' : 'http://m.katsomo.fi/?progId='
	}
	clearCache=False
	def __init__(self, username="", password="", cookie_file=""):
		self.cj = LWPCookieJar(cookie_file)
		if username == "":
			self.noLogin = True
		else:
			self.noLogin = False
			self.username = username
			self.password = password
			try:
				self.cj.revert(ignore_discard = True)
			except IOError:
				pass
		self.cj.set_cookie(self.makeCookie('hq','1'))
		self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj))
		self.user_agent = 'Mozilla/5.0 (iPhone; CPU iPhone OS 5_0 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9A334 Safari/7534.48.3'
	
	def makeCookie(self, name, value):
		return Cookie(
		    version=0, 
		    name=name, 
		    value=value,
		    port=None, 
		    port_specified=False,
		    domain="katsomo.fi", 
		    domain_specified=True, 
		    domain_initial_dot=False,
		    path="/", 
		    path_specified=True,
		    secure=False,
		    expires=None,
		    discard=False,
		    comment=None,
		    comment_url=None,
		    rest=None
		)
		
	def getPage(self, url,postvars={}, header_data = {}):
		req = urllib2.Request(url, urllib.urlencode(postvars), header_data)			
		req.add_header('user-agent', self.user_agent)
		try:
			resp = self.opener.open(req).read()
		except HTTPError, error:
			raise NetworkError('HTTPError: %s' % error)
		except URLError, error:
			raise NetworkError('URLError: %s' % error)
Exemple #2
0
class zhihuCrawler(object):
	
	def __init__(self, login_url, login_info, header):
		self.base_url = 'http://www.zhihu.com'
		self.login_url = login_url
		self.login_info = login_info
		self.header = header
		self.cookies = LWPCookieJar('cookies.jar')

		try:
			self.cookies.revert()
		#	print open(cookies.filename).read()
		except Exception,e:
			print 'First time to login, setting up cookies...'

		opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookies))
		urllib2.install_opener(opener)