Example #1
0
	def profile(self, renrenId, no_use=None):
		"""profile('234234') --> (record:dict(), timecost:str)
		return (None, error_info) if error"""
		runtime_start=time.time()
		pageStyle='profile_detail'
		html_content=self._download(urls[pageStyle].format(renrenId))
		if html_content is None:
			return None, 'timout'
		elif html_content[0:30].find('<div class="col-left">') > -1:
			pf=parse.profile_detail(itemReg[pageStyle].findall(html_content))
		else:
			#TODO:check whether account safety
			pageStyle='profile_mini'
			pf=parse.profile_mini(itemReg[pageStyle].findall(html_content))
		runtime=time.time()-runtime_start
		return pf, format_time(runtime)
Example #2
0
	def profile(self, renrenId, no_use=None):
		"""profile('234234') --> (record:dict(), timecost:str)
		return (None, error_info) if error"""
		runtime_start=time.time()
		pageStyle='profile_detail'
		html_content=self._download(urls[pageStyle].format(renrenId))
		if html_content is None:
			return None, 'timout'
		elif html_content[0:30].find('<div class="col-left">') > -1:
			pf=parse.profile_detail(itemReg[pageStyle].findall(html_content))
		else:
			#TODO:check whether account safety
			pageStyle='profile_mini'
			pf=parse.profile_mini(itemReg[pageStyle].findall(html_content))
		runtime=time.time()-runtime_start
		return pf, format_time(runtime)
Example #3
0
	def test_profile_mini(self):
		contents={
			#full items with space
			"""<ul class="information-ul" id="information-ul" onclick href='http:'">\\n\n\t\\t\
			<li class="school"> \n\\n\t\\t<span>\n就读于西北大学\n</span>\t\\t</li>\n\t\
			<li class="birthday">\n\\n<span class="link">\t男生\n\\n</span>\\n\n<span> ,2月13日\\n</span>\t\\t</li> \
			<li class="hometown">\n\\n来自内蒙古\n\\n<a stats="info_info">\n延安市\n</a>\n\\n</li>\n\\n\
			<li class="address">\\n现居\\n山南地区 </li> </ul>"""
			:{'school':'就读于西北大学',
			'gender':'男生 ',
			'birthday':'2月13日',
			'hometown':'来自内蒙古 延安市',
			'address':'现居 山南地区'},
			#full items with no space
			"""<ul class="information-ul" id="information-ul" onclick href='http:'">\
			<li class="school"><span>就读于西北大学</span></li>
			<li class="birthday"><span class="link">男生</span><span>,2月13日</span></li>
			<li class="hometown">来自内蒙古<a stats="info_info">延安市</a></li>
			<li class="address">现居山南地区</li></ul>"""
			:{'hometown':'来自内蒙古延安市','school':'就读于西北大学','birthday':'2月13日','gender':'男生','address':'现居山南地区'},
			#full items with space. basic
				"""<ul class="user-info clearfix">\
				<li class="gender">\n\t\\t<span class="link">\\n男生\t</span></li>\t\\t\n\\n\
				<li class="hometown">\n\t\\n来自\\n<span>\\n\n山东\n\\t</span>\n\\n <a href="">烟台市\t\\t\n\\n</a></li>\
				<li class="school">\n\\n在\t\\t<span class="link">\t\\tFachhochschule Aachen\t\\t</span>\n\\t读书\\t</li></ul>"""
				:{'gender': '男生', 'school': '在 Fachhochschule Aachen 读书', 'hometown': '来自 山东 烟台市'},
				#full items without space
				"""<ul class="user-info clearfix"><li class="gender">\
				<span class="link">男生</span></li>\
				<li class="hometown">来自<span>山东</span><a href="">烟台市</a></li>\
				<li class="school">在<span class="link">Fachhochschule Aachen</span>读书</li></ul>"""
				:{'gender':'男生', 'school':'在Fachhochschule Aachen读书','hometown':'来自山东烟台市'},
				#no items or None
				"""<ul class="user-info clearfix"></ul>""":{},None:None}
		for content,expt in contents.items():
				self.assertEquals(parse.profile_mini(content),expt)
Example #4
0
	def test_profile_mini(self):
		contents={
			#full items with space
			"""<ul class="information-ul" id="information-ul" onclick href='http:'">\\n\n\t\\t\
			<li class="school"> \n\\n\t\\t<span>\n就读于西北大学\n</span>\t\\t</li>\n\t\
			<li class="birthday">\n\\n<span class="link">\t男生\n\\n</span>\\n\n<span> ,2月13日\\n</span>\t\\t</li> \
			<li class="hometown">\n\\n来自内蒙古\n\\n<a stats="info_info">\n延安市\n</a>\n\\n</li>\n\\n\
			<li class="address">\\n现居\\n山南地区 </li> </ul>"""
			:{'school':'就读于西北大学',
			'gender':'男生 ',
			'birthday':'2月13日',
			'hometown':'来自内蒙古 延安市',
			'address':'现居 山南地区'},
			#full items with no space
			"""<ul class="information-ul" id="information-ul" onclick href='http:'">\
			<li class="school"><span>就读于西北大学</span></li>
			<li class="birthday"><span class="link">男生</span><span>,2月13日</span></li>
			<li class="hometown">来自内蒙古<a stats="info_info">延安市</a></li>
			<li class="address">现居山南地区</li></ul>"""
			:{'hometown':'来自内蒙古延安市','school':'就读于西北大学','birthday':'2月13日','gender':'男生','address':'现居山南地区'},
			#full items with space. basic
				"""<ul class="user-info clearfix">\
				<li class="gender">\n\t\\t<span class="link">\\n男生\t</span></li>\t\\t\n\\n\
				<li class="hometown">\n\t\\n来自\\n<span>\\n\n山东\n\\t</span>\n\\n <a href="">烟台市\t\\t\n\\n</a></li>\
				<li class="school">\n\\n在\t\\t<span class="link">\t\\tFachhochschule Aachen\t\\t</span>\n\\t读书\\t</li></ul>"""
				:{'gender': '男生', 'school': '在 Fachhochschule Aachen 读书', 'hometown': '来自 山东 烟台市'},
				#full items without space
				"""<ul class="user-info clearfix"><li class="gender">\
				<span class="link">男生</span></li>\
				<li class="hometown">来自<span>山东</span><a href="">烟台市</a></li>\
				<li class="school">在<span class="link">Fachhochschule Aachen</span>读书</li></ul>"""
				:{'gender':'男生', 'school':'在Fachhochschule Aachen读书','hometown':'来自山东烟台市'},
				#no items or None
				"""<ul class="user-info clearfix"></ul>""":{},None:None}
		for content,expt in contents.items():
				self.assertEquals(parse.profile_mini(content),expt)