def profile(self, renrenId, no_use=None): """profile('234234') --> (record:dict(), timecost:str) return (None, error_info) if error""" runtime_start=time.time() pageStyle='profile_detail' html_content=self._download(urls[pageStyle].format(renrenId)) if html_content is None: return None, 'timout' elif html_content[0:30].find('<div class="col-left">') > -1: pf=parse.profile_detail(itemReg[pageStyle].findall(html_content)) else: #TODO:check whether account safety pageStyle='profile_mini' pf=parse.profile_mini(itemReg[pageStyle].findall(html_content)) runtime=time.time()-runtime_start return pf, format_time(runtime)
def test_profile_mini(self): contents={ #full items with space """<ul class="information-ul" id="information-ul" onclick href='http:'">\\n\n\t\\t\ <li class="school"> \n\\n\t\\t<span>\n就读于西北大学\n</span>\t\\t</li>\n\t\ <li class="birthday">\n\\n<span class="link">\t男生\n\\n</span>\\n\n<span> ,2月13日\\n</span>\t\\t</li> \ <li class="hometown">\n\\n来自内蒙古\n\\n<a stats="info_info">\n延安市\n</a>\n\\n</li>\n\\n\ <li class="address">\\n现居\\n山南地区 </li> </ul>""" :{'school':'就读于西北大学', 'gender':'男生 ', 'birthday':'2月13日', 'hometown':'来自内蒙古 延安市', 'address':'现居 山南地区'}, #full items with no space """<ul class="information-ul" id="information-ul" onclick href='http:'">\ <li class="school"><span>就读于西北大学</span></li> <li class="birthday"><span class="link">男生</span><span>,2月13日</span></li> <li class="hometown">来自内蒙古<a stats="info_info">延安市</a></li> <li class="address">现居山南地区</li></ul>""" :{'hometown':'来自内蒙古延安市','school':'就读于西北大学','birthday':'2月13日','gender':'男生','address':'现居山南地区'}, #full items with space. basic """<ul class="user-info clearfix">\ <li class="gender">\n\t\\t<span class="link">\\n男生\t</span></li>\t\\t\n\\n\ <li class="hometown">\n\t\\n来自\\n<span>\\n\n山东\n\\t</span>\n\\n <a href="">烟台市\t\\t\n\\n</a></li>\ <li class="school">\n\\n在\t\\t<span class="link">\t\\tFachhochschule Aachen\t\\t</span>\n\\t读书\\t</li></ul>""" :{'gender': '男生', 'school': '在 Fachhochschule Aachen 读书', 'hometown': '来自 山东 烟台市'}, #full items without space """<ul class="user-info clearfix"><li class="gender">\ <span class="link">男生</span></li>\ <li class="hometown">来自<span>山东</span><a href="">烟台市</a></li>\ <li class="school">在<span class="link">Fachhochschule Aachen</span>读书</li></ul>""" :{'gender':'男生', 'school':'在Fachhochschule Aachen读书','hometown':'来自山东烟台市'}, #no items or None """<ul class="user-info clearfix"></ul>""":{},None:None} for content,expt in contents.items(): self.assertEquals(parse.profile_mini(content),expt)