def getLocation(phoneNumber): htmlGraber = HtmlGraber() url = serviceUrl + "?m=" + phoneNumber content = htmlGraber.doGrab(url) content = content.replace("<br/><br/>", "||") items = content.split("||") # print items[1]; return items[1]
homeurl="https://bbs.sjtu.edu.cn/bbstdoc,board,PPPerson.html"; pages=getHomePage(homeurl); prePage=getPrePage(homeurl); #grabImageUrls(pages[2]); dateNow=getCurrentDate(); makeDirWhenNeeded(savePathRoot,dateNow); for i in range(TOTALPAGES): fileUtil=FileUtil(); for pageUrl in pages: images=grabImageUrls(pageUrl); # print "\n".join([str(item[0]) for item in images]); for image in images: fileName=image[0]; filePath=savePathRoot+getCurrentDate()+"\\"+fileName; content=htmlGraber.doGrab(image[1]); fileUtil.binaryWrite(filePath, content); print fileName+" saved!"; pages=getHomePage(prePage); prePage=getPrePage(prePage);
maxCount=50; firstIndex=249317678-50; fileUtil=FileUtil(); homeSavePath="E:\\temp\\" def grabImageUrl(picUrl): patt=re.compile(r'http://fmn.rrimg.com/.*'); htmlContent=htmlGraber.doGrab(picUrl); # print htmlContent; soup=BeautifulSoup(htmlContent); imgurls=soup.findAll('img',id="photo");#re.compile(patt)); #print str(imgurl[0].src); # print "\n".join([str(item) for item in imgurls]); if(len(imgurls)>0): return imgurls[0]["src"]; else: return "" fileUtil.makeDirWhenNeeded("E:\\temp\\", "renren"); for index in range(maxCount): picId=firstIndex-index; picUrl = PicUrlHead + str(picId); print picUrl+"\n"; imgurl = grabImageUrl(picUrl); if(imgurl != ""): imgContent=htmlGraber.doGrab(imgurl); fileUtil.binaryWrite("E:\\temp\\renren\\"+str(index)+".jpg", imgContent);