def panel(url): link=araclar.get_url(web) soup=BS(link.decode('utf-8','ignore')) div = soup.findAll("div",{"class":"blok-liste"}) for li in div[int(url)].findAll('li'):#-------------dizi anasayfalari bulur url= li.a['href'] name = li.a.text name=name.encode("utf-8") araclar.addDir(fileName,name,"kategoriler(url)",url,"YOK")
def run(self): #resp = urllib2.urlopen(self.url) #print self.url, resp.getcode() req = urllib2.Request(url=self.url,headers=headers) content = urllib2.urlopen(req) soup = BeautifulSOAP(content,fromEncoding="gb18030") #print soup.originalEncoding #print soup.prettify() songlist = soup.findAll('a',{'href':re.compile(r'/song/(\d)+')}) #print dir(songlist[0]) for song in songlist: song_url='' song_url= 'www.xiami.com' + song.get('href') print song_url ,song.string
def geturladdress(keywords,type,number,filename): urltitle = {} pageid = string.atoi(number)/100 for idpage in range(0,pageid,1): entirehtml= getresponse(keywords,type,idpage*100) soup = BeautifulSOAP(entirehtml) results = soup.findAll('li', {'class': 'g'}) for result in results: title_a = result.find('a') if not title_a: continue else: title = ''.join(title_a.findAll(text=True)) title = html_unescape(title) #print title url = title_a['href'] #print url url = getdomain(url) urltitle[title]= url writetofile(filename,urltitle)
def geturladdress(keywords, type, number, filename): urltitle = {} pageid = string.atoi(number) / 100 for idpage in range(0, pageid, 1): entirehtml = getresponse(keywords, type, idpage * 100) soup = BeautifulSOAP(entirehtml) results = soup.findAll('li', {'class': 'g'}) for result in results: title_a = result.find('a') if not title_a: continue else: title = ''.join(title_a.findAll(text=True)) title = html_unescape(title) #print title url = title_a['href'] #print url url = getdomain(url) urltitle[title] = url writetofile(filename, urltitle)
import re,urllib,urllib2 from BeautifulSoup import BeautifulSOAP url= 'http://www.xiami.com/artist/top/id/1234' headers = {'User-Agent':"Mozilla/5.0 (Windows NT 5.1; rv:27.0) Gecko/20100101 Firefox/27.0"} req = urllib2.Request(url=url,headers=headers) content = urllib2.urlopen(req) soup = BeautifulSOAP(content,fromEncoding="gb18030") #print soup.originalEncoding #print soup.prettify() songlist = soup.findAll('a',{'href':re.compile(r'/song/(\d)+')}) #print dir(songlist[0]) for song in songlist: song_url='' song_url= 'www.xiami.com' + song.get('href') print song_url ,song.string #songlist = re.findall(pattern,string) #songlist = re.findall(pattern,content) #for song in songlist: # print song # print "end"