def get_player_static(self,url,pozisyon):
     a=1
     b=[]
     print(self.base_url[:-1]+url.replace('profil','leistungsdaten'))
     client = Request(self.base_url[:-1]+url.replace('profil','leistungsdaten'), headers={"User-Agent" : "Mozilla/5.0"})
     page = urlopen(client).read()
     soup =soup1(page, 'html.parser')
     for i in soup.find_all('tfoot'):
         if a==1:
             c=i.find_all('td')
             c=str(c).split('</td>')
             for j in range(2,len(c)-1):
                 b.append(c[j][c[j].index('>')+1:])
         else:
             pass
         a=a+1
     a=1
 
     ozellik=[]
     print(pozisyon)
     if pozisyon=="Kaleci":
         
         b.insert(2,"-")
     else:
         b.append("-")
         b.append("-")
     print(len(b))
     b.clear()
 def get_player(self,url,takim):#url  ve takim adı alır 
     a=0
     pozisyon=[]
     client = Request(url, headers={"User-Agent" : "Mozilla/5.0"})
     page = urlopen(client).read()
     soup =soup1(page, 'html.parser')
     for tr in soup.findAll('tbody'):
         liste=[] 
         ozellik=""
         if a==1:
             for i in tr.find_all('td'):
                 b=i.get_text()
                 if "€" in b :
                     ozellik =ozellik+b+"+"
                     c=ozellik.split("+")
                     yas=c[5].split("(")[1][:-1]
                     dogum_tarihi=c[5].split("(")[0]
                     
                     c.pop(1)  
                     c.insert(4,dogum_tarihi)
                     c.insert(5,yas)
                     c.pop(6)
                     self.register(c,takim)
                     pozisyon.append(c[3])
                     #print(url)
                     ozellik=""
                 else:
                     ozellik=ozellik+b+"+"
             sayac=0
             p_sayac=0
             for i in tr.find_all('a'):
                 if "profil" in i.get('href'):
                     if sayac%2==0:
                         self.get_player_static(i.get('href'),pozisyon[p_sayac])
                         p_sayac=p_sayac+1
                 sayac =sayac+1
         a=a+1
 def get_team(self):
     for i in self.my_url:
         url=self.base_url+i
         client = Request(url, headers={"User-Agent" : "Mozilla/5.0"})
         page = urlopen(client).read()
         soup =soup1(page, 'html.parser')
         
         for link in soup.find_all('a'):
             if "/kader/verein/" in str(link.get("href")):
                 if i.split("/")[0] =="premier-league":
                     if link.get('href') not in self.en_team:
                         self.en_team.append(link.get('href'))
                 elif i.split("/")[0]=="super-lig":
                     if link.get('href') not in self.tr_team:
                         self.tr_team.append(link.get('href'))
                 elif i.split("/")[0]=="laliga":
                     if link.get('href') not in self.isp_team:
                         self.isp_team.append(link.get('href'))
                 elif i.split("/")[0]=="serie-a":
                     if link.get('href') not in self.ital_team:
                         self.ital_team.append(link.get('href'))
                         
                 else:
                     print("hatalı")
Beispiel #4
0
def make_soup(url):
    thepage = urlopen(url)
    soupdata = soup1(thepage, 'html.parser')
    return soupdata
Beispiel #5
0
csvfile.write(headers)

dircount = 1
url = "https://www.amazon.com/amp/mobiles/1715147/buy"
#change to your download path
browser = webdriver.Chrome(executable_path='/Users/shrinidhipg/Downloads/chromedriver')
while(True):
	try:
		url = input()
	except:
		break
#Remove above comments and select all below lines and hit tab
	browser.get(url)

	html = browser.page_source
	soup = soup1(html, "lxml")
	for brk in soup.findAll('br'):
	    brk.replace_with('.')
	#print(soup)
	description = ""
	for detail in soup.findAll("p", {"class" : "pdp-product-description-content"}):
		print(detail.text)
		description = description + detail.text +"."
	description = "\"" + description + "\""
	try:
		print(soup.find("h1",{"class" : "pdp-title"}).text)
		title = "\"" + soup.find("h1",{"class" : "pdp-title"}).text + "\""
	except:
		title = "none"
		pass