コード例 #1
0
ファイル: scrap_atoz.py プロジェクト: noba3/KoTos
def scrap_itemorvideoid_atoz(url, filename):

		sky_main = "http://go.sky.com/"
		pDialog = xbmcgui.DialogProgress()
		
		atoz_database = []
		headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.154 Safari/537.36'}
		req = urllib2.Request(url, None, headers)
		html = urllib2.urlopen(req).read()
		soup = BeautifulSoup(html)
		
		all = soup.find("tbody", "ResultRows")
		progress = 0
		noItems = int(len(all)/2.00)
		z = 100/noItems
		percent_per_loop = 100.00/noItems
		progress = 1
		
		ret = pDialog.create('Skygo V2', ('Caching %s items' % noItems), "Will load instantly next time")
		
		for x in all.findAll("tr"):
			temp_list = []
			for y in x.findAll("td"):
				text = str(y.text)
				temp_list.extend([text])
			temp_list.pop(-1)
			for menu in x.findAll('a', href=True):
				item_url = str(menu['href'])
				startMode = item_url.find("ent/", 30) + 4
				endMode = item_url.find("Id", startMode) + 2
				nextMode = str(item_url[startMode:endMode])
				fullURL = sky_main+item_url
				if nextMode == "seriesId":
					meta_info = meta.scrap_meta_series(fullURL)
				else:
					meta_info = meta.scrap_meta_episode(fullURL)
				img = meta_info[1]
				temp_list.insert(1, item_url)
				temp_list.insert(2, nextMode)
				temp_list.insert(3, img)
			atoz_database.append(temp_list)
			progress = progress+1
			completed = int(progress*percent_per_loop)
			pDialog.update(completed)
		pickle.dump(atoz_database,  open(filename, "wb"))	
		return atoz_database
コード例 #2
0
def grid_scraper(menu_url, filename):
	
	sky_main = "http://go.sky.com/"
	full_url = sky_main+menu_url
	pDialog = xbmcgui.DialogProgress()

	
	grid_database = []
	headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.154 Safari/537.36'}
	req = urllib2.Request(full_url, None, headers)
	html = urllib2.urlopen(req).read()
	soup = BeautifulSoup(html)
	
	
	gridItems = soup.find("div", "listRows ATI_listRows")
	
	noItems = int(len(gridItems)/2.00)
	percent_per_loop = 100.00/noItems
	progress = 1
	
	
	ret = pDialog.create('Skygo V2', ('Caching %s items' % noItems), "Will load instantly next time")
	for x in gridItems.findAll("div"):
		for menu in x.findAll('a', "teaserImageLnk", href=True):
			item_url = str(menu['href'])
		for t in x.findAll('span', "clickAttractor" , ['title']):
			title = str(t.next_sibling.encode('ascii', 'ignore'))
		#img = x.find('img')['src']
		startMode = item_url.find("tent/", 15) + 5
		endMode = item_url.find("Id", startMode) + 2
		nextMode = str(item_url[startMode:endMode])
		fullURL = sky_main+item_url
		if nextMode == "seriesId":
			meta_info = meta.scrap_meta_series(fullURL)
		else:
			meta_info = meta.scrap_meta_episode(fullURL)
		try:
			img = meta_info[1]
		except:
			img = ""
		grid_database.append([title, item_url, img, nextMode])
		progress = progress+1
		completed = int(progress*percent_per_loop)
		pDialog.update(completed)
	pickle.dump(grid_database, open(filename, "wb"))
	return grid_database