def scrap_itemorvideoid_atoz(url, filename): sky_main = "http://go.sky.com/" pDialog = xbmcgui.DialogProgress() atoz_database = [] headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.154 Safari/537.36'} req = urllib2.Request(url, None, headers) html = urllib2.urlopen(req).read() soup = BeautifulSoup(html) all = soup.find("tbody", "ResultRows") progress = 0 noItems = int(len(all)/2.00) z = 100/noItems percent_per_loop = 100.00/noItems progress = 1 ret = pDialog.create('Skygo V2', ('Caching %s items' % noItems), "Will load instantly next time") for x in all.findAll("tr"): temp_list = [] for y in x.findAll("td"): text = str(y.text) temp_list.extend([text]) temp_list.pop(-1) for menu in x.findAll('a', href=True): item_url = str(menu['href']) startMode = item_url.find("ent/", 30) + 4 endMode = item_url.find("Id", startMode) + 2 nextMode = str(item_url[startMode:endMode]) fullURL = sky_main+item_url if nextMode == "seriesId": meta_info = meta.scrap_meta_series(fullURL) else: meta_info = meta.scrap_meta_episode(fullURL) img = meta_info[1] temp_list.insert(1, item_url) temp_list.insert(2, nextMode) temp_list.insert(3, img) atoz_database.append(temp_list) progress = progress+1 completed = int(progress*percent_per_loop) pDialog.update(completed) pickle.dump(atoz_database, open(filename, "wb")) return atoz_database
def grid_scraper(menu_url, filename): sky_main = "http://go.sky.com/" full_url = sky_main+menu_url pDialog = xbmcgui.DialogProgress() grid_database = [] headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.154 Safari/537.36'} req = urllib2.Request(full_url, None, headers) html = urllib2.urlopen(req).read() soup = BeautifulSoup(html) gridItems = soup.find("div", "listRows ATI_listRows") noItems = int(len(gridItems)/2.00) percent_per_loop = 100.00/noItems progress = 1 ret = pDialog.create('Skygo V2', ('Caching %s items' % noItems), "Will load instantly next time") for x in gridItems.findAll("div"): for menu in x.findAll('a', "teaserImageLnk", href=True): item_url = str(menu['href']) for t in x.findAll('span', "clickAttractor" , ['title']): title = str(t.next_sibling.encode('ascii', 'ignore')) #img = x.find('img')['src'] startMode = item_url.find("tent/", 15) + 5 endMode = item_url.find("Id", startMode) + 2 nextMode = str(item_url[startMode:endMode]) fullURL = sky_main+item_url if nextMode == "seriesId": meta_info = meta.scrap_meta_series(fullURL) else: meta_info = meta.scrap_meta_episode(fullURL) try: img = meta_info[1] except: img = "" grid_database.append([title, item_url, img, nextMode]) progress = progress+1 completed = int(progress*percent_per_loop) pDialog.update(completed) pickle.dump(grid_database, open(filename, "wb")) return grid_database