def getLatestEpisodes(): showsgoHtmls = [] for i in range(10, 1, -1): text = "http://showsgo.com/page/%d"%i #print(text) showsgoFile = urllib2.urlopen(text) showsgoHtml = showsgoFile.read() showsgoHtmls.append(showsgoHtml) showsgoFile.close() showsgoFile = urllib2.urlopen("http://showsgo.com/") showsgoHtml = showsgoFile.read() showsgoHtmls.append(showsgoHtml) showsgoFile.close() for page in showsgoHtmls: soup = BeautifulSoup(page) for links in soup.ul.find_all('li'): show = None title = None stream = None image = None for url in links.find_all('div', {'class' : 'cover'}): title = url.a.get('title') stream = url.a.get('href') image = url.a.find('img')['src'] for url in links.find_all('div', {'class' : 'postcontent'}): show = url.a.get('title').split(' Full Episodes')[0] if title is not None: p = None if (TvShows.objects.filter(show_name=show).count() < 1): p = TvShows(show_name=show, picture_link=image) p.save() else: p = TvShows.objects.get(show_name=show) if (Episode.objects.filter(show_link=stream).count() < 1): a = Episode(show=p, show_link=stream, season_episode=title) a.save()
def getLatestEpisodes(listOfShows): shows = listOfShows.keys() showsgoHtmls = [] showsgoFile = urllib2.urlopen("http://showsgo.com/") showsgoHtml = showsgoFile.read() showsgoHtmls.append(showsgoHtml) showsgoFile.close() for i in range(2, 11): text = "http://showsgo.com/page/%d"%i #print(text) showsgoFile = urllib2.urlopen(text) showsgoHtml = showsgoFile.read() showsgoHtmls.append(showsgoHtml) showsgoFile.close() # soup = BeautifulSoup(showsgoHtml) # #print soup.ul.find_all('li') for show in shows: for page in showsgoHtmls: soup = BeautifulSoup(page) for links in soup.ul.find_all('li'): for url in links.find_all('div', {'class' : 'cover'}): #print(url.a.get('title') + " " + url.a.get('href')) if ( show in url.a.get('title')): #print(url.a.get('href')) #listOfShows[show] = url.a.get('href') listOfShows[show].append(url.a.get('href')) p = TvShows(show_name=show, show_link=listOfShows[show]) p.save() return listOfShows