def __loadSeasons(self, soup, media, url): #seasonTags = soup.findAll("a", {"href":re.compile("episodes#season-[0-9]+")}) #media.mediainfo.duration = len(seasonTags) #length measured in seasons if url[-1] != '/': url += '/' html = mediakey.getUrl(url + "episodes") if settings.IMDB_DEBUG: tmpFile = self.debugDir + self.getSourceBusinessKey(url) +"_eps.html" f = open(tmpFile, 'w') f.write(html) f.close() soup = BeautifulSoup(html) # Find the max number of eps for each season pattern = re.compile("Season ([0-9+]), Episode ([0-9]+)",re.IGNORECASE) tags = soup.findAll("h3",text=pattern) seasonEps = {} for tag in tags: match = pattern.search(str(tag)) season = int(match.group(1)) episode = int(match.group(2)) if not season in seasonEps: seasonEps[season] = 0 elif episode > seasonEps[season]: seasonEps[season] = episode #print tag # Create dummy eps - we are not going to add SourceMedia # So that the source can be added by someone else seasonType = MediaType.objects.get(name='TV Show Seasons') episodeType = MediaType.objects.get(name='TV Show Episodes') for seasonIndex in seasonEps: season = Media(name=media.name+" season "+str(seasonIndex), type=seasonType, businessKey=mediakey.generateTvSeriesSeasonKey(media, seasonIndex), parent=media, order=seasonIndex) season.mediainfo = MediaInfo(author=None,date=None,location=None) try: dbSeason = Media.objects.get(type=seasonType,businessKey=season.businessKey) # We don't have any information about the season at this point, # (except the number of episodes which we will handle later), # so we just use the existing db media without modifying/updating it season = dbSeason except Media.DoesNotExist: # no worries season.save() season.mediainfo.media = season season.mediainfo.save() episodeCount = seasonEps[seasonIndex] for epix in range(1,episodeCount+1): episode = Media(name="Episode "+str(epix), type=episodeType, businessKey=mediakey.generateTvSeriesEpisodeKey(season, epix), parent=season, order=epix) # Check if the episode exists so we can update it if necessary try: dbEpisode = Media.objects.get(type=episodeType, businessKey=episode.businessKey) # We have no episode info at this point, so the only thing we find out # by looking up the ep in the db is that we don't have to save it except Media.DoesNotExist: # no worries episode.mediainfo = MediaInfo(author=None,date=None,location=None) episode.save() episode.mediainfo.media = episode episode.mediainfo.save()
def __loadTvShowEpisode(self, soup, media, url): # This is very troublesome # We must first load the whole tv show, then the season, and only then the episode # for now, let's just see if we can find the parent and give up if we can't epInfo = self.__getTvShowEpisodeInfo(soup, url) parentMediaKey = re.search("tt[0-9]+",epInfo['parent'],re.IGNORECASE).group(0) try: sm_show = SourceMedia.objects.get(businessKey=parentMediaKey,source=self.getSource()) m_show = sm_show.media except SourceMedia.DoesNotExist: # We should load the tv show first, (slow) parentUrl = self.getUrl(parentMediaKey) m_show = self.loadMedia(parentUrl) if not m_show or not m_show.mediainfo: raise WebLoadError("[09] Cannot parse url: "+parentUrl) m_show.save() m_show.mediainfo.media = m_show m_show.mediainfo.save() #also add source media! sm_show = SourceMedia(source=self.getSource()) sm_show.businessKey=parentMediaKey sm_show.media=m_show sm_show.url = parentUrl sm_show.save() # Now find the season (or add it) seasonIndex = epInfo['season'] seasonKey = mediakey.generateTvSeriesSeasonKey(m_show, seasonIndex) try: seasonType = MediaType.objects.get(name="TV Show Seasons") m_season = Media.objects.get(parent=m_show, businessKey=seasonKey) except MediaType.DoesNotExist: raise WebLoadError("Fatal error - media type not found") except Media.DoesNotExist: # Create a new season object m_season = Media(parent=m_show, type=seasonType, name = unicode(m_show.name+" season "+str(seasonIndex)), businessKey=seasonKey, order=seasonIndex) m_season.mediainfo = MediaInfo(author=unicode(m_show.mediainfo.author), date=None) m_season.save() m_season.mediainfo.media = m_season m_season.mediainfo.save() # Load the last bits of info for our media media.parent = m_season media.order = epInfo['episode'] media.businessKey = mediakey.generateTvSeriesEpisodeKey(m_season, media.order) # (We don't need directors for the business key of a child item) self.__loadDirectors(soup, media, url) self.__loadDuration(soup, media, url) # now we can compare with the database object, if it exists try: dbMedia = Media.objects.get(type=media.type,businessKey=media.businessKey) # Update items only if necessary if dbMedia.name.lower() == "episode "+str(media.order): dbMedia.name = unicode(media.name) if int(media.duration) > int(dbMedia.duration): dbMedia.duration = media.duration if not dbMedia.mediainfo.author or len(dbMedia.mediainfo.author) == 0: dbMedia.mediainfo.author = unicode(media.mediainfo.author) if not dbMedia.mediainfo.date: dbMedia.mediainfo.date = media.mediainfo.date dbMedia.save() dbMedia.mediainfo.save() return dbMedia except Media.DoesNotExist: # No worries media.save() media.mediainfo.media = media media.mediainfo.save() return media