예제 #1
0
 def __loadSeasons(self, soup, media, url):    
     #seasonTags = soup.findAll("a", {"href":re.compile("episodes#season-[0-9]+")})
     #media.mediainfo.duration = len(seasonTags) #length measured in seasons
     if url[-1] != '/':
         url += '/'
     html = mediakey.getUrl(url + "episodes")
     
     if settings.IMDB_DEBUG:
         tmpFile = self.debugDir + self.getSourceBusinessKey(url) +"_eps.html"
         f = open(tmpFile, 'w')
         f.write(html)
         f.close()
     
     soup = BeautifulSoup(html)
     
     # Find the max number of eps for each season
     pattern = re.compile("Season ([0-9+]), Episode ([0-9]+)",re.IGNORECASE)
     tags = soup.findAll("h3",text=pattern)
     seasonEps = {}
     for tag in tags:
         match = pattern.search(str(tag))
         season = int(match.group(1))
         episode = int(match.group(2))
         if not season in seasonEps:
             seasonEps[season] = 0
         elif episode > seasonEps[season]:
             seasonEps[season] = episode
             #print tag
     
     # Create dummy eps - we are not going to add SourceMedia
     # So that the source can be added by someone else
     seasonType = MediaType.objects.get(name='TV Show Seasons')
     episodeType = MediaType.objects.get(name='TV Show Episodes')
     
     for seasonIndex in seasonEps:
         season = Media(name=media.name+" season "+str(seasonIndex), type=seasonType, 
                        businessKey=mediakey.generateTvSeriesSeasonKey(media, seasonIndex),
                        parent=media, order=seasonIndex)
         season.mediainfo = MediaInfo(author=None,date=None,location=None)
         
         try:
             dbSeason = Media.objects.get(type=seasonType,businessKey=season.businessKey)
             # We don't have any information about the season at this point, 
             # (except the number of episodes which we will handle later),
             # so we just use the existing db media without modifying/updating it
             season = dbSeason
         except Media.DoesNotExist: # no worries
             season.save()
             season.mediainfo.media = season
             season.mediainfo.save()
         
         
         episodeCount = seasonEps[seasonIndex]
         for epix in range(1,episodeCount+1):
             episode = Media(name="Episode "+str(epix), type=episodeType,
                             businessKey=mediakey.generateTvSeriesEpisodeKey(season, epix),
                             parent=season, order=epix)
             # Check if the episode exists so we can update it if necessary
             try:
                 dbEpisode = Media.objects.get(type=episodeType, businessKey=episode.businessKey)
                 # We have no episode info at this point, so the only thing we find out 
                 # by looking up the ep in the db is that we don't have to save it
             except Media.DoesNotExist: # no worries
                 episode.mediainfo = MediaInfo(author=None,date=None,location=None)
                 episode.save()
                 episode.mediainfo.media = episode
                 episode.mediainfo.save()
예제 #2
0
 def __loadTvShowEpisode(self, soup, media, url):
     # This is very troublesome
     # We must first load the whole tv show, then the season, and only then the episode
     # for now, let's just see if we can find the parent and give up if we can't
     epInfo = self.__getTvShowEpisodeInfo(soup, url)
     
     parentMediaKey = re.search("tt[0-9]+",epInfo['parent'],re.IGNORECASE).group(0)
     
     try:
         sm_show = SourceMedia.objects.get(businessKey=parentMediaKey,source=self.getSource())
         m_show = sm_show.media
     except SourceMedia.DoesNotExist:
         # We should load the tv show first, (slow)
         parentUrl = self.getUrl(parentMediaKey)
         m_show = self.loadMedia(parentUrl)
         if not m_show or not m_show.mediainfo:
             raise WebLoadError("[09] Cannot parse url: "+parentUrl) 
         m_show.save()
         m_show.mediainfo.media = m_show
         m_show.mediainfo.save()
         
         #also add source media!
         sm_show = SourceMedia(source=self.getSource())
         sm_show.businessKey=parentMediaKey
         sm_show.media=m_show
         sm_show.url = parentUrl
         sm_show.save()
         
     
     # Now find the season (or add it)
     seasonIndex = epInfo['season']
     seasonKey = mediakey.generateTvSeriesSeasonKey(m_show, seasonIndex)
     try:
         seasonType = MediaType.objects.get(name="TV Show Seasons")
         m_season = Media.objects.get(parent=m_show, businessKey=seasonKey)
     except MediaType.DoesNotExist:
         raise WebLoadError("Fatal error - media type not found")
     except Media.DoesNotExist: # Create a new season object
         m_season = Media(parent=m_show, type=seasonType,
                          name = unicode(m_show.name+" season "+str(seasonIndex)),
                          businessKey=seasonKey, order=seasonIndex)
         m_season.mediainfo = MediaInfo(author=unicode(m_show.mediainfo.author),
                                        date=None)
         m_season.save()
         m_season.mediainfo.media = m_season
         m_season.mediainfo.save()
        
     # Load the last bits of info for our media
     media.parent = m_season
     media.order = epInfo['episode']
     media.businessKey = mediakey.generateTvSeriesEpisodeKey(m_season, media.order)
     
     # (We don't need directors for the business key of a child item)
     self.__loadDirectors(soup, media, url)
     self.__loadDuration(soup, media, url)
     
     # now we can compare with the database object, if it exists
     try:
         dbMedia = Media.objects.get(type=media.type,businessKey=media.businessKey)
         # Update items only if necessary
         if dbMedia.name.lower() == "episode "+str(media.order):
             dbMedia.name = unicode(media.name)
         if int(media.duration) > int(dbMedia.duration): 
             dbMedia.duration = media.duration
         
         if not dbMedia.mediainfo.author or len(dbMedia.mediainfo.author) == 0:
             dbMedia.mediainfo.author = unicode(media.mediainfo.author)
         if not dbMedia.mediainfo.date:
             dbMedia.mediainfo.date = media.mediainfo.date
         
         dbMedia.save()
         dbMedia.mediainfo.save()
         return dbMedia
     
     except Media.DoesNotExist: # No worries
         media.save()
         media.mediainfo.media = media
         media.mediainfo.save()
         return media