def importFromRatingList(self, user, summary_url): self.last_import_count = 0 allRatings = {} offset = 0 hasRatings = True while (hasRatings): url = summary_url + "&o=" + str(offset) self.last_url = url #for debug html = mediakey.getUrl(url) if settings.IMDB_DEBUG: tmpFile = self.debugDir + "IMDb_list_" + str(user.id) + "_" + str(offset) + ".html" f = open(tmpFile, 'w') f.write(html) f.close() soup = BeautifulSoup(html) ratings = self.__buildRatingMap(soup) oldSize = len(allRatings) allRatings.update(ratings) newSize = len(allRatings) # If same ratings are added over and over, size does not increase # If no ratings are found (eg. on an empty page) the size does not increase hasRatings = oldSize < newSize print "Page increment: "+str(newSize-oldSize)+" ("+str(len(allRatings))+")" offset += newSize - oldSize #(== # of ratings on page) # Now we must import each rating errors = [] for key, (rating,status,date) in allRatings.items(): try: self.last_import_count += 1 self.__importRating(user, key, rating, status, date) except WebLoadError, e: errors.append(str(key)+" - "+str(e.value))
def __loadSeasons(self, soup, media, url): #seasonTags = soup.findAll("a", {"href":re.compile("episodes#season-[0-9]+")}) #media.mediainfo.duration = len(seasonTags) #length measured in seasons if url[-1] != '/': url += '/' html = mediakey.getUrl(url + "episodes") if settings.IMDB_DEBUG: tmpFile = self.debugDir + self.getSourceBusinessKey(url) +"_eps.html" f = open(tmpFile, 'w') f.write(html) f.close() soup = BeautifulSoup(html) # Find the max number of eps for each season pattern = re.compile("Season ([0-9+]), Episode ([0-9]+)",re.IGNORECASE) tags = soup.findAll("h3",text=pattern) seasonEps = {} for tag in tags: match = pattern.search(str(tag)) season = int(match.group(1)) episode = int(match.group(2)) if not season in seasonEps: seasonEps[season] = 0 elif episode > seasonEps[season]: seasonEps[season] = episode #print tag # Create dummy eps - we are not going to add SourceMedia # So that the source can be added by someone else seasonType = MediaType.objects.get(name='TV Show Seasons') episodeType = MediaType.objects.get(name='TV Show Episodes') for seasonIndex in seasonEps: season = Media(name=media.name+" season "+str(seasonIndex), type=seasonType, businessKey=mediakey.generateTvSeriesSeasonKey(media, seasonIndex), parent=media, order=seasonIndex) season.mediainfo = MediaInfo(author=None,date=None,location=None) try: dbSeason = Media.objects.get(type=seasonType,businessKey=season.businessKey) # We don't have any information about the season at this point, # (except the number of episodes which we will handle later), # so we just use the existing db media without modifying/updating it season = dbSeason except Media.DoesNotExist: # no worries season.save() season.mediainfo.media = season season.mediainfo.save() episodeCount = seasonEps[seasonIndex] for epix in range(1,episodeCount+1): episode = Media(name="Episode "+str(epix), type=episodeType, businessKey=mediakey.generateTvSeriesEpisodeKey(season, epix), parent=season, order=epix) # Check if the episode exists so we can update it if necessary try: dbEpisode = Media.objects.get(type=episodeType, businessKey=episode.businessKey) # We have no episode info at this point, so the only thing we find out # by looking up the ep in the db is that we don't have to save it except Media.DoesNotExist: # no worries episode.mediainfo = MediaInfo(author=None,date=None,location=None) episode.save() episode.mediainfo.media = episode episode.mediainfo.save()
def loadMedia(self, url): try: html = mediakey.getUrl(url) except URLError, e: raise WebLoadError("Http error getting "+url+" - "+str(e))