def update_episode(parsed_episode, episode, podcast): """ updates "episode" with the data from "parsed_episode" """ # TODO: check if there have been any changes, to avoid unnecessary updates episode.guid = to_maxlength(Episode, 'guid', parsed_episode.guid or episode.guid) episode.description = parsed_episode.description or episode.description episode.subtitle = parsed_episode.subtitle or episode.subtitle episode.content = parsed_episode.content or parsed_episode.description or episode.content episode.link = to_maxlength(Episode, 'link', parsed_episode.link or episode.link) episode.released = datetime.utcfromtimestamp(parsed_episode.released) if parsed_episode.released else episode.released episode.author = to_maxlength(Episode, 'author', parsed_episode.author or episode.author) episode.duration = parsed_episode.duration or episode.duration episode.filesize = parsed_episode.files[0].filesize episode.language = parsed_episode.language or episode.language or \ podcast.language episode.mimetypes = ','.join(list(set(filter(None, [f.mimetype for f in parsed_episode.files])))) episode.flattr_url = to_maxlength(Episode, 'flattr_url', parsed_episode.flattr or episode.flattr_url) episode.license = parsed_episode.license or episode.license episode.title = to_maxlength(Episode, 'title', parsed_episode.title or episode.title or file_basename_no_extension(episode.url)) episode.last_update = datetime.utcnow() episode.save() parsed_urls = list(chain.from_iterable(f.urls for f in parsed_episode.files)) episode.add_missing_urls(parsed_urls)
def get_or_create_for_url(self, url, defaults={}): if not url: raise ValueError('The URL must not be empty') # TODO: where to specify how uuid is created? import uuid defaults.update({ 'id': uuid.uuid1(), }) url = utils.to_maxlength(URL, 'url', url) try: # try to fetch the podcast return Podcast.objects.get(urls__url=url, urls__scope='', ) except Podcast.DoesNotExist: # episode did not exist, try to create it try: with transaction.atomic(): podcast = Podcast.objects.create(**defaults) url = URL.objects.create(url=url, order=0, scope='', content_object=podcast, ) return podcast # URL could not be created, so it was created since the first get except IntegrityError: return Podcast.objects.get(urls__url=url, urls__scope='', )
def add_tag(request, podcast): tag_str = request.GET.get('tag', '') if not tag_str: return HttpResponseBadRequest() user = request.user tags = tag_str.split(',') tags = map(str.strip, tags) tags = map(str.lower, tags) tags = list(filter(None, tags)) ContentType.objects.get_for_model(podcast) for tag in tags: # trim to maximum length tag = to_maxlength(Tag, 'tag', tag) Tag.objects.get_or_create( tag=tag, source=Tag.USER, user=user, content_type=ContentType.objects.get_for_model(podcast), object_id=podcast.id, ) if request.GET.get('next', '') == 'mytags': return HttpResponseRedirect('/tags/') return HttpResponseRedirect(get_podcast_link_target(podcast))
def get_or_create_for_url(self, podcast, url, defaults={}): """ Create an Episode for a given URL This is the only place where new episodes are created """ if not url: raise ValueError("The URL must not be empty") # TODO: where to specify how uuid is created? import uuid url = utils.to_maxlength(URL, "url", url) try: # try to fetch the episode return Episode.objects.get(urls__url=url, urls__scope=podcast.as_scope) except Episode.DoesNotExist: # episode did not exist, try to create it try: with transaction.atomic(): episode = Episode.objects.create(podcast=podcast, id=uuid.uuid1(), **defaults) url = URL.objects.create(url=url, order=0, scope=episode.scope, content_object=episode) # Keep episode_count up to date here; it is not # recalculated when updating the podcast because counting # episodes can be very slow for podcasts with many episodes Podcast.objects.filter(pk=podcast.pk).update(episode_count=F("episode_count") + 1) return episode # URL could not be created, so it was created since the first get except IntegrityError: return Episode.objects.get(urls__url=url, urls__scope=podcast.as_scope)
def get_or_create_for_url(self, podcast, url, defaults={}): """ Create an Episode for a given URL This is the only place where new episodes are created """ if not url: raise ValueError('The URL must not be empty') # TODO: where to specify how uuid is created? import uuid url = utils.to_maxlength(URL, 'url', url) try: url = URL.objects.get(url=url, scope=podcast.as_scope) created = False episode = url.content_object if episode is None: with transaction.atomic(): episode = Episode.objects.create(podcast=podcast, id=uuid.uuid1(), **defaults) url.content_object = episode url.save() created = True return GetCreateResult(episode, created) except URL.DoesNotExist: # episode did not exist, try to create it try: with transaction.atomic(): episode = Episode.objects.create(podcast=podcast, id=uuid.uuid1(), **defaults) url = URL.objects.create(url=url, order=0, scope=episode.scope, content_object=episode, ) # Keep episode_count up to date here; it is not # recalculated when updating the podcast because counting # episodes can be very slow for podcasts with many episodes Podcast.objects.filter(pk=podcast.pk)\ .update(episode_count=F('episode_count')+1) return GetCreateResult(episode, True) # URL could not be created, so it was created since the first get except IntegrityError: episode = Episode.objects.get(urls__url=url, urls__scope=podcast.as_scope, ) return GetCreateResult(episode, False)
def _perform_subscribe(podcast, user, clients, timestamp, ref_url): """ Subscribes to a podcast on multiple clients Yields the clients on which a subscription was added, ie not those where the subscription already existed. """ for client in clients: subscription, created = Subscription.objects.get_or_create( user=user, client=client, podcast=podcast, defaults={ 'ref_url': to_maxlength(Subscription, 'ref_url', ref_url), 'created': timestamp, 'modified': timestamp, }, ) if not created: continue logger.info('{user} subscribed to {podcast} on {client}'.format( user=user, podcast=podcast, client=client)) HistoryEntry.objects.create( timestamp=timestamp, podcast=podcast, user=user, client=client, action=HistoryEntry.SUBSCRIBE, ) yield client
def get_or_create_for_url(self, url, defaults={}): if not url: raise ValueError("The URL must not be empty") # TODO: where to specify how uuid is created? import uuid defaults.update({"id": uuid.uuid1()}) url = utils.to_maxlength(URL, "url", url) try: # try to fetch the podcast podcast = Podcast.objects.get(urls__url=url, urls__scope="") return GetCreateResult(podcast, False) except Podcast.DoesNotExist: # episode did not exist, try to create it try: with transaction.atomic(): podcast = Podcast.objects.create(**defaults) url = URL.objects.create( url=url, order=0, scope="", content_object=podcast ) return GetCreateResult(podcast, True) # URL could not be created, so it was created since the first get except IntegrityError: podcast = Podcast.objects.get(urls__url=url, urls__scope="") return GetCreateResult(podcast, False)
def _perform_subscribe(podcast, user, clients, timestamp, ref_url): """ Subscribes to a podcast on multiple clients Yields the clients on which a subscription was added, ie not those where the subscription already existed. """ for client in clients: from mygpo.subscriptions.models import Subscription subscription, created = Subscription.objects.get_or_create( user=user, client=client, podcast=podcast, defaults={ 'ref_url': to_maxlength(Subscription, 'ref_url', ref_url), 'created': timestamp, 'modified': timestamp, } ) if not created: continue logger.info('{user} subscribed to {podcast} on {client}'.format( user=user, podcast=podcast, client=client)) from mygpo.history.models import HistoryEntry HistoryEntry.objects.create( timestamp=timestamp, podcast=podcast, user=user, client=client, action=HistoryEntry.SUBSCRIBE, ) yield client
def update_episode(parsed_episode, episode, podcast): """ updates "episode" with the data from "parsed_episode" """ # TODO: check if there have been any changes, to avoid unnecessary updates episode.guid = to_maxlength(Episode, 'guid', parsed_episode.get('guid') or episode.guid) episode.description = parsed_episode.get('description') or \ episode.description episode.subtitle = parsed_episode.get('subtitle') or episode.subtitle episode.content = parsed_episode.get('content') or \ parsed_episode.get('description') or episode.content episode.link = to_maxlength(Episode, 'link', parsed_episode.get('link') or episode.link) episode.released = datetime.utcfromtimestamp( parsed_episode.get('released')) if parsed_episode.get('released') \ else episode.released episode.author = to_maxlength( Episode, 'author', parsed_episode.get('author') or episode.author) episode.duration = parsed_episode.get('duration') or episode.duration episode.filesize = parsed_episode['files'][0]['filesize'] episode.language = parsed_episode.get('language') or \ episode.language or podcast.language episode.mimetypes = ','.join( list( set( filter( None, [f['mimetype'] for f in parsed_episode.get('files', [])])))) episode.flattr_url = to_maxlength( Episode, 'flattr_url', parsed_episode.get('flattr') or episode.flattr_url) episode.license = parsed_episode.get('license') or episode.license episode.title = to_maxlength( Episode, 'title', parsed_episode.get('title') or episode.title or file_basename_no_extension(episode.url)) episode.last_update = datetime.utcnow() episode.save() parsed_urls = list( chain.from_iterable( f.get('urls', []) for f in parsed_episode.get('files', []))) episode.add_missing_urls(parsed_urls)
def set_slugs(self, slugs): """Update the object's slugs to the given list 'slugs' should be a list of strings. Slugs that do not exist are created. Existing slugs that are not in the 'slugs' list are deleted.""" slugs = [utils.to_maxlength(Slug, "slug", slug) for slug in slugs] existing = {s.slug: s for s in self.slugs.all()} utils.set_ordered_entries(self, slugs, existing, Slug, "slug", "content_object")
def set_urls(self, urls): """Update the object's URLS to the given list 'urls' should be a list of strings. Slugs that do not exist are created. Existing urls that are not in the 'urls' list are deleted.""" urls = [utils.to_maxlength(URL, "url", url) for url in urls] existing = {u.url: u for u in self.urls.all()} utils.set_ordered_entries(self, urls, existing, URL, "url", "content_object")
def set_urls(self, urls): """ Update the object's URLS to the given list 'urls' should be a list of strings. Slugs that do not exist are created. Existing urls that are not in the 'urls' list are deleted. """ urls = [utils.to_maxlength(URL, 'url', url) for url in urls] existing = {u.url: u for u in self.urls.all()} utils.set_ordered_entries(self, urls, existing, URL, 'url', 'content_object')
def set_slugs(self, slugs): """ Update the object's slugs to the given list 'slugs' should be a list of strings. Slugs that do not exist are created. Existing slugs that are not in the 'slugs' list are deleted. """ slugs = [utils.to_maxlength(Slug, 'slug', slug) for slug in slugs] existing = {s.slug: s for s in self.slugs.all()} utils.set_ordered_entries(self, slugs, existing, Slug, 'slug', 'content_object')
def _perform_subscribe(podcast, user, clients, timestamp, ref_url): """ Subscribes to a podcast on multiple clients Yields the clients on which a subscription was added, ie not those where the subscription already existed. """ from mygpo.subscriptions.models import Subscription for client in clients: try: with transaction.atomic(): subscription = Subscription.objects.create( user=user, client=client, podcast=podcast, ref_url=to_maxlength(Subscription, 'ref_url', ref_url), created=timestamp, modified=timestamp, ) except IntegrityError as ie: msg = str(ie) if 'Key (user_id, client_id, podcast_id)' in msg: # Subscription already exists -- skip continue else: # unknown error raise logger.info( '{user} subscribed to {podcast} on {client}'.format( user=user, podcast=podcast, client=client ) ) from mygpo.history.models import HistoryEntry HistoryEntry.objects.create( timestamp=timestamp, podcast=podcast, user=user, client=client, action=HistoryEntry.SUBSCRIBE, ) yield client
def add_slug(self, slug): """ Adds a (non-cannonical) slug """ if not slug: raise ValueError("'%s' is not a valid slug" % slug) existing_slugs = self.slugs.all() # cut slug to the maximum allowed length slug = utils.to_maxlength(Slug, "slug", slug) # check if slug already exists if slug in [s.slug for s in existing_slugs]: return max_order = max([-1] + [s.order for s in existing_slugs]) next_order = max_order + 1 Slug.objects.create(scope=self.scope, slug=slug, content_object=self, order=next_order)
def _perform_subscribe(podcast, user, clients, timestamp, ref_url): """ Subscribes to a podcast on multiple clients Yields the clients on which a subscription was added, ie not those where the subscription already existed. """ from mygpo.subscriptions.models import Subscription for client in clients: try: with transaction.atomic(): subscription = Subscription.objects.create( user=user, client=client, podcast=podcast, ref_url=to_maxlength(Subscription, 'ref_url', ref_url), created=timestamp, modified=timestamp, ) except IntegrityError as ie: msg = str(ie) if 'Key (user_id, client_id, podcast_id)' in msg: # Subscription already exists -- skip continue else: # unknown error raise logger.info('{user} subscribed to {podcast} on {client}'.format( user=user, podcast=podcast, client=client)) from mygpo.history.models import HistoryEntry HistoryEntry.objects.create( timestamp=timestamp, podcast=podcast, user=user, client=client, action=HistoryEntry.SUBSCRIBE, ) yield client
def add_slug(self, slug): """Adds a (non-cannonical) slug""" if not slug: raise ValueError("'%s' is not a valid slug" % slug) existing_slugs = self.slugs.all() # cut slug to the maximum allowed length slug = utils.to_maxlength(Slug, "slug", slug) # check if slug already exists if slug in [s.slug for s in existing_slugs]: return max_order = max([-1] + [s.order for s in existing_slugs]) next_order = max_order + 1 Slug.objects.create( scope=self.scope, slug=slug, content_object=self, order=next_order )
def _update_podcast(podcast, parsed, episodes, max_episode_order): """ updates a podcast according to new parser results """ # we need that later to decide if we can "bump" a category prev_latest_episode_timestamp = podcast.latest_episode_timestamp podcast.title = parsed.get('title') or podcast.title podcast.description = parsed.get('description') or podcast.description podcast.subtitle = parsed.get('subtitle') or podcast.subtitle podcast.link = parsed.get('link') or podcast.link podcast.logo_url = parsed.get('logo') or podcast.logo_url podcast.author = to_maxlength(Podcast, 'author', parsed.get('author') or podcast.author) podcast.language = to_maxlength(Podcast, 'language', parsed.get('language') or podcast.language) podcast.content_types = ','.join(parsed.get('content_types')) or \ podcast.content_types #podcast.tags['feed'] = parsed.tags or podcast.tags.get('feed', []) podcast.common_episode_title = to_maxlength( Podcast, 'common_episode_title', parsed.get('common_title') or podcast.common_episode_title) podcast.new_location = parsed.get('new_location') or podcast.new_location podcast.flattr_url = to_maxlength(Podcast, 'flattr_url', parsed.get('flattr') or podcast.flattr_url) podcast.hub = parsed.get('hub') or podcast.hub podcast.license = parsed.get('license') or podcast.license podcast.max_episode_order = max_episode_order podcast.add_missing_urls(parsed.get('urls', [])) if podcast.new_location: try: new_podcast = Podcast.objects.get(urls__url=podcast.new_location) if new_podcast != podcast: _mark_outdated(podcast, 'redirected to different podcast') return except Podcast.DoesNotExist: podcast.set_url(podcast.new_location) # latest episode timestamp episodes = Episode.objects.filter(podcast=podcast, released__isnull=False)\ .order_by('released') podcast.update_interval = get_update_interval(episodes) latest_episode = episodes.last() if latest_episode: podcast.latest_episode_timestamp = latest_episode.released # podcast.episode_count is not update here on purpose. It is, instead, # continuously updated when creating new episodes in # EpisodeManager.get_or_create_for_url _update_categories(podcast, prev_latest_episode_timestamp) # try to download the logo and reset logo_url to None on http errors found = _save_podcast_logo(podcast.logo_url) if not found: podcast.logo_url = None # The podcast is always saved (not just when there are changes) because # we need to record the last update logger.info('Saving podcast.') podcast.last_update = datetime.utcnow() podcast.save() try: subscribe_at_hub(podcast) except SubscriptionError as se: logger.warn('subscribing to hub failed: %s', str(se)) if not podcast.slug: slug = PodcastSlug(podcast).get_slug() if slug: podcast.add_slug(slug) assign_missing_episode_slugs(podcast) update_related_podcasts.delay(podcast)
def _update_podcast(self, podcast, parsed, episode_updater, update_result): """ updates a podcast according to new parser results """ # we need that later to decide if we can "bump" a category prev_latest_episode_timestamp = podcast.latest_episode_timestamp # will later be used to see whether the index is outdated old_index_fields = get_index_fields(podcast) podcast.title = parsed.get('title') or podcast.title podcast.description = parsed.get('description') or podcast.description podcast.subtitle = parsed.get('subtitle') or podcast.subtitle podcast.link = parsed.get('link') or podcast.link podcast.logo_url = parsed.get('logo') or podcast.logo_url podcast.author = to_maxlength(Podcast, 'author', parsed.get('author') or podcast.author) podcast.language = to_maxlength( Podcast, 'language', parsed.get('language') or podcast.language) podcast.content_types = (','.join(parsed.get('content_types')) or podcast.content_types) # podcast.tags['feed'] = parsed.tags or podcast.tags.get('feed', []) podcast.common_episode_title = to_maxlength( Podcast, 'common_episode_title', parsed.get('common_title') or podcast.common_episode_title, ) podcast.new_location = parsed.get( 'new_location') or podcast.new_location podcast.flattr_url = to_maxlength( Podcast, 'flattr_url', parsed.get('flattr') or podcast.flattr_url) podcast.hub = parsed.get('hub') or podcast.hub podcast.license = parsed.get('license') or podcast.license podcast.max_episode_order = episode_updater.max_episode_order podcast.add_missing_urls(parsed.get('urls', [])) if podcast.new_location: try: new_podcast = Podcast.objects.get( urls__url=podcast.new_location) if new_podcast != podcast: self._mark_outdated(podcast, 'redirected to different podcast', episode_updater) return except Podcast.DoesNotExist: podcast.set_url(podcast.new_location) # latest episode timestamp episodes = Episode.objects.filter( podcast=podcast, released__isnull=False).order_by('released') # Determine update interval # Update interval is based on intervals between episodes podcast.update_interval = episode_updater.get_update_interval(episodes) # factor is increased / decreased depending on whether the latest # update has returned episodes if update_result.episodes_added == 0: # no episodes, incr factor newfactor = podcast.update_interval_factor * 1.2 podcast.update_interval_factor = min(1000, newfactor) # never above 1000 elif update_result.episodes_added > 1: # new episodes, decr factor newfactor = podcast.update_interval_factor / 1.2 podcast.update_interval_factor = max(1, newfactor) # never below 1 latest_episode = episodes.last() if latest_episode: podcast.latest_episode_timestamp = latest_episode.released # podcast.episode_count is not update here on purpose. It is, instead, # continuously updated when creating new episodes in # EpisodeManager.get_or_create_for_url self._update_categories(podcast, prev_latest_episode_timestamp) # try to download the logo and reset logo_url to None on http errors found = CoverArt.save_podcast_logo(podcast.logo_url) if not found: podcast.logo_url = None # check if search index should be considered out of date new_index_fields = get_index_fields(podcast) if list(old_index_fields.items()) != list(new_index_fields.items()): podcast.search_index_uptodate = False # The podcast is always saved (not just when there are changes) because # we need to record the last update logger.info('Saving podcast.') podcast.last_update = datetime.utcnow() podcast.save() try: subscribe_at_hub(podcast) except SubscriptionError as se: logger.warn('subscribing to hub failed: %s', str(se)) self.assign_slug(podcast) episode_updater.assign_missing_episode_slugs() update_related_podcasts.delay(podcast.pk)
def update_episode(self, parsed_episode): """ updates "episode" with the data from "parsed_episode" """ # TODO: check if there have been any changes, to # avoid unnecessary updates self.episode.guid = to_maxlength( Episode, "guid", parsed_episode.get("guid") or self.episode.guid ) self.episode.description = ( parsed_episode.get("description") or self.episode.description ) self.episode.subtitle = parsed_episode.get("subtitle") or self.episode.subtitle self.episode.content = ( parsed_episode.get("content") or parsed_episode.get("description") or self.episode.content ) self.episode.link = to_maxlength( Episode, "link", parsed_episode.get("link") or self.episode.link ) self.episode.released = ( datetime.utcfromtimestamp(parsed_episode.get("released")) if parsed_episode.get("released") else self.episode.released ) self.episode.author = to_maxlength( Episode, "author", parsed_episode.get("author") or self.episode.author ) self.episode.duration = parsed_episode.get("duration") or self.episode.duration self.episode.filesize = parsed_episode["files"][0]["filesize"] self.episode.language = ( parsed_episode.get("language") or self.episode.language or self.podcast.language ) mimetypes = [f["mimetype"] for f in parsed_episode.get("files", [])] self.episode.mimetypes = ",".join(list(set(filter(None, mimetypes)))) self.episode.flattr_url = to_maxlength( Episode, "flattr_url", parsed_episode.get("flattr") or self.episode.flattr_url, ) self.episode.license = parsed_episode.get("license") or self.episode.license self.episode.title = to_maxlength( Episode, "title", parsed_episode.get("title") or self.episode.title or file_basename_no_extension(self.episode.url), ) self.episode.last_update = datetime.utcnow() self.episode.save() parsed_urls = list( chain.from_iterable( f.get("urls", []) for f in parsed_episode.get("files", []) ) ) self.episode.add_missing_urls(parsed_urls)