def episode_state_for_ref_urls(user, podcast_url, episode_url): if not user: raise QueryParameterMissing('user') if not podcast_url: raise QueryParameterMissing('podcast_url') if not episode_url: raise QueryParameterMissing('episode_url') cache_key = 'episode-state-%s-%s-%s' % (user._id, sha1(podcast_url).hexdigest(), sha1(episode_url).hexdigest()) state = cache.get(cache_key) if state: return state udb = get_userdata_database() state = get_single_result(udb, 'episode_states/by_ref_urls', key = [user._id, podcast_url, episode_url], limit = 1, include_docs=True, schema = EpisodeUserState, ) if state: state.ref_url = episode_url state.podcast_ref_url = podcast_url cache.set(cache_key, state, 60*60) return state else: podcast = podcast_for_url(podcast_url, create=True) episode = episode_for_podcast_id_url(podcast.get_id(), episode_url, create=True) return episode_state_for_user_episode(user, episode)
def _update_episodes(self, podcast, parsed_episodes): pid = podcast.get_id() # list of (obj, fun) where fun is the function to update obj changes = [] episodes_to_update = list(islice(parsed_episodes, 0, MAX_EPISODES_UPDATE)) logger.info('Parsed %d (%d) episodes', len(parsed_episodes), len(episodes_to_update)) for n, parsed in enumerate(episodes_to_update, 1): url = get_episode_url(parsed) if not url: logger.info('Skipping episode %d for missing URL', n) continue logger.info('Updating episode %d / %d', n, len(parsed_episodes)) episode = episode_for_podcast_id_url(pid, url, create=True) update_episode = get_episode_update_function(parsed, episode, podcast) changes.append((episode, update_episode)) # determine which episodes have been found updated_episodes = [e for (e, f) in changes] logger.info('Updating %d episodes with new data', len(updated_episodes)) # and mark the remaining ones outdated current_episodes = set(episodes_for_podcast_current(podcast, limit=500)) outdated_episodes = current_episodes - set(updated_episodes) logger.info('Marking %d episodes as outdated', len(outdated_episodes)) changes.extend((e, mark_outdated) for e in outdated_episodes) logger.info('Saving %d changes', len(changes)) bulk_save_retry(changes, self.db) return updated_episodes