def get_common_episode_title(self, num_episodes=100): if self.common_episode_title: return self.common_episode_title from mygpo.db.couchdb.episode import episodes_for_podcast episodes = episodes_for_podcast(self, descending=True, limit=num_episodes) # We take all non-empty titles titles = filter(None, (e.title for e in episodes)) # there can not be a "common" title of a single title if len(titles) < 2: return None # get the longest common substring common_title = utils.longest_substr(titles) # but consider only the part up to the first number. Otherwise we risk # removing part of the number (eg if a feed contains episodes 100-199) common_title = re.search(r'^\D*', common_title).group(0) if len(common_title.strip()) < 2: return None return common_title
def add_user_recursive(self, user, docs): """ adds a user and all the podcast and episodes it references """ # User docs.add(user._id) # Suggestions suggestions = suggestions_for_user(user) docs.add(suggestions._id) progress(0, len(docs), '', stream=sys.stderr) # Podcast States for p_state in podcast_states_for_user(user): self.add_podcast_state(p_state, docs) progress(0, len(docs), p_state, stream=sys.stderr) # Podcast podcast = podcast_by_id(p_state.podcast) self.add_podcast(podcast, docs) progress(0, len(docs), podcast, stream=sys.stderr) # Episodes for episode in episodes_for_podcast(podcast): self.add_episode(episode, docs) progress(0, len(docs), episode, stream=sys.stderr) e_state = episode_state_for_user_episode(user, episode) self.add_episode_state(e_state, docs) progress(0, len(docs), e_state, stream=sys.stderr)
def episode_list(podcast, user, offset=0, limit=None): """ Returns a list of episodes, with their action-attribute set to the latest action. The attribute is unsert if there is no episode-action for the episode. """ listeners = dict(episode_listener_counts(podcast)) episodes = episodes_for_podcast(podcast, descending=True, skip=offset, limit=limit) if user.is_authenticated(): # prepare pre-populated data for HistoryEntry.fetch_data podcasts_dict = dict( (p_id, podcast) for p_id in podcast.get_ids()) episodes_dict = dict( (episode._id, episode) for episode in episodes) actions = get_podcasts_episode_states(podcast, user._id) actions = map(HistoryEntry.from_action_dict, actions) HistoryEntry.fetch_data(user, actions, podcasts=podcasts_dict, episodes=episodes_dict) episode_actions = dict( (action.episode_id, action) for action in actions) else: episode_actions = {} annotate_episode = partial(_annotate_episode, listeners, episode_actions) return map(annotate_episode, episodes)
def get_listener_stats(self, podcast): # times in seconds between episodes being published, # and first listen events i1 = [] # times in seconds between first download and first listen events i2 = [] episodes = episodes_for_podcast(podcast) episodes = dict((episode._id, episode.released) for episode in episodes) for state in all_podcast_episode_states(podcast): ep = episodes.get(state.episode, None) dl = self.first_action(state.actions, 'download') if dl and None not in (ep, dl.timestamp): i1.append(total_seconds(dl.timestamp - ep)) pl = self.first_action(state.actions, 'play') if None not in (dl, pl) and \ None not in (dl.timestamp, pl.timestamp): i2.append(total_seconds(pl.timestamp - dl.timestamp)) return i1, i2
def get_episode_after(self, episode): if not episode.released: return None from mygpo.db.couchdb.episode import episodes_for_podcast nexts = episodes_for_podcast(self, since=episode.released + timedelta(seconds=1), limit=1) return next(iter(nexts), None)
def get_episode_before(self, episode): if not episode.released: return None from mygpo.db.couchdb.episode import episodes_for_podcast prevs = episodes_for_podcast(self, until=episode.released, descending=True, limit=1) return next(iter(prevs), None)
def get_newest_episodes(self, max_date, max_per_podcast=5): """ Returns the newest episodes of all subscribed podcasts Only max_per_podcast episodes per podcast are loaded. Episodes with release dates above max_date are discarded. This method returns a generator that produces the newest episodes. The number of required DB queries is equal to the number of (distinct) podcasts of all consumed episodes (max: number of subscribed podcasts), plus a constant number of initial queries (when the first episode is consumed). """ cmp_key = lambda episode: episode.released or datetime(2000, 01, 01) podcasts = list(self.get_subscribed_podcasts()) podcasts = filter(lambda p: p.latest_episode_timestamp, podcasts) podcasts = sorted(podcasts, key=lambda p: p.latest_episode_timestamp, reverse=True) podcast_dict = dict((p.get_id(), p) for p in podcasts) # contains the un-yielded episodes, newest first episodes = [] for podcast in podcasts: yielded_episodes = 0 for episode in episodes: # determine for which episodes there won't be a new episodes # that is newer; those can be yielded if episode.released > podcast.latest_episode_timestamp: p = podcast_dict.get(episode.podcast, None) yield proxy_object(episode, podcast=p) yielded_episodes += 1 else: break # remove the episodes that have been yielded before episodes = episodes[yielded_episodes:] # fetch and merge episodes for the next podcast from mygpo.db.couchdb.episode import episodes_for_podcast new_episodes = episodes_for_podcast(podcast, since=1, until=max_date, descending=True, limit=max_per_podcast) episodes = sorted(episodes+new_episodes, key=cmp_key, reverse=True) # yield the remaining episodes for episode in episodes: podcast = podcast_dict.get(episode.podcast, None) yield proxy_object(episode, podcast=podcast)
def listener_data(podcasts, start_date=datetime(2010, 1, 1), leap=timedelta(days=1)): """ Returns data for the podcast listener timeseries An iterator with data for each day (starting from either the first released episode or the earliest listen-event) is returned, where each day is reresented by a dictionary * date: the day * listeners: the number of listeners on that day * episode: (one of) the episode(s) released on that day """ # pre-calculate episode list, make it index-able by release-date episodes = (episodes_for_podcast(podcast, since=start_date) for podcast in podcasts) episodes = flatten(episodes) episodes = dict((e.released.date(), e) for e in episodes) listeners = [ podcast_listener_count_timespan(p, start=start_date) for p in podcasts ] listeners = filter(None, listeners) # we start either at the first episode-release or the first listen-event events = [] if episodes.keys(): events.append(min(episodes.keys())) if listeners: events.append(min([l[0][0] for l in listeners])) if not events: return start = min(events) for d in daterange(start, leap=leap): listener_sum = 0 for l in listeners: if not l: continue day, count = l[0] if day == d: listener_sum += count l.pop(0) episode = episodes[d] if d in episodes else None yield dict(date=d, listeners=listener_sum, episode=episode)
def add_podcast_recursive(self, podcast, docs): self.add_podcast(podcast, docs) progress(0, len(docs), podcast, stream=sys.stderr) states = all_podcast_states(podcast) for state in states: self.add_podcast_state(state, docs) progress(0, len(docs), state, stream=sys.stderr) # Episodes for episode in episodes_for_podcast(podcast.get_podcast()): self.add_episode(episode, docs) progress(0, len(docs), episode, stream=sys.stderr) states = all_episode_states(episode) for state in states: self.add_episode_state(state, docs) progress(0, len(docs), state, stream=sys.stderr)
def episodes(request, podcast): if not check_publisher_permission(request.user, podcast): return HttpResponseForbidden() episodes = episodes_for_podcast(podcast, descending=True) listeners = dict(episode_listener_counts(podcast)) max_listeners = max(listeners.values() + [0]) def annotate_episode(episode): listener_count = listeners.get(episode._id, None) return proxy_object(episode, listeners=listener_count) episodes = map(annotate_episode, episodes) return render(request, 'publisher/episodes.html', { 'podcast': podcast, 'episodes': episodes, 'max_listeners': max_listeners })
def get_episode_updates(self, user, subscribed_podcasts, since, max_per_podcast=5): """ Returns the episode updates since the timestamp """ if gevent: # DB: get max_per_podcast episodes for each subscribed podcast episode_jobs = [gevent.spawn(episodes_for_podcast, p, since, limit=max_per_podcast) for p in subscribed_podcasts] gevent.joinall(episode_jobs) episodes = chain.from_iterable(job.get() for job in episode_jobs) # DB: get all episode states for all subscribed podcasts e_action_jobs = [gevent.spawn(get_podcasts_episode_states, p, user._id) for p in subscribed_podcasts] gevent.joinall(e_action_jobs) e_actions = chain.from_iterable(job.get() for job in e_action_jobs) else: episodes = chain.from_iterable(episodes_for_podcast(p, since, limit=max_per_podcast) for p in subscribed_podcasts) e_actions = chain.from_iterable(get_podcasts_episode_states(p, user._id) for p in subscribed_podcasts) # TODO: get_podcasts_episode_states could be optimized by returning # only actions within some time frame e_status = { e._id: EpisodeStatus(e, 'new', None) for e in episodes} for action in e_actions: e_id = action['episode_id'] if not e_id in e_status: continue episode = e_status[e_id].episode e_status[e_id] = EpisodeStatus(episode, action['action'], action) return e_status.itervalues()
def __get_episodes(self): episodes = {} for podcast in self.podcasts: episodes.update(dict((e._id, e) for e in episodes_for_podcast(podcast))) return episodes
def test_merge(self): p1 = Podcast() p1.urls = ['http://example.com/podcast1.rss'] p1.save() p2 = Podcast() p2.urls = ['http://example.com/podcast2.rss'] p2.save() e1 = Episode() e1.title = 'Episode 1' e1.podcast = p1.get_id() e1.urls = ['http://example.com/podcast1/e1.mp3'] e1.save() e2 = Episode() e2.title = 'Episode 2' e2.podcast = p1.get_id() e2.urls = ['http://example.com/podcast1/e2.mp3'] e2.save() e3 = Episode() e3.title = 'Episode 3' e3.podcast = p2.get_id() e3.urls = ['http://example.com/podcast2/e2.mp3'] e3.save() e4 = Episode() e4.title = 'Episode 4' e4.podcast = p2.get_id() e4.urls = ['http://example.com/podcast2/e3.mp3'] e4.save() user = User() user.username = '******' user.email = '*****@*****.**' user.set_password('secret') device1 = Device() device1.uid = 'dev1' device2 = Device() device2.uid = 'dev2' user.devices.append(device1) user.devices.append(device2) user.save() p1.subscribe(user, device1) time.sleep(1) p1.unsubscribe(user, device1) time.sleep(1) p1.subscribe(user, device1) p2.subscribe(user, device2) s1 = episode_state_for_user_episode(user, e1) add_episode_actions(s1, [EpisodeAction(action='play', upload_timestamp=get_timestamp(datetime.utcnow()))]) s3 = episode_state_for_user_episode(user, e3) add_episode_actions(s3, [EpisodeAction(action='play', upload_timestamp=get_timestamp(datetime.utcnow()))]) # we need that for later e3_id = e3._id actions = Counter() # decide which episodes to merge groups = [(0, [e1]), (1, [e2, e3]), (2, [e4])] # carry out the merge pm = PodcastMerger([p1, p2], actions, groups) pm.merge() e1 = episode_by_id(e1._id) es1 = episode_state_for_user_episode(user, e1) self.assertEqual(len(es1.actions), 1) # check if merged episode's id can still be accessed e3 = episode_by_id(e3_id) es3 = episode_state_for_user_episode(user, e3) self.assertEqual(len(es3.actions), 1) p1 = podcast_by_id(p1.get_id()) ps1 = podcast_state_for_user_podcast(user, p1) self.assertEqual(len(ps1.get_subscribed_device_ids()), 2) self.assertEqual(len(list(episodes_for_podcast(p1))), 3)