def search_podcasts(q, limit=20, skip=0): if is_url(q): url = sanitize_url(q) podcast = Podcast.for_url(url, create=True) if not podcast.title: update_podcasts([podcast]) podcast = Podcast.for_url(url) return [podcast], 1 db = get_main_database() #FIXME current couchdbkit can't parse responses for multi-query searches q = q.replace(',', '') res = db.search('podcasts/search', wrapper=search_wrapper, include_docs=True, limit=limit, skip=skip, q=q, sort='\\subscribers<int>') #FIXME: return empty results in case of search backend error try: return list(res), res.total_rows except: return [], 0
def update_episodes(user, actions, now, ua_string): update_urls = [] grouped_actions = defaultdict(list) # group all actions by their episode for action in actions: podcast_url = action['podcast'] podcast_url = sanitize_append(podcast_url, 'podcast', update_urls) if podcast_url == '': continue episode_url = action['episode'] episode_url = sanitize_append(episode_url, 'episode', update_urls) if episode_url == '': continue act = parse_episode_action(action, user, update_urls, now, ua_string) grouped_actions[ (podcast_url, episode_url) ].append(act) # Prepare the updates for each episode state obj_funs = [] for (p_url, e_url), action_list in grouped_actions.iteritems(): episode_state = EpisodeUserState.for_ref_urls(user, p_url, e_url) fun = partial(update_episode_actions, action_list=action_list) obj_funs.append( (episode_state, fun) ) db = get_main_database() bulk_save_retry(db, obj_funs) return update_urls
def _query(self): """ Queries the database and stores the heatmap and its borders """ db = get_main_database() group_level = len(filter(None, [self.podcast_id, self.episode_id, self.user_id])) r = db.view('heatmap/by_episode', startkey = [self.podcast_id, self.episode_id, self.user_id], endkey = [self.podcast_id, self.episode_id or {}, self.user_id or {}], reduce = True, group = True, group_level = group_level, stale = 'update_after', ) if not r: self.heatmap = [] self.borders = [] else: res = r.first()['value'] self.heatmap = res['heatmap'] self.borders = res['borders'] # heatmap info doesn't reach until the end of the episode # so we extend it with 0 listeners if self.duration > self.borders[-1]: self.heatmap.append(0) self.borders.append(self.duration)
def execute(self): """ Executes all added actions in bulk """ db = get_main_database() obj_funs = map(self._get_obj_fun, self.actions) bulk_save_retry(db, obj_funs) # prepare for another run self.actions = []
def get_podcast_count_for_language(): """ Returns a the number of podcasts for each language """ counts = defaultdict(int) db = get_main_database() r = db.view("podcasts/by_language", reduce=True, group_level=1, stale="update_after") counts.update(dict((x["key"][0], x["value"]) for x in r)) return counts
def handle(self, *args, **options): db = get_main_database() status = self.get_cmd_status() since = self.get_since(status, options) objects = self.get_objects(db, since) actions = Counter() # create unfinished command run status run_status = CommandRunStatus() run_status.timestamp_started = datetime.utcnow() run_status.start_seq = since # add it to existing one (if any) status.runs.append(run_status) status.save() total = db.info()['update_seq'] has_slug = lambda x: bool(x.slug) for seq, obj in objects: total = db.info()['update_seq'] if isinstance(obj, PodcastGroup): podcasts = filter(has_slug, obj.podcasts) if isinstance(obj, Podcast): podcasts = filter(has_slug, [obj]) elif isinstance(obj, Episode): if has_slug(obj): continue podcast = Podcast.get(obj.podcast) if not podcast: continue podcasts = filter(has_slug, [podcast]) updated = self.handle_podcasts(podcasts) actions['updated'] += updated if not options['silent']: status_str = ', '.join('%s: %d' % x for x in actions.items()) progress(seq, total, status_str) # finish command run status run_status.timestamp_finished = datetime.utcnow() run_status.end_seq = total run_status.status_counter = dict(actions) # and overwrite existing one (we could keep a longer log here) status.runs = [run_status] status.save()
def get_podcast_with_new_episodes(self): db = get_main_database() res = db.view('episodes/need_update', group_level = 1, reduce = True, ) for r in res: podcast_id = r['key'] podcast = Podcast.get(podcast_id) if podcast: yield podcast
def __init__(self, user, device): self.user = user self.device = device self._db = get_main_database() if device: self._view = 'history/by_device' self._startkey = [self.user._id, device.id, None] self._endkey = [self.user._id, device.id, {}] else: self._view = 'history/by_user' self._startkey = [self.user._id, None] self._endkey = [self.user._id, {}]
def handle(self, *args, **options): skip = options.get('skip') total = EpisodeUserState.view('episode_states/by_user_episode', limit=0, ).total_rows db = get_main_database() actions = Counter() actions['merged'] = 0 for n in count(skip): first = EpisodeUserState.view('episode_states/by_user_episode', skip = n, include_docs = True, limit = 1, ) first = list(first) if not first: break first = first[0] states = EpisodeUserState.view('episode_states/by_user_episode', key = [first.user, first.episode], include_docs = True, ) states = list(states) l1 = len(states) # we don't want to delete this one states.remove(first) assert len(states) == l1-1 if states: updater = get_updater(states) obj_funs = [(first, updater)] + [(state, do_delete) for state in states] bulk_save_retry(db, obj_funs) merged = len(states)-1 actions['merged'] += merged total -= merged status_str = ', '.join('%s: %d' % x for x in actions.items()) progress(n+1, total, status_str)
def get_episode_states(self, user_id): """ Returns the latest episode actions for the podcast's episodes """ from mygpo.users.models import EpisodeUserState db = get_main_database() res = db.view('episode_states/by_user_podcast', startkey = [user_id, self.get_id(), None], endkey = [user_id, self.get_id(), {}], ) for r in res: action = r['value'] yield action
def _query(self): db = get_main_database() res = db.view('categories/by_weight', descending = True, skip = self.skip, limit = self.count, stale = 'update_after', ) mk_entry = lambda r: TagCloudEntry(r['value'], r['key']) self._entries = map(mk_entry, res) if self.sort_by_name: self._entries.sort(key = lambda x: x.label.lower())
def multi_request_view(cls, view, wrap=True, auto_advance=True, *args, **kwargs): """ splits up a view request into several requests, which reduces the server load of the number of returned objects is large. NOTE: As such a split request is obviously not atomical anymore, results might skip some elements of contain some twice If auto_advance is False the method will always request the same range. This can be useful when the view contain unprocessed items and the caller processes the items, thus removing them from the view before the next request. """ per_page = kwargs.get('limit', 1000) kwargs['limit'] = per_page + 1 db = get_main_database() wrapper = kwargs.pop('wrapper', cls.wrap) cont = True while cont: resp = db.view(view, *args, **kwargs) cont = False for n, obj in enumerate(resp.iterator()): key = obj['key'] if wrap: doc = wrapper(obj['doc']) if wrapper else obj['doc'] docid = doc._id if wrapper else obj['id'] else: docid = obj.get('id', None) doc = obj if n == per_page: if auto_advance: kwargs['startkey'] = key if docid is not None: kwargs['startkey_docid'] = docid if 'skip' in kwargs: del kwargs['skip'] # we reached the end of the page, load next one cont = True break yield doc
def get_subscriber_count(podcast): db = get_main_database() subscriber_sum = 0 for podcast_id in podcast.get_ids(): x = db.view('subscriptions/by_podcast', startkey = [podcast_id, None], endkey = [podcast_id, {}], reduce = True, group = True, group_level = 2, ) subscriber_sum += x.count() return subscriber_sum
def handle(self, *args, **options): docs = set() for username in options.get('users', []): user = User.get_user(username) # User docs.add(user._id) # Suggestions suggestions = Suggestions.for_user(user) docs.add(suggestions._id) # Podcast States for p_state in PodcastUserState.for_user(user): docs.add(p_state._id) # Categories for tag in p_state.tags: c = Category.for_tag(tag) if c: docs.add(c._id) # Podcast podcast = Podcast.get(p_state.podcast) docs.add(podcast._id) # Categories for s in podcast.tags: for tag in podcast.tags[s]: c = Category.for_tag(tag) if c: docs.add(c._id) # Episodes for episode in podcast.get_episodes(): docs.add(episode._id) # Episode States e_state = episode.get_user_state(user) if e_state._id: docs.add(e_state._id) db = get_main_database() docs = sorted(docs) self.dump(docs, db)
def filter(user_id, since=None, until={}, podcast_id=None, device_id=None): """ Returns Episode Actions for the given criteria""" since_str = since.strftime('%Y-%m-%dT%H:%M:%S') if since else None until_str = until.strftime('%Y-%m-%dT%H:%M:%S') if until else {} if since_str >= until_str: return if not podcast_id and not device_id: view = 'episode_actions/by_user' startkey = [user_id, since_str] endkey = [user_id, until_str] elif podcast_id and not device_id: view = 'episode_actions/by_podcast' startkey = [user_id, podcast_id, since_str] endkey = [user_id, podcast_id, until_str] elif device_id and not podcast_id: view = 'episode_actions/by_device' startkey = [user_id, device_id, since_str] endkey = [user_id, device_id, until_str] else: view = 'episode_actions/by_podcast_device' startkey = [user_id, podcast_id, device_id, since_str] endkey = [user_id, podcast_id, device_id, until_str] db = get_main_database() res = db.view(view, startkey = startkey, endkey = endkey ) for r in res: action = r['value'] yield action
def get_db(self): return get_main_database()
def handle(self, *args, **options): path = os.path.join(settings.BASE_DIR, '..', 'couchdb', '_design') db = get_main_database() loader = FileSystemDocsLoader(path) loader.sync(db, verbose=True)