Exemple #1
0
    def handle(self, *args, **options):

        # couchdbkit doesn't preserve microseconds
        start_time = datetime.utcnow().replace(microsecond=0)

        excluded_tags = settings.DIRECTORY_EXCLUDED_TAGS

        tags = args or Tag.all()

        for n, tag in enumerate(tags):

            if not isinstance(tag, basestring):
                tag = str(tag)

            label = utils.remove_control_chars(tag.strip())
            if not label:
                continue

            tag_obj = Tag(tag)
            podcast_ids, weights = utils.unzip(list(tag_obj.get_podcasts()))
            podcast_objs = Podcast.get_multi(podcast_ids)
            podcasts = []
            for podcast, weight in zip(podcast_objs, weights):
                e = CategoryEntry()
                e.podcast = podcast.get_id()
                e.weight = float(weight * podcast.subscriber_count())
                podcasts.append(e)

            category = Category.for_tag(label)

            if not category:
                if not label or label in excluded_tags:
                    continue

                category = Category()
                category.label = label
                category.spellings = []

            # delete if it has been excluded after it has been created
            if label in excluded_tags:
                category.delete()
                continue

            # we overwrite previous data
            if category.updated != start_time:
                category.podcasts = []

            category.merge_podcasts(podcasts)

            category.updated = start_time

            if 'weight' in category:
                del category['weight']

            category.save()

            try:
                utils.progress(n % 1000, 1000, category.label.encode('utf-8'))
            except:
                pass
Exemple #2
0
    def add_user_recursive(self, user, docs):
        """ adds a user and all the podcast and episodes it references """

        # User
        docs.add(user._id)

        # Suggestions
        suggestions = suggestions_for_user(user)
        docs.add(suggestions._id)

        progress(0, len(docs), '', stream=sys.stderr)

        # Podcast States
        for p_state in podcast_states_for_user(user):
            self.add_podcast_state(p_state, docs)

            progress(0, len(docs), p_state, stream=sys.stderr)

            # Podcast
            podcast = podcast_by_id(p_state.podcast)
            self.add_podcast(podcast, docs)

            progress(0, len(docs), podcast, stream=sys.stderr)

            # Episodes
            for episode in episodes_for_podcast(podcast):
                self.add_episode(episode, docs)
                progress(0, len(docs), episode, stream=sys.stderr)

                e_state = episode_state_for_user_episode(user, episode)
                self.add_episode_state(e_state, docs)
                progress(0, len(docs), e_state, stream=sys.stderr)
Exemple #3
0
    def handle(self, *args, **options):

        docs = set()
        progress(0, len(docs), '', stream=sys.stderr)

        for username in options.get('users', []):
            user = User.get_user(username)
            self.add_user_recursive(user, docs)

        if options.get('toplist', False):
            toplist = PodcastToplist()
            for n, podcast in toplist[:25]:
                self.add_podcast_recursive(podcast, docs)

        for podcast_url in options.get('podcasts'):
            podcast = podcast_for_url(podcast_url, docs)
            if not podcast:
                logger.warn('podcast not found for URL "%s"', podcast_url)

            else:
                self.add_podcast_recursive(podcast, docs)

        db = get_main_database()
        docs = sorted(docs)
        self.dump(docs, db)
Exemple #4
0
    def print_status(self, seq, actions):
        counter = getattr(self, 'counter', 0)
        if counter % 1000 == 0:
            self.total = self.db.info()['update_seq']
        self.counter = counter + 1

        status_str = ', '.join('%s: %d' % x for x in self.actions.items())
        progress(seq, self.total, status_str)
Exemple #5
0
    def print_status(self, seq, actions):
        counter = getattr(self, "counter", 0)
        if counter % 1000 == 0:
            self.total = self.db.info()["update_seq"]
        self.counter = counter + 1

        status_str = ", ".join("%s: %d" % x for x in self.actions.items())
        progress(seq, self.total, status_str)
Exemple #6
0
    def handle(self, *args, **options):

        db = get_main_database()
        status = self.get_cmd_status()
        since = self.get_since(status, options)
        objects = self.get_objects(db, since)
        actions = Counter()


        # create unfinished command run status
        run_status = CommandRunStatus()
        run_status.timestamp_started = datetime.utcnow()
        run_status.start_seq = since
        # add it to existing one (if any)
        status.runs.append(run_status)
        status.save()

        total = db.info()['update_seq']

        has_slug = lambda x: bool(x.slug)

        for seq, obj in objects:
            total = db.info()['update_seq']

            if isinstance(obj, PodcastGroup):
                podcasts = filter(has_slug, obj.podcasts)

            if isinstance(obj, Podcast):
                podcasts = filter(has_slug, [obj])

            elif isinstance(obj, Episode):
                if has_slug(obj):
                    continue

                podcast = podcast_by_id(obj.podcast)
                if not podcast:
                    continue
                podcasts = filter(has_slug, [podcast])

            updated = self.handle_podcasts(podcasts)
            actions['updated'] += updated

            if not options['silent']:
                status_str = ', '.join('%s: %d' % x for x in actions.items())
                progress(seq, total, status_str)


        # finish command run status
        run_status.timestamp_finished = datetime.utcnow()
        run_status.end_seq = total
        run_status.status_counter = dict(actions)
        # and overwrite existing one (we could keep a longer log here)
        status.runs = [run_status]
        status.save()
    def handle(self, *args, **options):

        silent = options.get("silent")

        podcasts = Podcast.objects.all()
        total = podcasts.count()

        for n, podcast in enumerate(podcasts):
            update_podcast_subscribers.delay(podcast.get_id())

            if not silent:
                progress(n, total)
Exemple #8
0
    def handle(self, *args, **options):

        silent = options.get('silent')

        podcasts = Podcast.objects.all()
        total = podcasts.count_fast()

        for n, podcast in enumerate(podcasts):
            update_podcast_subscribers.delay(podcast.get_id())

            if not silent:
                progress(n, total)
Exemple #9
0
    def handle(self, *args, **options):

        skip = options.get('skip')
        total = EpisodeUserState.view('episode_states/by_user_episode',
                limit=0,
            ).total_rows
        db = get_main_database()

        actions = Counter()
        actions['merged'] = 0


        for n in count(skip):

            first = EpisodeUserState.view('episode_states/by_user_episode',
                    skip         = n,
                    include_docs = True,
                    limit        = 1,
                )
            first = list(first)
            if not first:
                break

            first = first[0]


            states = EpisodeUserState.view('episode_states/by_user_episode',
                    key          = [first.user, first.episode],
                    include_docs = True,
                )
            states = list(states)

            l1 = len(states)
            # we don't want to delete this one
            states.remove(first)

            assert len(states) == l1-1

            if states:
                updater = get_updater(states)

                obj_funs = [(first, updater)] + [(state, do_delete) for state in states]

                bulk_save_retry(db, obj_funs)

                merged = len(states)-1
                actions['merged'] += merged
                total -= merged

            status_str = ', '.join('%s: %d' % x for x in actions.items())
            progress(n+1, total, status_str)
    def handle(self, *args, **options):

        if not args:
            print >> sys.stderr, "Usage: ./manage.py delete-sanitizing-rules <slug> [<slug2> ...]"
            return


        for n, slug in enumerate(args):
            rule = SanitizingRule.for_slug(slug)

            if rule:
                self.delete_rule(rule=rule)

            progress(n+1, len(args))
Exemple #11
0
    def handle(self, *args, **options):

        users = deleted_users()
        total = deleted_user_count()

        for n, user in enumerate(users):

            if user.is_active or not user.deleted:
                print 'skipping', user.username

            print 'deleting', user.username,
            user.delete()

            progress(n+1, total)
Exemple #12
0
    def handle(self, *args, **options):

        silent = options.get('silent')

        # couchdbkit doesn't preserve microseconds
        started = datetime.utcnow().replace(microsecond=0)

        podcasts = Podcast.all_podcasts()
        total = Podcast.view('podcasts/by_oldid', limit=0).total_rows

        for n, podcast in enumerate(podcasts):
            subscriber_count = self.get_subscriber_count(podcast)
            self.update(podcast=podcast, started=started, subscriber_count=subscriber_count)

            if not silent:
                progress(n, total)
Exemple #13
0
    def handle(self, *args, **options):

        users = User.view("users/deleted", include_docs=True, reduce=False)

        total = User.view("users/deleted", reduce=True)

        total = list(total)[0]["value"] if total else 0

        for n, user in enumerate(users):

            if user.is_active or not user.deleted:
                print "skipping", user.username

            print "deleting", user.username,
            user.delete()

            progress(n + 1, total)
Exemple #14
0
    def handle(self, *args, **options):

        if not args:
            print >> sys.stderr, "Usage: ./manage.py sync-sanitizing-rules <filename> [<filename2> ...]"
            return


        for filename in args:
            config = ConfigParser.ConfigParser()
            config.read(filename)
            sections = config.sections()

            for n, slug in enumerate(sections):
                rule = SanitizingRule.for_slug(slug) or SanitizingRule()

                self.update_rule(rule=rule, config=config, slug=slug)

                progress(n+1, len(sections), filename)
Exemple #15
0
    def handle(self, *args, **options):

        total = Podcast.count()
        podcasts = Podcast.all_podcasts()
        actions = Counter()

        for n, podcast in enumerate(podcasts):

            psubscriber = PodcastSubscriberData.for_podcast(podcast.get_id())

            res = self.update_subscriber_data(podcast, data=psubscriber)
            self.update_podcast(podcast=podcast)

            action = 'updated' if res else 'skipped'
            actions[action] += 1

            status_str = ', '.join('%s: %d' % x for x in actions.items())
            progress(n+1, total, status_str)
    def handle(self, *args, **options):

        get_podcast = itemgetter(0)

        max_related = options.get('max')

        podcasts = all_podcasts()
        total = podcast_count()

        for (n, podcast) in enumerate(podcasts):

            l = calc_similar_podcasts(podcast)[:max_related]

            related = map(get_podcast, l)

            update_related_podcasts(podcast, related)

            progress(n+1, total)
Exemple #17
0
    def handle(self, *args, **options):

        groups = PodcastGroupsMissingSlugs()
        total = len(groups)
        for n, group in enumerate(groups):
            assign_slug(group, PodcastGroupSlug)
            progress(n+1, total)


        # only consider podcasts that have enough subscribers
        min_subscribers = settings.PODCAST_SLUG_SUBSCRIBER_LIMIT
        enough_subscribers = lambda p: p.subscriber_count() >= min_subscribers

        podcasts = PodcastsMissingSlugs()
        total = len(podcasts)

        for n, podcast in enumerate(takewhile(enough_subscribers, podcasts)):
            assign_slug(podcast, PodcastSlug)
            progress(n+1, total)
Exemple #18
0
    def dump(self, docs, db):

        output = sys.stdout
        boundary = None
        envelope = write_multipart(output, boundary=boundary)
        total = len(docs)

        for n, docid in enumerate(docs):

            if not docid:
                continue

            doc = db.get(docid, attachments=True)
            attachments = doc.pop('_attachments', {})
            jsondoc = json.encode(doc)

            if attachments:
                parts = envelope.open({
                    'Content-ID': doc['_id'],
                    'ETag': '"%s"' % doc['_rev']
                })
                parts.add('application/json', jsondoc)

                for name, info in attachments.items():
                    content_type = info.get('content_type')
                    if content_type is None: # CouchDB < 0.8
                        content_type = info.get('content-type')
                    parts.add(content_type, b64decode(info['data']), {
                        'Content-ID': name
                    })
                parts.close()

            else:
                envelope.add('application/json', jsondoc, {
                    'Content-ID': doc['_id'],
                    'ETag': '"%s"' % doc['_rev']
                })

            progress(n+1, total, docid, stream=sys.stderr)

        envelope.close()
Exemple #19
0
    def handle(self, *args, **options):

        max_suggestions = options.get('max')

        if options.get('username'):
            users = [User.get_user(options.get('username'))]

        else:
            users = User.all_users()
            users = filter(lambda u: u.is_active, users)

            if options.get('outdated'):
                users = filter(lambda u: not u.suggestions_up_to_date, users)

        if options.get('max_users'):
            users = users[:int(options.get('max_users'))]

        total = len(users)

        for n, user in enumerate(users):
            suggestion = Suggestions.for_user(user)

            subscribed_podcasts = list(set(user.get_subscribed_podcasts()))
            subscribed_podcasts = filter(None, subscribed_podcasts)

            subscribed_podcasts = filter(None, subscribed_podcasts)
            related = chain.from_iterable([p.related_podcasts for p in subscribed_podcasts])
            related = filter(lambda pid: not pid in suggestion.blacklist, related)

            counter = Counter(related)
            get_podcast_id = itemgetter(0)
            suggested = map(get_podcast_id, counter.most_common(max_suggestions))
            suggestion.podcasts = suggested

            suggestion.save()

            _update_user(user=user)

            progress(n+1, total)
    def handle(self, *args, **options):

        get_podcast = itemgetter(0)

        max_related = options.get('max')

        podcasts = Podcast.all_podcasts()
        total = Podcast.view('podcasts/by_id', limit=0).total_rows

        for (n, podcast) in enumerate(podcasts):

            l = calc_similar_podcasts(podcast)[:max_related]

            related = map(get_podcast, l)

            @repeat_on_conflict(['podcast'])
            def _update(podcast, related):
                podcast.related_podcasts = related
                podcast.save()

            _update(podcast=podcast, related=related)

            progress(n+1, total)
Exemple #21
0
    def handle(self, *args, **options):

        skip = options.get('skip')
        total = episode_states_count()

        actions = Counter()
        actions['merged'] = 0


        for n in count(skip):

            first = get_nth_episode_state(n)
            if first is None:
                break
            states = get_duplicate_episode_states(first.user, first.episode)

            l1 = len(states)
            # we don't want to delete this one
            states.remove(first)

            assert len(states) == l1-1

            if states:
                updater = get_updater(states)

                obj_funs = [(first, updater)] + [(state, do_delete) for state in states]

                udb = get_userdata_database()
                bulk_save_retry(obj_funs, udb)

                merged = len(states)-1
                actions['merged'] += merged
                total -= merged

            status_str = ', '.join('%s: %d' % x for x in actions.items())
            progress(n+1, total, status_str)
Exemple #22
0
    def add_podcast_recursive(self, podcast, docs):
        self.add_podcast(podcast, docs)

        progress(0, len(docs), podcast, stream=sys.stderr)

        states = all_podcast_states(podcast)
        for state in states:
            self.add_podcast_state(state, docs)
            progress(0, len(docs), state, stream=sys.stderr)

        # Episodes
        for episode in episodes_for_podcast(podcast.get_podcast()):
            self.add_episode(episode, docs)
            progress(0, len(docs), episode, stream=sys.stderr)

            states = all_episode_states(episode)
            for state in states:
                self.add_episode_state(state, docs)
                progress(0, len(docs), state, stream=sys.stderr)
Exemple #23
0
    def import_file(self, user_id, filename):

        progress(0, 100, filename)

        with open(filename, 'r') as f:
            actions = json.load(f)

        progress(0, len(actions), filename)

        user = User.get(user_id)

        now = datetime.now()

        batch_size = 100

        count = len(actions) / batch_size

        for low in range(0, len(actions), batch_size):
            high = low+batch_size
            batch = actions[low:high]

            update_episodes(user, batch, now, None)

            progress(high, len(actions), filename)
Exemple #24
0
        # last option - merge podcasts
        try:
            if not dry_run:
                rewrite_podcasts(p, su_podcast)
                p.delete()

            p_stats["merged"] += 1

        except Exception, e:
            log("error rewriting podcast %s: %s" % (p.id, e))
            print "error rewriting podcast %s: %s" % (p.id, e)
            p_stats["error"] += 1
            continue

        progress(n + 1, num_podcasts, str(p.id))

    print "finished %s podcasts" % (n + 1)
    print "%(unchanged)d unchanged, %(merged)d merged, %(updated)d updated, %(deleted)d deleted, %(error)d error" % p_stats
    print "Hits"
    for _, r in podcast_rules:
        print "% 30s: %d" % (r.slug, getattr(r, "hits", 0) if hasattr(r, "hits") else 0)


def rewrite_podcasts(p_old, p_new):

    log('merging podcast %s "%s" to correct podcast %s "%s"' % (p_old.id, p_old.url, p_new.id, p_new.url))

    rewrite_newpodcast(p_old, p_new)