def handle(self, *args, **options): if len(args) < 2: print """ Merges multiple categories into one by listing them as alternative spellings Usage: ./manage.py category-merge-spellings <category> <spelling1> [<spelling2> ...] """ return start_time = datetime.utcnow() cat_name = args[0] spellings = args[1:] print "Adding new spellings for %s ..." % cat_name category = category_for_tag(cat_name) if not category: print " creating new category %s" % cat_name category = Category() category.label = cat_name for spelling in spellings: new_cat = category_for_tag(spelling) if spelling == cat_name or (spelling in category.spellings): print " skipped %s: already in category" % spelling continue if not new_cat: #merged category doesn't yet exist category.spellings.append(spelling) elif new_cat and category._id == new_cat._id: print " set %s as new label" % cat_name category.spellings = list(set([x for x in category.spellings + [category.label] if x != cat_name])) category.label = cat_name else: print " add spelling %s" % spelling category.spellings = list(set(category.spellings + [new_cat.label] + new_cat.spellings)) category.merge_podcasts(new_cat.podcasts) new_cat.delete() category.updated = start_time save_category(category)
def handle(self, *args, **options): # couchdbkit doesn't preserve microseconds start_time = datetime.utcnow().replace(microsecond=0) excluded_tags = settings.DIRECTORY_EXCLUDED_TAGS tags = args or Tag.all() for n, tag in enumerate(tags): if not isinstance(tag, basestring): tag = str(tag) label = utils.remove_control_chars(tag.strip()) if not label: continue tag_obj = Tag(tag) podcast_ids, weights = utils.unzip(list(tag_obj.get_podcasts())) podcast_objs = Podcast.get_multi(podcast_ids) podcasts = [] for podcast, weight in zip(podcast_objs, weights): e = CategoryEntry() e.podcast = podcast.get_id() e.weight = float(weight * podcast.subscriber_count()) podcasts.append(e) category = Category.for_tag(label) if not category: if not label or label in excluded_tags: continue category = Category() category.label = label category.spellings = [] # delete if it has been excluded after it has been created if label in excluded_tags: category.delete() continue # we overwrite previous data if category.updated != start_time: category.podcasts = [] category.merge_podcasts(podcasts) category.updated = start_time if 'weight' in category: del category['weight'] category.save() try: utils.progress(n % 1000, 1000, category.label.encode('utf-8')) except: pass