def handle(self, *args, **options): es = amcates.ES() print("Counting articles..", end=" ") sys.stdout.flush() narticles = es.count(query="*", filters={}) print(narticles) then, now = datetime.datetime.now(), datetime.datetime.now() for i, article_ids in enumerate(grouper(es.query_ids(), n=GROUP_SIZE)): progress = (float(i * GROUP_SIZE) / float(narticles)) * 100 print("{} of {} ({:.2f}%)".format(i * GROUP_SIZE, narticles, progress)) articles = Article.objects.filter( id__in=article_ids).select_related("medium") article_dicts = map(get_article_dict, articles) for article_dict in article_dicts: del article_dict["sets"] del article_dict["hash"] es.bulk_update_values({a["id"]: a for a in article_dicts}) then, now = now, datetime.datetime.now() print("Articles per second: ", end="") print(int(GROUP_SIZE / (now - then).total_seconds())) print("Done.")
def handle(self, *args, **options): es = amcates.ES() print("Counting articles..", end=" ") sys.stdout.flush() narticles = es.count(query="*", filters={}) print(narticles) then, now = datetime.datetime.now(), datetime.datetime.now() for i, article_ids in enumerate(grouper(es.query_ids(), n=GROUP_SIZE)): progress = (float(i * GROUP_SIZE) / float(narticles)) * 100 print("{} of {} ({:.2f}%)".format(i*GROUP_SIZE, narticles, progress)) articles = Article.objects.filter(id__in=article_ids).select_related("medium") article_dicts = map(get_article_dict, articles) for article_dict in article_dicts: del article_dict["sets"] del article_dict["hash"] es.bulk_update_values({a["id"]: a for a in article_dicts}) then, now = now, datetime.datetime.now() print("Articles per second: ", end="") print(int(GROUP_SIZE / (now - then).total_seconds())) print("Done.")
def handle(self, *args, **options): es = amcates.ES() print("Counting articles..", end=" ") sys.stdout.flush() narticles = es.count(query="*", filters={}) print(narticles) then, now = datetime.datetime.now(), datetime.datetime.now() for i, article_ids in enumerate(grouper(es.query_ids(), n=GROUP_SIZE)): progress = (float(i * GROUP_SIZE) / float(narticles)) * 100 print("{} of {} ({:.2f}%)".format(i * GROUP_SIZE, narticles, progress)) es_articles = es.query_all(filters={"ids": article_ids}, fields=HASH_FIELDS) es.bulk_update_values( {a.id: { "hash": _get_hash(a.to_dict()) } for a in es_articles}) then, now = now, datetime.datetime.now() print("Articles per second: ", end="") print(int(GROUP_SIZE / (now - then).total_seconds())) print("Done.")
def handle(self, *args, **options): es = amcates.ES() print("Counting articles..", end=" ") sys.stdout.flush() narticles = es.count(query="*", filters={}) print(narticles) then, now = datetime.datetime.now(), datetime.datetime.now() for i, article_ids in enumerate(grouper(es.query_ids(), n=GROUP_SIZE)): progress = (float(i * GROUP_SIZE) / float(narticles)) * 100 print("{} of {} ({:.2f}%)".format(i*GROUP_SIZE, narticles, progress)) es_articles = es.query_all(filters={"ids": article_ids}, fields=HASH_FIELDS) es.bulk_update_values({a.id: {"hash": _get_hash(a.to_dict())} for a in es_articles}) then, now = now, datetime.datetime.now() print("Articles per second: ", end="") print(int(GROUP_SIZE / (now - then).total_seconds())) print("Done.")