コード例 #1
0
ファイル: test_amcates.py プロジェクト: christianbaden/amcat
    def test_elastic_hash(self):
        """Can we reproduce a hash from elastic data alone?"""
        article = Article(**{
            "date": datetime.date(2015, 1, 1),
            "section": "\u6f22\u5b57",
            "pagenr": 1928390,
            "headline": "Headline hier.",
            "byline": "byline..",
            "length": 1928,
            "metastring": "Even more strange characters.. \x0C ..",
            "url": "https://example.com",
            "externalid": None,
            "author": None,
            "addressee": "Hmm",
            "text": "Contains invalid char \x08 woo",
            "medium": create_test_medium(name="abc."),
            "project": create_test_project()
        })

        article.save()

        es = ES()
        es.add_articles([article.id])
        hash = get_article_dict(article)["hash"]
        es.flush()

        es_articles = es.query_all(filters={"ids": [article.id]}, fields=HASH_FIELDS + ["hash"])
        es_article = list(es_articles)[0]

        self.assertEqual(article.id, es_article.id)
        self.assertEqual(hash, es_article.hash)
        self.assertEqual(_get_hash(es_article.to_dict()), hash)
コード例 #2
0
ファイル: test_amcates.py プロジェクト: BBie/amcat
    def test_elastic_hash(self):
        """Can we reproduce a hash from elastic data alone?"""
        article = Article(**{
            "date": datetime.date(2015, 1, 1),
            "section": "\u6f22\u5b57",
            "pagenr": 1928390,
            "headline": "Headline hier.",
            "byline": "byline..",
            "length": 1928,
            "metastring": "Even more strange characters.. \x0C ..",
            "url": "https://example.com",
            "externalid": None,
            "author": None,
            "addressee": "Hmm",
            "text": "Contains invalid char \x08 woo",
            "medium": create_test_medium(name="abc."),
            "project": create_test_project()
        })

        article.save()

        es = ES()
        es.add_articles([article.id])
        hash = get_article_dict(article)["hash"]
        es.flush()

        es_articles = es.query_all(filters={"ids": [article.id]}, fields=HASH_FIELDS + ["hash"])
        es_article = list(es_articles)[0]

        self.assertEqual(article.id, es_article.id)
        self.assertEqual(hash, es_article.hash)
        self.assertEqual(_get_hash(es_article.to_dict()), hash)
コード例 #3
0
    def handle(self, *args, **options):
        es = amcates.ES()

        print("Counting articles..", end=" ")
        sys.stdout.flush()
        narticles = es.count(query="*", filters={})
        print(narticles)

        then, now = datetime.datetime.now(), datetime.datetime.now()
        for i, article_ids in enumerate(grouper(es.query_ids(), n=GROUP_SIZE)):
            progress = (float(i * GROUP_SIZE) / float(narticles)) * 100
            print("{} of {} ({:.2f}%)".format(i * GROUP_SIZE, narticles,
                                              progress))

            es_articles = es.query_all(filters={"ids": article_ids},
                                       fields=HASH_FIELDS)
            es.bulk_update_values(
                {a.id: {
                    "hash": _get_hash(a.to_dict())
                }
                 for a in es_articles})

            then, now = now, datetime.datetime.now()
            print("Articles per second: ", end="")
            print(int(GROUP_SIZE / (now - then).total_seconds()))

        print("Done.")
コード例 #4
0
ファイル: upgrade_hashes.py プロジェクト: CJStuart/amcat
    def handle(self, *args, **options):
        es = amcates.ES()

        print("Counting articles..", end=" ")
        sys.stdout.flush()
        narticles = es.count(query="*", filters={})
        print(narticles)

        then, now = datetime.datetime.now(), datetime.datetime.now()
        for i, article_ids in enumerate(grouper(es.query_ids(), n=GROUP_SIZE)):
            progress = (float(i * GROUP_SIZE) / float(narticles)) * 100
            print("{} of {} ({:.2f}%)".format(i*GROUP_SIZE, narticles, progress))

            es_articles = es.query_all(filters={"ids": article_ids}, fields=HASH_FIELDS)
            es.bulk_update_values({a.id: {"hash": _get_hash(a.to_dict())} for a in es_articles})

            then, now = now, datetime.datetime.now()
            print("Articles per second: ", end="")
            print(int(GROUP_SIZE / (now - then).total_seconds()))

        print("Done.")