Beispiel #1
0
def test_mongo_map():
    conn = mongo_conn()
    col = conn.testdb.testcol

    def fn(x):
        x["value"] = x["value"] * 2
        return x

    query = col.find({"value": {"$gt": 500}})
    conn.testdb.drop_collection("maptest")
    maptest = conn.testdb.maptest

    ls_out, failed = transforms.mongo_map(fn, query, maptest, [])
    assert failed == [], "No errors"
    assert maptest.count() == query.count(), "All elements got transformed"
    for x in ls_out:
        assert int(x["value"]) > 1000, "Transform happened"

    failed = transforms.mongo_map(fn, query, maptest, None)
    assert failed == [], "No errors when no listout"

    def fn2(x):
        if x["value"] == 600:
            raise Exception("bad")
        else:
            return fn(x)

    conn.testdb.drop_collection("maptest")
    query = col.find({"value": {"$gt": 500}})

    failed = transforms.mongo_map(fn2, query, maptest, None)
    assert len(failed) == 1, "Failed get returned properly"
Beispiel #2
0
def test_arxiv_to_article():
    N = 100
    min_hz = 20

    # copy N values from arxiv
    myxiv = mongo_conn().myxiv
    records = [x for x in myxiv.arxiv.find().limit(N)]

    testdb = schema.connect("testdb", host="127.0.0.1", port=27017)
    testdb.drop_collection("arxiv")
    testdb.drop_collection("article")
    testdb.arxiv.insert(records)

    # try to import them all as articles
    t0 = time.time()
    failed = transforms.mongo_map(lambda x: transforms.arxiv_to_article(x, True), testdb.arxiv.find())
    dt = time.time() - t0
    assert N / dt > min_hz, (N / dt, " rec/sec too slow, min is ", min_hz)

    # Check they all made it
    assert failed == [], "No fails"
    assert testdb.article.count() == testdb.arxiv.count(), (
        "arxiv count ",
        testdb.arxiv.count(),
        ", got article count ",
        testdb.article.count(),
    )