def test_mongo_map(): conn = mongo_conn() col = conn.testdb.testcol def fn(x): x["value"] = x["value"] * 2 return x query = col.find({"value": {"$gt": 500}}) conn.testdb.drop_collection("maptest") maptest = conn.testdb.maptest ls_out, failed = transforms.mongo_map(fn, query, maptest, []) assert failed == [], "No errors" assert maptest.count() == query.count(), "All elements got transformed" for x in ls_out: assert int(x["value"]) > 1000, "Transform happened" failed = transforms.mongo_map(fn, query, maptest, None) assert failed == [], "No errors when no listout" def fn2(x): if x["value"] == 600: raise Exception("bad") else: return fn(x) conn.testdb.drop_collection("maptest") query = col.find({"value": {"$gt": 500}}) failed = transforms.mongo_map(fn2, query, maptest, None) assert len(failed) == 1, "Failed get returned properly"
def test_arxiv_to_article(): N = 100 min_hz = 20 # copy N values from arxiv myxiv = mongo_conn().myxiv records = [x for x in myxiv.arxiv.find().limit(N)] testdb = schema.connect("testdb", host="127.0.0.1", port=27017) testdb.drop_collection("arxiv") testdb.drop_collection("article") testdb.arxiv.insert(records) # try to import them all as articles t0 = time.time() failed = transforms.mongo_map(lambda x: transforms.arxiv_to_article(x, True), testdb.arxiv.find()) dt = time.time() - t0 assert N / dt > min_hz, (N / dt, " rec/sec too slow, min is ", min_hz) # Check they all made it assert failed == [], "No fails" assert testdb.article.count() == testdb.arxiv.count(), ( "arxiv count ", testdb.arxiv.count(), ", got article count ", testdb.article.count(), )