Beispiel #1
0
        if FLAGS.start_key and FLAGS.end_key:
            db_iter = ldb.RangeIter(FLAGS.start_key, FLAGS.end_key)
        else:
            db_iter = ldb.RangeIter()
    elif FLAGS.sharded_db:
        assert os.path.exists(FLAGS.sharded_db)
        db = parallel.ShardedDB.open(FLAGS.sharded_db)
        if FLAGS.start_key and FLAGS.end_key:
            db_iter = db.range_iter(FLAGS.start_key, FLAGS.end_key)
        else:
            db_iter = db.__iter__()
    else:
        print 'Must specify --level_db or --sharded_db'
        print FLAGS.GetHelp()
        sys.exit(1)

    cnt = 0
    for (k, v) in db_iter:
        if FLAGS.json_value_out:
            v = json.dumps(cPickle.loads(v), indent=2, sort_keys=True)

        if FLAGS.key_only: print k
        elif FLAGS.value_only: print v
        else: print k, v
        cnt += 1
    print("Total keys: %s", cnt)


if __name__ == '__main__':
    app.run()
Beispiel #2
0
        results = self.run_mr("/tmp/test-lineinput", input_data)

        for i in range(10):
            key, value = results[i]
            assert value == "test-line", (i, results[i])

    def test_sum(self):
        # 10 files with 100 lines each
        results = self.run_mr(
            "/tmp/test-sum", ["\n".join([str(i) for i in range(100)]) for i in range(10)], reducer=parallel.SumReducer()
        )

        results = set(dict(results).values())
        for i in range(100):
            assert i * 10 in results
            results.remove(i * 10)
        assert len(results) == 0, "Unexpected output: %s" % results

    def test_exception(self):
        with self.assertRaises(parallel.MRException) as ctx:
            self.run_mr("/tmp/test-bad-mapper", ["hello" for i in range(10)], mapper=BadMapper())


def main(argv):
    unittest.main(argv=argv)


if __name__ == "__main__":
    app.run()