Ejemplo n.º 1
0
 def test_treasury_json_config(self):
     mongo_manager.mongo_import(self.server_hostname, "mongo_hadoop",
                                "yield_historical.in3", JSONFILE_PATH)
     PARAMS = DEFAULT_PARAMETERS.copy()
     PARAMS[
         'mongo.splitter.class'] = "com.mongodb.hadoop.splitter.MultiMongoCollectionSplitter"
     collection_settings = [{
         "mongo.input.uri":
         "mongodb://%s/mongo_hadoop.yield_historical.in" %
         self.server_hostname,
         "query": {
             "dayOfWeek": "FRIDAY"
         },
         "mongo.splitter.class":
         "com.mongodb.hadoop.splitter.SingleMongoSplitter",
         "mongo.input.split.use_range_queries":
         True,
         "mongo.input.notimeout":
         True
     }, {
         "mongo.input.uri":
         "mongodb://%s/mongo_hadoop.yield_historical.in3" %
         self.server_hostname,
         "mongo.input.split.use_range_queries":
         True,
         "mongo.input.notimeout":
         True
     }]
     #we need to escape this for the shell
     PARAMS["mongo.input.multi_uri.json"] = '"' + re.sub(
         '"', '\\"', json.dumps(collection_settings)) + '"'
     runjob(self.server_hostname, PARAMS, input_collection=None)
     out_col = self.server.connection(
     )['mongo_hadoop']['yield_historical.out']
     print(list(out_col.find()))
Ejemplo n.º 2
0
    def setUp(self):
        self.shard1 = mongo_manager.ReplicaSetManager(home="/tmp/rs0",
                with_arbiter=True,
                num_members=3)
        self.shard1.start_set(fresh=True)
        self.shard2 = mongo_manager.ReplicaSetManager(home="/tmp/rs1",
                with_arbiter=True,
                num_members=3)
        self.shard2.start_set(fresh=True)
        self.configdb = mongo_manager.StandaloneManager(home="/tmp/config_db")  
        self.confighost = self.configdb.start_server(fresh=True)

        self.mongos = mongo_manager.MongosManager(home="/tmp/mongos")
        self.mongos_hostname = self.mongos.start_mongos(self.confighost,
                [h.get_shard_string() for h in (self.shard1,self.shard2)],
                noauth=False, fresh=True, addShards=True)

        self.mongos_connection = self.mongos.connection()
        self.mongos_connection.drop_database('mongo_hadoop')
        mongo_manager.mongo_import(self.mongos_hostname,
                                   "mongo_hadoop",
                                   "yield_historical.in",
                                   JSONFILE_PATH)
        mongos_admindb = self.mongos_connection['admin']
        mongos_admindb.command("enablesharding", "mongo_hadoop")
        mongos_admindb.command("shardCollection",
                "mongo_hadoop.yield_historical.in",
                key={"_id":1})
        mongos_admindb.command("split",
                "mongo_hadoop.yield_historical.in",
                find={"_id":1})
Ejemplo n.º 3
0
    def setUpClass(self):
        self.shard1 = mongo_manager.ReplicaSetManager(home=os.path.join(TEMPDIR, "rs0"),
                with_arbiter=True,
                num_members=3)
        self.shard1.start_set(fresh=True)
        self.shard2 = mongo_manager.ReplicaSetManager(home=os.path.join(TEMPDIR, "rs1"),
                with_arbiter=True,
                num_members=3)
        self.shard2.start_set(fresh=True)
        self.configdb = mongo_manager.StandaloneManager(home=os.path.join(TEMPDIR, 'config_db'))
        self.confighost = self.configdb.start_server(fresh=True)

        self.mongos = mongo_manager.MongosManager(home=os.path.join(TEMPDIR, 'mongos'))
        self.mongos_hostname = self.mongos.start_mongos(self.confighost,
                [h.get_shard_string() for h in (self.shard1,self.shard2)],
                noauth=False, fresh=True, addShards=True)

        self.mongos2 = mongo_manager.MongosManager(home=os.path.join(TEMPDIR, 'mongos2'))
        self.mongos2_hostname = self.mongos2.start_mongos(self.confighost,
                [h.get_shard_string() for h in (self.shard1,self.shard2)],
                noauth=False, fresh=True, addShards=False)

        self.mongos_connection = self.mongos.connection()
        self.mongos2_connection = self.mongos2.connection()
        self.mongos_connection.drop_database('mongo_hadoop')
        mongo_manager.mongo_import(self.mongos_hostname,
                                   "mongo_hadoop",
                                   "yield_historical.in",
                                   JSONFILE_PATH)
        mongos_admindb = self.mongos_connection['admin']
        mongos_admindb.command("enablesharding", "mongo_hadoop")

        #turn off the balancer
        self.mongos_connection['config'].settings.update({ "_id": "balancer" }, { '$set' : { 'stopped': True } }, True );
        mongos_admindb.command("shardCollection",
                "mongo_hadoop.yield_historical.in",
                key={"_id":1})

        testcoll = self.mongos_connection['mongo_hadoop']['yield_historical.in']

        for chunkpos in [2000, 3000, 1000, 500, 4000, 750, 250, 100, 3500, 2500, 2250, 1750]:
            mongos_admindb.command("split", "mongo_hadoop.yield_historical.in",
                    middle={"_id":testcoll.find().sort("_id", 1).skip(chunkpos).limit(1)[0]['_id']})

        ms_config = self.mongos_connection['config']
        shards = list(ms_config.shards.find())
        numchunks = ms_config.chunks.count()
        chunk_source = ms_config.chunks.find_one()['shard']
        print "chunk source", chunk_source
        chunk_dest = [s['_id'] for s in shards if s['_id'] != chunk_source][0]
        print "chunk dest", chunk_dest
        #shuffle chunks around
        for i in xrange(0, numchunks/2):
            chunk_to_move = ms_config.chunks.find_one({"shard":chunk_source})
            print "moving", chunk_to_move, "from", chunk_source, "to", chunk_dest
            try:
                mongos_admindb.command("moveChunk", "mongo_hadoop.yield_historical.in", find=chunk_to_move['min'], to=chunk_dest);
            except Exception, e:
                print e
Ejemplo n.º 4
0
 def setUpClass(self):
     self.server = mongo_manager.StandaloneManager(
         home=os.path.join(TEMPDIR, "standalone1"))
     self.server_hostname = self.server.start_server(fresh=True)
     self.server.connection().drop_database('mongo_hadoop')
     self.server.connection()['mongo_hadoop'].set_profiling_level(2)
     mongo_manager.mongo_import(self.server_hostname, "mongo_hadoop",
                                "yield_historical.in", JSONFILE_PATH)
     print "server is ready."
Ejemplo n.º 5
0
 def setUpClass(self):
     global num_runs
     self.homedir = "standalone1_" + str(num_runs)
     self.server = mongo_manager.StandaloneManager(home=os.path.join(TEMPDIR, self.homedir))
     self.server_hostname = self.server.start_server(fresh=True)
     self.server.connection().drop_database("mongo_hadoop")
     mongo_manager.mongo_import(self.server_hostname, "mongo_hadoop", "yield_historical.in", JSONFILE_PATH)
     num_runs += 1
     print "server is ready."
Ejemplo n.º 6
0
 def setUp(self):
     self.server = mongo_manager.StandaloneManager(home="/tmp/standalone1")  
     self.server_hostname = self.server.start_server(fresh=True)
     self.server.connection().drop_database('mongo_hadoop')
     mongo_manager.mongo_import(self.server_hostname,
                                "mongo_hadoop",
                                "yield_historical.in",
                                JSONFILE_PATH)
     print "server is ready."
Ejemplo n.º 7
0
 def setUpClass(self):
     self.server = mongo_manager.StandaloneManager(home=os.path.join(TEMPDIR,"standalone1"))
     self.server_hostname = self.server.start_server(fresh=True)
     self.server.connection().drop_database('mongo_hadoop')
     mongo_manager.mongo_import(self.server_hostname,
                                "mongo_hadoop",
                                "yield_historical.in",
                                JSONFILE_PATH)
     print "server is ready."
Ejemplo n.º 8
0
 def setUpClass(self):
     global num_runs
     self.homedir = "standalone1_" + str(num_runs)
     self.server = mongo_manager.StandaloneManager(home=os.path.join(TEMPDIR,self.homedir))
     self.server_hostname = self.server.start_server(fresh=True,noauth=self.noauth)
     self.server.connection().drop_database('mongo_hadoop')
     mongo_manager.mongo_import('localhost:' + str(self.server.port),
                                "mongo_hadoop",
                                "yield_historical.in",
                                JSONFILE_PATH)
     num_runs += 1
Ejemplo n.º 9
0
 def setUpClass(self):
     global num_runs
     self.homedir = "standalone1_" + str(num_runs)
     self.server = mongo_manager.StandaloneManager(home=os.path.join(TEMPDIR,self.homedir))
     self.server_hostname = self.server.start_server(fresh=True,noauth=self.noauth)
     self.server.connection().drop_database('mongo_hadoop')
     mongo_manager.mongo_import('localhost:' + str(self.server.port),
                                "mongo_hadoop",
                                "yield_historical.in",
                                JSONFILE_PATH)
     num_runs += 1
Ejemplo n.º 10
0
 def test_treasury(self):
     logging.info("testing multiple collection support.")
     mongo_manager.mongo_import(self.server_hostname,
                                "mongo_hadoop",
                                "yield_historical.in2",
                                JSONFILE_PATH)
     PARAMS = DEFAULT_PARAMETERS.copy()
     PARAMS['mongo.splitter.class'] = "com.mongodb.hadoop.splitter.MultiMongoCollectionSplitter"
     runjob(self.server_hostname, PARAMS,
             input_collection=['mongo_hadoop.yield_historical.in', \
                               'mongo_hadoop.yield_historical.in2'])
     out_col = self.server.connection()['mongo_hadoop']['yield_historical.out']
     reference_doubled = [{"_id":x['_id'],
                           "count":x['count'] * 2,
                           "avg": (x['sum']*2) / (x['count']*2),
                           "sum": x['sum']*2} for x in check_results]
     self.assertTrue(compare_results(out_col, reference_doubled))
     print list(out_col.find())
Ejemplo n.º 11
0
 def test_treasury(self):
     logging.info("testing multiple collection support.")
     mongo_manager.mongo_import(self.server_hostname,
                                "mongo_hadoop",
                                "yield_historical.in2",
                                JSONFILE_PATH)
     PARAMS = DEFAULT_PARAMETERS.copy()
     PARAMS['mongo.splitter.class'] = "com.mongodb.hadoop.splitter.MultiMongoCollectionSplitter"
     runjob(self.server_hostname, PARAMS,
             input_collection=['mongo_hadoop.yield_historical.in', \
                               'mongo_hadoop.yield_historical.in2'])
     out_col = self.server.connection()['mongo_hadoop']['yield_historical.out']
     reference_doubled = [{"_id":x['_id'],
                           "count":x['count'] * 2,
                           "avg": (x['sum']*2) / (x['count']*2),
                           "sum": x['sum']*2} for x in check_results]
     self.assertTrue(compare_results(out_col, reference_doubled))
     print list(out_col.find())
Ejemplo n.º 12
0
 def test_treasury_json_config(self):
     mongo_manager.mongo_import(self.server_hostname,
                                "mongo_hadoop",
                                "yield_historical.in3",
                                JSONFILE_PATH)
     PARAMS = DEFAULT_PARAMETERS.copy()
     PARAMS['mongo.splitter.class'] = "com.mongodb.hadoop.splitter.MultiMongoCollectionSplitter"
     collection_settings = [{"mongo.input.uri":"mongodb://%s/mongo_hadoop.yield_historical.in" % self.server_hostname,
                             "query":{"dayOfWeek":"FRIDAY"},
                             "mongo.splitter.class":"com.mongodb.hadoop.splitter.SingleMongoSplitter",
                             "mongo.input.split.use_range_queries":True,
                             "mongo.input.notimeout":True},
                            {"mongo.input.uri":"mongodb://%s/mongo_hadoop.yield_historical.in3" % self.server_hostname,
                             "mongo.input.split.use_range_queries":True,
                             "mongo.input.notimeout":True} ]
     #we need to escape this for the shell
     PARAMS["mongo.input.multi_uri.json"] = '"' + re.sub('"','\\"', json.dumps(collection_settings) ) + '"'
     runjob(self.server_hostname, PARAMS, input_collection=None)
     out_col = self.server.connection()['mongo_hadoop']['yield_historical.out']
     print(list(out_col.find()))
Ejemplo n.º 13
0
import mongo_manager, sys

try:
    shard1 = mongo_manager.ReplicaSetManager(home="/tmp/rs0", with_arbiter=True, num_members=3)
    shard1.start_set(fresh=True)

    shard2 = mongo_manager.ReplicaSetManager(home="/tmp/rs1", with_arbiter=True, num_members=3)
    shard2.start_set(fresh=True)

    # config server
    z = mongo_manager.StandaloneManager(home="/tmp/config_db")  
    zhost = z.start_server(fresh=True)

    s = mongo_manager.MongosManager(home="/tmp/mongos")
    s.start_mongos(zhost, [h.get_shard_string() for h in (shard1,shard2)], noauth=False, fresh=True, addShards=True)

    mongo_manager.mongo_import(s.port, "testdb", "testcoll", "/Users/mike/projects/mongo-hadoop/examples/treasury_yield/src/main/resources/yield_historical_in.json")

    s_client = s.connection()
    s_client['admin'].command("enablesharding", "testdb")
    s_client['admin'].command("shardCollection", "testdb.testcoll", key={"_id":1})
    sys.exit(0)
except:
    sys.exit(1)

Ejemplo n.º 14
0
import mongo_manager
x = mongo_manager.ReplicaSetManager(home="/tmp/rs0", with_arbiter=True, num_members=3)
x.start_set(fresh=True)
primary = x.get_primary()[0]
mongo_manager.mongo_import(primary, "mongo_hadoop", "yield_historical.in", "/Users/mike/projects/mongo-hadoop/examples/treasury_yield/src/main/resources/yield_historical_in.json")
Ejemplo n.º 15
0
import mongo_manager
x = mongo_manager.ReplicaSetManager(home="/tmp/rs0",
                                    with_arbiter=True,
                                    num_members=3)
x.start_set(fresh=True)
primary = x.get_primary()[0]
mongo_manager.mongo_import(
    primary, "mongo_hadoop", "yield_historical.in",
    "/Users/mike/projects/mongo-hadoop/examples/treasury_yield/src/main/resources/yield_historical_in.json"
)
Ejemplo n.º 16
0
    def setUpClass(self):
        time.sleep(5)
        global num_runs

        self.shard1 = mongo_manager.ReplicaSetManager(
            home=os.path.join(TEMPDIR, "rs0_" + str(num_runs)), with_arbiter=True, num_members=3
        )
        self.shard1.start_set(fresh=True)
        self.shard2 = mongo_manager.ReplicaSetManager(
            home=os.path.join(TEMPDIR, "rs1_" + str(num_runs)), with_arbiter=True, num_members=3
        )
        self.shard2.start_set(fresh=True)
        self.configdb = mongo_manager.StandaloneManager(home=os.path.join(TEMPDIR, "config_db_" + str(num_runs)))
        self.confighost = self.configdb.start_server(fresh=True)

        self.mongos = mongo_manager.MongosManager(home=os.path.join(TEMPDIR, "mongos_" + str(num_runs)))
        self.mongos_hostname = self.mongos.start_mongos(
            self.confighost,
            [h.get_shard_string() for h in (self.shard1, self.shard2)],
            noauth=False,
            fresh=True,
            addShards=True,
        )

        self.mongos2 = mongo_manager.MongosManager(home=os.path.join(TEMPDIR, "mongos2_" + str(num_runs)))
        self.mongos2_hostname = self.mongos2.start_mongos(
            self.confighost,
            [h.get_shard_string() for h in (self.shard1, self.shard2)],
            noauth=False,
            fresh=True,
            addShards=False,
        )

        self.mongos_connection = self.mongos.connection()
        self.mongos2_connection = self.mongos2.connection()
        self.mongos_connection.drop_database("mongo_hadoop")
        mongo_manager.mongo_import(self.mongos_hostname, "mongo_hadoop", "yield_historical.in", JSONFILE_PATH)
        mongos_admindb = self.mongos_connection["admin"]
        mongos_admindb.command("enablesharding", "mongo_hadoop")
        self.homedirs = [x + str(num_runs) for x in ("rs0_", "rs1_", "config_db_", "mongos_", "mongos2_")]
        num_runs += 1

        # turn off the balancer
        self.mongos_connection["config"].settings.update({"_id": "balancer"}, {"$set": {"stopped": True}}, True)
        mongos_admindb.command("shardCollection", "mongo_hadoop.yield_historical.in", key={"_id": 1})

        testcoll = self.mongos_connection["mongo_hadoop"]["yield_historical.in"]

        for chunkpos in [2000, 3000, 1000, 500, 4000, 750, 250, 100, 3500, 2500, 2250, 1750]:
            mongos_admindb.command(
                "split",
                "mongo_hadoop.yield_historical.in",
                middle={"_id": testcoll.find().sort("_id", 1).skip(chunkpos).limit(1)[0]["_id"]},
            )

        ms_config = self.mongos_connection["config"]
        shards = list(ms_config.shards.find())
        numchunks = ms_config.chunks.count()
        chunk_source = ms_config.chunks.find_one()["shard"]
        logging.info("chunk source", chunk_source)
        chunk_dest = [s["_id"] for s in shards if s["_id"] != chunk_source][0]
        logging.info("chunk dest", chunk_dest)
        # shuffle chunks around
        for i in xrange(0, numchunks / 2):
            chunk_to_move = ms_config.chunks.find_one({"shard": chunk_source})
            logging.info("moving", chunk_to_move, "from", chunk_source, "to", chunk_dest)
            try:
                mongos_admindb.command(
                    "moveChunk", "mongo_hadoop.yield_historical.in", find=chunk_to_move["min"], to=chunk_dest
                )
            except Exception, e:
                print e
Ejemplo n.º 17
0
    def setUpClass(self):
        time.sleep(5)
        global num_runs

        randstr = generate_id(size=6)

        self.shard1 = mongo_manager.ReplicaSetManager(home=os.path.join(TEMPDIR, "rs0_" + randstr + "_" + str(num_runs)),
                with_arbiter=True,
                num_members=3, noauth=self.noauth)
        self.shard1.start_set(fresh=True)
        self.shard2 = mongo_manager.ReplicaSetManager(home=os.path.join(TEMPDIR, "rs1_"  + randstr + "_" + str(num_runs)),
                with_arbiter=True,
                num_members=3, noauth=self.noauth)
        self.shard2.start_set(fresh=True)
        self.configdb = mongo_manager.StandaloneManager(home=os.path.join(TEMPDIR, 'config_db_'  + randstr + "_" + str(num_runs)))
        self.confighost = self.configdb.start_server(fresh=True,noauth=self.noauth)

        self.mongos = mongo_manager.MongosManager(home=os.path.join(TEMPDIR, 'mongos_' + randstr + "_"  + str(num_runs)))
        self.mongos_hostname = self.mongos.start_mongos(self.confighost,
                [h.get_shard_string() for h in (self.shard1,self.shard2)],
                noauth=self.noauth, fresh=True, addShards=True)

        self.mongos2 = mongo_manager.MongosManager(home=os.path.join(TEMPDIR, 'mongos2_' + randstr + "_"  + str(num_runs)))
        self.mongos2_hostname = self.mongos2.start_mongos(self.confighost,
                [h.get_shard_string() for h in (self.shard1,self.shard2)],
                noauth=self.noauth, fresh=True, addShards=False)

        self.mongos_connection = self.mongos.connection()
        self.mongos2_connection = self.mongos2.connection()
        self.mongos_connection.drop_database('mongo_hadoop')
        mongo_manager.mongo_import("localhost:" + str(self.mongos.port),
                                   "mongo_hadoop",
                                   "yield_historical.in",
                                   JSONFILE_PATH)
        mongos_admindb = self.mongos_connection['admin']
        mongos_admindb.command("enablesharding", "mongo_hadoop")
        self.homedirs = [x + randstr + "_" + str(num_runs) for x in ("rs0_", "rs1_", "config_db_", "mongos_", "mongos2_")]
        num_runs += 1

        #turn off the balancer
        self.mongos_connection['config'].settings.update({ "_id": "balancer" }, { '$set' : { 'stopped': True } }, True );
        mongos_admindb.command("shardCollection",
                "mongo_hadoop.yield_historical.in",
                key={"_id":1})

        testcoll = self.mongos_connection['mongo_hadoop']['yield_historical.in']

        for chunkpos in [2000, 3000, 1000, 500, 4000, 750, 250, 100, 3500, 2500, 2250, 1750]:
            mongos_admindb.command("split", "mongo_hadoop.yield_historical.in",
                    middle={"_id":testcoll.find().sort("_id", 1).skip(chunkpos).limit(1)[0]['_id']})

        ms_config = self.mongos_connection['config']
        shards = list(ms_config.shards.find())
        numchunks = ms_config.chunks.count()
        chunk_source = ms_config.chunks.find_one()['shard']
        logging.info("chunk source", chunk_source)
        chunk_dest = [s['_id'] for s in shards if s['_id'] != chunk_source][0]
        logging.info("chunk dest", chunk_dest)
        #shuffle chunks around
        for i in xrange(0, numchunks/2):
            chunk_to_move = ms_config.chunks.find_one({"shard":chunk_source})
            logging.info("moving", chunk_to_move, "from", chunk_source, "to", chunk_dest)
            try:
                mongos_admindb.command("moveChunk", "mongo_hadoop.yield_historical.in", find=chunk_to_move['min'], to=chunk_dest);
            except Exception, e:
                print e