Exemple #1
0
def provision_new_database_for_variant_warehouse(db_name):
    """Create a variant warehouse database of the specified name and shared the collections"""
    # Passing the secrets_file override the password already in the uri
    db_handle = MongoDatabase(
        uri=cfg['mongodb']['mongo_admin_uri'],
        secrets_file=cfg['mongodb']['mongo_admin_secrets_file'],
        db_name=db_name)
    if len(db_handle.get_collection_names()) > 0:
        logger.info(f'Found existing database named {db_name}.')
    else:
        db_handle.enable_sharding()
        db_handle.shard_collections(
            collections_shard_key_map,
            collections_to_shard=collections_shard_key_map.keys())
        logger.info(f'Created new database named {db_name}.')
Exemple #2
0
def prepare_dest_db(mongo_source_db: MongoDatabase,
                    mongo_dest_db: MongoDatabase):
    try:
        logger.info("Dropping target database if it already exists...")
        mongo_dest_db.drop()
        logger.info("Enabling sharding in the target database...")
        mongo_dest_db.enable_sharding()
        logger.info("Sharding collections in the target database...")
        mongo_dest_db.shard_collections(
            collections_shard_key_map,
            collections_to_shard=mongo_source_db.get_collection_names())
    except Exception as ex:
        logger.error(
            f"Error while preparing destination database!\n{ex.__str__()}")
        sys.exit(1)
Exemple #3
0
class TestMongoDatabase(TestCommon):
    dump_db_name = "test_mongo_db"
    uri = "mongodb://localhost:27017/admin"
    local_mongo_handle = pymongo.MongoClient()

    # Tests expect a local sharded Mongo instance
    def setUp(self) -> None:
        self.test_mongo_db = MongoDatabase(uri=self.uri,
                                           db_name=self.dump_db_name)
        self.dump_dir = os.path.join(self.resources_folder, self.dump_db_name)
        run_command_with_output(
            "Drop target test database if it already exists...",
            f"mongo {self.dump_db_name} "
            f"--eval 'db.dropDatabase()'")
        run_command_with_output("Import test database...",
                                f"mongorestore --dir {self.dump_dir}")

    def tearDown(self) -> None:
        pass

    def _restore_data_to_another_db(self):
        with tempfile.TemporaryDirectory() as tempdir:
            self.test_mongo_db.dump_data(tempdir)
            test_restore_db = MongoDatabase(
                uri=self.uri, db_name=self.test_mongo_db.db_name + "_restore")
            test_restore_db.drop()
            test_restore_db.restore_data(
                dump_dir=tempdir,
                mongorestore_args={
                    "nsFrom": f'"{self.test_mongo_db.db_name}.*"',
                    "nsTo": f'"{test_restore_db.db_name}.*"'
                })
            return test_restore_db

    def test_drop_database(self):
        self.test_mongo_db.drop()
        self.assertTrue(self.dump_db_name not in
                        self.local_mongo_handle.list_database_names())

    def test_get_indexes(self):
        expected_index_map = {
            'annotationMetadata_2_0': {
                '_id_': {
                    'key': [('_id', 1)],
                    'ns': 'test_mongo_db.annotationMetadata_2_0',
                    'v': 2
                }
            },
            'annotations_2_0': {
                '_id_': {
                    'key': [('_id', 1)],
                    'ns': 'test_mongo_db.annotations_2_0',
                    'v': 2
                },
                'ct.so_1': {
                    'background': True,
                    'key': [('ct.so', 1)],
                    'ns': 'test_mongo_db.annotations_2_0',
                    'v': 2
                },
                'xrefs.id_1': {
                    'background': True,
                    'key': [('xrefs.id', 1)],
                    'ns': 'test_mongo_db.annotations_2_0',
                    'v': 2
                }
            },
            'files_2_0': {
                '_id_': {
                    'key': [('_id', 1)],
                    'ns': 'test_mongo_db.files_2_0',
                    'v': 2
                },
                'unique_file': {
                    'background': True,
                    'key': [('sid', 1), ('fid', 1), ('fname', 1)],
                    'ns': 'test_mongo_db.files_2_0',
                    'unique': True,
                    'v': 2
                }
            },
            'variants_2_0': {
                '_id_': {
                    'key': [('_id', 1)],
                    'ns': 'test_mongo_db.variants_2_0',
                    'v': 2
                },
                'annot.so_1': {
                    'background': True,
                    'key': [('annot.so', 1)],
                    'ns': 'test_mongo_db.variants_2_0',
                    'v': 2
                },
                'annot.xrefs_1': {
                    'background': True,
                    'key': [('annot.xrefs', 1)],
                    'ns': 'test_mongo_db.variants_2_0',
                    'v': 2
                },
                'chr_1_start_1_end_1': {
                    'background': True,
                    'key': [('chr', 1), ('start', 1), ('end', 1)],
                    'ns': 'test_mongo_db.variants_2_0',
                    'v': 2
                },
                'files.sid_1_files.fid_1': {
                    'background': True,
                    'key': [('files.sid', 1), ('files.fid', 1)],
                    'ns': 'test_mongo_db.variants_2_0',
                    'v': 2
                },
                'ids_1': {
                    'background': True,
                    'key': [('ids', 1)],
                    'ns': 'test_mongo_db.variants_2_0',
                    'v': 2
                }
            }
        }
        self.assertDictEqual(expected_index_map,
                             self.test_mongo_db.get_indexes())

    def test_create_index_on_collections(self):
        collection_index_map = {
            'files_2_0': {
                'unique_file': {
                    'background': True,
                    'key': [('sid', 1), ('fid', 1), ('fname', 1)],
                    'ns': 'test_mongo_db.files_2_0',
                    'unique': True,
                    'v': 2
                }
            }
        }
        test_restore_db = self._restore_data_to_another_db()
        test_restore_db.create_index_on_collections(
            collection_index_map=collection_index_map)
        test_restore_db_index_info = test_restore_db.get_indexes()
        # Check if index with the name "unique_file" is created on the collection
        self.assertTrue('files_2_0' in test_restore_db_index_info.keys())
        self.assertTrue(
            'unique_file' in test_restore_db_index_info['files_2_0'])
        self.assertEqual(
            [('sid', 1), ('fid', 1), ('fname', 1)],
            test_restore_db_index_info['files_2_0']['unique_file']['key'])

    def test_enable_sharding(self):
        self.test_mongo_db.enable_sharding()
        # Query meta-collection in the config database to check sharding status
        self.assertTrue(
            len(
                list(self.local_mongo_handle["config"]["databases"].find(
                    {
                        "_id": self.test_mongo_db.db_name,
                        "partitioned": True
                    }))) > 0)

    def test_shard_collections(self):
        test_restore_db = self._restore_data_to_another_db()
        collection_to_shard = "files_2_0"
        test_restore_db.enable_sharding()
        test_restore_db.shard_collections(
            collections_shard_key_map={
                "files_2_0": (["sid", "fid", "fname"], True)
            },
            collections_to_shard=[collection_to_shard])
        # Query meta-collection in the config database to check sharding status
        self.assertTrue(
            len(
                list(self.local_mongo_handle["config"]["collections"].find({
                    "_id":
                    f"{test_restore_db.db_name}.{collection_to_shard}",
                    "key": {
                        "sid": 1,
                        "fid": 1,
                        "fname": 1
                    }
                }))) > 0)

    def test_dump_data(self):
        with tempfile.TemporaryDirectory() as tempdir:
            self.test_mongo_db.dump_data(tempdir)
            self.assertTrue(
                os.path.isdir(os.path.join(tempdir, self.dump_db_name)))

    def test_archive_data(self):
        with tempfile.TemporaryDirectory() as tempdir:
            self.test_mongo_db.archive_data(tempdir, self.dump_db_name)
            self.assertTrue(
                os.path.isfile(os.path.join(tempdir, self.dump_db_name)))

    def test_restore_data(self):
        test_restore_db = self._restore_data_to_another_db()
        self.assertTrue(test_restore_db.db_name in
                        self.local_mongo_handle.list_database_names())

    def test_export_import_data(self):
        org_collection_name = "variants_2_0"
        mongo_export_args = {"collection": org_collection_name}
        with tempfile.TemporaryDirectory() as tempdir:
            export_file_path = os.path.join(tempdir, self.dump_db_name)
            coll_doc_count = self.test_mongo_db.mongo_handle[
                self.dump_db_name][org_collection_name].count_documents({})
            self.test_mongo_db.export_data(export_file_path, mongo_export_args)
            with open(export_file_path, "r") as exported_file:
                export_doc_count = len(exported_file.readlines())
                self.assertEqual(coll_doc_count, export_doc_count)

            # import whatever we have exported into a new collection in the same database
            new_collection_name = "temp_variants_2_0"
            mongo_import_args = {
                "mode": "upsert",
                "collection": new_collection_name
            }
            self.test_mongo_db.import_data(export_file_path, mongo_import_args)
            imported_doc_count = self.test_mongo_db.mongo_handle[
                self.dump_db_name][new_collection_name].count_documents({})
            self.assertEqual(coll_doc_count, imported_doc_count)

            # delete the newly created temp collection
            self.test_mongo_db.mongo_handle[
                self.dump_db_name][new_collection_name].drop()