Exemple #1
0
def export_accession_data(mongo_source_uri, mongo_source_secrets_file,
                          study_seq_tuple_set, export_dir, query_file_dir):
    mongo_source = MongoDatabase(uri=mongo_source_uri,
                                 secrets_file=mongo_source_secrets_file,
                                 db_name=accession_db)
    accession_query = create_accession_query(study_seq_tuple_set)
    query_file_path = write_query_to_file(accession_query, query_file_dir,
                                          accession_query_file_name)
    mongo_export_args = {
        "collection": accession_collection,
        "queryFile": query_file_path
    }

    logger.info(
        f"Starting mongo export process for accessioning database: mongo_source ({mongo_source_uri}) and mongo_export_args ({mongo_export_args})"
    )
    accession_export_file = os.path.join(export_dir, accession_db,
                                         accession_collection,
                                         accession_collection)

    mongo_source.export_data(accession_export_file, mongo_export_args)
Exemple #2
0
def mongo_export_files_variants_data(mongo_source_uri,
                                     mongo_source_secrets_file, db_study_dict,
                                     export_dir, query_dir):
    logger.info(
        f"Starting mongo export process for  mongo ({mongo_source_uri})")
    for db, study_vcf in db_study_dict.items():
        mongo_source = MongoDatabase(uri=mongo_source_uri,
                                     secrets_file=mongo_source_secrets_file,
                                     db_name=db)
        files_query = create_files_query(study_vcf)
        files_query_path = write_query_to_file(files_query, query_dir,
                                               files_query_file_name)
        files_mongo_export_args = {
            "collection": files_collection,
            "queryFile": files_query_path
        }
        logger.info(
            f"Exporting data for database ({db}): collection ({files_collection}) - files_mongo_export_args ({files_mongo_export_args})"
        )
        files_export_file = os.path.join(export_dir, db, files_collection,
                                         files_collection)
        mongo_source.export_data(files_export_file, files_mongo_export_args)

        variants_query = create_variants_query(study_vcf)
        variants_query_path = write_query_to_file(variants_query, query_dir,
                                                  variants_query_file_name)
        variants_mongo_export_args = {
            "collection": variant_collection,
            "queryFile": variants_query_path
        }
        logger.info(
            f"Exporting data for database ({db}): collection ({variant_collection}) - variants_mongo_export_args ({variants_mongo_export_args})"
        )
        variant_export_file = os.path.join(export_dir, db, variant_collection,
                                           variant_collection)
        mongo_source.export_data(variant_export_file,
                                 variants_mongo_export_args)
Exemple #3
0
class TestMongoDatabase(TestCommon):
    dump_db_name = "test_mongo_db"
    uri = "mongodb://localhost:27017/admin"
    local_mongo_handle = pymongo.MongoClient()

    # Tests expect a local sharded Mongo instance
    def setUp(self) -> None:
        self.test_mongo_db = MongoDatabase(uri=self.uri,
                                           db_name=self.dump_db_name)
        self.dump_dir = os.path.join(self.resources_folder, self.dump_db_name)
        run_command_with_output(
            "Drop target test database if it already exists...",
            f"mongo {self.dump_db_name} "
            f"--eval 'db.dropDatabase()'")
        run_command_with_output("Import test database...",
                                f"mongorestore --dir {self.dump_dir}")

    def tearDown(self) -> None:
        pass

    def _restore_data_to_another_db(self):
        with tempfile.TemporaryDirectory() as tempdir:
            self.test_mongo_db.dump_data(tempdir)
            test_restore_db = MongoDatabase(
                uri=self.uri, db_name=self.test_mongo_db.db_name + "_restore")
            test_restore_db.drop()
            test_restore_db.restore_data(
                dump_dir=tempdir,
                mongorestore_args={
                    "nsFrom": f'"{self.test_mongo_db.db_name}.*"',
                    "nsTo": f'"{test_restore_db.db_name}.*"'
                })
            return test_restore_db

    def test_drop_database(self):
        self.test_mongo_db.drop()
        self.assertTrue(self.dump_db_name not in
                        self.local_mongo_handle.list_database_names())

    def test_get_indexes(self):
        expected_index_map = {
            'annotationMetadata_2_0': {
                '_id_': {
                    'key': [('_id', 1)],
                    'ns': 'test_mongo_db.annotationMetadata_2_0',
                    'v': 2
                }
            },
            'annotations_2_0': {
                '_id_': {
                    'key': [('_id', 1)],
                    'ns': 'test_mongo_db.annotations_2_0',
                    'v': 2
                },
                'ct.so_1': {
                    'background': True,
                    'key': [('ct.so', 1)],
                    'ns': 'test_mongo_db.annotations_2_0',
                    'v': 2
                },
                'xrefs.id_1': {
                    'background': True,
                    'key': [('xrefs.id', 1)],
                    'ns': 'test_mongo_db.annotations_2_0',
                    'v': 2
                }
            },
            'files_2_0': {
                '_id_': {
                    'key': [('_id', 1)],
                    'ns': 'test_mongo_db.files_2_0',
                    'v': 2
                },
                'unique_file': {
                    'background': True,
                    'key': [('sid', 1), ('fid', 1), ('fname', 1)],
                    'ns': 'test_mongo_db.files_2_0',
                    'unique': True,
                    'v': 2
                }
            },
            'variants_2_0': {
                '_id_': {
                    'key': [('_id', 1)],
                    'ns': 'test_mongo_db.variants_2_0',
                    'v': 2
                },
                'annot.so_1': {
                    'background': True,
                    'key': [('annot.so', 1)],
                    'ns': 'test_mongo_db.variants_2_0',
                    'v': 2
                },
                'annot.xrefs_1': {
                    'background': True,
                    'key': [('annot.xrefs', 1)],
                    'ns': 'test_mongo_db.variants_2_0',
                    'v': 2
                },
                'chr_1_start_1_end_1': {
                    'background': True,
                    'key': [('chr', 1), ('start', 1), ('end', 1)],
                    'ns': 'test_mongo_db.variants_2_0',
                    'v': 2
                },
                'files.sid_1_files.fid_1': {
                    'background': True,
                    'key': [('files.sid', 1), ('files.fid', 1)],
                    'ns': 'test_mongo_db.variants_2_0',
                    'v': 2
                },
                'ids_1': {
                    'background': True,
                    'key': [('ids', 1)],
                    'ns': 'test_mongo_db.variants_2_0',
                    'v': 2
                }
            }
        }
        self.assertDictEqual(expected_index_map,
                             self.test_mongo_db.get_indexes())

    def test_create_index_on_collections(self):
        collection_index_map = {
            'files_2_0': {
                'unique_file': {
                    'background': True,
                    'key': [('sid', 1), ('fid', 1), ('fname', 1)],
                    'ns': 'test_mongo_db.files_2_0',
                    'unique': True,
                    'v': 2
                }
            }
        }
        test_restore_db = self._restore_data_to_another_db()
        test_restore_db.create_index_on_collections(
            collection_index_map=collection_index_map)
        test_restore_db_index_info = test_restore_db.get_indexes()
        # Check if index with the name "unique_file" is created on the collection
        self.assertTrue('files_2_0' in test_restore_db_index_info.keys())
        self.assertTrue(
            'unique_file' in test_restore_db_index_info['files_2_0'])
        self.assertEqual(
            [('sid', 1), ('fid', 1), ('fname', 1)],
            test_restore_db_index_info['files_2_0']['unique_file']['key'])

    def test_enable_sharding(self):
        self.test_mongo_db.enable_sharding()
        # Query meta-collection in the config database to check sharding status
        self.assertTrue(
            len(
                list(self.local_mongo_handle["config"]["databases"].find(
                    {
                        "_id": self.test_mongo_db.db_name,
                        "partitioned": True
                    }))) > 0)

    def test_shard_collections(self):
        test_restore_db = self._restore_data_to_another_db()
        collection_to_shard = "files_2_0"
        test_restore_db.enable_sharding()
        test_restore_db.shard_collections(
            collections_shard_key_map={
                "files_2_0": (["sid", "fid", "fname"], True)
            },
            collections_to_shard=[collection_to_shard])
        # Query meta-collection in the config database to check sharding status
        self.assertTrue(
            len(
                list(self.local_mongo_handle["config"]["collections"].find({
                    "_id":
                    f"{test_restore_db.db_name}.{collection_to_shard}",
                    "key": {
                        "sid": 1,
                        "fid": 1,
                        "fname": 1
                    }
                }))) > 0)

    def test_dump_data(self):
        with tempfile.TemporaryDirectory() as tempdir:
            self.test_mongo_db.dump_data(tempdir)
            self.assertTrue(
                os.path.isdir(os.path.join(tempdir, self.dump_db_name)))

    def test_archive_data(self):
        with tempfile.TemporaryDirectory() as tempdir:
            self.test_mongo_db.archive_data(tempdir, self.dump_db_name)
            self.assertTrue(
                os.path.isfile(os.path.join(tempdir, self.dump_db_name)))

    def test_restore_data(self):
        test_restore_db = self._restore_data_to_another_db()
        self.assertTrue(test_restore_db.db_name in
                        self.local_mongo_handle.list_database_names())

    def test_export_import_data(self):
        org_collection_name = "variants_2_0"
        mongo_export_args = {"collection": org_collection_name}
        with tempfile.TemporaryDirectory() as tempdir:
            export_file_path = os.path.join(tempdir, self.dump_db_name)
            coll_doc_count = self.test_mongo_db.mongo_handle[
                self.dump_db_name][org_collection_name].count_documents({})
            self.test_mongo_db.export_data(export_file_path, mongo_export_args)
            with open(export_file_path, "r") as exported_file:
                export_doc_count = len(exported_file.readlines())
                self.assertEqual(coll_doc_count, export_doc_count)

            # import whatever we have exported into a new collection in the same database
            new_collection_name = "temp_variants_2_0"
            mongo_import_args = {
                "mode": "upsert",
                "collection": new_collection_name
            }
            self.test_mongo_db.import_data(export_file_path, mongo_import_args)
            imported_doc_count = self.test_mongo_db.mongo_handle[
                self.dump_db_name][new_collection_name].count_documents({})
            self.assertEqual(coll_doc_count, imported_doc_count)

            # delete the newly created temp collection
            self.test_mongo_db.mongo_handle[
                self.dump_db_name][new_collection_name].drop()