def test_move(self):
        dir_path = os.path.dirname(os.path.realpath(__file__))
        config_file_content = f"""migration-folder: {dir_path}/../resources
python3-path: python3
nextflow-binary-path: nextflow
nextflow-config-path: {dir_path}/workflow.config
script-path: {dir_path}/../../
mongo-source-uri: mongodb://localhost:27017/admin
mongo-source-secrets-file: {dir_path}/empty_secret_file 
mongo-dest-uri: mongodb://localhost:27018/admin
mongo-dest-secrets-file: {dir_path}/empty_secret_file
"""
        open(f"{dir_path}/migration_config.yml", "w").write(config_file_content)
        mover = MoveMongoDBs(migration_config_file=f"{dir_path}/migration_config.yml",
                             dbs_to_migrate_list=f"{dir_path}/dbs_to_migrate.txt",
                             batch_number="1", resume_flag=False)

        # Load data to source
        for db_name in mover.dbs_to_migrate:
            source_db = MongoDatabase(mover.migration_config["mongo-source-uri"], db_name=db_name)
            source_db.drop()
            source_db.restore_data(dump_dir=f"{dir_path}/../resources/{db_name}")

        mover.move()
        # Check if source data made it to the destination
        for db_name in mover.dbs_to_migrate:
            source_db = MongoDatabase(mover.migration_config["mongo-source-uri"], db_name=db_name)
            dest_db = MongoDatabase(mover.migration_config["mongo-dest-uri"], db_name=db_name)
            for collection_name in source_db.get_collection_names():
                self.assertEqual(source_db.mongo_handle[db_name][collection_name].count_documents(filter={}),
                                 dest_db.mongo_handle[db_name][collection_name].count_documents(filter={}))
Example #2
0
def create_collection_count_validation_report(mongo_source: MongoDatabase,
                                              database_list,
                                              private_config_xml_file):
    report_timestamp = datetime.now()
    mongo_host = mongo_source.mongo_handle.address[0]

    for db in database_list:
        mongo_source.db_name = db
        source_collections = mongo_source.get_collection_names()

        if not source_collections:
            logger.warning(
                f"database {db} does not exist in mongo instances {mongo_host}"
            )
            continue

        for coll in sorted(source_collections):
            logger.info(
                f"fetching count for database ({db}) - collection ({coll})")

            no_of_documents = get_documents_count_for_collection(
                mongo_source, db, coll)
            logger.info(
                f"Found {no_of_documents} documents in database ({db}) - collection ({coll})"
            )

            insert_count_validation_result_to_db(
                private_config_xml_file,
                (mongo_host, db, coll, no_of_documents, report_timestamp))
Example #3
0
def create_indexes(mongo_source: MongoDatabase, mongo_dest: MongoDatabase):
    logger.info(
        f"Creating indexes in the target database {mongo_dest.uri_with_db_name}...."
    )
    try:
        mongo_dest.create_index_on_collections(mongo_source.get_indexes())
    except Exception as ex:
        logger.error(f"Error while creating indexes!\n{ex.__str__()}")
        sys.exit(1)
Example #4
0
 def setUp(self) -> None:
     self.test_mongo_db = MongoDatabase(uri=self.uri,
                                        db_name=self.dump_db_name)
     self.dump_dir = os.path.join(self.resources_folder, self.dump_db_name)
     run_command_with_output(
         "Drop target test database if it already exists...",
         f"mongo {self.dump_db_name} "
         f"--eval 'db.dropDatabase()'")
     run_command_with_output("Import test database...",
                             f"mongorestore --dir {self.dump_dir}")
def dump_data_from_source(mongo_source: MongoDatabase, top_level_dump_dir):
    try:
        logger.info("Running mongodump from source...")

        # Force table scan is performant for many workloads avoids cursor timeout issues
        # See https://jira.mongodb.org/browse/TOOLS-845?focusedCommentId=988298&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-988298
        mongo_source.dump_data(dump_dir=os.path.join(top_level_dump_dir, mongo_source.db_name),
                               mongodump_args={"forceTableScan": "", "numParallelCollections": "1"})
    except Exception as ex:
        logger.error(f"Error while dumping data from source!\n{ex.__str__()}")
        sys.exit(1)
def restore_data_to_dest(mongo_dest: MongoDatabase, top_level_dump_dir):
    try:
        dump_dir = os.path.join(top_level_dump_dir, mongo_dest.db_name)
        logger.info(f"Loading data in target database from source dump {dump_dir}...")
        # noIndexRestore - Do not restore indexes because MongoDB 3.2 does not have index compatibility with MongoDB 4.0
        mongo_dest.restore_data(dump_dir=dump_dir,
                                mongorestore_args={"noIndexRestore": "",
                                                   "numParallelCollections": 4,
                                                   "numInsertionWorkersPerCollection": 4})
    except Exception as ex:
        logger.error(f"Error while restoring data to the destination database!\n{ex.__str__()}")
        sys.exit(1)
    def setUp(self) -> None:
        self.contig = "AF034253.1"
        self.db = "eva_accession_sharded_test"
        self.collection = "submittedVariantEntity"
        self.uri = "mongodb://localhost:27017/"
        self.mongo_source = MongoDatabase(uri=self.uri, db_name=self.db)
        wrong_contig = [{
            "_id": "1125697507941CA420E26588F9F40F6C56C876A0",
            "accession": 7315407067,
            "alt": "G",
            "contig": "M",
            "createdDate": "2021-02-24T10:26:17.561Z",
            "ref": "A",
            "seq": "GCA_000003025.4",
            "start": 158,
            "study": "PRJEB43246",
            "tax": 9823,
            "version": 1
        }, {
            "_id": "6CD16D81C36466B1C12A4D1911DAD1A7ECDA0976",
            "accession": 7315401731,
            "alt": "T",
            "contig": "CM000812.4",
            "createdDate": "2021-02-24T10:25:25.259Z",
            "ref": "C",
            "seq": "GCA_000003025.4",
            "start": 21664,
            "study": "PRJEB43246",
            "tax": 9823,
            "version": 1
        }]

        self.mongo_source.mongo_handle[self.db][self.collection].drop()
        self.mongo_source.mongo_handle[self.db][self.collection].insert_many(
            wrong_contig)
Example #8
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Parse all the clustering logs to get date ranges and query mongo to get metrics counts'
    )
    parser.add_argument(
        "--clustering_root_path",
        type=str,
        help="base directory where all the clustering was run.",
        required=True)
    parser.add_argument(
        "--mongo-source-uri",
        help=
        "Mongo Source URI (ex: mongodb://user:@mongos-source-host:27017/admin)",
        required=True)
    parser.add_argument(
        "--mongo-source-secrets-file",
        help=
        "Full path to the Mongo Source secrets file (ex: /path/to/mongo/source/secret)",
        required=True)
    parser.add_argument('--private_config_xml_file',
                        help='Path to the file containing the ',
                        required=True)

    args = parser.parse_args()
    mongo_source = MongoDatabase(uri=args.mongo_source_uri,
                                 secrets_file=args.mongo_source_secrets_file,
                                 db_name="eva_accession_sharded")
    gather_count_from_mongo(args.clustering_root_path, mongo_source,
                            args.private_config_xml_file)
def main():
    parser = argparse.ArgumentParser(
        description='Create and load the clustering and release tracking table',
        add_help=False)
    parser.add_argument("--private-config-xml-file",
                        help="ex: /path/to/eva-maven-settings.xml",
                        required=True)
    parser.add_argument("--release-version",
                        help="version of the release",
                        type=int,
                        required=True)
    parser.add_argument(
        "--reference-directory",
        help=
        "Directory where the reference genomes exists or should be downloaded",
        required=True)
    parser.add_argument(
        "--taxonomy",
        help="taxonomy id for which rs count needs to be updated",
        type=int,
        required=False)
    parser.add_argument('--tasks',
                        required=False,
                        type=str,
                        nargs='+',
                        default=all_tasks,
                        choices=all_tasks,
                        help='Task or set of tasks to perform.')
    parser.add_argument('--help',
                        action='help',
                        help='Show this help message and exit')
    args = parser.parse_args()

    logging_config.add_stdout_handler()

    if not args.tasks:
        args.tasks = all_tasks

    if 'create_and_fill_table' in args.tasks:
        create_table(args.private_config_xml_file)
        fill_in_from_previous_inventory(args.private_config_xml_file,
                                        args.release_version)
        fill_in_table_from_remapping(args.private_config_xml_file,
                                     args.release_version,
                                     args.reference_directory)

    if 'fill_rs_count' in args.tasks:
        if not args.taxonomy:
            raise Exception(
                "For running task 'fill_rs_count', it is mandatory to provide taxonomy arguments"
            )
        mongo_source_uri = get_mongo_uri_for_eva_profile(
            'production', args.private_config_xml_file)
        mongo_source = MongoDatabase(uri=mongo_source_uri,
                                     db_name="eva_accession_sharded")
        fill_num_rs_id_for_taxonomy_and_assembly(mongo_source,
                                                 args.private_config_xml_file,
                                                 args.release_version,
                                                 args.taxonomy,
                                                 args.reference_directory)
def main():
    parser = argparse.ArgumentParser(
        description=
        'Detect clustered variant that have position discordant with their submitted variants and are '
        'involve in a merge or split event.')
    parser.add_argument(
        "--mongo-source-uri",
        help=
        "Mongo Source URI (ex: mongodb://user:@mongos-source-host:27017/admin)",
        required=True)
    parser.add_argument(
        "--mongo-source-secrets-file",
        help=
        "Full path to the Mongo Source secrets file (ex: /path/to/mongo/source/secret)",
        required=True)
    parser.add_argument("--assemblies",
                        nargs='+',
                        help="The list of assembly to check",
                        default=[])
    parser.add_argument("--batch_size",
                        default=1000,
                        help="The number of variant to retrieve pr batch")
    args = parser.parse_args()
    mongo_source = MongoDatabase(uri=args.mongo_source_uri,
                                 secrets_file=args.mongo_source_secrets_file,
                                 db_name="eva_accession_sharded")
    detect_discordant_cluster_variant_from_split_merge_operations(
        mongo_source, args.assemblies, args.batch_size)
Example #11
0
def annotations_export(mongo_source_uri, mongo_source_secrets_file, export_dir,
                       query_dir):
    db_list = os.listdir(export_dir)
    for db in db_list:
        mongo_source = MongoDatabase(uri=mongo_source_uri,
                                     secrets_file=mongo_source_secrets_file,
                                     db_name=db)
        variant_file_loc = os.path.join(export_dir, db, variant_collection,
                                        variant_collection)
        if os.path.isfile(variant_file_loc):
            with open(variant_file_loc, 'r') as variant_file:
                chunk_number = 0
                while True:
                    variant_batch = list(islice(variant_file, chunk_size))
                    if not variant_batch:
                        break
                    annotations = get_annotations_ids(variant_batch)
                    annotation_ids = annotations["annotations_id"]
                    annotation_metadata_ids = annotations[
                        "annotations_metadata_id"]
                    if annotation_ids:
                        export_annotations_data(mongo_source, db,
                                                annotation_collection,
                                                annotation_ids, export_dir,
                                                query_dir,
                                                annotation_query_file_name,
                                                chunk_number)
                    if annotation_metadata_ids:
                        export_annotations_data(
                            mongo_source, db, annotation_metadata_collection,
                            annotation_metadata_ids, export_dir, query_dir,
                            annotation_metadata_query_file_name, chunk_number)
                    chunk_number = chunk_number + 1
Example #12
0
 def setUp(self) -> None:
     self.db = "eva_accession_sharded_test"
     self.collection = "submittedVariantEntity"
     self.uri = "mongodb://localhost:27017/"
     self.mongo_source = MongoDatabase(uri=self.uri, db_name=self.db)
     contig_list = [
         {
             "_id": "D0D2E897BFD59EAF6E2D0BA2C7883B5DC5B34F30",
             "accession": 7169189340,
             "alt": "C",
             "contig": "AEMK02000229.1",
             "createdDate": "2020-09-01T22:55:50.956Z",
             "ref": "G",
             "seq": "GCA_000003025.6",
             "start": 24577,
             "study": "PRJEB28579",
             "tax": 9823,
             "version": 1
         },
         {
             "_id": "C5E53108D33B135EB45E2BFD3E67744B48CB06A8",
             "accession": 7166483872,
             "alt": "G",
             "contig": "AEMK02000626.1",
             "createdDate": "2020-09-01T15:31:27.489Z",
             "ref": "T",
             "seq": "GCA_000003025.6",
             "start": 15784,
             "study": "PRJEB28579",
             "tax": 9823,
             "version": 1
         }
     ]
     self.mongo_source.mongo_handle[self.db][self.collection].drop()
     self.mongo_source.mongo_handle[self.db][self.collection].insert_many(contig_list)
Example #13
0
def mongo_import_from_dir(mongo_dest_uri, mongo_dest_secrets_file, export_dir):
    mongo_import_args = {
        "mode": "upsert"
    }
    db_list = os.listdir(export_dir)

    for db in db_list:
        mongo_dest = MongoDatabase(uri=mongo_dest_uri, secrets_file=mongo_dest_secrets_file, db_name=db)
        db_dir = os.path.join(export_dir, db)
        all_coll_dir = os.listdir(db_dir)
        for coll in all_coll_dir:
            logger.info(f'Importing data for db ({db} - collection ({coll})')
            coll_dir = os.path.join(db_dir, coll)
            files_list = os.listdir(coll_dir)
            for file in files_list:
                mongo_import_args.update({"collection": coll})
                mongo_dest.import_data(os.path.join(coll_dir, file), mongo_import_args)
Example #14
0
def main():
    parser = argparse.ArgumentParser(
        description='Prepare target database before loading',
        formatter_class=argparse.RawTextHelpFormatter,
        add_help=False)
    parser.add_argument(
        "--mongo-source-uri",
        help=
        "Mongo Source URI (ex: mongodb://user:@mongos-source-host:27017/admin)",
        required=True)
    parser.add_argument(
        "--mongo-source-secrets-file",
        help=
        "Full path to the Mongo Source secrets file (ex: /path/to/mongo/source/secret)",
        required=True)
    parser.add_argument(
        "--mongo-dest-uri",
        help=
        "Mongo Destination URI (ex: mongodb://user:@mongos-dest-host:27017/admin)",
        required=True)
    parser.add_argument(
        "--mongo-dest-secrets-file",
        help=
        "Full path to the Mongo Source secrets file (ex: /path/to/mongo/source/secret)",
        required=True)
    parser.add_argument("--db-name",
                        help="Database to migrate (ex: eva_hsapiens_grch37)",
                        required=True)
    parser.add_argument('--help',
                        action='help',
                        help='Show this help message and exit')

    args = parser.parse_args()
    mongo_source_db = MongoDatabase(
        uri=args.mongo_source_uri,
        secrets_file=args.mongo_source_secrets_file,
        db_name=args.db_name)
    mongo_dest_db = MongoDatabase(uri=args.mongo_dest_uri,
                                  secrets_file=args.mongo_dest_secrets_file,
                                  db_name=args.db_name)
    prepare_dest_db(mongo_source_db, mongo_dest_db)
Example #15
0
    def setUp(self) -> None:
        self.db = "eva_accession_sharded_test"
        self.collection = "submittedVariantEntity"
        self.uri = "mongodb://localhost:27017/"
        self.mongo_source = MongoDatabase(uri=self.uri, db_name=self.db)
        wrong_contig = [
            {
                "_id": "52AF357592EAE966B65D3F9D04C952791F1493DC",
                "seq": "GCA_000001895.4",
                "tax": 10116,
                "study": "PRJEB42012",
                "contig": "Un.1",
                "start": 1063,
                "ref": "G",
                "alt": "C",
                "accession": 7315398622,
                "version": 1,
                "createdDate": "2021-02-09T03:23:05.842Z"
            },
            {
                "_id": "899D3B4E7B6E1B18B9D34AC0CA3880AF5F68E09B",
                "seq": "GCA_000001895.4",
                "tax": 10116,
                "study": "PRJEB42012",
                "contig": "1_random.1",
                "start": 1510,
                "ref": "T",
                "alt": "C",
                "accession": 7315398494,
                "version": 1,
                "createdDate": "2021-02-09T03:23:05.041Z"
            }
        ]

        correct_contig = [
            {
                "_id": "CAB0D97D36233AC8D84637F228B1CE172228A166",
                "seq": "GCA_000001895.4",
                "tax": 10116,
                "study": "PRJEB42012",
                "contig": "CM000072.5",
                "start": 2203542,
                "ref": "T",
                "alt": "C",
                "accession": 7306435312,
                "version": 1,
                "createdDate": "2021-02-08T11:56:42.206Z"
            }
        ]

        self.mongo_source.mongo_handle[self.db][self.collection].drop()
        self.mongo_source.mongo_handle[self.db][self.collection].insert_many(wrong_contig)
        self.mongo_source.mongo_handle[self.db][self.collection].insert_many(correct_contig)
Example #16
0
def export_accession_data(mongo_source_uri, mongo_source_secrets_file,
                          study_seq_tuple_set, export_dir, query_file_dir):
    mongo_source = MongoDatabase(uri=mongo_source_uri,
                                 secrets_file=mongo_source_secrets_file,
                                 db_name=accession_db)
    accession_query = create_accession_query(study_seq_tuple_set)
    query_file_path = write_query_to_file(accession_query, query_file_dir,
                                          accession_query_file_name)
    mongo_export_args = {
        "collection": accession_collection,
        "queryFile": query_file_path
    }

    logger.info(
        f"Starting mongo export process for accessioning database: mongo_source ({mongo_source_uri}) and mongo_export_args ({mongo_export_args})"
    )
    accession_export_file = os.path.join(export_dir, accession_db,
                                         accession_collection,
                                         accession_collection)

    mongo_source.export_data(accession_export_file, mongo_export_args)
Example #17
0
    def setUp(self) -> None:
        self.contig = "AF010406.1"
        self.db = "eva_accession_sharded_test"
        self.collection = "submittedVariantEntity"
        self.uri = "mongodb://localhost:27017/"
        self.mongo_source = MongoDatabase(uri=self.uri, db_name=self.db)
        wrong_contig = [{
                "_id": "8C4E490E82B895ADE3B9405204771B9E6BDCB286",
                "seq": "GCA_000298735.1",
                "tax": 9940,
                "study": "PRJEB33693",
                "contig": "-",
                "start": 16410,
                "ref": "G",
                "alt": "A",
                "accession": 7121896076,
                "version": 1,
                "createdDate": "2020-05-05T10:38:43.367Z"
            },
            {
                "_id": "7350E3A0B0242791BD25901F15D22467DC7939BD",
                "seq": "GCA_000298735.1",
                "tax": 9940,
                "study": "PRJEB33693",
                "contig": "OARMT",
                "start": 16410,
                "ref": "G",
                "alt": "A",
                "accession": 7121824383,
                "version": 1,
                "createdDate": "2020-04-28T00:26:01.844Z"
            },
            {
                "_id": "C9618202A2AF568A94259A1A16AB0A67DCC1CC94",
                "seq": "GCA_000298735.1",
                "tax": 9940,
                "study": "PRJEB23437",
                "contig": "CM001582.1",
                "start": 5442343,
                "ref": "C",
                "alt": "T",
                "accession": 5264373293,
                "version": 1,
                "createdDate": "2019-07-07T11:14:13.110Z"
            }

        ]

        self.mongo_source.mongo_handle[self.db][self.collection].drop()
        self.mongo_source.mongo_handle[self.db][self.collection].insert_many(wrong_contig)
Example #18
0
def main():
    parser = argparse.ArgumentParser(description='Correct 98 contig error in study PRJEB28579', add_help=False)
    parser.add_argument("--mongo-source-uri",
                        help="Mongo Source URI (ex: mongodb://user:@mongos-source-host:27017/admin)", required=True)
    parser.add_argument("--mongo-source-secrets-file",
                        help="Full path to the Mongo Source secrets file (ex: /path/to/mongo/source/secret)",
                        required=True)
    args = parser.parse_args()
    mongo_source = MongoDatabase(uri=args.mongo_source_uri, secrets_file=args.mongo_source_secrets_file,
                                 db_name="eva_accession_sharded")

    contig_swap_list = [{'contig_1': 'AEMK02000229.1', 'contig_2': 'AEMK02000626.1'},
                        {'contig_1': 'AEMK02000417.1', 'contig_2': 'AEMK02000654.1'}]
    swap_with_correct_contig(mongo_source, contig_swap_list)
 def _restore_data_to_another_db(self):
     with tempfile.TemporaryDirectory() as tempdir:
         self.test_mongo_db.dump_data(tempdir)
         test_restore_db = MongoDatabase(uri=self.uri, db_name=self.test_mongo_db.db_name + "_restore")
         test_restore_db.drop()
         test_restore_db.restore_data(dump_dir=tempdir,
                                      mongorestore_args={
                                          "nsFrom": f'"{self.test_mongo_db.db_name}.*"',
                                          "nsTo": f'"{test_restore_db.db_name}.*"'})
         return test_restore_db
def main():
    parser = argparse.ArgumentParser(description='Dump data from a given MongoDB source',
                                     formatter_class=argparse.RawTextHelpFormatter, add_help=False)
    parser.add_argument("--mongo-source-uri",
                        help="Mongo Source URI (ex: mongodb://user:@mongos-source-host:27017/admin)", required=True)
    parser.add_argument("--mongo-source-secrets-file",
                        help="Full path to the Mongo Source secrets file (ex: /path/to/mongo/source/secret)",
                        required=True)
    parser.add_argument("--db-name", help="Database to migrate (ex: eva_hsapiens_grch37)", required=True)
    parser.add_argument("--dump-dir", help="Top-level directory where all dumps reside (ex: /path/to/dumps)",
                        required=True)
    parser.add_argument('--help', action='help', help='Show this help message and exit')

    args = parser.parse_args()
    dump_data_from_source(MongoDatabase(uri=args.mongo_source_uri, secrets_file= args.mongo_source_secrets_file,
                                        db_name=args.db_name), top_level_dump_dir=args.dump_dir)
Example #21
0
def provision_new_database_for_variant_warehouse(db_name):
    """Create a variant warehouse database of the specified name and shared the collections"""
    # Passing the secrets_file override the password already in the uri
    db_handle = MongoDatabase(
        uri=cfg['mongodb']['mongo_admin_uri'],
        secrets_file=cfg['mongodb']['mongo_admin_secrets_file'],
        db_name=db_name)
    if len(db_handle.get_collection_names()) > 0:
        logger.info(f'Found existing database named {db_name}.')
    else:
        db_handle.enable_sharding()
        db_handle.shard_collections(
            collections_shard_key_map,
            collections_to_shard=collections_shard_key_map.keys())
        logger.info(f'Created new database named {db_name}.')
Example #22
0
def prepare_dest_db(mongo_source_db: MongoDatabase,
                    mongo_dest_db: MongoDatabase):
    try:
        logger.info("Dropping target database if it already exists...")
        mongo_dest_db.drop()
        logger.info("Enabling sharding in the target database...")
        mongo_dest_db.enable_sharding()
        logger.info("Sharding collections in the target database...")
        mongo_dest_db.shard_collections(
            collections_shard_key_map,
            collections_to_shard=mongo_source_db.get_collection_names())
    except Exception as ex:
        logger.error(
            f"Error while preparing destination database!\n{ex.__str__()}")
        sys.exit(1)
def main():
    parser = argparse.ArgumentParser(
        description='Delete declustered variants in dbsnpSubmittedVariantEntity Collection', add_help=False)
    parser.add_argument("--mongo-source-uri",
                        help="Mongo Source URI (ex: mongodb://user:@mongos-source-host:27017/admin)", required=True)
    parser.add_argument("--mongo-source-secrets-file",
                        help="Full path to the Mongo Source secrets file (ex: /path/to/mongo/source/secret)",
                        required=True)
    parser.add_argument("--output-dir", help="Top-level directory where all files reside (ex: /path/to/files)",
                        required=True)
    args = parser.parse_args()
    mongo_source = MongoDatabase(uri=args.mongo_source_uri, secrets_file=args.mongo_source_secrets_file,
                                 db_name="eva_accession_sharded")
    # this method finds the variants to delete and stored their ids in a file named by assembly
    find_ids_of_declustered_variants(mongo_source, args.output_dir)
    # this method reads the variant ids store by previous method in batches and deletes them
    delete_variants(mongo_source, args.output_dir)
def main():
    parser = argparse.ArgumentParser(
        description='Correct contig error in study PRJEB33693', add_help=False)
    parser.add_argument(
        "--mongo-source-uri",
        help=
        "Mongo Source URI (ex: mongodb://user:@mongos-source-host:27017/admin)",
        required=True)
    parser.add_argument(
        "--mongo-source-secrets-file",
        help=
        "Full path to the Mongo Source secrets file (ex: /path/to/mongo/source/secret)",
        required=True)
    args = parser.parse_args()
    mongo_source = MongoDatabase(uri=args.mongo_source_uri,
                                 secrets_file=args.mongo_source_secrets_file,
                                 db_name="eva_accession_sharded")
    correct(mongo_source)
Example #25
0
    def setUp(self) -> None:
        host = 'localhost'
        port = 27017

        # Accessioning warehouse
        self.accession_db = 'eva_accession_sharded'
        self.submitted_variants_collection = 'submittedVariantEntity'
        self.clustered_variants_collection = 'clusteredVariantEntity'
        self.dbsnp_clustered_variants_collection = 'dbsnpClusteredVariantEntity'
        uri = f'mongodb://{host}:{port}'
        self.mongo_db = MongoDatabase(uri=uri, db_name="eva_accession_sharded")
        self.connection_handle = self.mongo_db.mongo_handle

        # Accessioning db
        submitted_variants = [{
            "_id": calculate_id(rs),
            "seq": "GCA_000181335.4",
            "tax": 1111,
            "study": "PRJEB30318",
            "contig": "CM000001.1",
            "start": 76166296,
            "ref": "C",
            "alt": "T",
            "accession": 5318166021,
            "rs": rs,
        } for rs in range(1000, 1011)]

        clustered_variants = [{
            "_id": calculate_id(rs),
            "asm": "GCA_000181335.4",
            "tax": 1111,
            "contig": "CM000001.1",
            "start": 76166296,
            "accession": rs,
        } for rs in range(1000, 1010)]

        self.connection_handle[self.accession_db][
            self.submitted_variants_collection].drop()
        self.connection_handle[self.accession_db][
            self.submitted_variants_collection].insert_many(submitted_variants)
        self.connection_handle[self.accession_db][
            self.clustered_variants_collection].drop()
        self.connection_handle[self.accession_db][
            self.clustered_variants_collection].insert_many(clustered_variants)
Example #26
0
def main():
    parser = argparse.ArgumentParser(
        description='Delete document associated with Ovis aries remapped data in multiple collections', add_help=False)
    parser.add_argument("--mongo-source-uri",
                        help="Mongo Source URI (ex: mongodb://user:@mongos-source-host:27017/admin)", required=True)
    parser.add_argument("--mongo-source-secrets-file",
                        help="Full path to the Mongo Source secrets file (ex: /path/to/mongo/source/secret)",
                        required=True)
    parser.add_argument("--output-dir", help="Top-level directory where all files reside (ex: /path/to/files)",
                        required=True)
    args = parser.parse_args()
    mongo_source = MongoDatabase(uri=args.mongo_source_uri, secrets_file=args.mongo_source_secrets_file,
                                 db_name="eva_accession_sharded")
    deletion_files = []
    deletion_files.extend(find_ids_of_remapped_submitted_variants(mongo_source, args.output_dir))
    deletion_files.extend(find_ids_of_remapped_clustered_variants(mongo_source, args.output_dir))
    deletion_files.extend(find_ids_of_submitted_variant_operations(mongo_source, args.output_dir))

    delete_variants(mongo_source, deletion_files)
Example #27
0
def main():
    parser = argparse.ArgumentParser(
        description='Replace incorrect contig in a given assembly and study with the correct contig', add_help=False)
    parser.add_argument("--mongo-source-uri",
                        help="Mongo Source URI (ex: mongodb://user:@mongos-source-host:27017/admin)", required=True)
    parser.add_argument("--mongo-source-secrets-file",
                        help="Full path to the Mongo Source secrets file (ex: /path/to/mongo/source/secret)",
                        required=True)
    parser.add_argument("--assembly-accession", help="Genbank assembly accession (ex: GCA_000003055.5)", required=True)
    parser.add_argument("--study-accession", help="Study accession (ex: PRJEB29734)", required=True)
    parser.add_argument("--incorrect-contig", help="Study accession (ex: MT)", required=True)
    parser.add_argument("--correct-contig", help="Study accession (ex: AY526085.1)", required=True)
    parser.add_argument("--num-variants-to-replace", help="Number of variants to replace (ex: 10)", type=int,
                        required=True)

    args = parser.parse_args()
    mongo_source = MongoDatabase(uri=args.mongo_source_uri, secrets_file=args.mongo_source_secrets_file,
                                 db_name="eva_accession_sharded")
    replace_with_correct_contig(mongo_source, args.assembly_accession, args.study_accession,
                                args.incorrect_contig, args.correct_contig, args.num_variants_to_replace)
Example #28
0
def main():
    parser = argparse.ArgumentParser(
        description='Archive data from a given MongoDB source',
        formatter_class=argparse.RawTextHelpFormatter,
        add_help=False)
    parser.add_argument(
        "--mongo-source-uri",
        help=
        "Mongo Source URI (ex: mongodb://user:@mongos-source-host:27017/admin)",
        required=True)
    parser.add_argument(
        "--mongo-source-secrets-file",
        help=
        "Full path to the Mongo Source secrets file (ex: /path/to/mongo/source/secret)",
        required=True)
    parser.add_argument(
        "--db-names-list-file",
        help=
        "Full path to the File containing list of Databases to migrate (ex: eva_hsapiens_grch37)",
        required=True)
    parser.add_argument(
        "--archive-dir",
        help=
        "Top-level directory where all archives reside (ex: /path/to/archives)",
        required=True)
    parser.add_argument('--help',
                        action='help',
                        help='Show this help message and exit')

    args = parser.parse_args()

    databases_list = get_databases_list_for_export(args.db_names_list_file)

    for db in databases_list:
        archive_data_from_source(MongoDatabase(
            uri=args.mongo_source_uri,
            secrets_file=args.mongo_source_secrets_file,
            db_name=db),
                                 top_level_archive_dir=args.archive_dir)
Example #29
0
    def setUp(self) -> None:
        self.assembly = "GCA_000002315.5"
        self.db = "eva_accession_sharded"
        self.collection = "submittedVariantEntity"
        self.uri = "mongodb://localhost:27017/"
        self.mongo_source = MongoDatabase(uri=self.uri, db_name=self.db)
        wrong_assembly = {
            "_id": "CF2DD190877BE29372CAB647EEC609525B861F22",
            "accession": 7054574707,
            "alt": "TTTGTCTGTGTATGGCTCTGGTGACACATCATTGCCTGGTGACAGGACTCT",
            "contig": "CM000094.5",
            "createdDate": "2020-01-27T10:53:40.169Z",
            "ref": "",
            "seq": "PRJEB36115",
            "start": 3672970,
            "study": "PRJEB36115",
            "tax": 9031,
            "version": 1
        }

        self.mongo_source.mongo_handle[self.db][self.collection].drop()
        self.mongo_source.mongo_handle[self.db][self.collection].insert_one(
            wrong_assembly)
    def setUp(self) -> None:
        host = 'localhost'
        port = 27017

        # Accessioning warehouse
        self.accession_db = 'eva_accession_sharded'
        self.submitted_variants_collection = 'dbsnpSubmittedVariantEntity'
        uri = f'mongodb://{host}:{port}'
        self.mongo_db = MongoDatabase(uri=uri, db_name="eva_accession_sharded")
        self.connection_handle = self.mongo_db.mongo_handle

        # Accessioning db
        submitted_variants = [{
            "seq": "GCA_000181335.4",
            "tax": 1111,
            "study": "PRJEB30318",
            "contig": "CM000001.1",
            "start": random.randint(1, 1000000),
            "ref": "C",
            "alt": "T",
            "accession": ss,
            "rs": 5318166021,
        } for ss in [1000, 1000, 1001, 1001]]
        for submitted_variant in submitted_variants:
            submitted_variant['_id'] = calculate_id(submitted_variant['start'])

        for i, sub_var in enumerate(submitted_variants):
            if i % 4 == 0:
                sub_var['allelesMatch'] = True
            if i % 4 == 1:
                sub_var['mapWeight'] = 3
            if i % 4 == 3:
                sub_var['remappedFrom'] = 'GCA_000181335.3'
        self.connection_handle[self.accession_db][
            self.submitted_variants_collection].drop()
        self.connection_handle[self.accession_db][
            self.submitted_variants_collection].insert_many(submitted_variants)