def test_move(self): dir_path = os.path.dirname(os.path.realpath(__file__)) config_file_content = f"""migration-folder: {dir_path}/../resources python3-path: python3 nextflow-binary-path: nextflow nextflow-config-path: {dir_path}/workflow.config script-path: {dir_path}/../../ mongo-source-uri: mongodb://localhost:27017/admin mongo-source-secrets-file: {dir_path}/empty_secret_file mongo-dest-uri: mongodb://localhost:27018/admin mongo-dest-secrets-file: {dir_path}/empty_secret_file """ open(f"{dir_path}/migration_config.yml", "w").write(config_file_content) mover = MoveMongoDBs(migration_config_file=f"{dir_path}/migration_config.yml", dbs_to_migrate_list=f"{dir_path}/dbs_to_migrate.txt", batch_number="1", resume_flag=False) # Load data to source for db_name in mover.dbs_to_migrate: source_db = MongoDatabase(mover.migration_config["mongo-source-uri"], db_name=db_name) source_db.drop() source_db.restore_data(dump_dir=f"{dir_path}/../resources/{db_name}") mover.move() # Check if source data made it to the destination for db_name in mover.dbs_to_migrate: source_db = MongoDatabase(mover.migration_config["mongo-source-uri"], db_name=db_name) dest_db = MongoDatabase(mover.migration_config["mongo-dest-uri"], db_name=db_name) for collection_name in source_db.get_collection_names(): self.assertEqual(source_db.mongo_handle[db_name][collection_name].count_documents(filter={}), dest_db.mongo_handle[db_name][collection_name].count_documents(filter={}))
def create_collection_count_validation_report(mongo_source: MongoDatabase, database_list, private_config_xml_file): report_timestamp = datetime.now() mongo_host = mongo_source.mongo_handle.address[0] for db in database_list: mongo_source.db_name = db source_collections = mongo_source.get_collection_names() if not source_collections: logger.warning( f"database {db} does not exist in mongo instances {mongo_host}" ) continue for coll in sorted(source_collections): logger.info( f"fetching count for database ({db}) - collection ({coll})") no_of_documents = get_documents_count_for_collection( mongo_source, db, coll) logger.info( f"Found {no_of_documents} documents in database ({db}) - collection ({coll})" ) insert_count_validation_result_to_db( private_config_xml_file, (mongo_host, db, coll, no_of_documents, report_timestamp))
def create_indexes(mongo_source: MongoDatabase, mongo_dest: MongoDatabase): logger.info( f"Creating indexes in the target database {mongo_dest.uri_with_db_name}...." ) try: mongo_dest.create_index_on_collections(mongo_source.get_indexes()) except Exception as ex: logger.error(f"Error while creating indexes!\n{ex.__str__()}") sys.exit(1)
def setUp(self) -> None: self.test_mongo_db = MongoDatabase(uri=self.uri, db_name=self.dump_db_name) self.dump_dir = os.path.join(self.resources_folder, self.dump_db_name) run_command_with_output( "Drop target test database if it already exists...", f"mongo {self.dump_db_name} " f"--eval 'db.dropDatabase()'") run_command_with_output("Import test database...", f"mongorestore --dir {self.dump_dir}")
def dump_data_from_source(mongo_source: MongoDatabase, top_level_dump_dir): try: logger.info("Running mongodump from source...") # Force table scan is performant for many workloads avoids cursor timeout issues # See https://jira.mongodb.org/browse/TOOLS-845?focusedCommentId=988298&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-988298 mongo_source.dump_data(dump_dir=os.path.join(top_level_dump_dir, mongo_source.db_name), mongodump_args={"forceTableScan": "", "numParallelCollections": "1"}) except Exception as ex: logger.error(f"Error while dumping data from source!\n{ex.__str__()}") sys.exit(1)
def restore_data_to_dest(mongo_dest: MongoDatabase, top_level_dump_dir): try: dump_dir = os.path.join(top_level_dump_dir, mongo_dest.db_name) logger.info(f"Loading data in target database from source dump {dump_dir}...") # noIndexRestore - Do not restore indexes because MongoDB 3.2 does not have index compatibility with MongoDB 4.0 mongo_dest.restore_data(dump_dir=dump_dir, mongorestore_args={"noIndexRestore": "", "numParallelCollections": 4, "numInsertionWorkersPerCollection": 4}) except Exception as ex: logger.error(f"Error while restoring data to the destination database!\n{ex.__str__()}") sys.exit(1)
def setUp(self) -> None: self.contig = "AF034253.1" self.db = "eva_accession_sharded_test" self.collection = "submittedVariantEntity" self.uri = "mongodb://localhost:27017/" self.mongo_source = MongoDatabase(uri=self.uri, db_name=self.db) wrong_contig = [{ "_id": "1125697507941CA420E26588F9F40F6C56C876A0", "accession": 7315407067, "alt": "G", "contig": "M", "createdDate": "2021-02-24T10:26:17.561Z", "ref": "A", "seq": "GCA_000003025.4", "start": 158, "study": "PRJEB43246", "tax": 9823, "version": 1 }, { "_id": "6CD16D81C36466B1C12A4D1911DAD1A7ECDA0976", "accession": 7315401731, "alt": "T", "contig": "CM000812.4", "createdDate": "2021-02-24T10:25:25.259Z", "ref": "C", "seq": "GCA_000003025.4", "start": 21664, "study": "PRJEB43246", "tax": 9823, "version": 1 }] self.mongo_source.mongo_handle[self.db][self.collection].drop() self.mongo_source.mongo_handle[self.db][self.collection].insert_many( wrong_contig)
def main(): parser = argparse.ArgumentParser( description= 'Parse all the clustering logs to get date ranges and query mongo to get metrics counts' ) parser.add_argument( "--clustering_root_path", type=str, help="base directory where all the clustering was run.", required=True) parser.add_argument( "--mongo-source-uri", help= "Mongo Source URI (ex: mongodb://user:@mongos-source-host:27017/admin)", required=True) parser.add_argument( "--mongo-source-secrets-file", help= "Full path to the Mongo Source secrets file (ex: /path/to/mongo/source/secret)", required=True) parser.add_argument('--private_config_xml_file', help='Path to the file containing the ', required=True) args = parser.parse_args() mongo_source = MongoDatabase(uri=args.mongo_source_uri, secrets_file=args.mongo_source_secrets_file, db_name="eva_accession_sharded") gather_count_from_mongo(args.clustering_root_path, mongo_source, args.private_config_xml_file)
def main(): parser = argparse.ArgumentParser( description='Create and load the clustering and release tracking table', add_help=False) parser.add_argument("--private-config-xml-file", help="ex: /path/to/eva-maven-settings.xml", required=True) parser.add_argument("--release-version", help="version of the release", type=int, required=True) parser.add_argument( "--reference-directory", help= "Directory where the reference genomes exists or should be downloaded", required=True) parser.add_argument( "--taxonomy", help="taxonomy id for which rs count needs to be updated", type=int, required=False) parser.add_argument('--tasks', required=False, type=str, nargs='+', default=all_tasks, choices=all_tasks, help='Task or set of tasks to perform.') parser.add_argument('--help', action='help', help='Show this help message and exit') args = parser.parse_args() logging_config.add_stdout_handler() if not args.tasks: args.tasks = all_tasks if 'create_and_fill_table' in args.tasks: create_table(args.private_config_xml_file) fill_in_from_previous_inventory(args.private_config_xml_file, args.release_version) fill_in_table_from_remapping(args.private_config_xml_file, args.release_version, args.reference_directory) if 'fill_rs_count' in args.tasks: if not args.taxonomy: raise Exception( "For running task 'fill_rs_count', it is mandatory to provide taxonomy arguments" ) mongo_source_uri = get_mongo_uri_for_eva_profile( 'production', args.private_config_xml_file) mongo_source = MongoDatabase(uri=mongo_source_uri, db_name="eva_accession_sharded") fill_num_rs_id_for_taxonomy_and_assembly(mongo_source, args.private_config_xml_file, args.release_version, args.taxonomy, args.reference_directory)
def main(): parser = argparse.ArgumentParser( description= 'Detect clustered variant that have position discordant with their submitted variants and are ' 'involve in a merge or split event.') parser.add_argument( "--mongo-source-uri", help= "Mongo Source URI (ex: mongodb://user:@mongos-source-host:27017/admin)", required=True) parser.add_argument( "--mongo-source-secrets-file", help= "Full path to the Mongo Source secrets file (ex: /path/to/mongo/source/secret)", required=True) parser.add_argument("--assemblies", nargs='+', help="The list of assembly to check", default=[]) parser.add_argument("--batch_size", default=1000, help="The number of variant to retrieve pr batch") args = parser.parse_args() mongo_source = MongoDatabase(uri=args.mongo_source_uri, secrets_file=args.mongo_source_secrets_file, db_name="eva_accession_sharded") detect_discordant_cluster_variant_from_split_merge_operations( mongo_source, args.assemblies, args.batch_size)
def annotations_export(mongo_source_uri, mongo_source_secrets_file, export_dir, query_dir): db_list = os.listdir(export_dir) for db in db_list: mongo_source = MongoDatabase(uri=mongo_source_uri, secrets_file=mongo_source_secrets_file, db_name=db) variant_file_loc = os.path.join(export_dir, db, variant_collection, variant_collection) if os.path.isfile(variant_file_loc): with open(variant_file_loc, 'r') as variant_file: chunk_number = 0 while True: variant_batch = list(islice(variant_file, chunk_size)) if not variant_batch: break annotations = get_annotations_ids(variant_batch) annotation_ids = annotations["annotations_id"] annotation_metadata_ids = annotations[ "annotations_metadata_id"] if annotation_ids: export_annotations_data(mongo_source, db, annotation_collection, annotation_ids, export_dir, query_dir, annotation_query_file_name, chunk_number) if annotation_metadata_ids: export_annotations_data( mongo_source, db, annotation_metadata_collection, annotation_metadata_ids, export_dir, query_dir, annotation_metadata_query_file_name, chunk_number) chunk_number = chunk_number + 1
def setUp(self) -> None: self.db = "eva_accession_sharded_test" self.collection = "submittedVariantEntity" self.uri = "mongodb://localhost:27017/" self.mongo_source = MongoDatabase(uri=self.uri, db_name=self.db) contig_list = [ { "_id": "D0D2E897BFD59EAF6E2D0BA2C7883B5DC5B34F30", "accession": 7169189340, "alt": "C", "contig": "AEMK02000229.1", "createdDate": "2020-09-01T22:55:50.956Z", "ref": "G", "seq": "GCA_000003025.6", "start": 24577, "study": "PRJEB28579", "tax": 9823, "version": 1 }, { "_id": "C5E53108D33B135EB45E2BFD3E67744B48CB06A8", "accession": 7166483872, "alt": "G", "contig": "AEMK02000626.1", "createdDate": "2020-09-01T15:31:27.489Z", "ref": "T", "seq": "GCA_000003025.6", "start": 15784, "study": "PRJEB28579", "tax": 9823, "version": 1 } ] self.mongo_source.mongo_handle[self.db][self.collection].drop() self.mongo_source.mongo_handle[self.db][self.collection].insert_many(contig_list)
def mongo_import_from_dir(mongo_dest_uri, mongo_dest_secrets_file, export_dir): mongo_import_args = { "mode": "upsert" } db_list = os.listdir(export_dir) for db in db_list: mongo_dest = MongoDatabase(uri=mongo_dest_uri, secrets_file=mongo_dest_secrets_file, db_name=db) db_dir = os.path.join(export_dir, db) all_coll_dir = os.listdir(db_dir) for coll in all_coll_dir: logger.info(f'Importing data for db ({db} - collection ({coll})') coll_dir = os.path.join(db_dir, coll) files_list = os.listdir(coll_dir) for file in files_list: mongo_import_args.update({"collection": coll}) mongo_dest.import_data(os.path.join(coll_dir, file), mongo_import_args)
def main(): parser = argparse.ArgumentParser( description='Prepare target database before loading', formatter_class=argparse.RawTextHelpFormatter, add_help=False) parser.add_argument( "--mongo-source-uri", help= "Mongo Source URI (ex: mongodb://user:@mongos-source-host:27017/admin)", required=True) parser.add_argument( "--mongo-source-secrets-file", help= "Full path to the Mongo Source secrets file (ex: /path/to/mongo/source/secret)", required=True) parser.add_argument( "--mongo-dest-uri", help= "Mongo Destination URI (ex: mongodb://user:@mongos-dest-host:27017/admin)", required=True) parser.add_argument( "--mongo-dest-secrets-file", help= "Full path to the Mongo Source secrets file (ex: /path/to/mongo/source/secret)", required=True) parser.add_argument("--db-name", help="Database to migrate (ex: eva_hsapiens_grch37)", required=True) parser.add_argument('--help', action='help', help='Show this help message and exit') args = parser.parse_args() mongo_source_db = MongoDatabase( uri=args.mongo_source_uri, secrets_file=args.mongo_source_secrets_file, db_name=args.db_name) mongo_dest_db = MongoDatabase(uri=args.mongo_dest_uri, secrets_file=args.mongo_dest_secrets_file, db_name=args.db_name) prepare_dest_db(mongo_source_db, mongo_dest_db)
def setUp(self) -> None: self.db = "eva_accession_sharded_test" self.collection = "submittedVariantEntity" self.uri = "mongodb://localhost:27017/" self.mongo_source = MongoDatabase(uri=self.uri, db_name=self.db) wrong_contig = [ { "_id": "52AF357592EAE966B65D3F9D04C952791F1493DC", "seq": "GCA_000001895.4", "tax": 10116, "study": "PRJEB42012", "contig": "Un.1", "start": 1063, "ref": "G", "alt": "C", "accession": 7315398622, "version": 1, "createdDate": "2021-02-09T03:23:05.842Z" }, { "_id": "899D3B4E7B6E1B18B9D34AC0CA3880AF5F68E09B", "seq": "GCA_000001895.4", "tax": 10116, "study": "PRJEB42012", "contig": "1_random.1", "start": 1510, "ref": "T", "alt": "C", "accession": 7315398494, "version": 1, "createdDate": "2021-02-09T03:23:05.041Z" } ] correct_contig = [ { "_id": "CAB0D97D36233AC8D84637F228B1CE172228A166", "seq": "GCA_000001895.4", "tax": 10116, "study": "PRJEB42012", "contig": "CM000072.5", "start": 2203542, "ref": "T", "alt": "C", "accession": 7306435312, "version": 1, "createdDate": "2021-02-08T11:56:42.206Z" } ] self.mongo_source.mongo_handle[self.db][self.collection].drop() self.mongo_source.mongo_handle[self.db][self.collection].insert_many(wrong_contig) self.mongo_source.mongo_handle[self.db][self.collection].insert_many(correct_contig)
def export_accession_data(mongo_source_uri, mongo_source_secrets_file, study_seq_tuple_set, export_dir, query_file_dir): mongo_source = MongoDatabase(uri=mongo_source_uri, secrets_file=mongo_source_secrets_file, db_name=accession_db) accession_query = create_accession_query(study_seq_tuple_set) query_file_path = write_query_to_file(accession_query, query_file_dir, accession_query_file_name) mongo_export_args = { "collection": accession_collection, "queryFile": query_file_path } logger.info( f"Starting mongo export process for accessioning database: mongo_source ({mongo_source_uri}) and mongo_export_args ({mongo_export_args})" ) accession_export_file = os.path.join(export_dir, accession_db, accession_collection, accession_collection) mongo_source.export_data(accession_export_file, mongo_export_args)
def setUp(self) -> None: self.contig = "AF010406.1" self.db = "eva_accession_sharded_test" self.collection = "submittedVariantEntity" self.uri = "mongodb://localhost:27017/" self.mongo_source = MongoDatabase(uri=self.uri, db_name=self.db) wrong_contig = [{ "_id": "8C4E490E82B895ADE3B9405204771B9E6BDCB286", "seq": "GCA_000298735.1", "tax": 9940, "study": "PRJEB33693", "contig": "-", "start": 16410, "ref": "G", "alt": "A", "accession": 7121896076, "version": 1, "createdDate": "2020-05-05T10:38:43.367Z" }, { "_id": "7350E3A0B0242791BD25901F15D22467DC7939BD", "seq": "GCA_000298735.1", "tax": 9940, "study": "PRJEB33693", "contig": "OARMT", "start": 16410, "ref": "G", "alt": "A", "accession": 7121824383, "version": 1, "createdDate": "2020-04-28T00:26:01.844Z" }, { "_id": "C9618202A2AF568A94259A1A16AB0A67DCC1CC94", "seq": "GCA_000298735.1", "tax": 9940, "study": "PRJEB23437", "contig": "CM001582.1", "start": 5442343, "ref": "C", "alt": "T", "accession": 5264373293, "version": 1, "createdDate": "2019-07-07T11:14:13.110Z" } ] self.mongo_source.mongo_handle[self.db][self.collection].drop() self.mongo_source.mongo_handle[self.db][self.collection].insert_many(wrong_contig)
def main(): parser = argparse.ArgumentParser(description='Correct 98 contig error in study PRJEB28579', add_help=False) parser.add_argument("--mongo-source-uri", help="Mongo Source URI (ex: mongodb://user:@mongos-source-host:27017/admin)", required=True) parser.add_argument("--mongo-source-secrets-file", help="Full path to the Mongo Source secrets file (ex: /path/to/mongo/source/secret)", required=True) args = parser.parse_args() mongo_source = MongoDatabase(uri=args.mongo_source_uri, secrets_file=args.mongo_source_secrets_file, db_name="eva_accession_sharded") contig_swap_list = [{'contig_1': 'AEMK02000229.1', 'contig_2': 'AEMK02000626.1'}, {'contig_1': 'AEMK02000417.1', 'contig_2': 'AEMK02000654.1'}] swap_with_correct_contig(mongo_source, contig_swap_list)
def _restore_data_to_another_db(self): with tempfile.TemporaryDirectory() as tempdir: self.test_mongo_db.dump_data(tempdir) test_restore_db = MongoDatabase(uri=self.uri, db_name=self.test_mongo_db.db_name + "_restore") test_restore_db.drop() test_restore_db.restore_data(dump_dir=tempdir, mongorestore_args={ "nsFrom": f'"{self.test_mongo_db.db_name}.*"', "nsTo": f'"{test_restore_db.db_name}.*"'}) return test_restore_db
def main(): parser = argparse.ArgumentParser(description='Dump data from a given MongoDB source', formatter_class=argparse.RawTextHelpFormatter, add_help=False) parser.add_argument("--mongo-source-uri", help="Mongo Source URI (ex: mongodb://user:@mongos-source-host:27017/admin)", required=True) parser.add_argument("--mongo-source-secrets-file", help="Full path to the Mongo Source secrets file (ex: /path/to/mongo/source/secret)", required=True) parser.add_argument("--db-name", help="Database to migrate (ex: eva_hsapiens_grch37)", required=True) parser.add_argument("--dump-dir", help="Top-level directory where all dumps reside (ex: /path/to/dumps)", required=True) parser.add_argument('--help', action='help', help='Show this help message and exit') args = parser.parse_args() dump_data_from_source(MongoDatabase(uri=args.mongo_source_uri, secrets_file= args.mongo_source_secrets_file, db_name=args.db_name), top_level_dump_dir=args.dump_dir)
def provision_new_database_for_variant_warehouse(db_name): """Create a variant warehouse database of the specified name and shared the collections""" # Passing the secrets_file override the password already in the uri db_handle = MongoDatabase( uri=cfg['mongodb']['mongo_admin_uri'], secrets_file=cfg['mongodb']['mongo_admin_secrets_file'], db_name=db_name) if len(db_handle.get_collection_names()) > 0: logger.info(f'Found existing database named {db_name}.') else: db_handle.enable_sharding() db_handle.shard_collections( collections_shard_key_map, collections_to_shard=collections_shard_key_map.keys()) logger.info(f'Created new database named {db_name}.')
def prepare_dest_db(mongo_source_db: MongoDatabase, mongo_dest_db: MongoDatabase): try: logger.info("Dropping target database if it already exists...") mongo_dest_db.drop() logger.info("Enabling sharding in the target database...") mongo_dest_db.enable_sharding() logger.info("Sharding collections in the target database...") mongo_dest_db.shard_collections( collections_shard_key_map, collections_to_shard=mongo_source_db.get_collection_names()) except Exception as ex: logger.error( f"Error while preparing destination database!\n{ex.__str__()}") sys.exit(1)
def main(): parser = argparse.ArgumentParser( description='Delete declustered variants in dbsnpSubmittedVariantEntity Collection', add_help=False) parser.add_argument("--mongo-source-uri", help="Mongo Source URI (ex: mongodb://user:@mongos-source-host:27017/admin)", required=True) parser.add_argument("--mongo-source-secrets-file", help="Full path to the Mongo Source secrets file (ex: /path/to/mongo/source/secret)", required=True) parser.add_argument("--output-dir", help="Top-level directory where all files reside (ex: /path/to/files)", required=True) args = parser.parse_args() mongo_source = MongoDatabase(uri=args.mongo_source_uri, secrets_file=args.mongo_source_secrets_file, db_name="eva_accession_sharded") # this method finds the variants to delete and stored their ids in a file named by assembly find_ids_of_declustered_variants(mongo_source, args.output_dir) # this method reads the variant ids store by previous method in batches and deletes them delete_variants(mongo_source, args.output_dir)
def main(): parser = argparse.ArgumentParser( description='Correct contig error in study PRJEB33693', add_help=False) parser.add_argument( "--mongo-source-uri", help= "Mongo Source URI (ex: mongodb://user:@mongos-source-host:27017/admin)", required=True) parser.add_argument( "--mongo-source-secrets-file", help= "Full path to the Mongo Source secrets file (ex: /path/to/mongo/source/secret)", required=True) args = parser.parse_args() mongo_source = MongoDatabase(uri=args.mongo_source_uri, secrets_file=args.mongo_source_secrets_file, db_name="eva_accession_sharded") correct(mongo_source)
def setUp(self) -> None: host = 'localhost' port = 27017 # Accessioning warehouse self.accession_db = 'eva_accession_sharded' self.submitted_variants_collection = 'submittedVariantEntity' self.clustered_variants_collection = 'clusteredVariantEntity' self.dbsnp_clustered_variants_collection = 'dbsnpClusteredVariantEntity' uri = f'mongodb://{host}:{port}' self.mongo_db = MongoDatabase(uri=uri, db_name="eva_accession_sharded") self.connection_handle = self.mongo_db.mongo_handle # Accessioning db submitted_variants = [{ "_id": calculate_id(rs), "seq": "GCA_000181335.4", "tax": 1111, "study": "PRJEB30318", "contig": "CM000001.1", "start": 76166296, "ref": "C", "alt": "T", "accession": 5318166021, "rs": rs, } for rs in range(1000, 1011)] clustered_variants = [{ "_id": calculate_id(rs), "asm": "GCA_000181335.4", "tax": 1111, "contig": "CM000001.1", "start": 76166296, "accession": rs, } for rs in range(1000, 1010)] self.connection_handle[self.accession_db][ self.submitted_variants_collection].drop() self.connection_handle[self.accession_db][ self.submitted_variants_collection].insert_many(submitted_variants) self.connection_handle[self.accession_db][ self.clustered_variants_collection].drop() self.connection_handle[self.accession_db][ self.clustered_variants_collection].insert_many(clustered_variants)
def main(): parser = argparse.ArgumentParser( description='Delete document associated with Ovis aries remapped data in multiple collections', add_help=False) parser.add_argument("--mongo-source-uri", help="Mongo Source URI (ex: mongodb://user:@mongos-source-host:27017/admin)", required=True) parser.add_argument("--mongo-source-secrets-file", help="Full path to the Mongo Source secrets file (ex: /path/to/mongo/source/secret)", required=True) parser.add_argument("--output-dir", help="Top-level directory where all files reside (ex: /path/to/files)", required=True) args = parser.parse_args() mongo_source = MongoDatabase(uri=args.mongo_source_uri, secrets_file=args.mongo_source_secrets_file, db_name="eva_accession_sharded") deletion_files = [] deletion_files.extend(find_ids_of_remapped_submitted_variants(mongo_source, args.output_dir)) deletion_files.extend(find_ids_of_remapped_clustered_variants(mongo_source, args.output_dir)) deletion_files.extend(find_ids_of_submitted_variant_operations(mongo_source, args.output_dir)) delete_variants(mongo_source, deletion_files)
def main(): parser = argparse.ArgumentParser( description='Replace incorrect contig in a given assembly and study with the correct contig', add_help=False) parser.add_argument("--mongo-source-uri", help="Mongo Source URI (ex: mongodb://user:@mongos-source-host:27017/admin)", required=True) parser.add_argument("--mongo-source-secrets-file", help="Full path to the Mongo Source secrets file (ex: /path/to/mongo/source/secret)", required=True) parser.add_argument("--assembly-accession", help="Genbank assembly accession (ex: GCA_000003055.5)", required=True) parser.add_argument("--study-accession", help="Study accession (ex: PRJEB29734)", required=True) parser.add_argument("--incorrect-contig", help="Study accession (ex: MT)", required=True) parser.add_argument("--correct-contig", help="Study accession (ex: AY526085.1)", required=True) parser.add_argument("--num-variants-to-replace", help="Number of variants to replace (ex: 10)", type=int, required=True) args = parser.parse_args() mongo_source = MongoDatabase(uri=args.mongo_source_uri, secrets_file=args.mongo_source_secrets_file, db_name="eva_accession_sharded") replace_with_correct_contig(mongo_source, args.assembly_accession, args.study_accession, args.incorrect_contig, args.correct_contig, args.num_variants_to_replace)
def main(): parser = argparse.ArgumentParser( description='Archive data from a given MongoDB source', formatter_class=argparse.RawTextHelpFormatter, add_help=False) parser.add_argument( "--mongo-source-uri", help= "Mongo Source URI (ex: mongodb://user:@mongos-source-host:27017/admin)", required=True) parser.add_argument( "--mongo-source-secrets-file", help= "Full path to the Mongo Source secrets file (ex: /path/to/mongo/source/secret)", required=True) parser.add_argument( "--db-names-list-file", help= "Full path to the File containing list of Databases to migrate (ex: eva_hsapiens_grch37)", required=True) parser.add_argument( "--archive-dir", help= "Top-level directory where all archives reside (ex: /path/to/archives)", required=True) parser.add_argument('--help', action='help', help='Show this help message and exit') args = parser.parse_args() databases_list = get_databases_list_for_export(args.db_names_list_file) for db in databases_list: archive_data_from_source(MongoDatabase( uri=args.mongo_source_uri, secrets_file=args.mongo_source_secrets_file, db_name=db), top_level_archive_dir=args.archive_dir)
def setUp(self) -> None: self.assembly = "GCA_000002315.5" self.db = "eva_accession_sharded" self.collection = "submittedVariantEntity" self.uri = "mongodb://localhost:27017/" self.mongo_source = MongoDatabase(uri=self.uri, db_name=self.db) wrong_assembly = { "_id": "CF2DD190877BE29372CAB647EEC609525B861F22", "accession": 7054574707, "alt": "TTTGTCTGTGTATGGCTCTGGTGACACATCATTGCCTGGTGACAGGACTCT", "contig": "CM000094.5", "createdDate": "2020-01-27T10:53:40.169Z", "ref": "", "seq": "PRJEB36115", "start": 3672970, "study": "PRJEB36115", "tax": 9031, "version": 1 } self.mongo_source.mongo_handle[self.db][self.collection].drop() self.mongo_source.mongo_handle[self.db][self.collection].insert_one( wrong_assembly)
def setUp(self) -> None: host = 'localhost' port = 27017 # Accessioning warehouse self.accession_db = 'eva_accession_sharded' self.submitted_variants_collection = 'dbsnpSubmittedVariantEntity' uri = f'mongodb://{host}:{port}' self.mongo_db = MongoDatabase(uri=uri, db_name="eva_accession_sharded") self.connection_handle = self.mongo_db.mongo_handle # Accessioning db submitted_variants = [{ "seq": "GCA_000181335.4", "tax": 1111, "study": "PRJEB30318", "contig": "CM000001.1", "start": random.randint(1, 1000000), "ref": "C", "alt": "T", "accession": ss, "rs": 5318166021, } for ss in [1000, 1000, 1001, 1001]] for submitted_variant in submitted_variants: submitted_variant['_id'] = calculate_id(submitted_variant['start']) for i, sub_var in enumerate(submitted_variants): if i % 4 == 0: sub_var['allelesMatch'] = True if i % 4 == 1: sub_var['mapWeight'] = 3 if i % 4 == 3: sub_var['remappedFrom'] = 'GCA_000181335.3' self.connection_handle[self.accession_db][ self.submitted_variants_collection].drop() self.connection_handle[self.accession_db][ self.submitted_variants_collection].insert_many(submitted_variants)