def test_pdb_mutation_pipeline_have_sequences(pdb_id): """Make sure that we can copy folders with precalculated Provean score(s). """ unique_temp_dir_old = _get_unique_temp_dir(pdb_id) unique_temp_dir = op.join(op.splitext(__file__)[0], pdb_id + '_have_sequences', '.elaspic') os.makedirs(unique_temp_dir, exist_ok=True) try: shutil.copy2( op.join(unique_temp_dir_old, 'sequence.json'), op.join(unique_temp_dir, 'sequence.json')) shutil.copytree( op.join(unique_temp_dir_old, 'sequence'), op.join(unique_temp_dir, 'sequence')) conf.read_configuration_file( DEFAULT={ 'unique_temp_dir': unique_temp_dir }, EXTERNAL_DIRS={ 'pdb_dir': os.environ['PDB_DIR'], 'blast_db_dir': os.environ['BLAST_DB_DIR'], 'archive_dir': os.environ['ARCHIVE_DIR'] } ) os.chdir(unique_temp_dir) helper_fns.run_pdb_mutation_pipeline( pdb_id, pdb_mutatations, have_sequences=True) except: raise finally: shutil.rmtree(unique_temp_dir)
def load_data_to_database(args): if args.config_file: conf.read_configuration_file(args.config_file) elif args.connection_string: conf.read_configuration_file(DATABASE={'connection_string': args.connection_string}) else: raise Exception("Either 'config_file' or 'connection_string' must be specified!") from elaspic import elaspic_database db = elaspic_database.MyDatabase() args.data_folder = args.data_folder.rstrip('/') table_names = args.data_files.split(',') if args.data_files else None dirpath, dirnames, filenames = next(os.walk(args.data_folder)) for table in elaspic_database.Base.metadata.sorted_tables: if table_names is not None and table.name not in table_names: print("Skipping table '{}' because it was not included in the 'table_names' list..." .format(table.name)) continue if '{}.tsv'.format(table.name) in filenames: db.copy_table_to_db(table.name, args.data_folder) print("Successfully loaded data from file '{}' to table '{}'" .format('{}.tsv'.format(table.name), table.name)) elif '{}.tsv.gz'.format(table.name) in filenames: with decompress(os.path.join(args.data_folder, '{}.tsv.gz'.format(table.name))): db.copy_table_to_db(table.name, args.data_folder.rstrip('/')) print("Successfully loaded data from file '{}' to table '{}'" .format('{}.tsv.gz'.format(table.name), table.name))
def elaspic_database(args): if args.config_file: conf.read_configuration_file(args.config_file) elif args.connection_string: conf.read_configuration_file(DATABASE={'connection_string': args.connection_string}) else: raise Exception("Either 'config_file' or 'connection_string' must be specified!") print("Running function '{}'...".format(args.func.__name__))
def delete_database(args): if args.config_file: conf.read_configuration_file(args.config_file) elif args.connection_string: conf.read_configuration_file(DATABASE={'connection_string': args.connection_string}) else: raise Exception("Either 'config_file' or 'connection_string' must be specified!") from elaspic import elaspic_database db = elaspic_database.MyDatabase() db.delete_database_tables(args.drop_schema, args.drop_uniprot_sequence) logger.info('Done!')
def test_sequence_mutation_pipeline(pdb_id_sequence): unique_temp_dir = op.join(op.splitext(__file__)[0], '.'.join(pdb_id_sequence), '.elaspic') os.makedirs(unique_temp_dir, exist_ok=True) conf.read_configuration_file( DEFAULT={ 'unique_temp_dir': unique_temp_dir }, EXTERNAL_DIRS={ 'pdb_dir': os.environ['PDB_DIR'], 'blast_db_dir': os.environ['BLAST_DB_DIR'], 'archive_dir': os.environ['ARCHIVE_DIR'] } ) os.chdir(unique_temp_dir) return helper_fns.run_sequence_mutation_pipeline(pdb_id_sequence, sequence_mutations,)
def test_pdb_mutation_pipeline(pdb_id): """Canonical folder. """ unique_temp_dir = _get_unique_temp_dir(pdb_id) os.makedirs(unique_temp_dir, exist_ok=True) conf.read_configuration_file( DEFAULT={ 'unique_temp_dir': unique_temp_dir }, EXTERNAL_DIRS={ 'pdb_dir': os.environ['PDB_DIR'], 'blast_db_dir': os.environ['BLAST_DB_DIR'], 'archive_dir': os.environ['ARCHIVE_DIR'] } ) os.chdir(unique_temp_dir) helper_fns.run_pdb_mutation_pipeline(pdb_id, pdb_mutatations)
def elaspic_database_cli(args): if args.config_file: conf.read_configuration_file(args.config_file) elif args.connection_string: conf.read_configuration_file( DATABASE={"connection_string": args.connection_string}, LOGGER={"level": LOGGING_LEVELS[args.verbose]}, ) else: raise Exception("Either 'config_file' or 'connection_string' must be specified!") tables_basic = [ "domain", "domain_contact", "uniprot_sequence", "provean", "uniprot_domain", "uniprot_domain_template", "uniprot_domain_pair", "uniprot_domain_pair_template", ] tables_complete = [ "domain", "domain_contact", "uniprot_sequence", "provean", "uniprot_domain", "uniprot_domain_template", "uniprot_domain_pair", "uniprot_domain_pair_template", "uniprot_domain_model", "uniprot_domain_pair_model", ] if args.action == "create": create_database(args) elif args.action == "load_basic": load_data_to_database(args, tables_basic) elif args.action == "load_complete": load_data_to_database(args, tables_complete) elif args.action == "delete": delete_database(args) else: raise Exception("Unsupported action: {}".format(args.action))
def elaspic(args): validate_args(args) # Read configurations if args.config_file is not None: conf.read_configuration_file(args.config_file) elif args.uniprot_id: conf.read_configuration_file( DATABASE={ 'connection_string': args.connection_string }, EXTERNAL_DIRS={ 'pdb_dir': args.pdb_dir, 'blast_db_dir': args.blast_db_dir, 'archive_dir': args.archive_dir, }) elif args.structure_file: unique_temp_dir = op.abspath(op.join(os.getcwd(), '.elaspic')) os.makedirs(unique_temp_dir, exist_ok=True) conf.read_configuration_file( DEFAULT={ 'unique_temp_dir': unique_temp_dir }, EXTERNAL_DIRS={ 'pdb_dir': args.pdb_dir, 'blast_db_dir': args.blast_db_dir, 'archive_dir': args.archive_dir }) if args.uniprot_id: # Run database pipeline if args.uniprot_domain_pair_ids: logger.debug('uniprot_domain_pair_ids: %s', args.uniprot_domain_pair_ids) uniprot_domain_pair_ids_asint = ( [int(x) for x in args.uniprot_domain_pair_ids.split(',') if x] ) else: uniprot_domain_pair_ids_asint = [] # Run database pipeline from elaspic import database_pipeline pipeline = database_pipeline.DatabasePipeline( args.uniprot_id, args.mutations, run_type=args.run_type, uniprot_domain_pair_ids=uniprot_domain_pair_ids_asint ) pipeline.run() elif args.structure_file: # Run local pipeline from elaspic import standalone_pipeline pipeline = standalone_pipeline.StandalonePipeline( args.structure_file, args.sequence_file, args.mutations, mutation_format=args.mutation_format, run_type=args.run_type, ) pipeline.run()
import random import pytest import pandas as pd from elaspic import conf logger = logging.getLogger(__name__) # Constants QUICK = False CONFIG_FILE = op.join(op.dirname(__file__), 'config_file_database.ini') if hasattr(pytest, "config"): QUICK = pytest.config.getoption('--quick') CONFIG_FILE = pytest.config.getoption('--config-file') or CONFIG_FILE conf.read_configuration_file(CONFIG_FILE, unique_temp_dir=None) assert conf.CONFIGS['db_type'] logger.debug('Running quick: {}'.format(QUICK)) logger.debug('Config file: {}'.format(CONFIG_FILE)) # Imports that require a parsed config file import helper_fns # noqa from elaspic import elaspic_database # noqa db = elaspic_database.MyDatabase() conf.CONFIGS['engine'] = db.get_engine() conf.CONFIGS['engine'].execute("SET sql_mode = ''")
def elaspic_cli(args): validate_args(args) # Read configurations if args.config_file is not None: conf.read_configuration_file(args.config_file) elif args.uniprot_id: conf.read_configuration_file( DATABASE={ "connection_string": args.connection_string, }, EXTERNAL_DIRS={ "pdb_dir": args.pdb_dir, "blast_db_dir": args.blast_db_dir, "archive_dir": args.archive_dir, }, LOGGER={ "level": LOGGING_LEVELS[args.verbose], }, ) elif args.structure_file: unique_temp_dir = op.abspath(op.join(os.getcwd(), ".elaspic")) os.makedirs(unique_temp_dir, exist_ok=True) conf.read_configuration_file( DEFAULT={"unique_temp_dir": unique_temp_dir}, EXTERNAL_DIRS={ "pdb_dir": args.pdb_dir, "blast_db_dir": args.blast_db_dir, "archive_dir": args.archive_dir, }, LOGGER={ "level": LOGGING_LEVELS[args.verbose], }, ) if args.uniprot_id: # Run database pipeline if args.uniprot_domain_pair_ids: logger.debug("uniprot_domain_pair_ids: {}".format(args.uniprot_domain_pair_ids)) uniprot_domain_pair_ids_asint = [ int(x) for x in args.uniprot_domain_pair_ids.split(",") if x ] else: uniprot_domain_pair_ids_asint = [] # Run database pipeline from elaspic import database_pipeline pipeline = database_pipeline.DatabasePipeline( args.uniprot_id, args.mutations, run_type=args.run_type, uniprot_domain_pair_ids=uniprot_domain_pair_ids_asint, ) pipeline.run() elif args.structure_file: # Run local pipeline from elaspic import standalone_pipeline pipeline = standalone_pipeline.StandalonePipeline( args.structure_file, args.sequence_file, args.mutations, mutation_format=args.mutation_format, run_type=args.run_type, ) pipeline.run()
import pandas as pd import pytest from elaspic import conf logger = logging.getLogger(__name__) # Constants QUICK = False CONFIG_FILE = op.join(op.dirname(__file__), "test_database_pipeline.ini") if hasattr(pytest, "config"): QUICK = pytest.config.getoption("--quick") CONFIG_FILE = pytest.config.getoption("--config-file") or CONFIG_FILE conf.read_configuration_file(CONFIG_FILE) assert conf.CONFIGS["db_type"] logger.debug("Running quick: {}".format(QUICK)) logger.debug("Config file: {}".format(CONFIG_FILE)) # Imports that require a parsed config file import helper_fns # noqa from elaspic import elaspic_database # noqa db = elaspic_database.MyDatabase() conf.CONFIGS["engine"] = db.get_engine() conf.CONFIGS["engine"].execute("SET sql_mode = ''") test_cases = []