Ejemplo n.º 1
0
def test_pdb_mutation_pipeline_have_sequences(pdb_id):
    """Make sure that we can copy folders with precalculated Provean score(s).
    """
    unique_temp_dir_old = _get_unique_temp_dir(pdb_id)
    unique_temp_dir = op.join(op.splitext(__file__)[0], pdb_id + '_have_sequences', '.elaspic')
    os.makedirs(unique_temp_dir, exist_ok=True)
    try:
        shutil.copy2(
            op.join(unique_temp_dir_old, 'sequence.json'),
            op.join(unique_temp_dir, 'sequence.json'))
        shutil.copytree(
            op.join(unique_temp_dir_old, 'sequence'),
            op.join(unique_temp_dir, 'sequence'))
        conf.read_configuration_file(
            DEFAULT={
                'unique_temp_dir': unique_temp_dir
            },
            EXTERNAL_DIRS={
                'pdb_dir': os.environ['PDB_DIR'],
                'blast_db_dir': os.environ['BLAST_DB_DIR'],
                'archive_dir': os.environ['ARCHIVE_DIR']
            }
        )
        os.chdir(unique_temp_dir)
        helper_fns.run_pdb_mutation_pipeline(
            pdb_id, pdb_mutatations, have_sequences=True)
    except:
        raise
    finally:
        shutil.rmtree(unique_temp_dir)
Ejemplo n.º 2
0
def load_data_to_database(args):
    if args.config_file:
        conf.read_configuration_file(args.config_file)
    elif args.connection_string:
        conf.read_configuration_file(DATABASE={'connection_string': args.connection_string})
    else:
        raise Exception("Either 'config_file' or 'connection_string' must be specified!")
    from elaspic import elaspic_database
    db = elaspic_database.MyDatabase()
    args.data_folder = args.data_folder.rstrip('/')
    table_names = args.data_files.split(',') if args.data_files else None
    dirpath, dirnames, filenames = next(os.walk(args.data_folder))
    for table in elaspic_database.Base.metadata.sorted_tables:
        if table_names is not None and table.name not in table_names:
            print("Skipping table '{}' because it was not included in the 'table_names' list..."
                  .format(table.name))
            continue
        if '{}.tsv'.format(table.name) in filenames:
            db.copy_table_to_db(table.name, args.data_folder)
            print("Successfully loaded data from file '{}' to table '{}'"
                  .format('{}.tsv'.format(table.name), table.name))
        elif '{}.tsv.gz'.format(table.name) in filenames:
            with decompress(os.path.join(args.data_folder, '{}.tsv.gz'.format(table.name))):
                db.copy_table_to_db(table.name, args.data_folder.rstrip('/'))
            print("Successfully loaded data from file '{}' to table '{}'"
                  .format('{}.tsv.gz'.format(table.name), table.name))
Ejemplo n.º 3
0
def elaspic_database(args):
    if args.config_file:
        conf.read_configuration_file(args.config_file)
    elif args.connection_string:
        conf.read_configuration_file(DATABASE={'connection_string': args.connection_string})
    else:
        raise Exception("Either 'config_file' or 'connection_string' must be specified!")
    print("Running function '{}'...".format(args.func.__name__))
Ejemplo n.º 4
0
def delete_database(args):
    if args.config_file:
        conf.read_configuration_file(args.config_file)
    elif args.connection_string:
        conf.read_configuration_file(DATABASE={'connection_string': args.connection_string})
    else:
        raise Exception("Either 'config_file' or 'connection_string' must be specified!")
    from elaspic import elaspic_database
    db = elaspic_database.MyDatabase()
    db.delete_database_tables(args.drop_schema, args.drop_uniprot_sequence)
    logger.info('Done!')
Ejemplo n.º 5
0
def test_sequence_mutation_pipeline(pdb_id_sequence):
    unique_temp_dir = op.join(op.splitext(__file__)[0], '.'.join(pdb_id_sequence), '.elaspic')
    os.makedirs(unique_temp_dir, exist_ok=True)
    conf.read_configuration_file(
        DEFAULT={
            'unique_temp_dir': unique_temp_dir
        },
        EXTERNAL_DIRS={
            'pdb_dir': os.environ['PDB_DIR'],
            'blast_db_dir': os.environ['BLAST_DB_DIR'],
            'archive_dir': os.environ['ARCHIVE_DIR']
        }
    )
    os.chdir(unique_temp_dir)
    return helper_fns.run_sequence_mutation_pipeline(pdb_id_sequence, sequence_mutations,)
Ejemplo n.º 6
0
def test_pdb_mutation_pipeline(pdb_id):
    """Canonical folder.
    """
    unique_temp_dir = _get_unique_temp_dir(pdb_id)
    os.makedirs(unique_temp_dir, exist_ok=True)
    conf.read_configuration_file(
        DEFAULT={
            'unique_temp_dir': unique_temp_dir
        },
        EXTERNAL_DIRS={
            'pdb_dir': os.environ['PDB_DIR'],
            'blast_db_dir': os.environ['BLAST_DB_DIR'],
            'archive_dir': os.environ['ARCHIVE_DIR']
        }
    )
    os.chdir(unique_temp_dir)
    helper_fns.run_pdb_mutation_pipeline(pdb_id, pdb_mutatations)
Ejemplo n.º 7
0
def elaspic_database_cli(args):
    if args.config_file:
        conf.read_configuration_file(args.config_file)
    elif args.connection_string:
        conf.read_configuration_file(
            DATABASE={"connection_string": args.connection_string},
            LOGGER={"level": LOGGING_LEVELS[args.verbose]},
        )
    else:
        raise Exception("Either 'config_file' or 'connection_string' must be specified!")

    tables_basic = [
        "domain",
        "domain_contact",
        "uniprot_sequence",
        "provean",
        "uniprot_domain",
        "uniprot_domain_template",
        "uniprot_domain_pair",
        "uniprot_domain_pair_template",
    ]
    tables_complete = [
        "domain",
        "domain_contact",
        "uniprot_sequence",
        "provean",
        "uniprot_domain",
        "uniprot_domain_template",
        "uniprot_domain_pair",
        "uniprot_domain_pair_template",
        "uniprot_domain_model",
        "uniprot_domain_pair_model",
    ]

    if args.action == "create":
        create_database(args)
    elif args.action == "load_basic":
        load_data_to_database(args, tables_basic)
    elif args.action == "load_complete":
        load_data_to_database(args, tables_complete)
    elif args.action == "delete":
        delete_database(args)
    else:
        raise Exception("Unsupported action: {}".format(args.action))
Ejemplo n.º 8
0
def elaspic(args):
    validate_args(args)

    # Read configurations
    if args.config_file is not None:
        conf.read_configuration_file(args.config_file)
    elif args.uniprot_id:
        conf.read_configuration_file(
            DATABASE={
                'connection_string': args.connection_string
            },
            EXTERNAL_DIRS={
                'pdb_dir': args.pdb_dir,
                'blast_db_dir': args.blast_db_dir,
                'archive_dir': args.archive_dir,
            })
    elif args.structure_file:
        unique_temp_dir = op.abspath(op.join(os.getcwd(), '.elaspic'))
        os.makedirs(unique_temp_dir, exist_ok=True)
        conf.read_configuration_file(
            DEFAULT={
                'unique_temp_dir': unique_temp_dir
            },
            EXTERNAL_DIRS={
                'pdb_dir': args.pdb_dir,
                'blast_db_dir': args.blast_db_dir,
                'archive_dir': args.archive_dir
            })

    if args.uniprot_id:
        # Run database pipeline
        if args.uniprot_domain_pair_ids:
            logger.debug('uniprot_domain_pair_ids: %s', args.uniprot_domain_pair_ids)
            uniprot_domain_pair_ids_asint = (
                [int(x) for x in args.uniprot_domain_pair_ids.split(',') if x]
            )
        else:
            uniprot_domain_pair_ids_asint = []
        # Run database pipeline
        from elaspic import database_pipeline
        pipeline = database_pipeline.DatabasePipeline(
            args.uniprot_id, args.mutations,
            run_type=args.run_type,
            uniprot_domain_pair_ids=uniprot_domain_pair_ids_asint
        )
        pipeline.run()
    elif args.structure_file:
        # Run local pipeline
        from elaspic import standalone_pipeline
        pipeline = standalone_pipeline.StandalonePipeline(
            args.structure_file, args.sequence_file, args.mutations,
            mutation_format=args.mutation_format,
            run_type=args.run_type,
        )
        pipeline.run()
Ejemplo n.º 9
0
import random
import pytest
import pandas as pd
from elaspic import conf

logger = logging.getLogger(__name__)

# Constants
QUICK = False
CONFIG_FILE = op.join(op.dirname(__file__), 'config_file_database.ini')

if hasattr(pytest, "config"):
    QUICK = pytest.config.getoption('--quick')
    CONFIG_FILE = pytest.config.getoption('--config-file') or CONFIG_FILE

conf.read_configuration_file(CONFIG_FILE, unique_temp_dir=None)
assert conf.CONFIGS['db_type']

logger.debug('Running quick: {}'.format(QUICK))
logger.debug('Config file: {}'.format(CONFIG_FILE))


# Imports that require a parsed config file
import helper_fns  # noqa
from elaspic import elaspic_database  # noqa

db = elaspic_database.MyDatabase()
conf.CONFIGS['engine'] = db.get_engine()
conf.CONFIGS['engine'].execute("SET sql_mode = ''")

Ejemplo n.º 10
0
def elaspic_cli(args):
    validate_args(args)

    # Read configurations
    if args.config_file is not None:
        conf.read_configuration_file(args.config_file)
    elif args.uniprot_id:
        conf.read_configuration_file(
            DATABASE={
                "connection_string": args.connection_string,
            },
            EXTERNAL_DIRS={
                "pdb_dir": args.pdb_dir,
                "blast_db_dir": args.blast_db_dir,
                "archive_dir": args.archive_dir,
            },
            LOGGER={
                "level": LOGGING_LEVELS[args.verbose],
            },
        )
    elif args.structure_file:
        unique_temp_dir = op.abspath(op.join(os.getcwd(), ".elaspic"))
        os.makedirs(unique_temp_dir, exist_ok=True)
        conf.read_configuration_file(
            DEFAULT={"unique_temp_dir": unique_temp_dir},
            EXTERNAL_DIRS={
                "pdb_dir": args.pdb_dir,
                "blast_db_dir": args.blast_db_dir,
                "archive_dir": args.archive_dir,
            },
            LOGGER={
                "level": LOGGING_LEVELS[args.verbose],
            },
        )

    if args.uniprot_id:
        # Run database pipeline
        if args.uniprot_domain_pair_ids:
            logger.debug("uniprot_domain_pair_ids: {}".format(args.uniprot_domain_pair_ids))
            uniprot_domain_pair_ids_asint = [
                int(x) for x in args.uniprot_domain_pair_ids.split(",") if x
            ]
        else:
            uniprot_domain_pair_ids_asint = []
        # Run database pipeline
        from elaspic import database_pipeline

        pipeline = database_pipeline.DatabasePipeline(
            args.uniprot_id,
            args.mutations,
            run_type=args.run_type,
            uniprot_domain_pair_ids=uniprot_domain_pair_ids_asint,
        )
        pipeline.run()
    elif args.structure_file:
        # Run local pipeline
        from elaspic import standalone_pipeline

        pipeline = standalone_pipeline.StandalonePipeline(
            args.structure_file,
            args.sequence_file,
            args.mutations,
            mutation_format=args.mutation_format,
            run_type=args.run_type,
        )
        pipeline.run()
Ejemplo n.º 11
0
import pandas as pd
import pytest

from elaspic import conf

logger = logging.getLogger(__name__)

# Constants
QUICK = False
CONFIG_FILE = op.join(op.dirname(__file__), "test_database_pipeline.ini")

if hasattr(pytest, "config"):
    QUICK = pytest.config.getoption("--quick")
    CONFIG_FILE = pytest.config.getoption("--config-file") or CONFIG_FILE

conf.read_configuration_file(CONFIG_FILE)
assert conf.CONFIGS["db_type"]

logger.debug("Running quick: {}".format(QUICK))
logger.debug("Config file: {}".format(CONFIG_FILE))

# Imports that require a parsed config file
import helper_fns  # noqa

from elaspic import elaspic_database  # noqa

db = elaspic_database.MyDatabase()
conf.CONFIGS["engine"] = db.get_engine()
conf.CONFIGS["engine"].execute("SET sql_mode = ''")

test_cases = []