Ejemplo n.º 1
0
def get_benchmark_list_by_name(database_name):
    with database.connect(db_name=database_name) as session:
        return [
            r.name for r in session.execute(
                'SELECT DISTINCT name from benchmarks ORDER BY benchmark_id DESC'
            )
        ]
Ejemplo n.º 2
0
def resume_benchmark(benchmark_id, nstruct=None):
    qsub_command = 'qsub',
    benchmark_command = 'loop_benchmark.py', benchmark_id

    # You get weird errors if you forget to cast nstruct from string to int.

    if nstruct is not None: nstruct = int(nstruct)

    # Read the job parameters from the database.

    with database.connect() as session:
        benchmark = session.query(database.Benchmarks).get(benchmark_id)
        num_pdbs = len(benchmark.input_pdbs)

        # Make sure the right version of rosetta is being used.

        git_commit = subprocess.check_output(
                shlex.split('git rev-parse HEAD'),
                cwd=settings.rosetta).strip()

        git_diff = subprocess.check_output(
                shlex.split('git diff'),
                cwd=settings.rosetta).strip()

        if benchmark.git_commit != git_commit:
            message = "Benchmark \"{0}\" was run with rosetta commit #{1}, but commit #{2} is currently checked out.  Press [Ctrl-C] to abort or [Enter] to continue."
            message = textwrap.fill(message.format(benchmark.id, benchmark.git_commit[:8], git_commit[:8]))
            raw_input(message)

        elif benchmark.git_diff != git_diff:
            message = "Uncommitted changes have been made to rosetta since benchmark \"{0}\" was run.  Press [Ctrl-C] to abort or [Enter] to continue."
            message = textwrap.fill(message.format(benchmark.id))
            raw_input(message)

        # Build the qsub command.

        if benchmark.fast:
            qsub_command += '-t', '1-{0}'.format((nstruct or 10) * num_pdbs)
            qsub_command += '-l', 'h_rt=0:30:00'
        else:
            qsub_command += '-t', '1-{0}'.format((nstruct or 500) * num_pdbs)
            qsub_command += '-l', 'h_rt=4:00:00'

        print "Your benchmark \"{0}\" (id={1}) is being resumed".format(
                benchmark.name, benchmark_id)

    # Submit the job.

    utilities.clear_directory('job_output')
    qsub_command += '-o', 'job_output', '-e', 'job_output'

    subprocess.call(qsub_command + benchmark_command)
Ejemplo n.º 3
0
from libraries import utilities
from libraries import settings; settings.load(interactive=False)
from libraries import database

# Parse arguments.

if len(sys.argv) != 2 or 'SGE_TASK_ID' not in os.environ:
    print 'Usage: SGE_TASK_ID=<id> loop_benchmark.py <benchmark_id>'
    sys.exit(1)

task_id = int(os.environ['SGE_TASK_ID']) - 1
benchmark_id = int(sys.argv[1])

# Figure out which loop to benchmark.

with database.connect() as session:
    benchmark = session.query(database.Benchmarks).get(benchmark_id)
    script_path = benchmark.rosetta_script
    script_vars = json.loads(benchmark.rosetta_script_vars or '[]')
    flags_path = benchmark.rosetta_flags
    fragments_path = benchmark.rosetta_fragments
    fast = benchmark.fast
    input_pdbs = benchmark.input_pdbs
    pdb_path = input_pdbs[task_id % len(input_pdbs)].pdb_path
    pdb_tag = os.path.splitext(os.path.basename(pdb_path))[0]
    loop_path = re.sub('\.pdb(\.gz)?$', '.loop', pdb_path)
    non_random = benchmark.non_random

# Set LD_LIBRARY_PATH so that the MySQL libraries can be found.

rosetta_env = os.environ.copy()
Ejemplo n.º 4
0
            r.name for r in session.execute(
                'SELECT DISTINCT name from benchmarks ORDER BY benchmark_id DESC'
            )
        ]


def get_progress(database_name, benchmark_name):

    try:
        database.test_connect(db_name=database_name)
    except RuntimeError, error:
        print error
        sys.exit(1)

    # Create an entry in the benchmarks table.
    with database.connect(db_name=database_name) as session:

        messages = ['']

        # Use the latest benchmark's name if none was supplied
        if not benchmark_name:
            q = session.query(database.Benchmarks).order_by(
                database.Benchmarks.benchmark_id.desc())
            if q.count() == 0:
                exit(
                    'There is no benchmark data in the database "{0}".'.format(
                        database_name))
            benchmark_name = q.first().name
            messages.append(
                'No benchmark was selected. Choosing the most recent benchmark: "{0}".\n'
                .format(benchmark_name))
Ejemplo n.º 5
0
settings.load(interactive=False)
from libraries import database

# Parse arguments.

if len(sys.argv) != 2 or 'SGE_TASK_ID' not in os.environ:
    print 'Usage: SGE_TASK_ID=<id> loop_benchmark.py <benchmark_id>'
    sys.exit(1)

task_id = int(os.environ['SGE_TASK_ID']) - 1
benchmark_id = int(sys.argv[1])

# Figure out which loop to benchmark.

with database.connect() as session:
    benchmark = session.query(database.Benchmarks).get(benchmark_id)
    script_path = benchmark.rosetta_script
    script_vars = json.loads(benchmark.rosetta_script_vars or '[]')
    flags_path = benchmark.rosetta_flags
    fragments_path = benchmark.rosetta_fragments
    fast = benchmark.fast
    input_pdbs = benchmark.input_pdbs
    pdb_path = input_pdbs[task_id % len(input_pdbs)].pdb_path
    pdb_tag = os.path.splitext(os.path.basename(pdb_path))[0]
    loop_path = re.sub('\.pdb(\.gz)?$', '.loop', pdb_path)
    non_random = benchmark.non_random

# Set LD_LIBRARY_PATH so that the MySQL libraries can be found.

rosetta_env = os.environ.copy()
    def from_database(name_or_id, group_by_name=False):
        from libraries import database
        from sqlalchemy import desc

        with database.connect() as session:

            # Decide whether a name or id was used to specify a benchmark run,
            # and load the corresponding data out of the database.  The meaning
            # of name_or_id is inferred from its type: names are expected to be
            # strings and ids are expected to be integers.  If more than one
            # benchmark has the same name, the most recent one will be used.

            db_benchmarks = []
            try:
                id = int(name_or_id)
                _db_benchmark = session.query(database.Benchmarks).get(id)

                if _db_benchmark is None:
                    message = "No benchmark '{}' in the database."
                    utilities.print_error_and_die(message, id)

                db_benchmarks = [_db_benchmark]

            except ValueError:
                name = name_or_id
                query = session.query(database.Benchmarks).filter_by(
                    name=name).order_by(desc(database.Benchmarks.start_time))
                db_benchmarks = [q for q in query]
                if not group_by_name:
                    if len(db_benchmarks) > 1:
                        message = "Multiple benchmarks runs were found with the same name '{0}' (ids are: {1}). If this is expected then set the --group_by_name option.".format(
                            name,
                            ', '.join(map(str, [b.id for b in db_benchmarks])))
                        utilities.print_error_and_die(message, name)

            b_name = set([db_benchmark.name for db_benchmark in db_benchmarks])
            assert (len(b_name) == 1)
            b_name = b_name.pop()

            b_title = set(
                [db_benchmark.title or '' for db_benchmark in db_benchmarks])
            if len(b_title) > 1:
                colortext.warning(
                    "There are multiple titles associated with benchmark {0}: '{1}'. Choosing the most recent ('{2}')."
                    .format(b_name, "', '".join(b_title),
                            db_benchmarks[0].title or ''))
                b_title = db_benchmarks[0].title
            else:
                b_title = b_title.pop() or None

            benchmark = Benchmark(b_name, b_title)

            for db_benchmark in db_benchmarks:

                # Fill in the benchmark data structure from the database.

                print(
                    "Loading the {0} benchmark (id {1}) from the database...".
                    format(benchmark.name, db_benchmark.id))

                for db_input in db_benchmark.input_pdbs:
                    path = db_input.pdb_path
                    if not benchmark.loops.get(path):
                        benchmark.loops[path] = Loop(benchmark, path)

                for structure in db_benchmark.structures:
                    loop = benchmark.loops[structure.input_tag]
                    id = len(loop.models) + 1
                    score = structure.score_features.score
                    rmsd = structure.rmsd_features.protein_backbone
                    runtime = structure.runtime_features.elapsed_time

                    model = Model(loop, id, score, rmsd, runtime)
                    loop.models.append(model)

        return benchmark
Ejemplo n.º 7
0
def get_benchmark_list_by_name(database_name):
    with database.connect(db_name = database_name) as session:
        return [r.name for r in session.execute('SELECT DISTINCT name from benchmarks ORDER BY benchmark_id DESC')]
Ejemplo n.º 8
0

def get_benchmark_list_by_name(database_name):
    with database.connect(db_name = database_name) as session:
        return [r.name for r in session.execute('SELECT DISTINCT name from benchmarks ORDER BY benchmark_id DESC')]


def get_progress(database_name, benchmark_name):

    try: database.test_connect(db_name = database_name)
    except RuntimeError, error:
        print error
        sys.exit(1)

    # Create an entry in the benchmarks table.
    with database.connect(db_name = database_name) as session:

        messages = ['']

        # Use the latest benchmark's name if none was supplied
        if not benchmark_name:
            q = session.query(database.Benchmarks).order_by(database.Benchmarks.benchmark_id.desc())
            if q.count() == 0:
                exit('There is no benchmark data in the database "{0}".'.format(database_name))
            benchmark_name = q.first().name
            messages.append('No benchmark was selected. Choosing the most recent benchmark: "{0}".\n'.format(benchmark_name))

        # Retrieve the set of benchmark runs associated the benchmark name
        q = session.query(database.Benchmarks).filter(database.Benchmarks.name == benchmark_name)
        if q.count() == 0:
            exit('There is no benchmark data in the database "{0}" for benchmark "{1}".'.format(database_name, benchmark_name))
Ejemplo n.º 9
0
    def from_database(name_or_id, group_by_name = False):
        from libraries import database
        from sqlalchemy import desc

        with database.connect() as session:

            # Decide whether a name or id was used to specify a benchmark run, 
            # and load the corresponding data out of the database.  The meaning 
            # of name_or_id is inferred from its type: names are expected to be 
            # strings and ids are expected to be integers.  If more than one
            # benchmark has the same name, the most recent one will be used.

            db_benchmarks = []
            try:
                id = int(name_or_id)
                _db_benchmark = session.query(database.Benchmarks).get(id)

                if _db_benchmark is None:
                    message = "No benchmark '{}' in the database."
                    utilities.print_error_and_die(message, id)

                db_benchmarks = [_db_benchmark]

            except ValueError:
                name = name_or_id
                query = session.query(database.Benchmarks).filter_by(name=name).order_by(desc(database.Benchmarks.start_time))
                db_benchmarks = [q for q in query]
                if not group_by_name:
                    if len(db_benchmarks) > 1:
                        message = "Multiple benchmarks runs were found with the same name '{0}' (ids are: {1}). If this is expected then set the --group_by_name option.".format(name, ', '.join(map(str, [b.id for b in db_benchmarks])))
                        utilities.print_error_and_die(message, name)

            b_name = set([db_benchmark.name for db_benchmark in db_benchmarks])
            assert(len(b_name) == 1)
            b_name = b_name.pop()

            b_title = set([db_benchmark.title or '' for db_benchmark in db_benchmarks])
            if len(b_title) > 1:
                colortext.warning("There are multiple titles associated with benchmark {0}: '{1}'. Choosing the most recent ('{2}').".format(b_name, "', '".join(b_title), db_benchmarks[0].title or ''))
                b_title = db_benchmarks[0].title
            else:
                b_title = b_title.pop() or None

            benchmark = Benchmark(b_name, b_title)

            for db_benchmark in db_benchmarks:

                # Fill in the benchmark data structure from the database.

                print "Loading the {0} benchmark (id {1}) from the database...".format(benchmark.name, db_benchmark.id)

                for db_input in db_benchmark.input_pdbs:
                    path = db_input.pdb_path
                    if not benchmark.loops.get(path):
                        benchmark.loops[path] = Loop(benchmark, path)

                for structure in db_benchmark.structures:
                    loop = benchmark.loops[structure.input_tag]
                    id = len(loop.models) + 1
                    score = structure.score_features.score
                    rmsd = structure.rmsd_features.protein_backbone
                    runtime = structure.runtime_features.elapsed_time

                    model = Model(loop, id, score, rmsd, runtime)
                    loop.models.append(model)

        return benchmark
Ejemplo n.º 10
0
def complete_benchmark(benchmark_id, nstruct=None):
    qsub_command = 'qsub',
    benchmark_command = 'loop_benchmark.py', benchmark_id

    # You get weird errors if you forget to cast nstruct from string to int.

    # Get the progress data for the job
    progress_data = get_progress(settings.db_name, benchmark_id)

    # Set up nstruct
    nstruct = progress_data['nstruct']
    if not nstruct:
        sys.exit('The nstruct variable is not set for this benchmark. Exiting.')

    # Set up the bins for structures that need extra jobs to be run. We run extra jobs in case these fail as well.
    bins = {5 : [], 10 : [], 20 : [], 30 : []}
    d_bins = bins.keys()
    for input_tag, finished_count in progress_data['CountPerStructure'].iteritems():
        if finished_count < nstruct:
            missing_count = nstruct - finished_count
            if missing_count <= 2:
                bins[5].append(input_tag)
            elif missing_count <= 5:
                bins[10].append(input_tag)
            elif missing_count <= 10:
                bins[20].append(input_tag)
            elif missing_count <= 15:
                bins[30].append(input_tag)
            else:
                bin_size = ((int((missing_count - 11)/20.0) + 2) * 20) + 10
                bins[bin_size] = bins.get(bin_size, [])
                bins[bin_size].append(input_tag)
    for d_bin in d_bins:
        if not bins[d_bin]:
            del bins[d_bin]

    with database.connect() as session:
        name = benchmark_id
        benchmark_records = [r for r in session.query(database.Benchmarks).filter(database.Benchmarks.name == benchmark_id)]
        print('')
        benchmark_variables = dict(
            rosetta_script = set([r.rosetta_script for r in benchmark_records]),
            rosetta_script_vars = [json.loads(r.rosetta_script_vars) for r in benchmark_records],
            rosetta_flags = set([r.rosetta_flags for r in benchmark_records]),
            rosetta_fragments = set([r.rosetta_fragments for r in benchmark_records]),
            fast = set([r.fast for r in benchmark_records]),
            non_random = set([r.non_random for r in benchmark_records]),
        )
        for x in range(0, len(benchmark_variables['rosetta_script_vars']) - 1):
            if benchmark_variables['rosetta_script_vars'][x] != benchmark_variables['rosetta_script_vars'][x + 1]:
                sys.exit('Exception (ambiguity): The benchmark {0} has multiple RosettaScript variable values associated with previous runs: "{1}".'.format(benchmark_id, '", "'.join(map(str, sorted(benchmark_variables['rosetta_script_vars'])))))

    for k, v in sorted(benchmark_variables.iteritems()):
        if len(v) == 0:
            sys.exit('Exception (missing data): The benchmark {0} has no {1} values associated with previous runs.'.format(benchmark_id, k.replace('_', ' ')))
        elif k == 'rosetta_script_vars':
            benchmark_variables[k] = benchmark_variables[k][0]
        elif len(v) > 1:
            sys.exit('Exception (ambiguity): The benchmark {0} has multiple {1} values associated with previous runs: "{2}".'.format(benchmark_id, k.replace('_', ' '), '", "'.join(sorted(v))))
        else:
            benchmark_variables[k] = v.pop()

    for nstruct, pdbs in reversed(sorted(bins.iteritems())): # start the longer jobs first
        run_benchmark(name, benchmark_variables['rosetta_script'], pdbs, vars=benchmark_variables['rosetta_script_vars'],
                      flags=benchmark_variables['rosetta_flags'], fragments=benchmark_variables['rosetta_fragments'], nstruct=nstruct,
                      desc=None, fast=benchmark_variables['fast'], non_random=benchmark_variables['non_random'])