def main():

    args = docopt.docopt(__doc__)
    prefix = args['--prefix'] or ''
    workspaces = find_validation_workspaces(args['<workspace>'],
                                            args['<round>'])
    designs = find_reasonable_designs(workspaces, args['--threshold'],
                                      args['--verbose'])
    metrics = [
        DesignNameMetric(),
        ResfileSequenceMetric(),
        SequenceClusterMetric(args['--subs-matrix']),
        StructureClusterMetric(args['--structure-threshold']),
        RestraintDistMetric(),
        ScoreGapMetric(),
        PercentSubangstromMetric(),
        BuriedUnsatHbondMetric(),
        DunbrackScoreMetric(),
    ]

    discover_filter_metrics(metrics, workspaces)
    #discover_custom_metrics(metrics, workspaces)
    calculate_quality_metrics(metrics, designs, args['--verbose'])
    designs = find_pareto_optimal_designs(designs, metrics, args['--verbose'])
    report_quality_metrics(designs, metrics, prefix + 'quality_metrics.xlsx')
    #report_score_vs_rmsd_funnels(designs, prefix + 'score_vs_rmsd.pdf')
    #report_pymol_sessions(designs, prefix + 'pymol_sessions')
    annotate_designs(designs)
Exemple #2
0
def main():
    args = docopt.docopt(__doc__)
    cluster.require_qsub()

    # Setup the workspace.

    workspace = pipeline.FixbbDesigns(args['<workspace>'], args['<round>'])
    workspace.check_paths()
    workspace.check_rosetta()
    workspace.make_dirs()

    if args['--clear'] or args['--test-run']:
        workspace.clear_outputs()

    # Decide which inputs to use.

    inputs = workspace.unclaimed_inputs
    nstruct = len(inputs) * int(args['--nstruct'])

    if not inputs:
        print """\
All the input structures have already been (or are already being) designed.  If
you want to rerun all the inputs from scratch, use the --clear flag."""
        raise SystemExit

    # Submit the design job.

    big_jobs.submit('pip_design.py',
                    workspace,
                    inputs=inputs,
                    nstruct=nstruct,
                    max_runtime=args['--max-runtime'],
                    max_memory=args['--max-memory'],
                    test_run=args['--test-run'])
Exemple #3
0
def main():
    args = docopt.docopt(__doc__)
    cluster.require_qsub()

    workspace = pipeline.workspace_from_path(args['<workspace>'])
    workspace.check_paths()
    workspace.make_dirs()
    workspace.clear_fragments()

    # Run the fragment generation script.

    generate_fragments = [
        'klab_generate_fragments',
        workspace.input_pdb_path,
        '--outdir',
        workspace.fragments_dir,
        '--memfree',
        args['--mem_free'],
    ]
    if not args['--ignore-loop-file']:
        generate_fragments += [
            '--loops_file',
            workspace.loops_path,
        ]

    if args['--dry-run']:
        print(' '.join(generate_fragments))
    else:
        subprocess.call(generate_fragments)
def main():
    args = docopt.docopt(__doc__)
    root = args['<workspace>']
    round = args['<round>']
    output_path = args['<pdf_output>']

    # Right now I'm looking at validated designs by default, but the user may 
    # be interested in fixbb designs or restrained models as well.

    workspace = pipeline.ValidatedDesigns(root, round)
    workspace.check_paths()

    designs = [structures.Design(x) for x in workspace.output_subdirs]
    sequences = corebio.seq.SeqList(
            [corebio.seq.Seq(x.resfile_sequence) for x in designs],
            alphabet=corebio.seq.unambiguous_protein_alphabet,
    )

    logo_data = weblogo.LogoData.from_seqs(sequences)
    logo_options = weblogo.LogoOptions()
    logo_options.title = workspace.focus_dir
    logo_format = weblogo.LogoFormat(logo_data, logo_options)

    with open(output_path, 'wb') as logo_file:
        document = weblogo.pdf_formatter(logo_data, logo_format)
        logo_file.write(document)
def main():
    args = docopt.docopt(__doc__)
    cluster.require_qsub()

    workspace = pipeline.ValidatedDesigns(args['<workspace>'], args['<round>'])
    workspace.check_paths()
    workspace.check_rosetta()
    workspace.make_dirs()
    workspace.clear_fragments()

    # Run the fragment generation script.

    generate_fragments = [
        'klab_generate_fragments',
        '--loops_file',
        workspace.loops_path,
        '--outdir',
        workspace.fragments_dir,
        '--memfree',
        args['--mem-free'],
        workspace.input_dir,
    ]

    if args['--dry-run']:
        print ' '.join(generate_fragments)
    else:
        subprocess.call(generate_fragments)
def main():
    args = docopt.docopt(__doc__)
    pipeline.fetch_data(
        args['<directory>'],
        args['--remote'],
        args['--include-logs'],
        args['--dry-run'],
    )
def discover_filter_metrics(metrics, workspaces):
    for workspace in workspaces:
        filter_list = pipeline.workspace_from_dir(
            docopt.docopt(__doc__)['<workspace>']).filters_list
        filters = []
        with open(filter_list, "r") as file:
            filters = yaml.load(file)
        if filters:
            for record in filters:
                filterclass = ExtraFilterHandler(record)
                metrics.append(filterclass)
def main():
    args = docopt.docopt(__doc__)
    num_models = 0

    for directory in args['<directories>']:
        records = structures.\
                load(directory, args['--restraints'], not args['--recalc'])
        if args['--query']:
            records = records.query(args['--query'])
        num_models += len(records)

    print num_models
def main():
    args = docopt.docopt(__doc__)

    # Setup the workspace.

    workspace = pipeline.ValidatedDesigns(args['<workspace>'], args['<round>'])
    workspace.check_paths()
    workspace.make_dirs()

    if args['--clear']:
        workspace.clear_inputs()

    # Copy the manual designs into the input directory.

    for source_path in args['<pdbs>']:
        dest_path = os.path.join(workspace.input_dir, os.path.basename(source_path))
        shutil.copy(source_path, dest_path)
def main():
    args = docopt.docopt(__doc__)
    cluster.require_qsub()

    # Setup the workspace.

    workspace = pipeline.ValidatedDesigns(args['<workspace>'], args['<round>'])
    workspace.check_paths()
    workspace.make_dirs()

    if args['--clear'] or args['--test-run']:
        workspace.clear_outputs()

    # Setup an output directory for each input.

    inputs = workspace.unclaimed_inputs
    nstruct = len(inputs) * int(args['--nstruct'])

    if nstruct == 0:
        scripting.print_error_and_die("""\
No unclaimed input files.

If you previously started a round of simulations and then stopped them for some
reason, the problem is probably that all the inputs are still claimed by those
simulations.  Use the '--clear' flag to remove the claims and try again.""")

    for input in inputs:
        subdir = workspace.output_subdir(input)
        scripting.clear_directory(subdir)

    # Launch the validation job.

    big_jobs.submit(
        'pip_validate.py',
        workspace,
        inputs=inputs,
        nstruct=nstruct,
        max_runtime=args['--max-runtime'],
        max_memory=args['--max-memory'],
        test_run=args['--test-run'],
    )
def main():
    args = docopt.docopt(__doc__)
    wait_time = 60 * eval(args['--wait-time'])

    if args['--keep-going']:
        while True:
            pipeline.fetch_and_cache_data(
                args['<directory>'],
                args['--remote'],
                args['--include-logs'],
            )

            print "Waiting {} min...".format(wait_time // 60)
            time.sleep(wait_time)

    else:
        pipeline.fetch_and_cache_data(
            args['<directory>'],
            args['--remote'],
            args['--include-logs'],
        )
def main():
    arguments = docopt.docopt(__doc__)
    cluster.require_qsub()

    # Setup the workspace.

    workspace = pipeline.RestrainedModels(arguments['<workspace>'])
    workspace.check_paths()
    workspace.check_rosetta()
    workspace.make_dirs()

    if arguments['--clear'] or arguments['--test-run']:
        workspace.clear_outputs()

    # Submit the model building job.

    big_jobs.submit('pip_build.py',
                    workspace,
                    nstruct=arguments['--nstruct'],
                    max_runtime=arguments['--max-runtime'],
                    max_memory=arguments['--max-memory'],
                    test_run=arguments['--test-run'])
Exemple #13
0
def main():
    args = docopt.docopt(__doc__)
    print structures.load(args['<directory>'], args['--restraints'],
                          not args['--recalc']).head()
Exemple #14
0
def main():
    args = docopt.docopt(__doc__)
    pipeline.push_data(args['<directory>'], args['--remote'],
                       args['--dry-run'])
Exemple #15
0
def main():
    arguments = docopt.docopt(__doc__)
    workspace = pipeline.Workspace(arguments['<workspace>'])

    # Make a new workspace directory.

    if workspace.incompatible_with_fragments_script:
        scripting.print_error_and_die("""\
Illegal character(s) found in workspace path:

  {}

The full path to a workspace must contain only characters that are alphanumeric
or '.' or '_'.  The reason for this ridiculous rule is the fragment generation
script, which will silently fail if the full path to its input file contains
any characters but those.""", workspace.abs_root_dir)

    if workspace.exists():
        if arguments['--overwrite']:
            shutil.rmtree(workspace.root_dir)
        else:
            scripting.print_error_and_die("""\
Design '{0}' already exists.  Use '-o' to overwrite.""", workspace.root_dir)

    workspace.make_dirs()

    # Decide which settings to ask for.

    if arguments['--remote']:
        installers = (
                RosettaDir,
                RsyncUrl,
        )
    else:
        installers = (
                RosettaDir,
                InputPdb,
                LoopsFile,
                Resfile,
                RestraintsFile,
                ScoreFunction,
                BuildScript,
                DesignScript,
                ValidateScript,
                FilterScript,
                SharedDefs,
                FlagsFile,
        )

    # Get the necessary settings from the user and use them to fill in the
    # workspace.

    print "Please provide the following pieces of information:"
    print

    scripting.use_path_completion()

    for installer in installers:

        # If the installer doesn't have a prompt, just install it without
        # asking any questions.

        if installer.prompt is None:
            installer.install(workspace)
            continue

        # Otherwise, print a description of the setting being installed and
        # prompt the user for a value.

        print installer.description
        print

        while True:
            try:
                setting = raw_input(installer.prompt)
                installer.install(workspace, setting)
            except (ValueError, IOError) as problem:
                print problem
                continue
            except (KeyboardInterrupt, EOFError):
                shutil.rmtree(workspace.root_dir)
                scripting.print_error_and_die("\nReceived exit command, no workspace created.")
            else:
                break

        print

    # If we made a link to a remote workspace, immediately try to synchronize
    # with it.  Rsync will say whether or not it succeeded.  Otherwise just
    # print a success message.

    if arguments['--remote']:
        pipeline.fetch_data(workspace.root_dir)
    else:
        print "Setup successful for design '{0}'.".format(workspace.root_dir)
                reversion_seq[aa_num - first_res] = wt_aa

        name = original_design + "_reversion"
        num_reversion_designs = 1
        while name in reverted_sequences:
            num_reversion_designs += 1
            name = name + str(num_reversion_designs)
        reverted_sequences[name] = "".join(reversion_seq)

    for design in reverted_sequences:
        protein_sequences[design] = reverted_sequences[design]

    return protein_sequences


arguments = docopt.docopt(__doc__)

inputs = arguments['<input_fasta_or_folder>']

template_dna = arguments['--template-dna']
wt_sequence_file = arguments['--combine-chains']
wt_sequence_str = ''

reversion_mutations = arguments['--reversion-mutations']

if wt_sequence_file:
    wt_sequence_str = import_wt_protein_sequence(wt_sequence_file)

from_files = arguments['--from-pdb-folder']

if from_files:
Exemple #17
0
def main():
    args = docopt.docopt(__doc__)
    root = args['<workspace>']
    round = args['<round>']
    query = ' and '.join(args['<queries>'])
    temp = float(args['--temp'])

    # Import ``pylab`` after handling the help message, because otherwise
    # ``matplotlib`` sometimes issues warnings that then show up in the docs.
    import pylab

    workspace = pipeline.ValidatedDesigns(root, round)
    workspace.check_paths()
    workspace.make_dirs()

    if args['--clear']:
        workspace.clear_inputs()

    predecessor = workspace.predecessor

    # Get sequences and scores for each design.

    seqs_scores = structures.load(
        predecessor.output_dir,
        use_cache=not args['--recalc'],
    )
    seqs_scores.dropna(inplace=True)
    print 'Total number of designs:      ', len(seqs_scores)

    # If a query was given on the command line, find models that satisfy it.

    if query:
        seqs_scores = seqs_scores.query(query)
        print '    minus given query:        ', len(seqs_scores)

    # Keep only the lowest scoring model for each set of identical sequences.

    groups = seqs_scores.groupby('sequence', group_keys=False)
    seqs_scores = groups.\
            apply(lambda df: df.ix[df.total_score.idxmin()]).\
            reset_index(drop=True)
    print '    minus duplicate sequences:', len(seqs_scores)

    # Remove designs that have already been picked.

    existing_inputs = set(
        os.path.basename(os.path.realpath(x)) for x in workspace.input_paths)
    seqs_scores = seqs_scores.query('path not in @existing_inputs')
    print '    minus current inputs:     ', len(seqs_scores)
    print

    # Use a Boltzmann weighting scheme to pick designs.

    seq_scores = seqs_scores.sort_values(by='total_score')

    scores = seqs_scores.total_score.values
    scores -= median(scores)
    weights = exp(-scores / temp)
    indices = arange(len(scores))

    pdf = array(weights)
    cdf = cumsum(pdf) / sum(pdf)

    num_to_pick = min(int(args['--num']), len(scores))
    picked_indices = set()

    while len(picked_indices) < num_to_pick:
        choice = random.random()
        picked_index = indices[cdf > choice][0]
        picked_indices.add(picked_index)

    picked_indices = sorted(picked_indices)

    # Show the user the probability distributions used to pick designs.

    raw_input("""\
Press [enter] to view the designs that were picked and the distributions that
were used to pick them.  Pay particular attention to the CDF.  If it is too
flat, the temperature (T={0}) is too high and designs are essentially being
picked randomly.  If it is too sharp, the temperature is too low and only the 
highest scoring designs are being picked.
""".format(temp))

    color = '#204a87'  # Tango dark blue
    base_format = dict(color=color)
    picked_format = dict(marker='o', ls='none', mfc=color, mec='none')

    pylab.figure(num=1, figsize=(8, 3))

    pylab.subplot(1, 3, 1)
    pylab.title('Rosetta Scores')
    pylab.plot(indices, scores, **base_format)
    pylab.plot(picked_indices, scores[picked_indices], **picked_format)

    pylab.subplot(1, 3, 2)
    pylab.title('Boltzmann PDF')
    pylab.plot(indices, pdf, **base_format)
    pylab.plot(picked_indices, pdf[picked_indices], **picked_format)
    pylab.yscale('log')

    pylab.subplot(1, 3, 3)
    pylab.title('Boltzmann CDF')
    pylab.plot(indices, cdf, **base_format)
    pylab.plot(picked_indices, cdf[picked_indices], **picked_format)

    pylab.tight_layout()
    pylab.show()

    if raw_input("Accept these picks? [Y/n] ") == 'n':
        print "Aborting."
        sys.exit()

    # Make symlinks to the picked designs.

    if not args['--dry-run']:
        existing_ids = set(
            int(x[0:-len('.pdb.gz')]) for x in os.listdir(workspace.input_dir))
        next_id = max(existing_ids) + 1 if existing_ids else 0

        for id, picked_index in enumerate(picked_indices, next_id):
            basename = seqs_scores.iloc[picked_index]['path']
            target = os.path.join(predecessor.output_dir, basename)
            link_name = os.path.join(workspace.input_dir, '{0:04}.pdb.gz')
            scripting.relative_symlink(target, link_name.format(id))

    print "Picked {} designs.".format(len(picked_indices))

    if args['--dry-run']:
        print "(Dry run: no symlinks created.)"