Beispiel #1
0
def generate_pam_variants_from_mutant(mutant_idx,
                                      total_pam_variants,
                                      flag_chimera=True):
    """Generate all the pam variants for a given mutant
    Args:
        mutant_idx:  int >= 0 (0 is default dCas9)
        total_pam_variants: 64 or 256 -- represents all 3nt or 4nt pam variants
        flag_chimera: [default: True] if flag_chimera, then use chimera to generate variants (else use 3DNA)
    Returns:
        path to the pam variant folder (containing 64 or 256 pdbs)
    """
    assert total_pam_variants == 64 or total_pam_variants == 256  # currently support 3 or 4 nt PAM sites
    mutant_template_pdb_path = mutant_template_pdb_path_by_idx(mutant_idx)
    mutant_variants_pdb_dir = mutant_variants_dir_by_idx(mutant_idx)

    # select a tool for generating the pam variants
    if flag_chimera:
        pam_variant_tool = generate_pam_variant_chimera
    else:
        pam_variant_tool = generate_pam_variant_3dna

    pam_length = int(log(total_pam_variants, 4))
    for pam_int in xrange(total_pam_variants):
        pam_string = pam_string_from_int(pam_int, pam_length)
        pam_variant_tool(pam_string, mutant_template_pdb_path,
                         mutant_variants_pdb_dir)
    return mutant_variants_pdb_dir
Beispiel #2
0
def dock_variants(pam_variants,
                  path_to_scores,
                  path_to_pdbs='',
                  dock_partners="B_ACD",
                  foldtree=None,
                  pam_length=4,
                  pam_tool='Chimera',
                  complex_docking_flag=False):
    """Docks and scores a pdb for each PAM variant (created using pam_tool) using simple docking
    Args:
        pam_variants: list of integers (any from 0 to 63 without repeats) which map to pam strings
        path_to_scores: path to the subdirectory of "results" where the variants are stored
        path_to_pdbs: [default: current directory] path to location of chimera/3DNA folders of PAM variants
        dock_partners: [default: "B_ACD"] string for thee set_partners(...) method for docking
        foldtree: [default: None] string for the 2nd setup_foldtree(...) argument, None implies default foldtree
        pam_length: [default: 4] length of the pam sequence to be investigated
        pam_tool: [default: 'Chimera'] either "3DNA" or "Chimera"
        complex_docking_flag: [default: False] if True, use complex dock function (NOT IMPLEMENTED)
    Notes:
    - creates a text file (e.g. 'results_agg_Chimera.txt') for each variant
    - path to variants is typically root/results/<timestamped folder>/<variants>
    - assumes current directory is the root of a folder that contains pdbs in Chimera and 3DNA directories
    """
    assert pam_tool in PAM_TOOLS
    for idx in pam_variants:
        variant = pam_string_from_int(idx, pam_length)
        print "Running for variant: %s_%s" % (variant, pam_tool)
        pdb_path = os.path.join(path_to_pdbs, pam_tool,
                                "4UN3." + variant + ".pdb")

        # track runtime while loading and passing pose to the simple docker
        time_init_total = time()
        loaded_pose = pose_from_pdb(pdb_path)

        time_init_docking = time()
        if complex_docking_flag:
            dock_stats = dock_complex(loaded_pose)
        else:
            dock_stats = dock_simple(loaded_pose, dock_partners, foldtree)

        time_final = time()
        time_diff_total = time_final - time_init_total
        time_diff_docking = time_final - time_init_docking

        # write results to file
        results_filename = variant + "_" + pam_tool + ".txt"
        write_dock_stats(path_to_scores, results_filename, dock_stats,
                         time_diff_total, time_diff_docking)
        print "Finished writing scores for variant: %s_%s" % (variant,
                                                              pam_tool)
    return
def dock_variants(pam_variants, path_to_scores, path_to_pdbs='', dock_partners="B_ACD", foldtree=None,
                  pam_length=4, pam_tool='Chimera', complex_docking_flag=False):
    """Docks and scores a pdb for each PAM variant (created using pam_tool) using simple docking
    Args:
        pam_variants: list of integers (any from 0 to 63 without repeats) which map to pam strings
        path_to_scores: path to the subdirectory of "results" where the variants are stored
        path_to_pdbs: [default: current directory] path to location of chimera/3DNA folders of PAM variants
        dock_partners: [default: "B_ACD"] string for thee set_partners(...) method for docking
        foldtree: [default: None] string for the 2nd setup_foldtree(...) argument, None implies default foldtree
        pam_length: [default: 4] length of the pam sequence to be investigated
        pam_tool: [default: 'Chimera'] either "3DNA" or "Chimera"
        complex_docking_flag: [default: False] if True, use complex dock function (NOT IMPLEMENTED)
    Notes:
    - creates a text file (e.g. 'results_agg_Chimera.txt') for each variant
    - path to variants is typically root/results/<timestamped folder>/<variants>
    - assumes current directory is the root of a folder that contains pdbs in Chimera and 3DNA directories
    """
    assert pam_tool in PAM_TOOLS
    for idx in pam_variants:
        variant = pam_string_from_int(idx, pam_length)
        print "Running for variant: %s_%s" % (variant, pam_tool)
        pdb_path = os.path.join(path_to_pdbs, pam_tool, "4UN3." + variant + ".pdb")

        # track runtime while loading and passing pose to the simple docker
        time_init_total = time()
        loaded_pose = pose_from_pdb(pdb_path)

        time_init_docking = time()
        if complex_docking_flag:
            dock_stats = dock_complex(loaded_pose)
        else:
            dock_stats = dock_simple(loaded_pose, dock_partners, foldtree)

        time_final = time()
        time_diff_total = time_final - time_init_total
        time_diff_docking = time_final - time_init_docking

        # write results to file
        results_filename = variant + "_" + pam_tool + ".txt"
        write_dock_stats(path_to_scores, results_filename, dock_stats, time_diff_total, time_diff_docking)
        print "Finished writing scores for variant: %s_%s" % (variant, pam_tool)
    return
def generate_pam_variants_from_mutant(mutant_idx, total_pam_variants, flag_chimera=True):
    """Generate all the pam variants for a given mutant
    Args:
        mutant_idx:  int >= 0 (0 is default dCas9)
        total_pam_variants: 64 or 256 -- represents all 3nt or 4nt pam variants
        flag_chimera: [default: True] if flag_chimera, then use chimera to generate variants (else use 3DNA)
    Returns:
        path to the pam variant folder (containing 64 or 256 pdbs)
    """
    assert total_pam_variants == 64 or total_pam_variants == 256  # currently support 3 or 4 nt PAM sites
    mutant_template_pdb_path = mutant_template_pdb_path_by_idx(mutant_idx)
    mutant_variants_pdb_dir = mutant_variants_dir_by_idx(mutant_idx)

    # select a tool for generating the pam variants
    if flag_chimera:
        pam_variant_tool = generate_pam_variant_chimera
    else:
        pam_variant_tool = generate_pam_variant_3dna

    pam_length = int(log(total_pam_variants, 4))
    for pam_int in xrange(total_pam_variants):
        pam_string = pam_string_from_int(pam_int, pam_length)
        pam_variant_tool(pam_string, mutant_template_pdb_path, mutant_variants_pdb_dir)
    return mutant_variants_pdb_dir

if __name__ == '__main__':
    # create parser and parse arguments
    parser = argparse.ArgumentParser(description='Generate PDBs with new PAM sites based on input PDB with Cas9 variant')
    parser.add_argument('-n', '--num_pams', metavar='N', type=str, # string type because of how this script must be run by Chimera
                        help='how many PAMs in total to run. 64 = all PAMs of length 3, 256 = all PAMs of length 4')
    parser.add_argument('-i', '--input_pdb', metavar='F', type=str,
                        help='input PDB file containing Cas9 mutant of interest')
    parser.add_argument('-o', '--output_dir', metavar='D', type=str,
                        help='path to output directory for new PDBs')
    args = parser.parse_args()

    assert args.num_pams is not None
    assert args.input_pdb is not None
    assert args.output_dir is not None
    args.num_pams = int(args.num_pams) # convert string from command line to int
    pam_length = int(math.log(args.num_pams, 4))
    assert 64 == args.num_pams or 256 == args.num_pams
    assert os.path.isfile(args.input_pdb)

    try: # check existence again to handle concurrency problems
        os.makedirs(args.output_dir)
    except OSError as exc:
        if exc.errno == errno.EEXIST and os.path.isdir(args.output_dir):
            pass
        else: raise

    for i in xrange(args.num_pams):
        generate_pam_variant_3dna(pam_string_from_int(i, pam_length), args.input_pdb, args.output_dir)
Beispiel #6
0
    args.num_pams = int(
        args.num_pams)  # convert string from command line to int
    pam_length = int(math.log(args.num_pams, 4))
    assert 64 == args.num_pams or 256 == args.num_pams
    assert os.path.isfile(args.input_pdb)

    try:  # check existence again to handle concurrency problems
        os.makedirs(args.output_dir)
    except OSError as exc:
        if exc.errno == errno.EEXIST and os.path.isdir(args.output_dir):
            pass
        else:
            raise

    for i in xrange(args.num_pams):
        pam = pam_string_from_int(i, pam_length)

        # open up the file again each time, for now
        runCommand("open " + args.input_pdb)

        # loop through the PAM sequence and mutate positions
        for pam_idx in xrange(pam_length):
            '''This has been commented out so that all nucleotides are changed
            regardless of whether the match the original pdb or not.'''
            # If the nt matches the original PAM nt, don't change it
            #if PAM_TEMPLATE_SEQUENCE[pam_idx] == pam[pam_idx]:
            #    continue
            mutate_nt(pam_idx, pam[pam_idx])

        # save and close all files
        generate_pam_variant_chimera(pam, args.input_pdb, args.output_dir)
    assert args.input_pdb is not None
    assert args.output_dir is not None
    args.num_pams = int(args.num_pams) # convert string from command line to int
    pam_length = int(math.log(args.num_pams, 4))
    assert 64 == args.num_pams or 256 == args.num_pams
    assert os.path.isfile(args.input_pdb)

    try: # check existence again to handle concurrency problems
        os.makedirs(args.output_dir)
    except OSError as exc:
        if exc.errno == errno.EEXIST and os.path.isdir(args.output_dir):
            pass
        else: raise

    for i in xrange(args.num_pams):
        pam = pam_string_from_int(i, pam_length)

        # open up the file again each time, for now
        runCommand("open " + args.input_pdb)

        # loop through the PAM sequence and mutate positions
        for pam_idx in xrange(pam_length):
            '''This has been commented out so that all nucleotides are changed
            regardless of whether the match the original pdb or not.'''
            # If the nt matches the original PAM nt, don't change it
            #if PAM_TEMPLATE_SEQUENCE[pam_idx] == pam[pam_idx]:
            #    continue
            mutate_nt(pam_idx, pam[pam_idx])

        # save and close all files
        generate_pam_variant_chimera(pam, args.input_pdb, args.output_dir)
        metavar='F',
        type=str,
        help='input PDB file containing Cas9 mutant of interest')
    parser.add_argument('-o',
                        '--output_dir',
                        metavar='D',
                        type=str,
                        help='path to output directory for new PDBs')
    args = parser.parse_args()

    assert args.num_pams is not None
    assert args.input_pdb is not None
    assert args.output_dir is not None
    args.num_pams = int(
        args.num_pams)  # convert string from command line to int
    pam_length = int(math.log(args.num_pams, 4))
    assert 64 == args.num_pams or 256 == args.num_pams
    assert os.path.isfile(args.input_pdb)

    try:  # check existence again to handle concurrency problems
        os.makedirs(args.output_dir)
    except OSError as exc:
        if exc.errno == errno.EEXIST and os.path.isdir(args.output_dir):
            pass
        else:
            raise

    for i in xrange(args.num_pams):
        generate_pam_variant_3dna(pam_string_from_int(i, pam_length),
                                  args.input_pdb, args.output_dir)