def generate_pam_variants_from_mutant(mutant_idx, total_pam_variants, flag_chimera=True): """Generate all the pam variants for a given mutant Args: mutant_idx: int >= 0 (0 is default dCas9) total_pam_variants: 64 or 256 -- represents all 3nt or 4nt pam variants flag_chimera: [default: True] if flag_chimera, then use chimera to generate variants (else use 3DNA) Returns: path to the pam variant folder (containing 64 or 256 pdbs) """ assert total_pam_variants == 64 or total_pam_variants == 256 # currently support 3 or 4 nt PAM sites mutant_template_pdb_path = mutant_template_pdb_path_by_idx(mutant_idx) mutant_variants_pdb_dir = mutant_variants_dir_by_idx(mutant_idx) # select a tool for generating the pam variants if flag_chimera: pam_variant_tool = generate_pam_variant_chimera else: pam_variant_tool = generate_pam_variant_3dna pam_length = int(log(total_pam_variants, 4)) for pam_int in xrange(total_pam_variants): pam_string = pam_string_from_int(pam_int, pam_length) pam_variant_tool(pam_string, mutant_template_pdb_path, mutant_variants_pdb_dir) return mutant_variants_pdb_dir
def dock_variants(pam_variants, path_to_scores, path_to_pdbs='', dock_partners="B_ACD", foldtree=None, pam_length=4, pam_tool='Chimera', complex_docking_flag=False): """Docks and scores a pdb for each PAM variant (created using pam_tool) using simple docking Args: pam_variants: list of integers (any from 0 to 63 without repeats) which map to pam strings path_to_scores: path to the subdirectory of "results" where the variants are stored path_to_pdbs: [default: current directory] path to location of chimera/3DNA folders of PAM variants dock_partners: [default: "B_ACD"] string for thee set_partners(...) method for docking foldtree: [default: None] string for the 2nd setup_foldtree(...) argument, None implies default foldtree pam_length: [default: 4] length of the pam sequence to be investigated pam_tool: [default: 'Chimera'] either "3DNA" or "Chimera" complex_docking_flag: [default: False] if True, use complex dock function (NOT IMPLEMENTED) Notes: - creates a text file (e.g. 'results_agg_Chimera.txt') for each variant - path to variants is typically root/results/<timestamped folder>/<variants> - assumes current directory is the root of a folder that contains pdbs in Chimera and 3DNA directories """ assert pam_tool in PAM_TOOLS for idx in pam_variants: variant = pam_string_from_int(idx, pam_length) print "Running for variant: %s_%s" % (variant, pam_tool) pdb_path = os.path.join(path_to_pdbs, pam_tool, "4UN3." + variant + ".pdb") # track runtime while loading and passing pose to the simple docker time_init_total = time() loaded_pose = pose_from_pdb(pdb_path) time_init_docking = time() if complex_docking_flag: dock_stats = dock_complex(loaded_pose) else: dock_stats = dock_simple(loaded_pose, dock_partners, foldtree) time_final = time() time_diff_total = time_final - time_init_total time_diff_docking = time_final - time_init_docking # write results to file results_filename = variant + "_" + pam_tool + ".txt" write_dock_stats(path_to_scores, results_filename, dock_stats, time_diff_total, time_diff_docking) print "Finished writing scores for variant: %s_%s" % (variant, pam_tool) return
if __name__ == '__main__': # create parser and parse arguments parser = argparse.ArgumentParser(description='Generate PDBs with new PAM sites based on input PDB with Cas9 variant') parser.add_argument('-n', '--num_pams', metavar='N', type=str, # string type because of how this script must be run by Chimera help='how many PAMs in total to run. 64 = all PAMs of length 3, 256 = all PAMs of length 4') parser.add_argument('-i', '--input_pdb', metavar='F', type=str, help='input PDB file containing Cas9 mutant of interest') parser.add_argument('-o', '--output_dir', metavar='D', type=str, help='path to output directory for new PDBs') args = parser.parse_args() assert args.num_pams is not None assert args.input_pdb is not None assert args.output_dir is not None args.num_pams = int(args.num_pams) # convert string from command line to int pam_length = int(math.log(args.num_pams, 4)) assert 64 == args.num_pams or 256 == args.num_pams assert os.path.isfile(args.input_pdb) try: # check existence again to handle concurrency problems os.makedirs(args.output_dir) except OSError as exc: if exc.errno == errno.EEXIST and os.path.isdir(args.output_dir): pass else: raise for i in xrange(args.num_pams): generate_pam_variant_3dna(pam_string_from_int(i, pam_length), args.input_pdb, args.output_dir)
args.num_pams = int( args.num_pams) # convert string from command line to int pam_length = int(math.log(args.num_pams, 4)) assert 64 == args.num_pams or 256 == args.num_pams assert os.path.isfile(args.input_pdb) try: # check existence again to handle concurrency problems os.makedirs(args.output_dir) except OSError as exc: if exc.errno == errno.EEXIST and os.path.isdir(args.output_dir): pass else: raise for i in xrange(args.num_pams): pam = pam_string_from_int(i, pam_length) # open up the file again each time, for now runCommand("open " + args.input_pdb) # loop through the PAM sequence and mutate positions for pam_idx in xrange(pam_length): '''This has been commented out so that all nucleotides are changed regardless of whether the match the original pdb or not.''' # If the nt matches the original PAM nt, don't change it #if PAM_TEMPLATE_SEQUENCE[pam_idx] == pam[pam_idx]: # continue mutate_nt(pam_idx, pam[pam_idx]) # save and close all files generate_pam_variant_chimera(pam, args.input_pdb, args.output_dir)
assert args.input_pdb is not None assert args.output_dir is not None args.num_pams = int(args.num_pams) # convert string from command line to int pam_length = int(math.log(args.num_pams, 4)) assert 64 == args.num_pams or 256 == args.num_pams assert os.path.isfile(args.input_pdb) try: # check existence again to handle concurrency problems os.makedirs(args.output_dir) except OSError as exc: if exc.errno == errno.EEXIST and os.path.isdir(args.output_dir): pass else: raise for i in xrange(args.num_pams): pam = pam_string_from_int(i, pam_length) # open up the file again each time, for now runCommand("open " + args.input_pdb) # loop through the PAM sequence and mutate positions for pam_idx in xrange(pam_length): '''This has been commented out so that all nucleotides are changed regardless of whether the match the original pdb or not.''' # If the nt matches the original PAM nt, don't change it #if PAM_TEMPLATE_SEQUENCE[pam_idx] == pam[pam_idx]: # continue mutate_nt(pam_idx, pam[pam_idx]) # save and close all files generate_pam_variant_chimera(pam, args.input_pdb, args.output_dir)
metavar='F', type=str, help='input PDB file containing Cas9 mutant of interest') parser.add_argument('-o', '--output_dir', metavar='D', type=str, help='path to output directory for new PDBs') args = parser.parse_args() assert args.num_pams is not None assert args.input_pdb is not None assert args.output_dir is not None args.num_pams = int( args.num_pams) # convert string from command line to int pam_length = int(math.log(args.num_pams, 4)) assert 64 == args.num_pams or 256 == args.num_pams assert os.path.isfile(args.input_pdb) try: # check existence again to handle concurrency problems os.makedirs(args.output_dir) except OSError as exc: if exc.errno == errno.EEXIST and os.path.isdir(args.output_dir): pass else: raise for i in xrange(args.num_pams): generate_pam_variant_3dna(pam_string_from_int(i, pam_length), args.input_pdb, args.output_dir)