def run(in_parms): """ Run a GROMACS simulations using the PDBREMIX parms dictionary. """ parms = copy.deepcopy(in_parms) basename = parms['output_basename'] # Copies across topology and related *.itp files, with appropriate # filename renaming in #includes top = basename + '.top' in_top = parms['topology'] shutil.copy(in_top, top) in_name = os.path.basename(in_top).replace('.top', '') in_dir = os.path.dirname(in_top) file_tag = "%s/%s_*itp" % (in_dir, in_name) new_files = [top] for f in glob.glob(file_tag): new_f = os.path.basename(f) new_f = new_f.replace(in_name, basename) shutil.copy(f, new_f) new_files.append(new_f) for f in new_files: replace_include_file(f, in_name + "_", basename + "_") # Copy over input coordinates/velocities in_gro = basename + '.in.gro' shutil.copy(parms['input_crds'], in_gro) # Generates a postiional-restraint topology file if parms['restraint_pdb']: # 1kcal*mol*A**-2 = 4.184 kJ*mol*(0.1 nm)**-2 kcalmolang2_to_kJmolnm2 = 400.184 open(basename + '_posre.itp', 'w').write( make_restraint_itp( parms['restraint_pdb'], parms['restraint_force'] * kcalmolang2_to_kJmolnm2)) # Generate .mdp file based on parms in_mdp = basename + '.grompp.mdp' open(in_mdp, 'w').write(make_mdp(parms)) # Now run .grompp to generate this .tpr file tpr = basename + '.tpr' # .mdp to save complete set of parameters mdp = basename + '.mdrun.mdp' data.binary( 'grompp', '-f %s -po %s -c %s -p %s -o %s' \ % (in_mdp, mdp, in_gro, top, tpr), basename + '.grompp') util.check_files(tpr) # Run simulation with the .tpr file data.binary('mdrun', '-v -deffnm %s' % (basename), basename + '.mdrun') top, crds, vels = get_restart_files(basename) util.check_output(top) util.check_output(crds) # Cleanup delete_backup_files(basename)
def merge_trajectories(basename, traj_basenames): """ Given a bunch of directories with consecutive trajectories, all with the same basename, the function will splice them into a single trajctory with basename in the current directory. """ save_dir = os.getcwd() trr_fname = basename + '.trr' trr_list = [b + '.trr' for b in traj_basenames] util.check_files(*trr_list) f = open(trr_fname, 'w') for trr in trr_list: trr = TrrReader(trr) for i_frame in range(trr.n_frame - 1): trr.file.seek(trr.size_frame * i_frame) txt = trr.file.read(trr.size_frame) f.write(txt) f.close() # Copy parameters of last pulse into current directory traj_basename = traj_basenames[-1] for ext in ['.top', '.itp', '.tpr', '.mdrun.mdp', '.grompp.mdp', '.gro']: for f in glob.glob('%s*%s' % (traj_basename, ext)): g = f.replace(traj_basename, basename) shutil.copy(f, g) if g.endswith('.top'): replace_include_file(g, traj_basename + "_", basename + "_") os.chdir(save_dir)
def merge_trajectories(basename, traj_basenames): """ Given a bunch of directories with consecutive trajectories, all with the same basename, the function will splice them into a single trajctory with basename in the current directory. """ save_dir = os.getcwd() trr_fname = basename + '.trr' trr_list = [b + '.trr' for b in traj_basenames] util.check_files(*trr_list) f = open(trr_fname, 'w') for trr in trr_list: trr = TrrReader(trr) for i_frame in range(trr.n_frame-1): trr.file.seek(trr.size_frame*i_frame) txt = trr.file.read(trr.size_frame) f.write(txt) f.close() # Copy parameters of last pulse into current directory traj_basename = traj_basenames[-1] for ext in ['.top', '.itp', '.tpr', '.mdrun.mdp', '.grompp.mdp', '.gro']: for f in glob.glob('%s*%s' % (traj_basename, ext)): g = f.replace(traj_basename, basename) shutil.copy(f, g) if g.endswith('.top'): replace_include_file(g, traj_basename + "_", basename + "_") os.chdir(save_dir)
def master(args, comm): """ Master function -> run the evolution and send parameters of evaluation task to workers. Args: args: dict with command-in-line parameters. comm: MPI.COMM_WORLD. """ logger = init_log(args['log_level'], name=__name__) if not os.path.exists(args['experiment_path']): logger.info(f"Creating {args['experiment_path']} ...") os.makedirs(args['experiment_path']) # Evolution or continue previous evolution if not args['continue_path']: phase = 'evolution' else: phase = 'continue_evolution' logger.info( f"Continue evolution from: {args['continue_path']}. Checking files ..." ) check_files(args['continue_path']) logger.info(f"Getting parameters from {args['config_file']} ...") config = cfg.ConfigParameters(args, phase=phase) config.get_parameters() logger.info(f"Saving parameters for {config.phase} phase ...") config.save_params_logfile() # Evaluation function for QNAS (train CNN and return validation accuracy) eval_f = evaluation.EvalPopulation( params=config.train_spec, data_info=config.data_info, fn_dict=config.fn_dict, log_level=config.train_spec['log_level'], new_fn_dict=config.fn_new_dict) qnas_cnn = qnas.QNAS(eval_f, config.train_spec['experiment_path'], log_file=config.files_spec['log_file'], log_level=config.train_spec['log_level'], data_file=config.files_spec['data_file']) qnas_cnn.initialize_qnas(**config.QNAS_spec) # If continue previous evolution, load log file and read it at final generation if phase == 'continue_evolution': logger.info( f"Loading {config.files_spec['previous_data_file']} file to get final " f"generation ...") qnas_cnn.load_qnas_data( file_path=config.files_spec['previous_data_file']) # Execute evolution logger.info(f"Starting evolution ...") qnas_cnn.evolve() send_stop_signal(comm)
def get_restart_files(basename): """ Returns restart files only if they exist """ psf, coor, vel = expand_restart_files(basename) util.check_files(psf, coor) if not os.path.isfile(vel): vel = '' return psf, coor, vel
def neutralize_system_with_salt( in_top, in_gro, basename, force_field): """ Takes a .top file and adds counterions to neutralize the overall charge of system, and saves to `basename.gro`. """ # Calculate overall charege in the .top file qtot = sum([q for mass, q, chain in read_top(in_top)]) counter_ion_charge = -int(round(qtot)) if counter_ion_charge == 0: shutil.copy(in_gro, basename + '.gro') return # Create a .tpr paramater file for genion to find low-energy sites in_mdp = basename + '.salt.grompp.mdp' open(in_mdp, 'w').write(ions_mdp + force_field_mdp) top = basename + '.top' if in_top != top: shutil.copy(in_top, top) tpr = basename + '.salt.tpr' out_mdp = basename + '.mdp' data.binary( 'grompp', '-f %s -po %s -c %s -p %s -o %s' \ % (in_mdp, out_mdp, in_gro, top, tpr), basename + '.salt.grompp') util.check_files(tpr) # Use genion to generate a gro of system with counterions gro = basename + '.gro' # Genion requires user input "SOL" to choose solvent for replacement input_fname = basename + '.salt.genion.in' open(input_fname, 'w').write('SOL') # Different versions of Gromacs use different counterions charge_str = "" if 'GROMACS4.5' in force_field: charge_str = " -pname NA -nname CL " elif 'GROMACS4.0' in force_field: charge_str = " -pname NA+ -nname CL- " else: raise ValueError, "Cannot recognize force_field " + force_field if counter_ion_charge > 0: charge_str += " -np %d " % counter_ion_charge else: charge_str += " -nn %d " % abs(counter_ion_charge) log = basename + '.salt.genion.log' data.binary( 'genion', '-g %s -s %s -o %s -p %s -neutral %s' % \ (log, tpr, gro, top, charge_str), basename + '.salt.genion', input_fname) util.check_files(gro)
def neutralize_system_with_salt(in_top, in_gro, basename, force_field): """ Takes a .top file and adds counterions to neutralize the overall charge of system, and saves to `basename.gro`. """ # Calculate overall charege in the .top file qtot = sum([q for mass, q, chain in read_top(in_top)]) counter_ion_charge = -int(round(qtot)) if counter_ion_charge == 0: shutil.copy(in_gro, basename + '.gro') return # Create a .tpr paramater file for genion to find low-energy sites in_mdp = basename + '.salt.grompp.mdp' open(in_mdp, 'w').write(ions_mdp + force_field_mdp) top = basename + '.top' if in_top != top: shutil.copy(in_top, top) tpr = basename + '.salt.tpr' out_mdp = basename + '.mdp' data.binary( 'grompp', '-f %s -po %s -c %s -p %s -o %s' \ % (in_mdp, out_mdp, in_gro, top, tpr), basename + '.salt.grompp') util.check_files(tpr) # Use genion to generate a gro of system with counterions gro = basename + '.gro' # Genion requires user input "SOL" to choose solvent for replacement input_fname = basename + '.salt.genion.in' open(input_fname, 'w').write('SOL') # Different versions of Gromacs use different counterions charge_str = "" if 'GROMACS4.5' in force_field: charge_str = " -pname NA -nname CL " elif 'GROMACS4.0' in force_field: charge_str = " -pname NA+ -nname CL- " else: raise ValueError, "Cannot recognize force_field " + force_field if counter_ion_charge > 0: charge_str += " -np %d " % counter_ion_charge else: charge_str += " -nn %d " % abs(counter_ion_charge) log = basename + '.salt.genion.log' data.binary( 'genion', '-g %s -s %s -o %s -p %s -neutral %s' % \ (log, tpr, gro, top, charge_str), basename + '.salt.genion', input_fname) util.check_files(gro)
def run(in_parms): """ Run a AMBER simulations using the PDBREMIX in_parms dictionary. """ parms = copy.deepcopy(in_parms) basename = parms['output_basename'] # Copies across topology file input_top = parms['topology'] util.check_files(input_top) new_top = basename + '.top' shutil.copy(input_top, new_top) # Copies over coordinate/velocity files input_crd = parms['input_crds'] util.check_files(input_crd) if input_crd.endswith('.crd'): new_crd = basename + '.in.crd' else: new_crd = basename + '.in.rst' shutil.copy(input_crd, new_crd) # Decide on type of output coordinate/velocity file if 'n_step_minimization' in parms: rst = basename + ".crd" else: rst = basename + ".rst" # Construct the long list of arguments for sander trj = basename + ".trj" vel_trj = basename + ".vel.trj" ene = basename + ".ene" inf = basename + ".inf" sander_out = basename + ".sander.out" sander_in = basename + ".sander.in" args = "-O -i %s -o %s -p %s -c %s -r %s -x %s -v %s -e %s -inf %s" \ % (sander_in, sander_out, new_top, new_crd, rst, trj, vel_trj, ene, inf) # Make the input script script = make_sander_input_file(parms) # If positional restraints if parms['restraint_pdb']: # Generate the AMBER .crd file that stores the constrained coordinates pdb = parms['restraint_pdb'] soup = pdbatoms.Soup(pdb) ref_crd = basename + '.restraint.crd' write_soup_to_rst(soup, ref_crd) util.check_output(ref_crd) # Add the restraints .crd to the SANDER arguments args += " -ref %s" % ref_crd # Add the restraint forces and atom indices to the SANDER input file script += make_restraint_script(pdb, parms['restraint_force']) open(sander_in, "w").write(script) # Run the simulation data.binary('sander', args, basename) # Check if output is okay util.check_output(sander_out, ['FATAL']) top, crds, vels = get_restart_files(basename) util.check_output(top) util.check_output(crds)
def get_restart_files(basename): """Returns restart files only if they exist""" top, crds, vels = expand_restart_files(basename) util.check_files(top, crds) return top, crds, vels
def pdb_to_top_and_crds(force_field, pdb, basename, solvent_buffer=10): """ Converts a PDB file into GROMACS topology and coordinate files, and fully converted PDB file. These constitute the restart files of a GROMACS simulation. """ util.check_files(pdb) full_pdb = os.path.abspath(pdb) save_dir = os.getcwd() # All intermediate files placed into a subdirectory util.goto_dir(basename + '.solvate') # Remove all but protein heavy atoms in a single clean conformation pdb = basename + '.clean.pdb' pdbtext.clean_pdb(full_pdb, pdb) # Generate protein topology in pdb2gmx_gro using pdb2gmx pdb2gmx_gro = basename + '.pdb2gmx.gro' top = basename + '.top' itp = basename + '_posre.itp' # Choose force field based on GROMACS version if 'GROMACS4.5' in force_field: ff = 'amber99' elif 'GROMACS4.0' in force_field: ff = 'G43a1' else: raise ValueError, "Couldn't work out pdb2gmx for " + force_field args = '-ignh -ff %s -water spc -missing -f %s -o %s -p %s -i %s -chainsep id_or_ter -merge all' \ % (ff, pdb, pdb2gmx_gro, top, itp) data.binary('pdb2gmx', args, basename + '.pdb2gmx') util.check_files(pdb2gmx_gro) # Now add a box with editconf box_gro = basename + '.box.gro' solvent_buffer_in_nm = solvent_buffer / 10.0 data.binary( 'editconf', '-f %s -o %s -c -d %f -bt cubic' \ % (pdb2gmx_gro, box_gro, solvent_buffer_in_nm), basename+'.box') util.check_files(box_gro) # Given box dimensions, can now populate with explict waters solvated_gro = basename + '.solvated.gro' data.binary( 'genbox', '-cp %s -cs spc216.gro -o %s -p %s' \ % (box_gro, solvated_gro, top), '%s.solvated' % basename) util.check_files(solvated_gro) # Neutralize with counterions using genion to place ions # based on energy parameters processed by grompp gro = basename + '.gro' neutralize_system_with_salt(top, solvated_gro, basename, force_field) util.check_files(gro) # Make a reference PDB file from restart files for viewing and restraints convert_restart_to_pdb(basename, basename + '.pdb') # Copy finished restart files back into original directory fnames = util.re_glob( '*', os.path.basename(basename) + r'[^\.]*\.(pdb|itp|gro|mdp|top)$') for fname in fnames: shutil.copy(fname, save_dir) # Cleanup delete_backup_files(basename) os.chdir(save_dir) return top, gro
def run(in_parms): """ Run a GROMACS simulations using the PDBREMIX parms dictionary. """ parms = copy.deepcopy(in_parms) basename = parms['output_basename'] # Copies across topology and related *.itp files, with appropriate # filename renaming in #includes top = basename + '.top' in_top = parms['topology'] shutil.copy(in_top, top) in_name = os.path.basename(in_top).replace('.top', '') in_dir = os.path.dirname(in_top) file_tag = "%s/%s_*itp" % (in_dir, in_name) new_files = [top] for f in glob.glob(file_tag): new_f = os.path.basename(f) new_f = new_f.replace(in_name, basename) shutil.copy(f, new_f) new_files.append(new_f) for f in new_files: replace_include_file(f, in_name + "_", basename + "_") # Copy over input coordinates/velocities in_gro = basename + '.in.gro' shutil.copy(parms['input_crds'], in_gro) # Generates a postiional-restraint topology file if parms['restraint_pdb']: # 1kcal*mol*A**-2 = 4.184 kJ*mol*(0.1 nm)**-2 kcalmolang2_to_kJmolnm2 = 400.184 open(basename + '_posre.itp', 'w').write( make_restraint_itp( parms['restraint_pdb'], parms['restraint_force'] * kcalmolang2_to_kJmolnm2)) # Generate .mdp file based on parms in_mdp = basename + '.grompp.mdp' open(in_mdp, 'w').write(make_mdp(parms)) # Now run .grompp to generate this .tpr file tpr = basename + '.tpr' # .mdp to save complete set of parameters mdp = basename + '.mdrun.mdp' data.binary( 'grompp', '-f %s -po %s -c %s -p %s -o %s' \ % (in_mdp, mdp, in_gro, top, tpr), basename + '.grompp') util.check_files(tpr) # Run simulation with the .tpr file data.binary( 'mdrun', '-v -deffnm %s' % (basename), basename + '.mdrun') top, crds, vels = get_restart_files(basename) util.check_output(top) util.check_output(crds) # Cleanup delete_backup_files(basename)
def pdb_to_top_and_crds(force_field, pdb, basename, solvent_buffer=10): """ Converts a PDB file into GROMACS topology and coordinate files, and fully converted PDB file. These constitute the restart files of a GROMACS simulation. """ util.check_files(pdb) full_pdb = os.path.abspath(pdb) save_dir = os.getcwd() # All intermediate files placed into a subdirectory util.goto_dir(basename + '.solvate') # Remove all but protein heavy atoms in a single clean conformation pdb = basename + '.clean.pdb' pdbtext.clean_pdb(full_pdb, pdb) # Generate protein topology in pdb2gmx_gro using pdb2gmx pdb2gmx_gro = basename + '.pdb2gmx.gro' top = basename + '.top' itp = basename + '_posre.itp' # Choose force field based on GROMACS version if 'GROMACS4.5' in force_field: ff = 'amber99' elif 'GROMACS4.0' in force_field: ff = 'G43a1' else: raise ValueError, "Couldn't work out pdb2gmx for " + force_field args = '-ignh -ff %s -water spc -missing -f %s -o %s -p %s -i %s -chainsep id_or_ter -merge all' \ % (ff, pdb, pdb2gmx_gro, top, itp) data.binary('pdb2gmx', args, basename+'.pdb2gmx') util.check_files(pdb2gmx_gro) # Now add a box with editconf box_gro = basename + '.box.gro' solvent_buffer_in_nm = solvent_buffer/10.0 data.binary( 'editconf', '-f %s -o %s -c -d %f -bt cubic' \ % (pdb2gmx_gro, box_gro, solvent_buffer_in_nm), basename+'.box') util.check_files(box_gro) # Given box dimensions, can now populate with explict waters solvated_gro = basename + '.solvated.gro' data.binary( 'genbox', '-cp %s -cs spc216.gro -o %s -p %s' \ % (box_gro, solvated_gro, top), '%s.solvated' % basename) util.check_files(solvated_gro) # Neutralize with counterions using genion to place ions # based on energy parameters processed by grompp gro = basename + '.gro' neutralize_system_with_salt(top, solvated_gro, basename, force_field) util.check_files(gro) # Make a reference PDB file from restart files for viewing and restraints convert_restart_to_pdb(basename, basename+'.pdb') # Copy finished restart files back into original directory fnames = util.re_glob( '*', os.path.basename(basename) + r'[^\.]*\.(pdb|itp|gro|mdp|top)$') for fname in fnames: shutil.copy(fname, save_dir) # Cleanup delete_backup_files(basename) os.chdir(save_dir) return top, gro
def main(**args): logger = init_log(args['log_level'], name=__name__) # Check if *experiment_path* contains all the necessary files to retrain an evolved model check_files(args['experiment_path']) # Get all parameters logger.info(f"Getting parameters from evolution ...") config = cfg.ConfigParameters(args, phase='retrain') config.get_parameters() # Load log file and read it at the specified generation s = f"last generation" if args['generation'] is None else f"generation {args['generation']}" logger.info(f"Loading {config.files_spec['data_file']} file to get {s}, individual " f"{args['individual']} ...") config.load_evolved_data(generation=args['generation'], individual=args['individual']) if args['lr_schedule'] is not None: special_params = train.train_schemes_map[args['lr_schedule']].get_params() logger.info(f"Overriding train parameters to use special scheme " f"'{args['lr_schedule']}' ...") config.override_train_params(special_params) # It is important to merge the dicts with the evolved_params first, as they need to be # overwritten in case we are using one of the special train schemes. train_params = {**config.evolved_params['params'], **config.train_spec} best_ind_tese = ['conv_5_1_512', 'conv_3_1_128', 'conv_3_1_512', 'conv_5_1_256', 'avg_pool_2_2', 'conv_3_1_256', 'avg_pool_2_2', 'conv_5_1_128', 'avg_pool_2_2', 'max_pool_2_2'] # best_ind_tese = ['bv1p_3_1_128', # 'bv1p_3_1_128', # 'bv1p_3_1_256', # 'avg_pool_2_2', # 'no_op', # 'bv1p_3_1_256', # 'no_op', # 'no_op', # 'no_op', # 'max_pool_2_2', # 'max_pool_2_2', # 'bv1_3_1_128', # 'bv1_3_1_64', # 'bv1p_3_1_256', # 'no_op', # 'bv1_3_1_256', # 'max_pool_2_2', # 'bv1_3_1_256', # 'bv1p_3_1_64', # 'no_op' # ] config.evolved_params['net'] = best_ind_tese logger.info(f"Starting training of model {config.evolved_params['net']}") valid_acc, test_info = train.train_and_eval(data_info=config.data_info, params=train_params, fn_dict=config.fn_dict, net_list=config.evolved_params['net'], lr_schedule=args['lr_schedule'], run_train_eval=args['run_train_eval']) logger.info(f"Saving parameters...") config.save_params_logfile() logger.info(f"Best accuracy in validation set: {valid_acc:.5f}") logger.info(f"Final test accuracy: {test_info['accuracy']:.5f}") logger.info(f"Final test confusion matrix:\n{test_info['confusion_matrix']}")