def writeInitialStructures(field1, field2, crit1, crit2, centers_info, filename_template, traj, topology=None, use_pdb=False): for cluster_num, field1, field2 in zip(centers_info, field1, field2): epoch_num, traj_num, snap_num = map( int, centers_info[cluster_num]['structure']) trajectory = "{}/{}{}.xtc".format( epoch_num, traj, traj_num) if topology else "{}/{}{}.pdb".format( epoch_num, traj, traj_num) snapshots = utilities.getSnapshots(trajectory, topology=topology, use_pdb=use_pdb) filename = filename_template.format(cluster_num, crit1, field1, crit2, field2) if not topology: with open(filename, "w") as fw: fw.write(snapshots[snap_num]) else: splitTrajectory.main("", [ trajectory, ], topology, [snap_num + 1], template=filename, use_pdb=use_pdb)
def extract_snapshot_from_xtc(path, f_id, output, topology, step, out_freq, f_out): f_in = glob.glob(os.path.join(os.path.dirname(path), "*trajectory*_{}.xtc".format(f_id))) if not f_in: f_in = glob.glob(os.path.join(os.path.dirname(path), "*trajectory*_{}.*".format(f_id))) if len(f_in) == 0: sys.exit("Trajectory {} not found. Be aware that PELE trajectories must contain the label \'trajectory\' in their file name to be detected".format("*trajectory*_{}".format(f_id))) splitTrajectory.main(output, [f_in[0], ], topology, [(step)/out_freq+1, ], template= f_out) print("Model {} selected".format(f_out))
def split_trajectory(paths): files = glob.glob(os.path.join(paths.adap_ex_output, "*/traj*.*")) epoch_files = [ report for report in files if (os.path.basename(os.path.dirname(report)).isdigit()) ] for file in epoch_files: output_dir = os.path.join(paths.pele_dir, "ini_str", os.path.splitext(os.path.basename(file))[0]) st.main(output_dir, [ file, ], paths.topology, None, template=None) yield output_dir
def extract_snapshots_from_xtc(self, min_values, steps): paths = min_values[DIR].tolist() epochs = [ os.path.basename(os.path.normpath(os.path.dirname(Path))) for Path in paths ] values1 = min_values[self.crit1].tolist() values2 = min_values[self.crit2].tolist() file_ids = min_values.report.tolist() step_indexes = min_values[steps].tolist() files_out = ["epoch{}_trajectory_{}.{}_{}{:.2f}_{}{:.3f}.pdb".format(epoch, report, int(step), self.crit1.replace(" ",""), value1, self.crit2.replace(" ",""), value2) \ for epoch, step, report, value1, value2 in zip(epochs, step_indexes, file_ids, values1, values2)] for f_id, f_out, step, path in zip(file_ids, files_out, step_indexes, paths): f_in = glob.glob( os.path.join(os.path.dirname(path), "*trajectory*_{}.xtc".format(f_id))) found = st.main(output, f_in, topology, [step % self.ad_steps / out_freq + 1], template=f_out) if found: print("MODEL {} has been selected".format(f_out)) else: print("MODEL {} not found. Check -f option".format(f_out))
def write_snapshot(snap_num, trajectory, filename, topology=None, use_pdb=False): if not topology: snapshots = utilities.getSnapshots(trajectory, topology=topology, use_pdb=use_pdb) with open(filename, "w") as fw: fw.write(snapshots[snap_num]) else: splitTrajectory.main("", [ trajectory, ], topology, [snap_num + 1], template=filename, use_pdb=use_pdb)
def cluster_with_dbscan(paths, snapshots, all_coordinates, out_freq=1, topology=None): """ Use high performance computing hdbscan to do an all-atom cluster of the chosen plot structures """ n_samples = len(snapshots) # Clusterize labels = [] results = [] t0 = time.time() try: db = hdbscan.HDBSCAN(min_samples=int(n_samples * 0.10) + 1).fit(all_coordinates) except ValueError: raise ValueError( "Ligand not found check the option --resname. i.e python interactive.py 5 6 7 --resname LIG" ) result = db.labels_ labels.append(len(set(result))) results.append(result) t1 = time.time() print("time clustering") print(t1 - t0) # Get Best Result t0 = time.time() mx_idx = np.argmax(np.array(labels)) final_result = results[mx_idx] try: silhouette_samples = mt.silhouette_samples(all_coordinates, final_result) except ValueError: raise ValueError( "Clustering failed. Structures do not follow any pattern or they are not enough" ) max_clust = { label: [path, snap, sil] for (path, snap, label, sil) in zip(paths, snapshots, final_result, silhouette_samples) } # Get representative for path, snapshot, label, sil in zip(paths, snapshots, final_result, silhouette_samples): if sil > max_clust[label][2]: max_clust[label] = [path, snapshot, sil] # Get Structures for i, (label, info) in enumerate(max_clust.items()): # if label == -1: continue output = "Clusters" if not os.path.exists(output): os.mkdir(output) f_out = "cluster_{}.pdb".format(label + 1) f_in, snapshot, _ = info #XTC if topology: found = st.main(output, [ f_in, ], topology, [ snapshot, ], template=f_out) if found: print("MODEL {} has been selected as {}".format(f_in, f_out)) else: print("MODEL {} not found. Check -f option".format(f_in)) #PDB else: traj = [] model = (snapshot) / out_freq + 1 with open(f_in, 'r') as input_file: file_content = input_file.read() trajectory_selected = re.search( 'MODEL\s+%d(.*?)ENDMDL' % int(model), file_content, re.DOTALL) with open(os.path.join(output, f_out), 'w') as f: traj.append("MODEL %d" % int(model)) try: traj.append(trajectory_selected.group(1)) except AttributeError: raise AttributeError( "Model {} not found. Check the -f option.".format( f_in)) traj.append("ENDMDL\n") f.write("\n".join(traj)) print("MODEL {} has been selected as {}".format(f_in, f_out)) t1 = time.time() print("Time post processing") print(t1 - t0)