def process_file(report, outputFilename, format_out, new_report, epoch, energy_column): try: reportFilename = glob.glob(report)[0] except IndexError: raise IndexError("File %s not found" % report) with open(reportFilename) as f: header = f.readline().rstrip() if not header.startswith("#"): header = "" reportFile = utilities.loadtxtfile(f) energy_values = reportFile[:, energy_column] energy_values = preprocessing.scale(energy_values) if not new_report: if outputFilename != reportFilename: reportFile = utilities.loadtxtfile(outputFilename) fixedReport = analysis_utils.extendReportWithRmsd( reportFile, energy_values) else: header = "" indexes = np.array(range(energy_values.shape[0])) fixedReport = np.concatenate( (indexes[:, None], energy_values[:, None]), axis=1) with open(outputFilename, "w") as fw: if header: fw.write("%s\tEnergy\n" % header) else: fw.write("# Step\tEnergy\n") np.savetxt(fw, fixedReport, fmt=format_out, delimiter="\t")
def main(filename, output_folder): print("FILENAME", filename) templateSummary = "%d/clustering/summary.txt" allFolders = os.listdir(".") numberOfEpochs = len([ epoch for epoch in allFolders if epoch.isdigit() and os.path.isfile(templateSummary % int(epoch)) ]) clustering = utilities.loadtxtfile(templateSummary % (numberOfEpochs - 1)) clustersThres = list(set(clustering[:, 4])) clustersThres.sort(reverse=True) spawningPerThres = np.zeros((numberOfEpochs, len(clustersThres))) for i in range(numberOfEpochs): clustering = utilities.loadtxtfile(templateSummary % i) for j, threshold in enumerate(clustersThres): spawningPerThres[i, j] = clustering[clustering[:, 4] == threshold, 2].sum() line_objects = plt.plot(spawningPerThres) plt.legend(line_objects, tuple(["Cluster size %d" % x for x in clustersThres]), loc="best") plt.title("Processors spawned per epoch and cluster size") plt.xlabel("Epoch") plt.ylabel("Number of spawned processors") if output_folder and not os.path.exists(output_folder): os.makedirs(output_folder) if filename != "": plt.savefig(os.path.join(output_folder, "%s_spawning.png" % filename)) plt.show()
def getRepresentativePDBs(filesWildcard, run): files = glob.glob(filesWildcard) trajs = [utilities.loadtxtfile(f)[:, 1:] for f in files] cl = cluster.Cluster(0, "", "") cl.clusterCenters = utilities.loadtxtfile(cl.clusterCentersFile) dtrajs = cl.assignNewTrajectories(trajs) numClusters = cl.clusterCenters.shape[0] centersInfo = getCentersInfo(cl.clusterCenters, trajs, files, dtrajs) if not os.path.exists("representative_structures"): os.makedirs("representative_structures") with open("representative_structures/representative_structures_%d.dat" % run, "w") as fw: fw.write("Cluster\tEpoch\tTrajectory\tSnapshot\n") for clNum in range(numClusters): fw.write("%d\t" % clNum+"\t".join(centersInfo[clNum]["structure"])+"\n")
def calculate_rmsd_traj(nativePDB, resname, symmetries, rmsdColInReport, traj, reportName, top, epoch, outputFilename, fmt_str, new_report): top_proc = None if top is not None: top_proc = utilities.getTopologyFile(top) rmsds = utilities.getRMSD(traj, nativePDB, resname, symmetries, topology=top_proc) if new_report: fixedReport = np.zeros((rmsds.size, 2)) fixedReport[:, 0] = range(rmsds.size) fixedReport[:, 1] = rmsds header = "" else: with open(reportName) as f: header = f.readline().rstrip() if not header.startswith("#"): header = "" reportFile = utilities.loadtxtfile(reportName) if rmsdColInReport > 0 and rmsdColInReport < reportFile.shape[1]: reportFile[:, rmsdColInReport] = rmsds fixedReport = reportFile else: fixedReport = analysis_utils.extendReportWithRmsd(reportFile, rmsds) with open(outputFilename, "w") as fw: if header: fw.write("%s\tRMSD\n" % header) else: fw.write("# Step\tRMSD\n") np.savetxt(fw, fixedReport, fmt=fmt_str)
def get_centers_info(trajectoryFolder, trajectoryBasename, num_clusters, clusterCenters): centersInfo = { x: { "structure": None, "minDist": 1e6, "center": None } for x in range(num_clusters) } trajFiles = glob.glob(os.path.join(trajectoryFolder, trajectoryBasename)) for traj in trajFiles: _, epoch, iTraj = os.path.splitext(traj)[0].split("_", 3) trajCoords = utilities.loadtxtfile(traj) if len(trajCoords.shape) < 2: trajCoords = [trajCoords] for snapshot in trajCoords: nSnap = snapshot[0] snapshotCoords = snapshot[1:] dist = np.sqrt(np.sum((clusterCenters - snapshotCoords)**2, axis=1)) for clusterInd in range(num_clusters): if dist[clusterInd] < centersInfo[clusterInd]['minDist']: centersInfo[clusterInd]['minDist'] = dist[clusterInd] centersInfo[clusterInd]['structure'] = (epoch, int(iTraj), nSnap) centersInfo[clusterInd]['center'] = snapshotCoords return centersInfo
def main(epoch_num, trajectory, snapshot_num, resname, clustering_object, topology): calc = RMSDCalculator.RMSDCalculator() clustering_object = utilities.readClusteringObject(clustering_object) n_clusters = utilities.loadtxtfile( os.path.join(str(max(0, epoch_num - 1)), "clustering", "summary.txt")).shape[0] if topology is not None: topology_contents = utilities.getTopologyFile(topology) else: topology_contents = None filename = glob.glob( os.path.join(str(epoch_num), "*traj*_%d.*" % trajectory)) if not filename: raise ValueError( "No file with the specified epoch and trajectory found") try: snapshots = utilities.getSnapshots(filename[0], topology=topology)[snapshot_num] except IndexError: raise IndexError( "Snapshot number %d not found in trajectory %d for epoch %d, please check that the arguments provided are correct" % (snapshot_num, trajectory, epoch_num)) pdb = atomset.PDB() pdb.initialise(snapshots, resname=resname, topology=topology_contents) for i, cluster in enumerate(clustering_object[:n_clusters]): dist = calc.computeRMSD(pdb, cluster.pdb) if dist < cluster.threshold: print("Snapshot belongs to cluster", i) return print("Snapshot not assigned to any cluster! :(")
def process_file(traj, top_file, resname, report, outputFilename, format_out, new_report, epoch): start = time.time() sasa_values = calculateSASA(traj, top_file, resname) header = "" if not new_report: try: reportFilename = glob.glob(report)[0] except IndexError: raise IndexError("File %s not found" % report) if outputFilename != reportFilename: reportFilename = outputFilename with open(reportFilename) as f: header = f.readline().rstrip() if not header.startswith("#"): header = "" reportFile = utilities.loadtxtfile(f) fixedReport = analysis_utils.extendReportWithRmsd( reportFile, sasa_values) else: indexes = np.array(range(sasa_values.shape[0])) fixedReport = np.concatenate((indexes[:, None], sasa_values[:, None]), axis=1) with open(outputFilename, "w") as fw: if header: fw.write("%s\tSASA\n" % header) else: fw.write("# Step\tSASA\n") np.savetxt(fw, fixedReport, fmt=format_out, delimiter="\t") end = time.time() print("Took %.2fs to process" % (end - start), traj)
def main(path_files, path_to_save): files = glob.glob(os.path.join(path_files, "traj_*.dat")) Y = [] for f in files: Y.append(utilities.loadtxtfile(f)) Y_stack = np.vstack(Y) xall = Y_stack[:, 1] yall = Y_stack[:, 2] zall = Y_stack[:, 3] plt.figure(figsize=(8, 5)) mplt.plot_free_energy(xall, yall, cmap="Spectral") plt.xlabel("x") plt.ylabel("y") if path_to_save is not None: plt.savefig(os.path.join(path_to_save, "x-y_decomposition.png")) plt.figure(figsize=(8, 5)) mplt.plot_free_energy(xall, zall, cmap="Spectral") plt.xlabel("x") plt.ylabel("z") if path_to_save is not None: plt.savefig(os.path.join(path_to_save, "x-z_decomposition.png")) plt.figure(figsize=(8, 5)) mplt.plot_free_energy(yall, zall, cmap="Spectral") plt.xlabel("y") plt.ylabel("z") if path_to_save is not None: plt.savefig(os.path.join(path_to_save, "y-z_decomposition.png")) plt.show()
def calculateAutoCorrelation(lagtimes, dtrajs, nclusters, nLags): C = np.zeros((nclusters, nLags)) Ci = np.zeros((nclusters, nLags)) Cf = np.zeros((nclusters, nLags)) autoCorr = np.zeros((nclusters, nLags)) N = 0 M = np.zeros(nLags) for trajectory in dtrajs: traj = utilities.loadtxtfile(trajectory, dtype=int) Nt = traj.size N += Nt for il, lagtime in enumerate(lagtimes): M[il] += Nt - lagtime for i in range(Nt - lagtime): autoCorr[traj[i], il] += (traj[i] == traj[i + lagtime]) C[traj[i], il] += 1 Ci[traj[i], il] += 1 if i > lagtime: Cf[traj[i], il] += 1 for j in range(Nt - lagtime, Nt): C[traj[j], il] += 1 Cf[traj[j], il] += 1 mean = C / float(N) var = (N * C - (C**2)) / float(N * (N - 1)) autoCorr += M * mean**2 - (Ci + Cf) * mean autoCorr /= N autoCorr /= var return autoCorr
def process_file(traj, top_file, resname, report, outputFilename, format_out, new_report, epoch): sasa_values = calculateSASA(traj, top_file, resname) header = "" if not new_report: try: reportFilename = glob.glob(report)[0] except IndexError: raise IndexError("File %s not found" % report) with open(reportFilename) as f: header = f.readline().rstrip() if not header.startswith("#"): header = "" reportFile = utilities.loadtxtfile(f) fixedReport = correctRMSD.extendReportWithRmsd(reportFile, sasa_values) else: fixedReport = sasa_values with open(outputFilename, "w") as fw: if header: fw.write("%s\tSASA\n" % header) else: fw.write("# SASA\n") np.savetxt(fw, fixedReport, fmt=format_out, delimiter="\t")
def getSASAvalues(representative_file, sasa_col, path_to_report): clusters_info = np.loadtxt(representative_file, skiprows=1, dtype=int) extract_info = getR.getExtractInfo(clusters_info) sasa = [0 for _ in clusters_info] for trajFile, extraInfo in extract_info.items(): report_filename = glob.glob( os.path.join(path_to_report, "%d", "report*_%d") % trajFile)[0] report = utilities.loadtxtfile(report_filename) for pair in extraInfo: sasa[pair[0]] = report[pair[1], sasa_col] return sasa
def copyWorkingTrajectories(fileWildcard, length=None, ntrajs=None, bootstrap=True, skipFirstSteps=0): """ Function that copies trajectories that match "fileWildcard" into the current directory. It may copy a subset and a part of them (length) Warning! If not using bootstrap, it uses all the trajectories :param fileWildcard: Wildcard to match original files :type fileWildcard: str :param length: Trajectory length to consider, if None (default value), the full trajectory will be considered :type length: int :param ntrajs: Number of trajs to consider. If None (default value), a number equal to the total will be considered :type ntrajs: int :param bootstrap: Bootstrap ntrajs from the original (default is True) :type bootstrap: bool :param skipFirstSteps: Skip first trajectory steps (default value is 0) :type skipFirstSteps: int :returns: list -- writenFiles, in order to ease a posterior cleanup """ allFiles = glob.glob(fileWildcard) if bootstrap is False: trajFiles = allFiles else: if ntrajs is None: ntrajs = len(allFiles) trajFiles = np.random.choice(allFiles, ntrajs) writenFiles = [] for i, trajFile in enumerate(trajFiles): dst = __getDstName(bootstrap, i, trajFile) writenFiles.append(dst) traj = utilities.loadtxtfile(trajFile) if length is None: traj_len = len(traj) # so that later eveything is copied else: traj_len = length try: trimmedTraj = traj[skipFirstSteps:traj_len + 1, :] if len(trimmedTraj) > 0: np.savetxt(dst, trimmedTraj, fmt=b"%.4f", delimiter="\t") except: sys.exit("There is a problem with %s" % trajFile) print("Boostraping trajectories", ntrajs, len(trajFiles), len(set(trajFiles))) # trajFiles.sort() # print(trajFiles) return writenFiles
def main(lagtime, clusters_file, disctraj, trajs, n_clusters, plots_path, save_plot, show_plot, lagtime_resolution=20): lagtimes = list(range(1, lagtime, lagtime_resolution)) n_lags = len(lagtimes) if disctraj is None: clusteringObject = cluster.Cluster(n_clusters, trajs, "traj*", alwaysCluster=False) if clusters_file is not None: # only assign utilities.makeFolder(clusteringObject.discretizedFolder) clusteringObject.clusterCentersFile = clusters_file clusteringObject.clusterTrajectories() disctraj = clusteringObject.discretizedFolder clusterCenters = clusteringObject.clusterCenters else: clusterCenters = utilities.loadtxtfile(clusters_file) if len(clusterCenters) != n_clusters: raise ValueError( "Number of clusters specified in the -n parameter does not match the provided clusters" ) print("Calculating autocorrelation...") dtrajs = glob.glob(os.path.join(disctraj, "traj*")) dtrajs_loaded = [ utilities.loadtxtfile(dtraj, dtype=int) for dtraj in dtrajs ] autoCorr = utils.calculateAutoCorrelation(lagtimes, dtrajs_loaded, n_clusters, n_lags) np.save("autoCorr.npy", autoCorr) # __cleanupFiles(parameters.trajWildcard, False) utilities.write_PDB_clusters(np.vstack( (clusterCenters.T, np.abs(autoCorr[:, -1]))).T, use_beta=True, title="cluster_autoCorr.pdb") print("Clusters over correlation time limit") correlation_limit = np.exp(-1) states2 = np.where(autoCorr[:, -1] > correlation_limit)[0] size2 = states2.size if len(states2): print(" ".join(map(str, states2))) print("Number of clusters:", size2, ", %.2f%% of the total" % (100 * size2 / float(n_clusters))) print("Clusters with more than 0.1 autocorrelation") states1 = np.where(autoCorr[:, -1] > 0.1)[0] size1 = states1.size if len(states1): print(" ".join(map(str, states1))) print("Number of clusters:", size1, ", %.2f%% of the total" % (100 * size1 / float(n_clusters))) if size2 > 0: print("Correlation time not achieved at lagtime %d" % lagtime) else: for i in range(len(lagtimes)): states = np.where(autoCorr[:, -i - 1] > correlation_limit)[0] if len(states): string_states = ", ".join(map(str, states)) print("Correlation time %d, for states: %s" % (lagtimes[-i], string_states)) break if plots_path is None: plots_path = "" else: utilities.makeFolder(plots_path) create_plots(autoCorr, plots_path, save_plot, show_plot, n_clusters, lagtimes, threshold=2.0)
def createPlot(reportName, column1, column2, stepsPerRun, printWithLines, paletteModifier, trajs_range=None, label_x=None, label_y=None, label_colorbar=None, fig_size=(6, 6), simulation_path=".", skip_first_step=False, skip_steps=None, y_top=None, y_bottom=None, x_left=None, x_right=None): """ Generate a string to be passed to gnuplot :param reportName: Name of the files containing the simulation data :type reportName: str :param column1: Column to plot in the X axis :type column1: int :param column2: Column to plot in the Y axis :type column2: int :param stepsPerRun: Number of steps per epoch, :type stepsPerRun: int :param paletteModifier: Whether to use the epoch as color or a column :type paletteModifier: int :param trajs_range: Range of trajectories to plot :type trajs_range: str :param label_x: Label of the x-axis :type label_x: str :param label_y: Label of the y-axis :type label_y: str :param label_colorbar: Label of the colorbar :type label_colorbar: str :param fig_size: Size of the plot figure (default (6in, 6in)) :type fig_size: tuple :param simulation_path: Path to the simulation data :type simulation_path: str :param skip_first_step: Whether to avoid plotting the first point in each report :type skip_first_step: bool :param skip_steps: Number of steps to skip in the plot :type skip_steps: int :param y_bottom: Bottom limit of the y axis :type y_bottom: float :param y_top: Top limit of the y axis :type y_top: float :param x_left: Left limit of the x axis :type x_bottom: float :param x_right: Right limit of the x axis :type x_right: float """ epochs = utilities.get_epoch_folders(simulation_path) numberOfEpochs = int(len(epochs)) if numberOfEpochs == 0: raise ValueError("No simulation found in specified path ", os.path.abspath(simulation_path)) cmap_name = "viridis" dictionary = { 'reportName': reportName, 'col2': column2, 'numberOfEpochs': numberOfEpochs, 'col1': column1, 'withLines': printWithLines, 'color': paletteModifier } annotations = [] artists = [] trajectory_range = set() if trajs_range is not None: start, end = map(int, trajs_range.split(":")) trajectory_range = set(range(start, end + 1)) cmin = 1e10 cmax = -1e10 data_dict = {} max_report = 0 min_report = 1e10 for epoch in epochs: ep = int(epoch) reports = utilities.getReportList( os.path.join(simulation_path, epoch, reportName + "*")) if not reports: raise ValueError( "Could not find any reports with the given name!!") for report in reports: report_num = utilities.getReportNum(report) max_report = max(max_report, report_num) min_report = min(min_report, report_num) if trajs_range is not None and report_num not in trajectory_range: continue data = utilities.loadtxtfile(report) if skip_steps is not None: if data.shape[0] <= skip_steps: continue data = data[skip_steps:] elif skip_first_step: data = data[1:] if paletteModifier is not None and paletteModifier != -1: cmin = min(cmin, data[:, paletteModifier].min()) cmax = max(cmax, data[:, paletteModifier].max()) data_dict[(ep, report_num)] = data fig, ax = plt.subplots(figsize=fig_size) ticks = None if paletteModifier == -1: cmin = min_report cmax = max_report if paletteModifier is None: cmin = int(epochs[0]) cmax = int(epochs[-1]) ticks = range(cmin, cmax + 1) sm = plt.cm.ScalarMappable(cmap=plt.get_cmap(cmap_name), norm=plt.Normalize(vmin=cmin, vmax=cmax)) sm.set_array([]) dictionary['cmap'] = sm if paletteModifier != -1: cbar = plt.colorbar(sm, ticks=ticks) cbar.ax.zorder = -1 offset = 0 if skip_steps is not None: offset = skip_steps elif skip_first_step: # if we skipt the first step there is a point that is not shown but we # should count either way offset = 1 for el in data_dict: addLine(data_dict[el], el[1], el[0], stepsPerRun, dictionary, artists) annotations.append([ "Epoch: %d\nTrajectory: %d\nModel: %d" % (el[0], el[1], i + 1 + offset) for i in range(len(data_dict[el])) ]) if label_x is not None: plt.xlabel(label_x) if label_y is not None: plt.ylabel(label_y) if paletteModifier is None: cbar.set_label("Epoch") if label_colorbar is not None: cbar.set_label(label_colorbar) ax.set_ylim(bottom=y_bottom, top=y_top) ax.set_xlim(left=x_left, right=x_right) annot = ax.annotate("", xy=(0, 0), xytext=(20, 20), textcoords="offset points", bbox=dict(boxstyle="round", fc="w"), arrowprops=dict(arrowstyle="->")) annot.set_visible(False) def modify_color(color): color_offset = 0.5 color = list(color) for i in range(3): color[i] = min(color[i] + color_offset, 1) return tuple(color) def update_annot(ind, color, pos, index): """Update the information box of the selected point""" annot.xy = pos annot.set_text(annotations[index][int(ind["ind"][0])]) annot.get_bbox_patch().set_facecolor(modify_color(color)) annot.get_bbox_patch().set_alpha(0.8) annot.zorder = 10 def locate_event(event): for j, el in enumerate(artists): found, info = el.contains(event) if found: return j, found, info, el return 0, False, None, None def extract_data(obj_plot, ind): try: x, y = obj_plot.get_data() x = x[ind["ind"][0]] y = y[ind["ind"][0]] return (x, y) except AttributeError: return obj_plot.get_offsets()[ind["ind"][0]] def extract_color(obj_plot, ind): try: return obj_plot.get_markerfacecolor() except AttributeError: return obj_plot.get_facecolor()[ind["ind"][0]] def hover(event): """Action to perform when hovering the mouse on a point""" vis = annot.get_visible() if event.inaxes == ax: index, cont, ind, obj = locate_event(event) if cont: update_annot(ind, extract_color(obj, ind), extract_data(obj, ind), index) annot.set_visible(True) fig.canvas.draw_idle() else: if vis: annot.set_visible(False) fig.canvas.draw_idle() # Respond to mouse motion fig.canvas.mpl_connect("motion_notify_event", hover)
def extractInfo(inputFile): clusterInfo = utilities.loadtxtfile(inputFile) return clusterInfo[:, 1]
def main(num_clusters, criteria1, criteria2, ligand_resname, output_folder="ClusterCentroids", atom_ids="", cpus=2, topology=None, report="report_", traj="trajectory_", use_pdb=False, png=False, CA=0, sidechains=0, restart="all"): # Create multiprocess pool if cpus > 1: pool = mp.Pool(cpus) else: pool = mp.Pool(1) # Extract COM ligand for each snapshot if not glob.glob("allTrajs/traj*"): extractCoords.main(lig_resname=ligand_resname, non_Repeat=True, atom_Ids=atom_ids, nProcessors=cpus, parallelize=True, topology=topology, protein_CA=CA, sidechains=sidechains) print("Clusterize trajectories by RMSD of COM") trajectoryFolder = "allTrajs" trajectoryBasename = "*traj*" stride = 1 clusterCountsThreshold = 0 folders = utilities.get_epoch_folders(".") folders.sort(key=int) if not restart: clusteringObject = cluster.Cluster(num_clusters, trajectoryFolder, trajectoryBasename, alwaysCluster=True, stride=stride) clusteringObject.clusterTrajectories() clusteringObject.eliminateLowPopulatedClusters(clusterCountsThreshold) clusterCenters = clusteringObject.clusterCenters np.savetxt("clustercenters.dat", clusterCenters) dtrajs = clusteringObject.dtrajs print("Extract metrics for each snapshot") min_metric_trajs = {} epochs = [folder for folder in glob.glob("./*/") if folder.isdigit()] reports = simulationToCsv.gather_reports() fields = simulationToCsv.retrieve_fields(reports[0]) df = simulationToCsv.init_df(fields) df = simulationToCsv.fill_data(reports, df, pool) print("Update data with metrics and clusters") df.index = range(df.shape[0]) df["Cluster"] = [None] * df.shape[0] input_list = [[ df, Traj, d ] for d, Traj in zip(dtrajs, clusteringObject.trajFilenames)] results = pool.map(save_to_df, input_list) for data in results: for df_tmp in data: df.update(df_tmp) df.to_csv("Simulation.csv", index=False) if restart: df = pd.read_csv("Simulation.csv") clusterCenters = utilities.loadtxtfile("clustercenters.dat") print(clusterCenters) centersInfo = get_centers_info(trajectoryFolder, trajectoryBasename, num_clusters, clusterCenters) COMArray = [centersInfo[i]['center'] for i in range(num_clusters)] print("Retrieve clusters and metric") fields1 = [] fields2 = [] print(centersInfo) for cluster_num in centersInfo: epoch_num, traj_num, snap_num = map( int, centersInfo[cluster_num]['structure']) field1, crit1_name = get_metric(criteria1, epoch_num, traj_num, snap_num, report) field2, crit2_name = get_metric(criteria2, epoch_num, traj_num, snap_num, report) fields1.append(field1) fields2.append(field2) if output_folder is not None: outputFolder = os.path.join(output_folder, "") if not os.path.exists(outputFolder): os.makedirs(outputFolder) else: outputFolder = "" print("Output structures") writePDB( COMArray, outputFolder + "clusters_%d_KMeans_allSnapshots.pdb" % num_clusters) writeInitialStructures(fields1, fields2, crit1_name, crit2_name, centersInfo, outputFolder + "cluster_{}_{}_{}_{}_{}.pdb", traj, topology=topology, use_pdb=use_pdb) plotClusters(fields1, fields2, crit1_name, crit2_name, outputFolder, png=png) assesClusterConvergence(df, num_clusters, traj, topology) return
import glob import numpy as np from AdaptivePELE.utilities import utilities import matplotlib.pyplot as plt plt.style.use("ggplot") files = glob.glob("dihedrals/allTrajs/traj_0_*") colors = ["r", "k", "b", "g", "y", "c", "m", "olive", "p"] n = 9 for j, f in enumerate(files): plt.figure() data = np.rad2deg(utilities.loadtxtfile(f)[:, 1:]) # terminal residues lack one of the dihedrals for i in range(n - 2): plt.scatter(data[:, i + n - 1], data[:, i + 1], color=colors[i], marker=".", label="Residue %d" % (i + 2)) plt.xlim(-180, 180) plt.ylim(-180, 180) plt.xlabel(r'$\Phi$ Angle (degrees)') plt.ylabel(r'$\Psi$ Angle (degrees)') plt.title(f) plt.legend() plt.show()