def main(): import argparse parser = argparse.ArgumentParser(prog = path.basename(__file__), description="List all the interactions between nucleotides") parser.add_argument('inputfile', type=str, nargs=1, help="The inputfile used to run the simulation") parser.add_argument('trajectory', type=str, nargs=1, help='the trajectory file you wish to analyze') parser.add_argument('-v', type=str, nargs=1, dest='outfile', help='if you want instead average per-particle energy as a viewer JSON') args = parser.parse_args() from oxDNA_analysis_tools.config import check_dependencies check_dependencies(["python", "numpy"]) traj_file = args.trajectory[0] inputfile = args.inputfile[0] try: outfile = args.outfile[0] visualize = True except: visualize = False if path.dirname(inputfile) != getcwd(): sim_directory = path.dirname(inputfile) else: sim_directory = "" top_file = sim_directory + get_input_parameter(inputfile, "topology") if "RNA" in get_input_parameter(inputfile, "interaction_type"): environ["OXRNA"] = "1" else: environ["OXRNA"] = "0" import oxDNA_analysis_tools.UTILS.base #this needs to be imported after the model type is set myreader = LorenzoReader2(traj_file,top_file) mysystem = myreader._get_system() energies = np.zeros(mysystem.N) count = 0 while mysystem != False: out = output_bonds(inputfile, mysystem) if visualize: for line in out.split('\n'): if not (line.startswith('#') or line == ''): line = [float(l) for l in line.split(' ')] energies[int(line[0])] += sum(line[2:]) energies[int(line[1])] += sum(line[2:]) else: print(out) count += 1 mysystem = myreader._get_system() if visualize: energies *= (41.42/count) with open(outfile, "w+") as file: file.write("{\n\"Energy (pN nm)\" : [") file.write(str(energies[0])) for n in energies[1:]: file.write(", {}".format(n)) file.write("] \n}")
def main(): #get commandline arguments parser = argparse.ArgumentParser( prog=os.path.basename(__file__), description= "Converts a mean structure .json from compute_mean.py to an oxDNA-readable .dat" ) parser.add_argument('mean', type=str, nargs=1, help="A mean structure from compute_mean.py") parser.add_argument('output', type=str, nargs=1, help="The name of the output file") args = parser.parse_args() from oxDNA_analysis_tools.config import check_dependencies check_dependencies(["python", "numpy"]) #load the mean file, which is in .json format with open(args.mean[0], "r") as file: mean_info = loads(file.read()) #write the file out in oxDNA format outfile = args.output[0] make_dat(mean_info, outfile)
def fire_multiprocess(traj_file, top_file, function, num_confs, n_cpus, *args): """ Distributes a function over a given number of processes Parameters: traj_file (str): The name of the trajectory file to analyze. top_file (str): The name of the topology file associated with the trajectory. function (function): The analysis function to be parallelized. num_confs (int): The number of configurations in the trajectory. n_cpus (int): The number of processes to launch. *args: The arguments for the provided function. Returns: results (list): The results from each individual processor's run. Note: The manner in which to concatenate the results is function-specific so should be handled in the calling module. """ from oxDNA_analysis_tools.config import check_dependencies check_dependencies(["pathos"]) confs_per_processor = int(np.floor(num_confs / n_cpus)) reader_pool = [] processor_pool = pp.Pool(n_cpus) #split_starts and split_ends are around for backwards compatability with the old parallelize algorithm reader_pool, tmpfiles = split_trajectory(traj_file, top_file, num_confs, n_cpus, confs_per_processor) split_starts = [0 for r in reader_pool] split_ends = [confs_per_processor for r in reader_pool] rem = num_confs % n_cpus for i in range(rem): split_ends[i] += 1 #Staple everything together, send it out to the workers, and collect the results as a list #Functions passed to this parallelizer must have the argument order defined by the lst variable (reader, <unique args>, number of configurations total, starting conf id, number of confs for this processor) #This args unpacking method was added in Python 3.6, so if you have an older version of Python that's why this isn't working results = [] lst = [(r, *args, num_confs, s, e) for r, s, e in zip(reader_pool, split_starts, split_ends)] #starmap allows you to have arguments that themselves are iterables #async because we don't actually care what order stuff finishes in. results = processor_pool.starmap_async(function, lst).get() processor_pool.close() for f in tmpfiles: f.close() remove(f.name) return (results)
def main(): #doesn't actually do anything... import argparse from UTILS.readers import LorenzoReader2, get_input_parameter parser = argparse.ArgumentParser( description= "A python wrapper for getting all vectors between nucleotides from a simulation" ) parser.add_argument('inputfile', type=str, nargs=1, help="The inputfile used to run the simulation") parser.add_argument( 'trajectory', type=str, nargs=1, help= "The file containing the configurations of which the contact map is needed" ) args = parser.parse_args() from oxDNA_analysis_tools.config import check_dependencies check_dependencies(["python", "numpy"]) inputfile = args.inputfile[0] traj_file = args.trajectory[0] top_file = get_input_parameter(inputfile, "topology") if "RNA" in get_input_parameter(inputfile, "interaction_type"): environ["OXRNA"] = "1" else: environ["OXRNA"] = "0" import UTILS.base #this needs to be imported after the model type is set r = LorenzoReader2(traj_file, top_file) system = r._get_system() while system: m = all_vectors(inputfile, system, True) system = r._get_system() print("well, it finished...")
def main(): parser = argparse.ArgumentParser(prog = os.path.basename(__file__), description="Create an external forces file enforcing the current base-pairing arrangement") parser.add_argument('db_file', type=str, nargs=1, help="A text file containing dot-bracket notation of the base-pairing arrangement") parser.add_argument('-o', '--output', type=str, nargs=1, help='Name of the file to write the force list to') parser.add_argument('-s', '--strength', type=float, nargs=1, help='Strength of the forces') args = parser.parse_args() from oxDNA_analysis_tools.config import check_dependencies check_dependencies(["python", "numpy"]) # Get input with open(args.db_file[0]) as f: db_str = f.read() # Check for strength input, otherwise default to 0.09 which won't explode most simulations. if args.strength: strength = args.strength[0] print("INFO: Using strength {}".format(strength), file=stderr) else: strength = 0.09 print("INFO: No strength provided, defaulting to {}".format(strength), file=stderr) # convert the db string to an index list db_idx = parse_dot_bracket(db_str) force_list = [] #p is particle id, q is paired particle id for p, q in enumerate(db_idx): if q != -1: force_list.append(mutual_trap(p, q, strength, 1.2, 1)) # write the force file if args.output: outfile = args.output[0] print("INFO: Writing forces to {}".format(outfile), file=stderr) else: outfile = "external_forces.txt" print("INFO: No output filename found. Defaulting to {}".format(outfile), file=stderr) write_force_file(force_list, outfile)
def main(): #read data from files parser = argparse.ArgumentParser(prog = path.basename(__file__), description="Compare the bonds found at each trajectory with the intended design") parser.add_argument('inputfile', type=str, nargs=1, help="The inputfile used to run the simulation") parser.add_argument('trajectory', type=str, nargs=1, help="The trajecotry file to compare against the designed pairs") parser.add_argument('designed_pairs', type=str, nargs=1, help="The file containing the desired nucleotides pairings in the format \n a b\nc d") parser.add_argument('output_file', type=str, nargs=1, help="name of the file to save the output json overlay to") parser.add_argument('-p', metavar='num_cpus', nargs=1, type=int, dest='parallel', help="(optional) How many cores to use") #run system checks from oxDNA_analysis_tools.config import check_dependencies check_dependencies(["python", "numpy"]) args = parser.parse_args() inputfile = args.inputfile[0] traj_file = args.trajectory[0] designfile = args.designed_pairs[0] outfile = args.output_file[0] parallel = args.parallel if parallel: n_cpus = args.parallel[0] top_file = get_input_parameter(inputfile, "topology") if "RNA" in get_input_parameter(inputfile, "interaction_type"): environ["OXRNA"] = "1" else: environ["OXRNA"] = "0" num_confs = cal_confs(traj_file) with open(designfile, 'r') as file: pairs = file.readlines() if not parallel: print("INFO: Computing base pairs in {} configurations using 1 core.".format(num_confs), file=stderr) r = LorenzoReader2(traj_file,top_file) tot_bonds, tot_missbonds, out_array, confid = bond_analysis(r, pairs, inputfile, num_confs) try: _ = tot_bonds #this will fail if DNAnalysis failed. except: print("ERROR: DNAnalysis encountered an error and could not analyze the trajectory") exit(1) if parallel: print("INFO: Computing base pairs in {} configurations using {} cores.".format(num_confs, n_cpus), file=stderr) out = parallelize_lorenzo_onefile.fire_multiprocess(traj_file, top_file, bond_analysis, num_confs, n_cpus, pairs, inputfile) tot_bonds = 0 tot_missbonds = 0 out_array = np.zeros(len(open(top_file, 'r').readlines())-1) confid = 0 for i in out: if i[0] is not None: tot_bonds += i[0] tot_missbonds += i[1] #out_array += i[2] confid += i[3] else: print("WARNING: Some configurations were invalid and not included in the analysis. Please check the logs", file=stderr) #tot_bonds = sum((i[0] for i in out if i[0] != None)) #tot_missbonds = sum((i[1] for i in out if i[1] != None)) out_array = sum((i[2] for i in out if len(i[2]) > 0)) #confid = sum((i[3] for i in out if i[3] != None)) print("\nSummary:\navg bonds: {}\navg_missbonds: {}".format(tot_bonds/(int(confid)),tot_missbonds/int(confid))) print("INFO: Writing bond occupancy data to {}".format(outfile)) with open(outfile, "w+") as file: file.write("{\n\"occupancy\" : [") file.write(str(out_array[0]/int(confid))) for n in out_array[1:]: file.write(", {}".format(n/int(confid))) file.write("] \n}")
#!/usr/bin/env python3 from os import environ, path import sys import subprocess import tempfile import numpy as np from oxDNA_analysis_tools.config import set_analysis_path PROCESSPROGRAM = set_analysis_path() from oxDNA_analysis_tools.config import check_dependencies check_dependencies(["numpy"]) def contact_map(inputfile, mysystem, return_full_matrix): """ Computes the distance between every pair of nucleotides and creates a matrix of these distances. Parameters: inputfile (string): the input file with which the simulation was run, mysystem (base.System): a base.py system object containing the system to analyze. return_full_matrix (bool): The matrix is symmetric. Return only the lower half or the whole matrix? Returns: distances (numpy.array): The matrix containing pairwise distances between every pair of nucleotides. """ tempfile_obj = tempfile.NamedTemporaryFile()
def main(): import argparse import matplotlib.pyplot as plt from oxDNA_analysis_tools.UTILS.readers import LorenzoReader2, get_input_parameter from oxDNA_analysis_tools.config import check_dependencies check_dependencies(["python", "numpy", "matplotlib"]) #get commandline arguments parser = argparse.ArgumentParser( prog=path.basename(__file__), description="Calculate and display the contact map for a structure") parser.add_argument('inputfile', type=str, nargs=1, help="The inputfile used to run the simulation") parser.add_argument( 'trajectory', type=str, nargs=1, help= "The file containing the configurations of which the contact map is needed" ) parser.add_argument( '-v', dest='visualize', action='store_const', const=True, default=False, help= "should we display the contact map once its calculated? Only recommend if there are few confs." ) args = parser.parse_args() visualize = args.visualize inputfile = args.inputfile[0] traj_file = args.trajectory[0] #process files top_file = get_input_parameter(inputfile, "topology") if "RNA" in get_input_parameter(inputfile, "interaction_type"): environ["OXRNA"] = "1" else: environ["OXRNA"] = "0" #create system object from first configuration in the trajectory r = LorenzoReader2(traj_file, top_file) system = r._get_system() #for every configuration, create a graphical contact map while system: m = contact_map(inputfile, system, True) if visualize: fig, ax = plt.subplots() a = ax.imshow(m, cmap='viridis', origin='lower') ax.set(title="interaction network", ylabel="nucleotide id", xlabel="nucleotide id") b = fig.colorbar(a, ax=ax) b.set_label("distance", rotation=270) plt.show() system = r._get_system()
def main(): parser = argparse.ArgumentParser( prog=path.basename(__file__), description= "Calculate differences between structures and automatically apply DBSCAN to retrieve clusters" ) parser.add_argument('inputfile', type=str, nargs=1, help="The inputfile used to run the simulation") parser.add_argument('trajectory', type=str, nargs=1, help='the trajectory file you wish to analyze') parser.add_argument('-p', metavar='num_cpus', nargs=1, type=int, dest='parallel', help="(optional) How many cores to use") args = parser.parse_args() from oxDNA_analysis_tools.config import check_dependencies check_dependencies(["python", "numpy", "matplotlib"]) traj_file = args.trajectory[0] inputfile = args.inputfile[0] parallel = args.parallel if parallel: n_cpus = args.parallel[0] top_file = get_input_parameter(inputfile, "topology") if "RNA" in get_input_parameter(inputfile, "interaction_type"): environ["OXRNA"] = "1" else: environ["OXRNA"] = "0" num_confs = cal_confs(traj_file) import UTILS.base #this needs to be imported after the model type is set r2 = LorenzoReader2(traj_file, top_file) #how do you want to get your eRMSDs? Do you need to do the time-consuming calculation or is it done and you have a pickle? if not parallel: r1 = LorenzoReader2(traj_file, top_file) eRMSDs = get_eRMSDs(r1, r2, inputfile, traj_file, top_file, num_confs) if parallel: out = parallelize_lorenzo_onefile.fire_multiprocess(traj_file, top_file, get_eRMSDs, num_confs, n_cpus, r2, inputfile, traj_file, top_file, matrix=True) eRMSDs = np.sum((i for i in out), axis=0) #eRMSDs = pickle.load(open('tmp_eRMSDs', 'rb')) #the eRMSD matrix is actually only half a matrix for ni, i in enumerate(eRMSDs): for nj, j in enumerate(i): eRMSDs[nj][ni] = j if ni == nj: eRMSDs[ni][nj] = 0 #since calculating the eRMSDs are so time-consuming to calculate we're gonna pickle it to iterate the DBSCAN later. with open("tmp_eRMSDs", "wb") as file: pickle.dump(eRMSDs, file) ############################################################################################################### #Next, we're going to perform a DBSCAN on that matrix of eRMSDs to find clusters of similar structures perform_DBSCAN(eRMSDs, num_confs, traj_file, inputfile, "precomputed", 12, 8)
def main(): parser = argparse.ArgumentParser( prog=path.basename(__file__), description= "Calculates a principal component analysis of nucleotide deviations over a trajectory" ) parser.add_argument('inputfile', type=str, nargs=1, help="The inputfile used to run the simulation") parser.add_argument('trajectory', type=str, nargs=1, help='the trajectory file you wish to analyze') parser.add_argument( 'meanfile', type=str, nargs=1, help='The mean structure .json file from compute_mean.py') parser.add_argument( 'outfile', type=str, nargs=1, help='the name of the .json file where the PCA will be written') parser.add_argument('-p', metavar='num_cpus', nargs=1, type=int, dest='parallel', help="(optional) How many cores to use") parser.add_argument( '-c', metavar='cluster', dest='cluster', action='store_const', const=True, default=False, help="Run the clusterer on each configuration's position in PCA space?" ) args = parser.parse_args() check_dependencies(["python", "numpy", "Bio"]) traj_file = args.trajectory[0] inputfile = args.inputfile[0] mean_file = args.meanfile[0] outfile = args.outfile[0] parallel = args.parallel if parallel: n_cpus = args.parallel[0] #-c makes it run the clusterer on the output cluster = args.cluster num_confs = cal_confs(traj_file) if mean_file.split(".")[-1] == "json": with open(mean_file) as file: align_conf = load(file)['g_mean'] elif mean_file.split(".")[-1] == "dat" or mean_file.split( ".")[-1] == "conf" or mean_file.split(".")[-1] == "oxdna": with ErikReader(mean_file) as reader: align_conf = reader.read().positions else: print( "ERROR: {} is an unrecognized file type. \nThe mean structure must either be provided as an oxDNA configuration file with the extension .dat, .conf or .oxdna or as the .json file produced by compute_mean.py.", file=stderr) exit(1) cms = np.mean(align_conf, axis=0) #all structures must have the same center of mass align_conf -= cms #Compute the deviations if not parallel: r = ErikReader(traj_file) covariation_matrix = get_cov(r, align_conf, num_confs) if parallel: out = parallelize_erik_onefile.fire_multiprocess( traj_file, get_cov, num_confs, n_cpus, align_conf) covariation_matrix = np.sum([i for i in out], axis=0) covariation_matrix /= (num_confs - 1) #now that we have the covatiation matrix we're going to use eigendecomposition to get the principal components. #make_heatmap(covariance) print("INFO: calculating eigenvectors", file=stderr) evalues, evectors = np.linalg.eig( covariation_matrix) #these eigenvalues are already sorted evectors = evectors.T #vectors come out as the columns of the array print("INFO: eigenvectors calculated", file=stderr) import matplotlib.pyplot as plt print("INFO: Saving scree plot to scree.png", file=stderr) plt.scatter(range(0, len(evalues)), evalues, s=25) plt.xlabel("component") plt.ylabel("eigenvalue") plt.savefig("scree.png") total = sum(evalues) running = 0 i = 0 while running < 0.9: running += (evalues[i] / total) i += 1 print("90% of the variance is found in the first {} components".format(i)) #if you want to weight the components by their eigenvectors #mul = np.einsum('ij,i->ij',evectors, evalues) mul = evectors #reconstruct configurations in component space #because we donlist't save the difference matrix, this involves running through the whole trajectory again if not parallel: r = ErikReader(traj_file) coordinates = change_basis(r, align_conf, mul, num_confs) if parallel: out = parallelize_erik_onefile.fire_multiprocess( traj_file, change_basis, num_confs, n_cpus, align_conf, mul) coordinates = np.concatenate([i for i in out]) #make a quick plot from the first three components print( "INFO: Creating coordinate plot from first three eigenvectors. Saving to coordinates.png", file=stderr) from mpl_toolkits.mplot3d import Axes3D fig = plt.figure() ax = fig.gca(projection='3d') ax.scatter(coordinates[:, 0], coordinates[:, 1], coordinates[:, 2], c='g', s=25) plt.savefig("coordinates.png") #Create an oxView overlays for the first N components N = 3 prep_pos_for_json = lambda conf: list(list(p) for p in conf) print( "INFO: Change the number of eigenvalues to sum and display by modifying the N variable in the script. Current value: {}" .format(N), file=stderr) for i in range(0, N): #how many eigenvalues do you want? try: if outfile.split(".")[1] != "json": raise Exception f = outfile.split(".")[0] + str(i) + "." + outfile.split(".")[1] except: print( "ERROR: oxView overlays must have a '.json' extension. No overlays will be produced", file=stderr) break out = np.sqrt(evalues[i]) * evectors[i] with catch_warnings( ): #this produces an annoying warning about casting complex values to real values that is not relevant simplefilter("ignore") output_vectors = out.reshape(int(out.shape[0] / 3), 3).astype(float) with open(f, "w+") as file: file.write(dumps({"pca": prep_pos_for_json(output_vectors)})) #If we're running clustering, feed the linear terms into the clusterer if cluster: print("INFO: Mapping configurations to component space...", file=stderr) #If you want to cluster on only some of the components, uncomment this #out = out[:,0:3] from oxDNA_analysis_tools.clustering import perform_DBSCAN labs = perform_DBSCAN(coordinates, num_confs, traj_file, inputfile, "euclidean", 12, 8)
def main(): parser = argparse.ArgumentParser( prog=os.path.basename(__file__), description="Computes the deviations in the backbone torsion angles") parser.add_argument('trajectory', type=str, nargs=1, help='the trajectory file you wish to analyze') parser.add_argument( 'topology', type=str, nargs=1, help="The topology file associated with the trajectory file") parser.add_argument('outfile', type=str, nargs=1, help='The file name for the output .json file.') parser.add_argument('-p', metavar='num_cpus', nargs=1, type=int, dest='parallel', help="(optional) How many cores to use") args = parser.parse_args() #run system checks from oxDNA_analysis_tools.config import check_dependencies check_dependencies(["python", "numpy"]) top_file = args.topology[0] traj_file = args.trajectory[0] parallel = args.parallel if parallel: n_cpus = args.parallel[0] num_confs = cal_confs(traj_file) r = LorenzoReader2(traj_file, top_file) if not parallel: torsions, dihedrals = get_internal_coords(r, num_confs) if parallel: out = parallelize_lorenzo_onefile.fire_multiprocess( traj_file, top_file, get_internal_coords, num_confs, n_cpus) # Out Dims: 1 Processor, 2 Torsion or Dihedrals, 3 Specific list of torsions listed by conf torsions = np.concatenate([out[i][0] for i in range(n_cpus)], axis=1) dihedrals = np.concatenate([out[i][1] for i in range(n_cpus)], axis=1) torsion_mean = np.mean(torsions, axis=1).tolist() dihedral_mean = np.mean(dihedrals, axis=1).tolist() #make something akin to a ramachandran plot for DNA origami?? import matplotlib.pyplot as plt plt.scatter(torsion_mean[1:], dihedral_mean) plt.xlabel("torsion_angle") plt.ylabel("dihedral_angle") plt.show() torsion_mean.insert(0, torsion_mean[0]) torsion_mean.insert(0, torsion_mean[0]) with open(args.outfile[0], "w") as file: file.write(dumps({"torsion": torsion_mean}))
def main(): #Get command line arguments. parser = argparse.ArgumentParser( prog=path.basename(__file__), description= "Finds the ensemble of angles between any two duplexes defined by a starting or ending nucleotide in the system" ) parser.add_argument( '-i', '--input', metavar='angle_file', dest='input', nargs='+', action='append', help= 'An angle file from duplex_angle_finder.py and a list of duplex-end particle pairs to compare. Can call -i multiple times to plot multiple datasets.' ) parser.add_argument('-o', '--output', metavar='output_file', nargs=1, help='The name to save the graph file to') parser.add_argument( '-f', '--format', metavar='<histogram/trajectory/both>', nargs=1, help= 'Output format for the graphs. Defaults to histogram. Options are \"histogram\", \"trajectory\", and \"both\"' ) parser.add_argument( '-d', '--data', metavar='data_file', nargs=1, help= 'If set, the output for the graphs will be dropped as a json to this filename for loading in oxView or your own scripts' ) parser.add_argument( '-n', '--names', metavar='names', nargs='+', action='append', help= 'Names of the data series. Will default to particle ids if not provided' ) args = parser.parse_args() from oxDNA_analysis_tools.config import check_dependencies check_dependencies(["python", "numpy", "matplotlib"]) try: files = [i[0] for i in args.input] p1s = [i[1::2] for i in args.input] p2s = [i[2::2] for i in args.input] except Exception as e: print("ERROR: Failed to read files") print(e) parser.print_help() exit(1) n_angles = sum(len(p) for p in p1s) #Make sure that the input is correctly formatted if (len(files) != len(p1s) != len(p2s)): print( "ERROR: bad input arguments\nPlease supply an equal number of trajectory and particle pairs", file=stderr) exit(1) #-o names the output file if args.output: outfile = args.output[0] else: if environ.get('DISPLAY', None) != "": print("INFO: No display detected, outputting to \"angle.png\"", file=stderr) outfile = False else: print( "INFO: No outfile name provided, defaulting to \"angle.png\"", file=stderr) outfile = "angle.png" #-f defines which type of graph to produce hist = False line = False if args.format: if "histogram" in args.format: hist = True if "trajectory" in args.format: line = True if "both" in args.format: hist = line = True if hist == line == False: print( "ERROR: unrecognized graph format\nAccepted formats are \"histogram\", \"trajectory\", and \"both\"", file=stderr) exit(1) else: print("INFO: No graph format specified, defaulting to histogram", file=stderr) hist = True all_angles = [[] for _ in files] means = [[] for _ in files] medians = [[] for _ in files] stdevs = [[] for _ in files] representations = [[] for _ in files] #For each input triplet for i, (anglefile, search1, search2) in enumerate(zip(files, p1s, p2s)): steps = 0 #counts the number of configurations in the file last_step = 0 all_angles[i] = [[] for _ in p1s[i]] found = False #the format of the angle file is as follows: (tbh this should be a JSON) # 0: time # 1: duplex id # 2: strand 1 start nucleotide id # 3: strand 1 end nucleotide id # 4: strand 2 start nucleotide id # 5: strand 2 end nucleotide id # 6: X-component of the axis vector # 7: Y-component of the axis vector # 8: Z-component of the axis vector # 9: Helix position with open(anglefile) as file: all_search = search1.copy() all_search.extend(search2) d = {i: np.array([0, 0, 0]) for i in all_search} for l in file.readlines( )[1:]: #the first line is a header, so it can be dropped try: l = l.split("\t") t = float(l[0]) except Exception as e: print( "ERROR: The following line is incorrectly formatted:") print(l) print("The error was:\n", e) print("skiping the line") continue #dump values and reset if we're in a new time (but also skip the first pass) if (t != last_step): if (steps != 0): for j, (p1, p2) in enumerate(zip(search1, search2)): if np.linalg.norm(d[p1]) != 0 and np.linalg.norm( d[p2]) != 0: angle = rad2degree( angle_between(d[p1], -1 * d[p2]) ) #add a -90 here if your duplexes in question are antiparallel all_angles[i][j].append(angle) else: all_angles[i][j].append(np.nan) found = False steps += 1 d = dict.fromkeys(d, np.array([0, 0, 0])) count = 0 #counts the number of search targets found #don't need to do anything if both angles were already found for this timestep if found: continue #look for the nucleotide IDs. The -1 on axis 2 assumes you're looking at contiguous duplexes for s in all_search: idx = l.index(s, 2, 6) if s in l[2:6] else None if idx: d[s] = np.array( [float(l[6]), float(l[7]), float(l[8])]) count += 1 #once all are found, add them to angle list if count == len(d): found = True last_step = t #catch last configuration for j, (p1, p2) in enumerate(zip(search1, search2)): if np.linalg.norm(d[p1]) != 0 and np.linalg.norm(d[p2]) != 0: angle = rad2degree( angle_between(d[p1], -1 * d[p2]) ) #add a -90 here if your duplexes in question are antiparallel all_angles[i][j].append(angle) else: all_angles[i][j].append(np.nan) #compute some statistics all_angles[i] = [np.array(a) for a in all_angles[i]] mean = [np.nanmean(a) for a in all_angles[i]] median = [np.nanmedian(a) for a in all_angles[i]] stdev = [np.nanstd(a) for a in all_angles[i]] representation = [ np.count_nonzero(~np.isnan(a)) / steps for a in all_angles[i] ] #add to the output data means[i] = mean medians[i] = median stdevs[i] = stdev representations[i] = representation #for i, m in enumerate(means): # if m > 90: # all_angles[i] = [180 - a for a in all_angles[i]] # means[i] = 180 - m # medians[i] = 180 - medians[i] # -n sets the names of the data series if args.names: names = args.names[0] if len(names) < n_angles: print( "WARNING: Names list too short. There are {} items in names and {} angles were calculated. Will pad with particle IDs" .format(len(names), n_angles), file=stderr) for i in range(len(names), n_angles): names.append("{}-{}".format([j for sl in p1s for j in sl][i], [j for sl in p2s for j in sl][i])) if len(names) > n_angles: print( "WARNING: Names list too long. There are {} items in names and {} angles were calculated. Truncating to be the same as distances" .format(len(names), n_angles), file=stderr) names = names[:n_angles] else: print("INFO: Defaulting to particle IDs as data series names") names = [ "{}-{}".format(p1, p2) for p1, p2 in zip([i for sl in p1s for i in sl], [i for sl in p2s for i in sl]) ] # -d will dump the distances as json files for loading with the trajectories in oxView if args.data: from json import dump if len(files) > 1: f_names = [path.basename(f) for f in files] print( "INFO: angle lists from separate trajectories are printed to separate files for oxView compatibility. Trajectory names will be appended to your provided data file name.", file=stderr) file_names = [ "{}_{}.json".format(args.data[0].strip('.json'), i) for i, _ in enumerate(f_names) ] else: file_names = [args.data[0].strip('.json') + '.json'] names_by_traj = [['{}-{}'.format(p1, p2) for p1, p2 in zip(p1l, p2l)] for p1l, p2l in zip(p1s, p2s)] for file_name, ns, ang_list in zip(file_names, names_by_traj, all_angles): obj = {} for n, a in zip(ns, ang_list): obj[n] = list(a) with open(file_name, 'w+') as f: print( "INFO: writing data to {}. This can be opened in oxView using the Order parameter selector" .format(file_name)) dump(obj, f) #print statistical information print("name:\t", end='') [print("{}\t".format(t), end='') for t in names[:n_angles]] print("") print("mean:\t", end='') [ print("{:.2f}\t".format(m), end='') for m in [i for sl in means for i in sl] ] print("") print("stdevs:\t", end='') [ print("{:.2f}\t".format(s), end='') for s in [i for sl in stdevs for i in sl] ] print("") print("median:\t", end='') [ print("{:.2f}\t".format(m), end='') for m in [i for sl in medians for i in sl] ] print("") print("freqs:\t", end='') [ print("{:.2f}\t".format(r), end='') for r in [i for sl in representations for i in sl] ] print("") #make a histogram import matplotlib.pyplot as plt if outfile and hist == True: if line == True: out = outfile[:outfile.find(".")] + "_hist" + outfile[outfile. find("."):] else: out = outfile bins = np.linspace(0, 180, 60) artists = [] for i, traj_set in enumerate(all_angles): for alist in traj_set: a = plt.hist(alist, bins, weights=np.ones(len(alist)) / len(alist), alpha=0.3, label=names[i], histtype=u'stepfilled', edgecolor='k') artists.append(a) plt.legend(labels=names) plt.xlim((0, 180)) plt.xlabel("Angle (degrees)") plt.ylabel("Normalized frequency") if outfile: print("INFO: Saving histogram to {}".format(out), file=stderr) plt.savefig(out) else: plt.show() #make a trajectory plot if outfile and line == True: if hist == True: plt.clf() out = outfile[:outfile.find(".")] + "_traj" + outfile[outfile. find("."):] else: out = outfile artists = [] for i, traj_set in enumerate(all_angles): for alist in traj_set: a = plt.plot(alist) artists.append(a) plt.legend(labels=names) plt.xlabel("Configuration Number") plt.ylabel("Angle (degrees)") if outfile: print("INFO: Saving line plot to {}".format(out), file=stderr) plt.savefig(out) else: plt.show()
def main(): parser = argparse.ArgumentParser( prog=os.path.basename(__file__), description="Computes the mean structure of a trajectory file") parser.add_argument('trajectory', type=str, nargs=1, help='the trajectory file you wish to analyze') parser.add_argument('-p', metavar='num_cpus', nargs=1, type=int, dest='parallel', help="(optional) How many cores to use") parser.add_argument('-o', '--output', metavar='output_file', nargs=1, help='The filename to save the mean structure to') parser.add_argument( '-f', '--format', metavar='<json/oxDNA/both>', nargs=1, help= 'Output format for the mean file. Defaults to json. Options are \"json\", \"oxdna/oxDNA\", and \"both\"' ) parser.add_argument( '-d', '--deviations', metavar='deviation_file', nargs=1, help='Immediatley run compute_deviations.py from the output') parser.add_argument( '-i', metavar='index_file', dest='index_file', nargs=1, help= 'Compute mean structure of a subset of particles from a space-separated list in the provided file' ) parser.add_argument( '-a', '--align', metavar='alignment_configuration', nargs=1, help='The id of the configuration to align to, otherwise random') args = parser.parse_args() from oxDNA_analysis_tools.config import check_dependencies check_dependencies(["python", "Bio", "numpy"]) #get file names traj_file = args.trajectory[0] parallel = args.parallel if parallel: from oxDNA_analysis_tools.UTILS import parallelize_erik_onefile n_cpus = args.parallel[0] #-f defines the format of the output file outjson = False outoxdna = False if args.format: if "json" in args.format: outjson = True if "oxDNA" in args.format or "oxdna" in args.format: outoxdna = True if "both" in args.format: outjson = True outoxdna = True if outjson == outoxdna == False: print( "ERROR: unrecognized output format\nAccepted formats are \"json\", \"oxDNA/oxdna\", and \"both\"", file=stderr) exit(1) else: print("INFO: No output format specified, defaulting to oxDNA", file=stderr) outoxdna = True #-o names the output file if args.output: outfile = args.output[0] else: if outjson and not outoxdna: ext = ".json" elif outjson and outoxdna: ext = ".json/.dat" elif outoxdna and not outjson: ext = ".dat" outfile = "mean{}".format(ext) print("INFO: No outfile name provided, defaulting to \"{}\"".format( outfile), file=stderr) #-d will run compute_deviations.py when this script is completed. dev_file = None if args.deviations: dev_file = args.deviations[0] #-i will make it only run on a subset of nucleotides. #The index file is a space-separated list of particle IDs if args.index_file: index_file = args.index_file[0] with open(index_file, 'r') as f: indexes = f.readline().split() try: indexes = [int(i) for i in indexes] except: print( "ERROR: The index file must be a space-seperated list of particles. These can be generated using oxView by clicking the \"Download Selected Base List\" button" ) else: with ErikReader(traj_file) as r: indexes = list(range(len(r.read().positions))) # The reference configuration which is used to define alignment align_conf = [] #calculate the number of configurations in the trajectory num_confs = cal_confs(traj_file) # if we have no align_conf we need to chose one # and realign its cms to be @ 0,0,0 if align_conf == []: align = None if args.align: align = args.align[0] align_conf_id, align_poses = pick_starting_configuration( traj_file, num_confs, align) # we are just interested in the nucleotide positions align_conf = align_poses.positions[indexes] #Actually compute mean structure if not parallel: print( "INFO: Computing mean of {} configurations with an alignment of {} particles using 1 core." .format(num_confs, len(align_conf)), file=stderr) r = ErikReader(traj_file) mean_pos_storage, mean_a1_storage, mean_a3_storage, intermediate_mean_structures, processed_frames = compute_mean( r, align_conf, indexes, num_confs) #If parallel, the trajectory is split into a number of chunks equal to the number of CPUs available. #Each of those chunks is then calculated seperatley and the result is summed. if parallel: print( "INFO: Computing mean of {} configurations with an alignment of {} particles using {} cores." .format(num_confs, len(align_conf), n_cpus), file=stderr) out = parallelize_erik_onefile.fire_multiprocess( traj_file, compute_mean, num_confs, n_cpus, align_conf, indexes) mean_pos_storage = np.sum(np.array([i[0] for i in out]), axis=0) mean_a1_storage = np.sum(np.array([i[1] for i in out]), axis=0) mean_a3_storage = np.sum(np.array([i[2] for i in out]), axis=0) intermediate_mean_structures = [] [intermediate_mean_structures.extend(i[3]) for i in out] processed_frames = sum((i[4] for i in out)) # finished task entry print("INFO: processed frames total: {}".format(processed_frames), file=stderr) #Convert mean structure to a json file mean_file = dumps({ "i_means": intermediate_mean_structures, "g_mean": prep_pos_for_json(mean_pos_storage / processed_frames), "a1_mean": prep_pos_for_json( [normalize(v) for v in (mean_a1_storage / processed_frames)]), "a3_mean": prep_pos_for_json( [normalize(v) for v in (mean_a3_storage / processed_frames)]), "p_frames": processed_frames, "ini_conf": { "conf": prep_pos_for_json(align_conf), "id": align_conf_id } }) #Save the mean structure to the specified output file. if outjson or dev_file: #save output as json format if outoxdna == True: #if making both outputs, automatically set file extensions. jsonfile = outfile.split(".")[0] + ".json" else: jsonfile = outfile print("INFO: Writing mean configuration to", jsonfile, file=stderr) with open(jsonfile, "w") as file: file.write(mean_file) if outoxdna: #save output as oxDNA .dat format if outjson == True: #if making both outputs, automatically set file extensions. outname = outfile.split(".")[0] + ".dat" else: outname = outfile from oxDNA_analysis_tools.mean2dat import make_dat make_dat(loads(mean_file), outname) #If requested, run compute_deviations.py using the output from this script. if dev_file: print("INFO: launching compute_deviations.py", file=stderr) #this is probably horrible practice, but to maintain the ability to call things from the command line, I cannot pass arguments between main() calls. #so instead we're gonna spoof a global variable to make it look like compute_deviations was called explicitally argv.clear() argv.extend([ 'compute_deviations.py', '-o', dev_file, "-r", dev_file.split('.')[0] + "_rmsd.png", "-d", dev_file.split('.')[0] + "_rmsd_data.json" ]) if args.index_file: argv.append("-i") argv.append(index_file) if parallel: argv.append("-p") argv.append(str(n_cpus)) argv.append(jsonfile) argv.append(traj_file) from oxDNA_analysis_tools import compute_deviations from sys import executable print(executable) print(argv) compute_deviations.main() #compute_deviations needs the json meanfile, but its not useful for visualization #so we dump it if not outjson: print("INFO: deleting {}".format(jsonfile), file=stderr) from os import remove remove(jsonfile) print(time.time() - start_t)
def main(): #handle commandline arguments parser = argparse.ArgumentParser( prog=os.path.basename(__file__), description="Aligns each frame in a trajectory to the first frame") parser.add_argument('traj', type=str, nargs=1, help="The trajectory file to align") parser.add_argument( 'outfile', type=str, nargs=1, help='The name of the new trajectory file to write out') parser.add_argument( '-i', metavar='index_file', dest='index_file', nargs=1, help= 'Align to only a subset of particles from a space-separated list in the provided file' ) parser.add_argument( '-r', metavar='reference_structure', dest='reference_structure', nargs=1, help="Align to a provided configuration instead of the first frame.") args = parser.parse_args() #run system checks from oxDNA_analysis_tools.config import check_dependencies check_dependencies(["python", "numpy", "Bio"]) #Parse command line arguments traj_file = args.traj[0] outfile = args.outfile[0] sup = SVDSuperimposer() #-i will make it only run on a subset of nucleotides. #The index file is a space-separated list of particle IDs if args.index_file: index_file = args.index_file[0] with open(index_file, 'r') as f: indexes = f.readline().split() try: indexes = [int(i) for i in indexes] except: print( "ERROR: The index file must be a space-seperated list of particles. These can be generated using oxView by clicking the \"Download Selected Base List\" button" ) else: with ErikReader(traj_file) as r: indexes = list(range(len(r.read().positions))) #-r will make it align to a provided .dat file instead of the first configuration if args.reference_structure: #read reference configuration r = ErikReader(args.reference_structure[0]) ref = r.read() ref.inbox() r = ErikReader(traj_file) ref_conf = ref.positions[indexes] mysystem = align_frame(ref_conf, sup, r.read()) else: #read the first configuration and use it as the reference configuration for the rest r = ErikReader(traj_file) mysystem = r.read() mysystem.inbox() ref_conf = mysystem.positions[indexes] #write first configuration to output file mysystem.write_new(outfile) mysystem = r.read() #Read the trajectory one configuration at a time and perform the alignment while mysystem != False: print("working on t = ", mysystem.time) mysystem = align_frame(ref_conf, sup, mysystem, indexes) mysystem.write_append(outfile) mysystem = r.read()
def perform_DBSCAN(points, num_confs, traj_file, inputfile, metric_name, eps, min_samples): """ Runs the DBSCAN algorithm using the provided analysis as positions and splits the trajectory into clusters. Parameters: points (numpy.array): The points fed to the clstering algorithm. num_confs (int): The number of configurations in the trajectory. traj_file (str): The analyzed trajectory file. inputfile (str): The input file used to run the analyzed simulation. metric_name (str): The type of data the points represent (usually either "euclidean" or "precomputed"). Returns: labels (numpy.array): The clusterID of each configuration in the trajectory. """ #run system checks from oxDNA_analysis_tools.config import check_dependencies check_dependencies(["python", "sklearn", "matplotlib"]) print("INFO: Running DBSCAN...", file=stderr) #dump the input as a json file so you can iterate on eps and min_samples dump_file = "cluster_data.json" print("INFO: Serializing input data to {}".format(dump_file), file=stderr) print("INFO: Run just clustering.py with the serialized data to adjust clustering parameters", file=stderr) out = [points.tolist(), num_confs, traj_file, inputfile, metric_name] dump(out, codecs.open(dump_file, 'w', encoding='utf-8'), separators=(',', ':'), sort_keys=True, indent=4) #prepping to show the plot later #this only shows the first three dimensions because we assume that this is either PCA data or only a few dimensions anyway #components = perform_pca(points, 3) dimensions = [] x = [] dimensions.append(x) if points.shape[1] > 1: y = [] dimensions.append(y) if points.shape[1] > 2: z = [] dimensions.append(z) for i in points: for j, dim in enumerate(dimensions): dim.append(i[j]) #DBSCAN parameters: #eps: the pairwise distance that configurations below are considered neighbors #min_samples: The smallest number of neighboring configurations required to start a cluster #metric: If the matrix fed in are points in n-dimensional space, then the metric needs to be "euclidean". # If the matrix is already a square distance matrix, the metrix needs to be "precomputed". #the eps and min_samples need to be determined for each input based on the values of the input data #If you're making your own multidimensional data, you probably want to normalize your data first. print("INFO: Adjust clustering parameters by adding the -e and -m flags to the invocation of this script.", file=stderr) print("INFO: Current values: eps={}, min_samples={}".format(eps, min_samples)) db = DBSCAN(eps=eps, min_samples=min_samples, metric=metric_name).fit(points) labels = db.labels_ n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) print ("Number of clusters:", n_clusters_) print("INFO: Making cluster plot...") if len(dimensions) == 3: fig = plt.figure() ax = fig.add_subplot(111, projection='3d') else: fig = plt.figure() ax = fig.add_subplot(1, 1, 1) plt.xlabel("OP0") plt.ylabel("OP1") if len(dimensions) == 3: ax.set_zlabel("OP2") #to show the plot immediatley and interactivley '''a = ax.scatter(x, y, z, s=2, alpha=0.4, c=labels, cmap=plt.get_cmap('tab10', 7)) b = fig.colorbar(a, ax=ax) plt.show()''' #to make a video showing a rotating plot plot_file = "animated.mp4" def init(): a = ax.scatter(x, y, z, s=2, alpha=0.4, c=labels, cmap=plt.get_cmap('tab10', n_clusters_+1)) fig.colorbar(a, ax=ax) return [fig] def animate(i): ax.view_init(elev=10., azim=i) return [fig] anim = animation.FuncAnimation(fig, animate, init_func=init, frames=range(360), interval=20, blit=True) anim.save(plot_file, fps=30, extra_args=['-vcodec', 'libx264']) else: plot_file = "plot.png" if len(dimensions) == 1: dimensions.append(np.arange(len(dimensions[0]))) a = ax.scatter(dimensions[1], dimensions[0], s=2, alpha=0.4, c=labels, cmap=plt.get_cmap('tab10', n_clusters_+1)) else: a = ax.scatter(dimensions[0], dimensions[1], s=2, alpha=0.4, c=labels, cmap=plt.get_cmap('tab10', n_clusters_+1)) b = fig.colorbar(a, ax=ax) plt.savefig(plot_file) print("INFO: Saved cluster plot to {}".format(plot_file), file=stderr) if metric_name == "precomputed": get_centroid(points, metric_name, num_confs, labels, traj_file, inputfile) split_trajectory(traj_file, inputfile, labels, n_clusters_) return labels
def main(): #at 2.5 you start to see the hard edges caused by end-loops and see some loop interactions cutoff_distance = 2.5 #get commandline arguments parser = argparse.ArgumentParser( prog=path.basename(__file__), description= "Calculate molecular contacts, and assembles an average set of contacts based on MDS" ) parser.add_argument('inputfile', type=str, nargs=1, help="The inputfile used to run the simulation") parser.add_argument('trajectory', type=str, nargs=1, help='the trajectory file you wish to analyze') parser.add_argument( 'meanfile', type=str, nargs=1, help='the name of the .dat file where the mean will be written') parser.add_argument( 'devfile', type=str, nargs=1, help='the name of the .json file where the devs will be written') parser.add_argument('-p', metavar='num_cpus', nargs=1, type=int, dest='parallel', help="(optional) How many cores to use") #process commandline arguments args = parser.parse_args() traj_file = args.trajectory[0] inputfile = args.inputfile[0] meanfile = args.meanfile[0] devfile = args.devfile[0] parallel = args.parallel if parallel: n_cpus = args.parallel[0] top_file = get_input_parameter(inputfile, "topology") if "RNA" in get_input_parameter(inputfile, "interaction_type"): environ["OXRNA"] = "1" else: environ["OXRNA"] = "0" from oxDNA_analysis_tools.config import check_dependencies check_dependencies(["python", "numpy"]) #get the number of configurations in the trajectory num_confs = cal_confs(traj_file) #Get the mean distance to all other particles if not parallel: print( "INFO: Computing interparticle distances of {} configurations using 1 core." .format(num_confs), file=stderr) r = LorenzoReader2(traj_file, top_file) cartesian_distances = get_mean(r, inputfile, num_confs) mean_distance_map = cartesian_distances * (1 / (num_confs)) if parallel: print( "INFO: Computing interparticle distances of {} configurations using {} cores." .format(num_confs, n_cpus), file=stderr) out = parallelize_lorenzo_onefile.fire_multiprocess( traj_file, top_file, get_mean, num_confs, n_cpus, inputfile) cartesian_distances = np.sum(np.array([i for i in out]), axis=0) mean_distance_map = cartesian_distances * (1 / (num_confs)) #Making a new configuration file from scratch is hard, so we're just going to read in one and then overwrite the positional information r = LorenzoReader2(traj_file, top_file) output_system = r._get_system() #make heatmap of the summed distances #make_heatmap(mean_distance_map) masked_mean = np.ma.masked_array(mean_distance_map, ~(mean_distance_map < cutoff_distance)) #I tried to use DGSOL to analytically solve this, but origamis were too big #f = open('test_dist.nmr', 'w+') #for i, line in enumerate(masked_mean): # for j, dist in enumerate(line): # if dist != "--" and dist != 0 and i < j: # if j%2 == 0: # f.write("{}\t{}\t1\t1\t{}\t{}\tn\tn\tn\tn\n".format(i+1, j+1, dist, dist)) # else: # f.write("{}\t{}\t1\t1\t{}\t{}\tn\tn\tn\tn\n".format(j+1, i+1, dist, dist)) #super_cutoff_ids = mean_distance_map > cutoff_distance #mean_distance_map[super_cutoff_ids] = 0 #sparse_map = csr_matrix(mean_distance_map) print("INFO: fitting local distance data", file=stderr) #Many embedding algorithms were tried... #from sklearn.manifold import LocallyLinearEmbedding #from megaman.geometry import Geometry #from scipy.sparse import csr_matrix #geom = Geometry() #geom = Geometry(adjacency_kwds={'radius':cutoff_distance})#, laplacian_kwds={'scaling_epps':cutoff_distance}) #geom.set_data_matrix(masked_mean) #geom.set_adjacency_matrix(masked_mean) #from megaman.embedding import LocallyLinearEmbedding #lle = LocallyLinearEmbedding(n_neighbors=5, n_components=3, eigen_solver='arpack', max_iter=3000) #lle = LocallyLinearEmbedding(n_components=3, eigen_solver='arpack', geom=geom) #out_coords = lle.fit_transform(masked_mean, input_type='adjacency') #out_coords = lle.fit_transform(masked_mean) #init = np.array([p.cm_pos for p in out_conf._nucleotides]) #Run multidimensional scaling on the average distances to find average positions from sklearn.manifold import MDS mds = MDS(n_components=3, metric=True, max_iter=3000, eps=1e-12, dissimilarity="precomputed", n_jobs=1, n_init=1) out_coords = mds.fit_transform( masked_mean) #, init=init) #this one worked best #Overwrite the system we made earlier with the coordinates calculated via MDS for i, n in enumerate(output_system._nucleotides): n.cm_pos = out_coords[i] n._a1 = np.array([0, 0, 0]) n._a3 = np.array( [0, 0, 0] ) #since the orientation vectors are all 0, this cannot be used in a simulation, but the viewer will handle it #Write the mean structure out as a new .dat and .top pair output_system.print_lorenzo_output("{}.dat".format(meanfile), "{}.top".format(meanfile)) print("INFO: wrote output files: {}.dat, {}.top".format( meanfile, meanfile), file=stderr) #Loop through the trajectory again and calculate deviations from the average distances print( "INFO: Computing distance deviations of {} configurations using 1 core." .format(num_confs), file=stderr) if not parallel: r = LorenzoReader2(traj_file, top_file) devs = get_devs(r, masked_mean, inputfile, cutoff_distance, num_confs) if parallel: print( "INFO: Computing distance deviations of {} configurations using {} cores." .format(num_confs, n_cpus), file=stderr) out = parallelize_lorenzo_onefile.fire_multiprocess( traj_file, top_file, get_devs, num_confs, n_cpus, masked_mean, inputfile, cutoff_distance) devs = np.sum(np.array([i for i in out]), axis=0) #Dump the deviations to an oxView overlay file devs = np.ma.masked_array( devs, ~(devs != 0.0)) #mask all the 0s so they don't contribute to the mean devs *= (1 / num_confs) devs = np.mean(devs, axis=0) devs = np.sqrt(devs) with open(devfile + ".json", "w") as file: file.write(dumps({"contact deviation": list(devs)})) print("INFO: wrote file {}.json".format(devfile), file=stderr)
def main(): parser = argparse.ArgumentParser( prog=path.basename(__file__), description="Fit vectors to every duplex in the structure") parser.add_argument('-p', metavar='num_cpus', nargs=1, type=int, dest='parallel', help="(optional) How many cores to use") parser.add_argument('inputfile', type=str, nargs=1, help="The inputfile used to run the simulation") parser.add_argument('trajectory', type=str, nargs=1, help="The trajectory file from the simulation") parser.add_argument('-o', '--output', metavar='output_file', type=str, nargs=1, help='name of the file to write the angle list to') args = parser.parse_args() from oxDNA_analysis_tools.config import check_dependencies check_dependencies(["python", "numpy"]) #Process command line arguments: inputfile = args.inputfile[0] traj_file = args.trajectory[0] parallel = args.parallel if parallel: n_cpus = args.parallel[0] #-o names the output file if args.output: outfile = args.output[0] else: outfile = "angles.txt" print("INFO: No outfile name provided, defaulting to \"{}\"".format( outfile), file=stderr) #Get relevant parameters from the input file top_file = get_input_parameter(inputfile, "topology") if "RNA" in get_input_parameter(inputfile, "interaction_type"): environ["OXRNA"] = "1" else: environ["OXRNA"] = "0" #Calculate the number of configurations. num_confs = cal_confs(traj_file) r0 = LorenzoReader2(traj_file, top_file) r0._get_system() #launch find_angle using the appropriate number of threads to find all duplexes. if not parallel: print( "INFO: Fitting duplexes to {} configurations using 1 core.".format( num_confs), file=stderr) r = LorenzoReader2(traj_file, top_file) duplexes_at_step = find_angles(r, inputfile, num_confs) if parallel: print("INFO: Fitting duplexes to {} configurations using {} cores.". format(num_confs, n_cpus), file=stderr) duplexes_at_step = [] out = parallelize_lorenzo_onefile.fire_multiprocess( traj_file, top_file, find_angles, num_confs, n_cpus, inputfile) [duplexes_at_step.extend(i) for i in out] if [] in duplexes_at_step: print( "WARNING: Some configurations were invalid and not included in the analysis. Please check the log to view the error", file=stderr) #print duplexes to a file print( "INFO: Writing duplex data to {}. Use duplex_angle_plotter to graph data" .format(outfile), file=stderr) output = open(outfile, 'w') output.write( "time\tduplex\tstart1\tend1\tstart2\tend2\taxisX\taxisY\taxisZ\thel_pos\n" ) for i in range(0, len(duplexes_at_step)): for j in range(0, len(duplexes_at_step[i])): line = '{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t[{},{},{}]\n'.format( duplexes_at_step[i][j].time, duplexes_at_step[i][j].index, duplexes_at_step[i][j].start1, duplexes_at_step[i][j].end1, duplexes_at_step[i][j].start2, duplexes_at_step[i][j].end2, duplexes_at_step[i][j].axis[0], duplexes_at_step[i][j].axis[1], duplexes_at_step[i][j].axis[2], duplexes_at_step[i][j].final_hel_pos[0], duplexes_at_step[i][j].final_hel_pos[1], duplexes_at_step[i][j].final_hel_pos[2]) output.write(line) output.close()
def main(): #handle commandline arguments #the positional arguments for this are: # 1. the mean structure from compute_mean.py in json format # 2. the trajectory from which to compute the centroid # 3. the name of the file to write out the centroid to. Should be a .dat because oxView uses file extensions parser = argparse.ArgumentParser( prog=os.path.basename(__file__), description= "Compute the RMSD of each nucleotide from the mean structure produced by compute_mean.py" ) parser.add_argument( 'mean_structure', type=str, nargs=1, help="The mean structure .json file from compute_mean.py") parser.add_argument('trajectory', type=str, nargs=1, help='the trajectory file you wish to analyze') parser.add_argument('-p', metavar='num_cpus', nargs=1, type=int, dest='parallel', help="(optional) How many cores to use") parser.add_argument('-o', '--output', metavar='output_file', nargs=1, help='The filename to save the centroid to') parser.add_argument( '-i', metavar='index_file', dest='index_file', nargs=1, help= 'Compute mean structure of a subset of particles from a space-separated list in the provided file' ) args = parser.parse_args() #system check from oxDNA_analysis_tools.config import check_dependencies check_dependencies(["python", "Bio", "numpy"]) #-o names the output file if args.output: outfile = args.output[0].strip() else: outfile = "centroid.dat" print("INFO: No outfile name provided, defaulting to \"{}\"".format( outfile), file=stderr) #prepare the data files and calculate how many configurations there are to run traj_file = args.trajectory[0] parallel = args.parallel if parallel: n_cpus = args.parallel[0] num_confs = cal_confs(traj_file) #-i will make it only run on a subset of nucleotides. #The index file is a space-separated list of particle IDs if args.index_file: index_file = args.index_file[0] with open(index_file, 'r') as f: indexes = f.readline().split() try: indexes = [int(i) for i in indexes] except: print( "ERROR: The index file must be a space-seperated list of particles. These can be generated using oxView by clicking the \"Download Selected Base List\" button" ) else: with ErikReader(traj_file) as r: indexes = list(range(len(r.read().positions))) # load mean structure mean_file = args.mean_structure[0] if mean_file.split(".")[-1] == "json": with open(mean_file) as file: mean_structure = load(file)['g_mean'][indexes] elif mean_file.split(".")[-1] == "dat": with ErikReader(mean_file) as reader: s = reader.read() mean_structure = s.positions[indexes] print("INFO: mean structure loaded", file=stderr) #Calculate centroid, in parallel if available if not parallel: print( "INFO: Computing centroid from the mean of {} configurations using 1 core." .format(num_confs), file=stderr) r = ErikReader(traj_file) centroid, centroid_a1s, centroid_a3s, centroid_rmsf, centroid_time = compute_centroid( r, mean_structure, indexes, num_confs) #If parallel, the trajectory is split into a number of chunks equal to the number of CPUs available. #Each of those chunks is then calculated seperatley and the results are compiled . if parallel: print( "INFO: Computing centroid from the mean of {} configurations using {} cores." .format(num_confs, n_cpus), file=stderr) candidates = [] rmsfs = [] a1s = [] a3s = [] ts = [] out = parallelize_erik_onefile.fire_multiprocess( traj_file, compute_centroid, num_confs, n_cpus, mean_structure, indexes) [candidates.append(i[0]) for i in out] [rmsfs.append(i[3]) for i in out] [a1s.append(i[1]) for i in out] [a3s.append(i[2]) for i in out] [ts.append(i[4]) for i in out] min_id = rmsfs.index(min(rmsfs)) centroid = candidates[min_id] centroid_a1s = a1s[min_id] centroid_a3s = a3s[min_id] centroid_time = ts[min_id] centroid_rmsf = rmsfs[min_id] print( "INFO: Centroid configuration found at configuration t = {}, RMSF = {}" .format(centroid_time, centroid_rmsf), file=stderr) from oxDNA_analysis_tools.mean2dat import make_dat make_dat( { 'g_mean': centroid, 'a1_mean': centroid_a1s, 'a3_mean': centroid_a3s }, outfile)
def main(): parser = argparse.ArgumentParser( prog=os.path.basename(__file__), description= "superimposes one or more structures sharing a topology to a reference structure" ) parser.add_argument('reference', type=str, nargs=1, help="The reference configuration to superimpose to") parser.add_argument( 'victims', type=str, nargs='+', help="The configuraitons to superimpose on the reference") parser.add_argument( '-i', metavar='index_file', dest='index_file', nargs=1, help= 'Align to only a subset of particles from a space-separated list in the provided file' ) args = parser.parse_args() #run system checks from oxDNA_analysis_tools.config import check_dependencies check_dependencies(["python", "numpy", "Bio"]) #Get the reference files ref_dat = args.reference[0] #-i will make it only run on a subset of nucleotides. #The index file is a space-separated list of particle IDs if args.index_file: index_file = args.index_file[0] with open(index_file, 'r') as f: indexes = f.readline().split() try: indexes = [int(i) for i in indexes] except: print( "ERROR: The index file must be a space-seperated list of particles. These can be generated using oxView by clicking the \"Download Selected Base List\" button" ) else: with ErikReader(ref_dat) as r: indexes = list(range(len(r.read().positions))) #Create list of configurations to superimpose to_sup = [] r = ErikReader(ref_dat) ref = r.read() ref.inbox() ref_conf = ref.positions[indexes] for i in args.victims: r = ErikReader(i) sys = r.read() sys.inbox() to_sup.append(sys) sup = SVDSuperimposer() #Run the biopython superimposer on each configuration and rewrite its configuration file for i, sys in enumerate(to_sup): indexed_cur_conf = sys.positions[indexes] sup.set(ref_conf, indexed_cur_conf) sup.run() rot, tran = sup.get_rotran() sys.positions = np.einsum('ij, ki -> kj', rot, sys.positions) + tran sys.a1s = np.einsum('ij, ki -> kj', rot, sys.a1s) sys.a3s = np.einsum('ij, ki -> kj', rot, sys.a3s) sys.write_new("aligned{}.dat".format(i)) print("INFO: Wrote file aligned{}.dat".format(i), file=stderr)
def main(): #handle commandline arguments #this program has no positional arguments, only flags parser = argparse.ArgumentParser( prog=os.path.basename(__file__), description= "Finds the ensemble of distances between any two particles in the system" ) parser.add_argument( '-i', '--input', metavar='input', nargs='+', action='append', help= 'An input, trajectory, and a list of particle pairs to compare. Can call -i multiple times to plot multiple datasets.' ) parser.add_argument('-o', '--output', metavar='output_file', nargs=1, help='The name to save the graph file to') parser.add_argument( '-f', '--format', metavar='<histogram/trajectory/both>', nargs=1, help= 'Output format for the graphs. Defaults to histogram. Options are \"histogram\", \"trajectory\", and \"both\"' ) parser.add_argument( '-d', '--data', metavar='data_file', nargs=1, help= 'If set, the output for the graphs will be dropped as a json to this filename for loading in oxView or your own scripts' ) parser.add_argument( '-n', '--names', metavar='names', nargs='+', action='append', help= 'Names of the data series. Will default to particle ids if not provided' ) parser.add_argument( '-c', metavar='cluster', dest='cluster', action='store_const', const=True, default=False, help="Run the clusterer on each configuration's distance?") args = parser.parse_args() from oxDNA_analysis_tools.config import check_dependencies check_dependencies(["python", "matplotlib", "numpy"]) #-i requires 4 or more arguments, the topology file of the structure, the trajectory to analyze, and any number of particle pairs to compute the distance between. try: input_files = [i[0] for i in args.input] trajectories = [i[1] for i in args.input] p1s = [i[2::2] for i in args.input] p2s = [i[3::2] for i in args.input] p1s = [[int(j) for j in i] for i in p1s] p2s = [[int(j) for j in i] for i in p2s] except Exception as e: print("ERROR:", e) parser.print_help() exit(1) #get number of distances to calculate n_dists = sum([len(l) for l in p1s]) #Make sure that the input is correctly formatted if (len(input_files) != len(trajectories)): print( "ERROR: bad input arguments\nPlease supply an equal number of input and, trajectory files", file=stderr) exit(1) if len(p1s) != len(p2s): print( "ERROR: bad input arguments\nPlease supply an even number of particles", file=stderr) exit(1) #-o names the output file if args.output: outfile = args.output[0] else: print("INFO: No outfile name provided, defaulting to \"distance.png\"", file=stderr) outfile = "distance.png" #-f defines which type of graph to produce hist = False lineplt = False if args.format: if "histogram" in args.format: hist = True if "trajectory" in args.format: lineplt = True if "both" in args.format: hist = True lineplt = True if hist == lineplt == False: print( "ERROR: unrecognized graph format\nAccepted formats are \"histogram\", \"trajectory\", and \"both\"", file=stderr) exit(1) else: print("INFO: No graph format specified, defaulting to histogram", file=stderr) hist = True #-c makes it run the clusterer on the output cluster = args.cluster #get the specified distances distances = get_distances(trajectories, p1s, p2s) # -n sets the names of the data series if args.names: names = args.names[0] if len(names) < n_dists: print( "WARNING: Names list too short. There are {} items in names and {} distances were calculated. Will pad with particle IDs" .format(len(names), n_dists), file=stderr) for i in range(len(names), len(distances)): names.append("{}-{}".format([j for sl in p1s for j in sl][i], [j for sl in p2s for j in sl][i])) if len(names) > n_dists: print( "WARNING: Names list too long. There are {} items in names and {} distances were calculated. Truncating to be the same as distances" .format(len(names), n_dists), file=stderr) names = names[:n_dists] else: print("INFO: Defaulting to particle IDs as data series names") names = [ "{}-{}".format(p1, p2) for p1, p2 in zip([i for sl in p1s for i in sl], [i for sl in p2s for i in sl]) ] # -d will dump the distances as json files for loading with the trajectories in oxView if args.data: from json import dump if len(trajectories) > 1: print( "INFO: distance lists from separate trajectories are printed to separate files for oxView compatibility. Trajectory numbers will be appended to your provided data file name.", file=stderr) file_names = [ "{}_{}.json".format(args.data[0].strip('.json'), i) for i, _ in enumerate(trajectories) ] else: file_names = [args.data[0].strip('.json') + '.json'] names_by_traj = [['{}-{}'.format(p1, p2) for p1, p2 in zip(p1l, p2l)] for p1l, p2l in zip(p1s, p2s)] for file_name, ns, dist_list in zip(file_names, names_by_traj, distances): obj = {} for n, d in zip(ns, dist_list): obj[n] = d with open(file_name, 'w+') as f: print( "INFO: writing data to {}. This can be opened in oxView using the Order parameter selector" .format(file_name)) dump(obj, f) #convert the distance list into numpy arrays because they're easier to work with for i, l in enumerate(distances): distances[i] = np.array(l) means = [np.mean(i, axis=1) for i in distances] medians = [np.median(i, axis=1) for i in distances] stdevs = [np.std(i, axis=1) for i in distances] #get some min/max values to make the plots pretty lower = min((l.min() for l in distances)) upper = max((l.max() for l in distances)) #those horrific list comprehensions unpack lists of lists into a single list print("input:\t", end='') [ print("{}-{}\t".format(p1, p2), end='') for p1, p2 in zip([i for sl in p1s for i in sl], [i for sl in p2s for i in sl]) ] print("") print("name:\t", end='') [print("{}\t".format(t), end='') for t in names[:n_dists]] print("") print("mean:\t", end='') [ print("{:.2f}\t".format(m), end='') for m in [i for sl in means for i in sl] ] print("") print("stdev:\t", end='') [ print("{:.2f}\t".format(s), end='') for s in [i for sl in stdevs for i in sl] ] print("") print("median:\t", end='') [ print("{:.2f}\t".format(m), end='') for m in [i for sl in medians for i in sl] ] print("") #make a histogram if hist == True: if lineplt == True: #if making two plots, automatically append the plot type to the output file name out = outfile[:outfile.find(".")] + "_hist" + outfile[outfile. find("."):] else: out = outfile bins = np.linspace(np.floor(lower - (lower * 0.1)), np.ceil(upper + (upper * 0.1)), 60) graph_count = 0 for traj_set in distances: for dist_list in traj_set: a = plt.hist(dist_list, bins, weights=np.ones(len(dist_list)) / len(dist_list), alpha=0.5, histtype=u'stepfilled', edgecolor='k', label=names[graph_count]) graph_count += 1 plt.xlabel("Distance (nm)") plt.ylabel("Normalized frequency") plt.legend() #plt.show() print("INFO: Writing histogram to file {}".format(out), file=stderr) plt.savefig("{}".format(out)) #make a trajectory plot if lineplt == True: if hist == True: #clear the histogram plot plt.clf() #if making two plots, automatically append the plot type to the output file name out = outfile[:outfile.find(".")] + "_traj" + outfile[outfile. find("."):] else: out = outfile graph_count = 0 for traj_set in distances: for dist_list in traj_set: a = plt.plot(dist_list, alpha=0.5, label=names[graph_count]) graph_count += 1 plt.xlabel("Simulation Steps") plt.ylabel("Distance (nm)") plt.legend() #plt.show() print("INFO: Writing trajectory plot to file {}".format(out), file=stderr) plt.savefig("{}".format(out)) if cluster == True: if not all([x == trajectories[0] for x in trajectories]): print("ERROR: Clustering can only be run on a single trajectory", file=stderr) exit(1) from oxDNA_analysis_tools.clustering import perform_DBSCAN labs = perform_DBSCAN(distances[0].T, len(distances[0][0]), trajectories[0], input_files[0], "euclidean", 12, 8)
def main(): parser = argparse.ArgumentParser( prog=path.basename(__file__), description= "Calculates a principal component analysis of nucleotide deviations over a trajectory" ) parser.add_argument('inputfile', type=str, nargs=1, help="The inputfile used to run the simulation") parser.add_argument('trajectory', type=str, nargs=1, help='the trajectory file you wish to analyze') parser.add_argument( 'meanfile', type=str, nargs=1, help='The mean structure .json file from compute_mean.py') parser.add_argument( 'outfile', type=str, nargs=1, help='the name of the .json file where the PCA will be written') parser.add_argument('-p', metavar='num_cpus', nargs=1, type=int, dest='parallel', help="(optional) How many cores to use") parser.add_argument( '-c', metavar='cluster', dest='cluster', action='store_const', const=True, default=False, help="Run the clusterer on each configuration's position in PCA space?" ) args = parser.parse_args() check_dependencies(["python", "numpy", "Bio"]) traj_file = args.trajectory[0] inputfile = args.inputfile[0] mean_file = args.meanfile[0] outfile = args.outfile[0] parallel = args.parallel if parallel: n_cpus = args.parallel[0] #-c makes it run the clusterer on the output cluster = args.cluster top_file = get_input_parameter(inputfile, "topology") if "RNA" in get_input_parameter(inputfile, "interaction_type"): environ["OXRNA"] = "1" else: environ["OXRNA"] = "0" import UTILS.base #this needs to be imported after the model type is set num_confs = cal_confs(traj_file) if mean_file.split(".")[-1] == "json": with open(mean_file) as file: align_conf = load(file)['g_mean'] elif mean_file.split(".")[-1] == "dat": fetch_np = lambda conf: np.array([n.cm_pos for n in conf._nucleotides]) with LorenzoReader2(mean_file, top_file) as reader: s = reader._get_system() align_conf = fetch_np(s) cms = np.mean(align_conf, axis=0) #all structures must have the same center of mass align_conf -= cms #Compute the deviations if not parallel: r = LorenzoReader2(traj_file, top_file) deviations_matrix = get_pca(r, align_conf, num_confs) if parallel: out = parallelize_lorenzo_onefile.fire_multiprocess( traj_file, top_file, get_pca, num_confs, n_cpus, align_conf) deviations_matrix = np.concatenate([i for i in out]) #now that we have the deviations matrix we're gonna get the covariance and PCA it #note that in the future we might want a switch for covariance vs correlation matrix because correlation (cov/stdev so all diagonals are 1) is better for really floppy structures pca = PCA(n_components=3) pca.fit(deviations_matrix) transformed = pca.transform(deviations_matrix) #THIS IS AS FAR AS I GOT import matplotlib.pyplot as plt print("INFO: Saving scree plot to scree.png", file=stderr) plt.scatter(range(0, len(evalues)), evalues, s=25) plt.xlabel("component") plt.ylabel("eigenvalue") plt.savefig("scree.png") print( "INFO: Creating coordinate plot from first three eigenvectors. Saving to coordinates.png", file=stderr) #if you want to weight the components by their eigenvectors #mul = np.einsum('ij,i->ij',evectors[0:3], evalues[0:3]) mul = evectors #reconstruct configurations in component space out = np.dot(deviations_matrix, mul).astype(float) #make a quick plot from the first three components from mpl_toolkits.mplot3d import Axes3D fig = plt.figure() ax = fig.gca(projection='3d') ax.scatter(out[:, 0], out[:, 1], out[:, 2], c='g', s=25) plt.savefig("coordinates.png") #Create an oxView overlay showing the first SUM components SUM = 1 print( "INFO: Change the number of eigenvalues to sum and display by modifying the SUM variable in the script. Current value: {}" .format(SUM), file=stderr) weighted_sum = np.zeros_like(evectors[0]) for i in range(0, SUM): #how many eigenvalues do you want? weighted_sum += evalues[i] * evectors[i] prep_pos_for_json = lambda conf: list(list(p) for p in conf) with catch_warnings( ): #this produces an annoying warning about casting complex values to real values that is not relevant simplefilter("ignore") output_vectors = weighted_sum.reshape(int(weighted_sum.shape[0] / 3), 3).astype(float) with open(outfile, "w+") as file: file.write(dumps({"pca": prep_pos_for_json(output_vectors)})) #If we're running clustering, feed the linear terms into the clusterer if cluster: print("INFO: Mapping configurations to component space...", file=stderr) #If you want to cluster on only some of the components, uncomment this #out = out[:,0:3] from clustering import perform_DBSCAN labs = perform_DBSCAN(out, num_confs, traj_file, inputfile, "euclidean", 12, 8)
def main(): parser = argparse.ArgumentParser( prog=os.path.basename(__file__), description="A very simple plotting utility for oxDNA.org") parser.add_argument('energy', nargs='+', help='Energy files to plot') parser.add_argument('-o', '--output', metavar='output_file', nargs=1, help='The name to save the graph file to') parser.add_argument( '-f', '--format', metavar='<histogram/trajectory/both>', nargs=1, help= 'Output format for the graphs. Defaults to histogram. Options are \"histogram\", \"trajectory\", and \"both\"' ) args = parser.parse_args() from oxDNA_analysis_tools.config import check_dependencies check_dependencies(["python", "numpy", "matplotlib"]) #get file name energy_files = args.energy #-o names the output file if args.output: outfile = args.output[0] else: outfile = 'energy.png' #-f defines which type of graph to produce hist = False line = False if args.format: if "histogram" in args.format: hist = True if "trajectory" in args.format: line = True if "both" in args.format: hist = line = True if hist == line == False: print( "ERROR: unrecognized graph format\nAccepted formats are \"histogram\", \"trajectory\", and \"both\"", file=stderr) exit(1) else: print("INFO: No graph format specified, defaulting to histogram", file=stderr) hist = True all_times = [] all_energies = [] for efile in energy_files: times = [] energies = [] with open(efile, 'r') as f: l = f.readline() while l: times.append(float(l.split()[0])) energies.append(float(l.split()[1])) l = f.readline() all_times.append(times) all_energies.append(energies) names = ["1", "2", "3"] if outfile and hist == True: if line == True: out = outfile[:outfile.find(".")] + "_hist" + outfile[outfile. find("."):] else: out = outfile bins = np.linspace(min([min(e) for e in all_energies]), max([max(e) for e in all_energies]), 40) artists = [] for i, elist in enumerate(all_energies): a = plt.hist(elist, bins, weights=np.ones(len(elist)) / len(elist), alpha=0.3, label=names[i], histtype=u'stepfilled', edgecolor='k') artists.append(a) plt.legend(labels=names) plt.xlabel("Energy per particle (SU)") plt.ylabel("Normalized frequency") if outfile: print("INFO: Saving histogram to {}".format(out), file=stderr) plt.savefig(out) else: plt.show() #make a trajectory plot if outfile and line == True: if hist == True: plt.clf() out = outfile[:outfile.find(".")] + "_traj" + outfile[outfile. find("."):] else: out = outfile artists = [] for tlist, elist in zip(all_times, all_energies): a = plt.plot(tlist, elist, alpha=0.5) artists.append(a) plt.legend(labels=names) plt.xlabel("Time (SU)") plt.ylabel("Energy (SU)") if outfile: print("INFO: Saving line plot to {}".format(out), file=stderr) plt.savefig(out) else: plt.show()
def main(): #handle commandline arguments #the positional arguments for this are: # 1. the mean structure from compute_mean.py in json format # 2. the trajectory from which to compute the deviations from sys import argv print(argv) parser = argparse.ArgumentParser( prog=os.path.basename(__file__), description= "Compute the RMSD of each nucleotide from the mean structure produced by compute_mean.py" ) parser.add_argument( 'mean_structure', type=str, nargs=1, help="The mean structure .json file from compute_mean.py") parser.add_argument('trajectory', type=str, nargs=1, help='the trajectory file you wish to analyze') parser.add_argument('-p', metavar='num_cpus', nargs=1, type=int, dest='parallel', help="(optional) How many cores to use") parser.add_argument( '-o', '--output', metavar='output_file', nargs=1, help='The filename to save the deviations json file to') parser.add_argument( '-i', metavar='index_file', dest='index_file', nargs=1, help= 'Compute mean structure of a subset of particles from a space-separated list in the provided file' ) parser.add_argument('-r', metavar='rmsd_plot', dest='rmsd_plot', nargs=1, help='The name of the file to save the RMSD plot to.') parser.add_argument( '-d', metavar='rmsd_data', dest='rmsd_data', nargs=1, help='The name of the file to save the RNSD data in json format.') args = parser.parse_args() #system check from oxDNA_analysis_tools.config import check_dependencies check_dependencies(["python", "Bio", "numpy", "matplotlib"]) #-o names the output file if args.output: outfile = args.output[0].strip() if not outfile.split(".")[-1] == 'json': outfile += ".json" else: outfile = "devs.json" print("INFO: No outfile name provided, defaulting to \"{}\"".format( outfile), file=stderr) #prepare the data files and calculate how many configurations there are to run traj_file = args.trajectory[0] parallel = args.parallel if parallel: from oxDNA_analysis_tools.UTILS import parallelize_erik_onefile n_cpus = args.parallel[0] num_confs = cal_confs(traj_file) #-i will make it only run on a subset of nucleotides. #The index file is a space-separated list of particle IDs if args.index_file: index_file = args.index_file[0] with open(index_file, 'r') as f: indexes = f.readline().split() try: indexes = [int(i) for i in indexes] except: print( "ERROR: The index file must be a space-seperated list of particles. These can be generated using oxView by clicking the \"Download Selected Base List\" button" ) else: with ErikReader(traj_file) as r: indexes = list(range(len(r.read().positions))) #-r names the file to print the RMSD plot to if args.rmsd_plot: plot_name = args.rmsd_plot[0] else: plot_name = 'rmsd.png' # -d names the file to print the RMSD data to if args.rmsd_data: data_file = args.rmsd_data[0] # load mean structure mean_structure_file = args.mean_structure[0] with open(mean_structure_file) as file: mean_data = loads(file.read()) mean_structure = np.array(mean_data["g_mean"]) indexed_mean_structure = mean_structure[indexes] print("INFO: mean structure loaded", file=stderr) #Calculate deviations, in parallel if available if not parallel: print( "INFO: Computing deviations from the mean of {} configurations with an alignment of {} particles using 1 core." .format(num_confs, len(indexed_mean_structure)), file=stderr) r = ErikReader(traj_file) deviations, RMSDs = compute_deviations(r, mean_structure, indexed_mean_structure, indexes, num_confs) #If parallel, the trajectory is split into a number of chunks equal to the number of CPUs available. #Each of those chunks is then calculated seperatley and the results are compiled . if parallel: print( "INFO: Computing deviations from the mean of {} configurations with an alignment of {} particles using {} cores." .format(num_confs, len(indexed_mean_structure), n_cpus), file=stderr) deviations = [] RMSDs = [] out = parallelize_erik_onefile.fire_multiprocess( traj_file, compute_deviations, num_confs, n_cpus, mean_structure, indexed_mean_structure, indexes) [deviations.extend(i[0]) for i in out] [RMSDs.extend(i[1]) for i in out] #compute_deviations() returns the deviation of every particle in every configuration #take the mean of the per-configuration deviations to get the RMSF rmsfs = np.sqrt(np.mean(np.square(np.array(deviations)), axis=0)) * 0.8518 #write the deviations to a json file print("INFO: writing deviations to {}".format(outfile), file=stderr) with open(outfile, "w") as file: file.write(dumps({"RMSF (nm)": rmsfs.tolist()})) #plot RMSDs print("INFO: writing RMSD plot to {}".format(plot_name), file=stderr) plt.plot(RMSDs) plt.axhline(np.mean(RMSDs), color='red') plt.xlabel('Configuration') plt.ylabel('RMSD (nm)') plt.savefig(plot_name) #print RMSDs print("INFO: writing RMSD data to {}".format(data_file), file=stderr) if args.rmsd_data: with open(data_file, 'w') as f: f.write(dumps({"RMSD (nm)": RMSDs}))