def main(): parser = argparse.ArgumentParser( prog=path.basename(__file__), description="Compress given configuration.") parser.add_argument('trajectory', type=str, nargs=1, help='the trajectory file you wish to analyze') parser.add_argument('outfile', type=str, nargs=1, help='minified file') parser.add_argument('-a', action='store_true', help='Discard a vectors.') parser.add_argument( '-p', type=int, nargs=1, help= 'Round positions and orientations to the specified number of digits.') args = parser.parse_args() traj_file = args.trajectory[0] out = args.outfile[0] # get the number of configurations n_confs = cal_confs(traj_file) try: # make sure there is no out file remove(out) except: pass with ErikReader(traj_file) as reader: for i in range(n_confs): print(i + 1, ":", n_confs) # Erik reader ignores velocities system = reader.read() if args.p: # round positions system.positions = round(system.positions, args.p[0]) system.a1s = round(system.a1s, args.p[0]) system.a3s = round(system.a3s, args.p[0]) if args.a: # discard a vectors system.a1s -= system.a1s system.a3s -= system.a3s # output conf system.write_append(out)
def main(): #read data from files parser = argparse.ArgumentParser(prog = path.basename(__file__), description="Compare the bonds found at each trajectory with the intended design") parser.add_argument('inputfile', type=str, nargs=1, help="The inputfile used to run the simulation") parser.add_argument('trajectory', type=str, nargs=1, help="The trajecotry file to compare against the designed pairs") parser.add_argument('designed_pairs', type=str, nargs=1, help="The file containing the desired nucleotides pairings in the format \n a b\nc d") parser.add_argument('output_file', type=str, nargs=1, help="name of the file to save the output json overlay to") parser.add_argument('-p', metavar='num_cpus', nargs=1, type=int, dest='parallel', help="(optional) How many cores to use") #run system checks from oxDNA_analysis_tools.config import check_dependencies check_dependencies(["python", "numpy"]) args = parser.parse_args() inputfile = args.inputfile[0] traj_file = args.trajectory[0] designfile = args.designed_pairs[0] outfile = args.output_file[0] parallel = args.parallel if parallel: n_cpus = args.parallel[0] top_file = get_input_parameter(inputfile, "topology") if "RNA" in get_input_parameter(inputfile, "interaction_type"): environ["OXRNA"] = "1" else: environ["OXRNA"] = "0" num_confs = cal_confs(traj_file) with open(designfile, 'r') as file: pairs = file.readlines() if not parallel: print("INFO: Computing base pairs in {} configurations using 1 core.".format(num_confs), file=stderr) r = LorenzoReader2(traj_file,top_file) tot_bonds, tot_missbonds, out_array, confid = bond_analysis(r, pairs, inputfile, num_confs) try: _ = tot_bonds #this will fail if DNAnalysis failed. except: print("ERROR: DNAnalysis encountered an error and could not analyze the trajectory") exit(1) if parallel: print("INFO: Computing base pairs in {} configurations using {} cores.".format(num_confs, n_cpus), file=stderr) out = parallelize_lorenzo_onefile.fire_multiprocess(traj_file, top_file, bond_analysis, num_confs, n_cpus, pairs, inputfile) tot_bonds = 0 tot_missbonds = 0 out_array = np.zeros(len(open(top_file, 'r').readlines())-1) confid = 0 for i in out: if i[0] is not None: tot_bonds += i[0] tot_missbonds += i[1] #out_array += i[2] confid += i[3] else: print("WARNING: Some configurations were invalid and not included in the analysis. Please check the logs", file=stderr) #tot_bonds = sum((i[0] for i in out if i[0] != None)) #tot_missbonds = sum((i[1] for i in out if i[1] != None)) out_array = sum((i[2] for i in out if len(i[2]) > 0)) #confid = sum((i[3] for i in out if i[3] != None)) print("\nSummary:\navg bonds: {}\navg_missbonds: {}".format(tot_bonds/(int(confid)),tot_missbonds/int(confid))) print("INFO: Writing bond occupancy data to {}".format(outfile)) with open(outfile, "w+") as file: file.write("{\n\"occupancy\" : [") file.write(str(out_array[0]/int(confid))) for n in out_array[1:]: file.write(", {}".format(n/int(confid))) file.write("] \n}")
def main(): parser = argparse.ArgumentParser( prog=path.basename(__file__), description= "Calculate differences between structures and automatically apply DBSCAN to retrieve clusters" ) parser.add_argument('inputfile', type=str, nargs=1, help="The inputfile used to run the simulation") parser.add_argument('trajectory', type=str, nargs=1, help='the trajectory file you wish to analyze') parser.add_argument('-p', metavar='num_cpus', nargs=1, type=int, dest='parallel', help="(optional) How many cores to use") args = parser.parse_args() from oxDNA_analysis_tools.config import check_dependencies check_dependencies(["python", "numpy", "matplotlib"]) traj_file = args.trajectory[0] inputfile = args.inputfile[0] parallel = args.parallel if parallel: n_cpus = args.parallel[0] top_file = get_input_parameter(inputfile, "topology") if "RNA" in get_input_parameter(inputfile, "interaction_type"): environ["OXRNA"] = "1" else: environ["OXRNA"] = "0" num_confs = cal_confs(traj_file) import UTILS.base #this needs to be imported after the model type is set r2 = LorenzoReader2(traj_file, top_file) #how do you want to get your eRMSDs? Do you need to do the time-consuming calculation or is it done and you have a pickle? if not parallel: r1 = LorenzoReader2(traj_file, top_file) eRMSDs = get_eRMSDs(r1, r2, inputfile, traj_file, top_file, num_confs) if parallel: out = parallelize_lorenzo_onefile.fire_multiprocess(traj_file, top_file, get_eRMSDs, num_confs, n_cpus, r2, inputfile, traj_file, top_file, matrix=True) eRMSDs = np.sum((i for i in out), axis=0) #eRMSDs = pickle.load(open('tmp_eRMSDs', 'rb')) #the eRMSD matrix is actually only half a matrix for ni, i in enumerate(eRMSDs): for nj, j in enumerate(i): eRMSDs[nj][ni] = j if ni == nj: eRMSDs[ni][nj] = 0 #since calculating the eRMSDs are so time-consuming to calculate we're gonna pickle it to iterate the DBSCAN later. with open("tmp_eRMSDs", "wb") as file: pickle.dump(eRMSDs, file) ############################################################################################################### #Next, we're going to perform a DBSCAN on that matrix of eRMSDs to find clusters of similar structures perform_DBSCAN(eRMSDs, num_confs, traj_file, inputfile, "precomputed", 12, 8)
def main(): parser = argparse.ArgumentParser( prog=path.basename(__file__), description= "Calculates a principal component analysis of nucleotide deviations over a trajectory" ) parser.add_argument('inputfile', type=str, nargs=1, help="The inputfile used to run the simulation") parser.add_argument('trajectory', type=str, nargs=1, help='the trajectory file you wish to analyze') parser.add_argument( 'meanfile', type=str, nargs=1, help='The mean structure .json file from compute_mean.py') parser.add_argument( 'outfile', type=str, nargs=1, help='the name of the .json file where the PCA will be written') parser.add_argument('-p', metavar='num_cpus', nargs=1, type=int, dest='parallel', help="(optional) How many cores to use") parser.add_argument( '-c', metavar='cluster', dest='cluster', action='store_const', const=True, default=False, help="Run the clusterer on each configuration's position in PCA space?" ) args = parser.parse_args() check_dependencies(["python", "numpy", "Bio"]) traj_file = args.trajectory[0] inputfile = args.inputfile[0] mean_file = args.meanfile[0] outfile = args.outfile[0] parallel = args.parallel if parallel: n_cpus = args.parallel[0] #-c makes it run the clusterer on the output cluster = args.cluster top_file = get_input_parameter(inputfile, "topology") if "RNA" in get_input_parameter(inputfile, "interaction_type"): environ["OXRNA"] = "1" else: environ["OXRNA"] = "0" import UTILS.base #this needs to be imported after the model type is set num_confs = cal_confs(traj_file) if mean_file.split(".")[-1] == "json": with open(mean_file) as file: align_conf = load(file)['g_mean'] elif mean_file.split(".")[-1] == "dat": fetch_np = lambda conf: np.array([n.cm_pos for n in conf._nucleotides]) with LorenzoReader2(mean_file, top_file) as reader: s = reader._get_system() align_conf = fetch_np(s) cms = np.mean(align_conf, axis=0) #all structures must have the same center of mass align_conf -= cms #Compute the deviations if not parallel: r = LorenzoReader2(traj_file, top_file) deviations_matrix = get_pca(r, align_conf, num_confs) if parallel: out = parallelize_lorenzo_onefile.fire_multiprocess( traj_file, top_file, get_pca, num_confs, n_cpus, align_conf) deviations_matrix = np.concatenate([i for i in out]) #now that we have the deviations matrix we're gonna get the covariance and PCA it #note that in the future we might want a switch for covariance vs correlation matrix because correlation (cov/stdev so all diagonals are 1) is better for really floppy structures pca = PCA(n_components=3) pca.fit(deviations_matrix) transformed = pca.transform(deviations_matrix) #THIS IS AS FAR AS I GOT import matplotlib.pyplot as plt print("INFO: Saving scree plot to scree.png", file=stderr) plt.scatter(range(0, len(evalues)), evalues, s=25) plt.xlabel("component") plt.ylabel("eigenvalue") plt.savefig("scree.png") print( "INFO: Creating coordinate plot from first three eigenvectors. Saving to coordinates.png", file=stderr) #if you want to weight the components by their eigenvectors #mul = np.einsum('ij,i->ij',evectors[0:3], evalues[0:3]) mul = evectors #reconstruct configurations in component space out = np.dot(deviations_matrix, mul).astype(float) #make a quick plot from the first three components from mpl_toolkits.mplot3d import Axes3D fig = plt.figure() ax = fig.gca(projection='3d') ax.scatter(out[:, 0], out[:, 1], out[:, 2], c='g', s=25) plt.savefig("coordinates.png") #Create an oxView overlay showing the first SUM components SUM = 1 print( "INFO: Change the number of eigenvalues to sum and display by modifying the SUM variable in the script. Current value: {}" .format(SUM), file=stderr) weighted_sum = np.zeros_like(evectors[0]) for i in range(0, SUM): #how many eigenvalues do you want? weighted_sum += evalues[i] * evectors[i] prep_pos_for_json = lambda conf: list(list(p) for p in conf) with catch_warnings( ): #this produces an annoying warning about casting complex values to real values that is not relevant simplefilter("ignore") output_vectors = weighted_sum.reshape(int(weighted_sum.shape[0] / 3), 3).astype(float) with open(outfile, "w+") as file: file.write(dumps({"pca": prep_pos_for_json(output_vectors)})) #If we're running clustering, feed the linear terms into the clusterer if cluster: print("INFO: Mapping configurations to component space...", file=stderr) #If you want to cluster on only some of the components, uncomment this #out = out[:,0:3] from clustering import perform_DBSCAN labs = perform_DBSCAN(out, num_confs, traj_file, inputfile, "euclidean", 12, 8)
def main(): parser = argparse.ArgumentParser( prog=path.basename(__file__), description="Fit vectors to every duplex in the structure") parser.add_argument('-p', metavar='num_cpus', nargs=1, type=int, dest='parallel', help="(optional) How many cores to use") parser.add_argument('inputfile', type=str, nargs=1, help="The inputfile used to run the simulation") parser.add_argument('trajectory', type=str, nargs=1, help="The trajectory file from the simulation") parser.add_argument('-o', '--output', metavar='output_file', type=str, nargs=1, help='name of the file to write the angle list to') args = parser.parse_args() from oxDNA_analysis_tools.config import check_dependencies check_dependencies(["python", "numpy"]) #Process command line arguments: inputfile = args.inputfile[0] traj_file = args.trajectory[0] parallel = args.parallel if parallel: n_cpus = args.parallel[0] #-o names the output file if args.output: outfile = args.output[0] else: outfile = "angles.txt" print("INFO: No outfile name provided, defaulting to \"{}\"".format( outfile), file=stderr) #Get relevant parameters from the input file top_file = get_input_parameter(inputfile, "topology") if "RNA" in get_input_parameter(inputfile, "interaction_type"): environ["OXRNA"] = "1" else: environ["OXRNA"] = "0" #Calculate the number of configurations. num_confs = cal_confs(traj_file) r0 = LorenzoReader2(traj_file, top_file) r0._get_system() #launch find_angle using the appropriate number of threads to find all duplexes. if not parallel: print( "INFO: Fitting duplexes to {} configurations using 1 core.".format( num_confs), file=stderr) r = LorenzoReader2(traj_file, top_file) duplexes_at_step = find_angles(r, inputfile, num_confs) if parallel: print("INFO: Fitting duplexes to {} configurations using {} cores.". format(num_confs, n_cpus), file=stderr) duplexes_at_step = [] out = parallelize_lorenzo_onefile.fire_multiprocess( traj_file, top_file, find_angles, num_confs, n_cpus, inputfile) [duplexes_at_step.extend(i) for i in out] if [] in duplexes_at_step: print( "WARNING: Some configurations were invalid and not included in the analysis. Please check the log to view the error", file=stderr) #print duplexes to a file print( "INFO: Writing duplex data to {}. Use duplex_angle_plotter to graph data" .format(outfile), file=stderr) output = open(outfile, 'w') output.write( "time\tduplex\tstart1\tend1\tstart2\tend2\taxisX\taxisY\taxisZ\thel_pos\n" ) for i in range(0, len(duplexes_at_step)): for j in range(0, len(duplexes_at_step[i])): line = '{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t[{},{},{}]\n'.format( duplexes_at_step[i][j].time, duplexes_at_step[i][j].index, duplexes_at_step[i][j].start1, duplexes_at_step[i][j].end1, duplexes_at_step[i][j].start2, duplexes_at_step[i][j].end2, duplexes_at_step[i][j].axis[0], duplexes_at_step[i][j].axis[1], duplexes_at_step[i][j].axis[2], duplexes_at_step[i][j].final_hel_pos[0], duplexes_at_step[i][j].final_hel_pos[1], duplexes_at_step[i][j].final_hel_pos[2]) output.write(line) output.close()
def main(): parser = argparse.ArgumentParser( prog=os.path.basename(__file__), description="Computes the mean structure of a trajectory file") parser.add_argument('trajectory', type=str, nargs=1, help='the trajectory file you wish to analyze') parser.add_argument('-p', metavar='num_cpus', nargs=1, type=int, dest='parallel', help="(optional) How many cores to use") parser.add_argument('-o', '--output', metavar='output_file', nargs=1, help='The filename to save the mean structure to') parser.add_argument( '-f', '--format', metavar='<json/oxDNA/both>', nargs=1, help= 'Output format for the mean file. Defaults to json. Options are \"json\", \"oxdna/oxDNA\", and \"both\"' ) parser.add_argument( '-d', '--deviations', metavar='deviation_file', nargs=1, help='Immediatley run compute_deviations.py from the output') parser.add_argument( '-i', metavar='index_file', dest='index_file', nargs=1, help= 'Compute mean structure of a subset of particles from a space-separated list in the provided file' ) parser.add_argument( '-a', '--align', metavar='alignment_configuration', nargs=1, help='The id of the configuration to align to, otherwise random') args = parser.parse_args() from oxDNA_analysis_tools.config import check_dependencies check_dependencies(["python", "Bio", "numpy"]) #get file names traj_file = args.trajectory[0] parallel = args.parallel if parallel: from oxDNA_analysis_tools.UTILS import parallelize_erik_onefile n_cpus = args.parallel[0] #-f defines the format of the output file outjson = False outoxdna = False if args.format: if "json" in args.format: outjson = True if "oxDNA" in args.format or "oxdna" in args.format: outoxdna = True if "both" in args.format: outjson = True outoxdna = True if outjson == outoxdna == False: print( "ERROR: unrecognized output format\nAccepted formats are \"json\", \"oxDNA/oxdna\", and \"both\"", file=stderr) exit(1) else: print("INFO: No output format specified, defaulting to oxDNA", file=stderr) outoxdna = True #-o names the output file if args.output: outfile = args.output[0] else: if outjson and not outoxdna: ext = ".json" elif outjson and outoxdna: ext = ".json/.dat" elif outoxdna and not outjson: ext = ".dat" outfile = "mean{}".format(ext) print("INFO: No outfile name provided, defaulting to \"{}\"".format( outfile), file=stderr) #-d will run compute_deviations.py when this script is completed. dev_file = None if args.deviations: dev_file = args.deviations[0] #-i will make it only run on a subset of nucleotides. #The index file is a space-separated list of particle IDs if args.index_file: index_file = args.index_file[0] with open(index_file, 'r') as f: indexes = f.readline().split() try: indexes = [int(i) for i in indexes] except: print( "ERROR: The index file must be a space-seperated list of particles. These can be generated using oxView by clicking the \"Download Selected Base List\" button" ) else: with ErikReader(traj_file) as r: indexes = list(range(len(r.read().positions))) # The reference configuration which is used to define alignment align_conf = [] #calculate the number of configurations in the trajectory num_confs = cal_confs(traj_file) # if we have no align_conf we need to chose one # and realign its cms to be @ 0,0,0 if align_conf == []: align = None if args.align: align = args.align[0] align_conf_id, align_poses = pick_starting_configuration( traj_file, num_confs, align) # we are just interested in the nucleotide positions align_conf = align_poses.positions[indexes] #Actually compute mean structure if not parallel: print( "INFO: Computing mean of {} configurations with an alignment of {} particles using 1 core." .format(num_confs, len(align_conf)), file=stderr) r = ErikReader(traj_file) mean_pos_storage, mean_a1_storage, mean_a3_storage, intermediate_mean_structures, processed_frames = compute_mean( r, align_conf, indexes, num_confs) #If parallel, the trajectory is split into a number of chunks equal to the number of CPUs available. #Each of those chunks is then calculated seperatley and the result is summed. if parallel: print( "INFO: Computing mean of {} configurations with an alignment of {} particles using {} cores." .format(num_confs, len(align_conf), n_cpus), file=stderr) out = parallelize_erik_onefile.fire_multiprocess( traj_file, compute_mean, num_confs, n_cpus, align_conf, indexes) mean_pos_storage = np.sum(np.array([i[0] for i in out]), axis=0) mean_a1_storage = np.sum(np.array([i[1] for i in out]), axis=0) mean_a3_storage = np.sum(np.array([i[2] for i in out]), axis=0) intermediate_mean_structures = [] [intermediate_mean_structures.extend(i[3]) for i in out] processed_frames = sum((i[4] for i in out)) # finished task entry print("INFO: processed frames total: {}".format(processed_frames), file=stderr) #Convert mean structure to a json file mean_file = dumps({ "i_means": intermediate_mean_structures, "g_mean": prep_pos_for_json(mean_pos_storage / processed_frames), "a1_mean": prep_pos_for_json( [normalize(v) for v in (mean_a1_storage / processed_frames)]), "a3_mean": prep_pos_for_json( [normalize(v) for v in (mean_a3_storage / processed_frames)]), "p_frames": processed_frames, "ini_conf": { "conf": prep_pos_for_json(align_conf), "id": align_conf_id } }) #Save the mean structure to the specified output file. if outjson or dev_file: #save output as json format if outoxdna == True: #if making both outputs, automatically set file extensions. jsonfile = outfile.split(".")[0] + ".json" else: jsonfile = outfile print("INFO: Writing mean configuration to", jsonfile, file=stderr) with open(jsonfile, "w") as file: file.write(mean_file) if outoxdna: #save output as oxDNA .dat format if outjson == True: #if making both outputs, automatically set file extensions. outname = outfile.split(".")[0] + ".dat" else: outname = outfile from oxDNA_analysis_tools.mean2dat import make_dat make_dat(loads(mean_file), outname) #If requested, run compute_deviations.py using the output from this script. if dev_file: print("INFO: launching compute_deviations.py", file=stderr) #this is probably horrible practice, but to maintain the ability to call things from the command line, I cannot pass arguments between main() calls. #so instead we're gonna spoof a global variable to make it look like compute_deviations was called explicitally argv.clear() argv.extend([ 'compute_deviations.py', '-o', dev_file, "-r", dev_file.split('.')[0] + "_rmsd.png", "-d", dev_file.split('.')[0] + "_rmsd_data.json" ]) if args.index_file: argv.append("-i") argv.append(index_file) if parallel: argv.append("-p") argv.append(str(n_cpus)) argv.append(jsonfile) argv.append(traj_file) from oxDNA_analysis_tools import compute_deviations from sys import executable print(executable) print(argv) compute_deviations.main() #compute_deviations needs the json meanfile, but its not useful for visualization #so we dump it if not outjson: print("INFO: deleting {}".format(jsonfile), file=stderr) from os import remove remove(jsonfile) print(time.time() - start_t)
def main(): #handle commandline arguments #the positional arguments for this are: # 1. the mean structure from compute_mean.py in json format # 2. the trajectory from which to compute the centroid # 3. the name of the file to write out the centroid to. Should be a .dat because oxView uses file extensions parser = argparse.ArgumentParser( prog=os.path.basename(__file__), description= "Compute the RMSD of each nucleotide from the mean structure produced by compute_mean.py" ) parser.add_argument( 'mean_structure', type=str, nargs=1, help="The mean structure .json file from compute_mean.py") parser.add_argument('trajectory', type=str, nargs=1, help='the trajectory file you wish to analyze') parser.add_argument('-p', metavar='num_cpus', nargs=1, type=int, dest='parallel', help="(optional) How many cores to use") parser.add_argument('-o', '--output', metavar='output_file', nargs=1, help='The filename to save the centroid to') parser.add_argument( '-i', metavar='index_file', dest='index_file', nargs=1, help= 'Compute mean structure of a subset of particles from a space-separated list in the provided file' ) args = parser.parse_args() #system check from oxDNA_analysis_tools.config import check_dependencies check_dependencies(["python", "Bio", "numpy"]) #-o names the output file if args.output: outfile = args.output[0].strip() else: outfile = "centroid.dat" print("INFO: No outfile name provided, defaulting to \"{}\"".format( outfile), file=stderr) #prepare the data files and calculate how many configurations there are to run traj_file = args.trajectory[0] parallel = args.parallel if parallel: n_cpus = args.parallel[0] num_confs = cal_confs(traj_file) #-i will make it only run on a subset of nucleotides. #The index file is a space-separated list of particle IDs if args.index_file: index_file = args.index_file[0] with open(index_file, 'r') as f: indexes = f.readline().split() try: indexes = [int(i) for i in indexes] except: print( "ERROR: The index file must be a space-seperated list of particles. These can be generated using oxView by clicking the \"Download Selected Base List\" button" ) else: with ErikReader(traj_file) as r: indexes = list(range(len(r.read().positions))) # load mean structure mean_file = args.mean_structure[0] if mean_file.split(".")[-1] == "json": with open(mean_file) as file: mean_structure = load(file)['g_mean'][indexes] elif mean_file.split(".")[-1] == "dat": with ErikReader(mean_file) as reader: s = reader.read() mean_structure = s.positions[indexes] print("INFO: mean structure loaded", file=stderr) #Calculate centroid, in parallel if available if not parallel: print( "INFO: Computing centroid from the mean of {} configurations using 1 core." .format(num_confs), file=stderr) r = ErikReader(traj_file) centroid, centroid_a1s, centroid_a3s, centroid_rmsf, centroid_time = compute_centroid( r, mean_structure, indexes, num_confs) #If parallel, the trajectory is split into a number of chunks equal to the number of CPUs available. #Each of those chunks is then calculated seperatley and the results are compiled . if parallel: print( "INFO: Computing centroid from the mean of {} configurations using {} cores." .format(num_confs, n_cpus), file=stderr) candidates = [] rmsfs = [] a1s = [] a3s = [] ts = [] out = parallelize_erik_onefile.fire_multiprocess( traj_file, compute_centroid, num_confs, n_cpus, mean_structure, indexes) [candidates.append(i[0]) for i in out] [rmsfs.append(i[3]) for i in out] [a1s.append(i[1]) for i in out] [a3s.append(i[2]) for i in out] [ts.append(i[4]) for i in out] min_id = rmsfs.index(min(rmsfs)) centroid = candidates[min_id] centroid_a1s = a1s[min_id] centroid_a3s = a3s[min_id] centroid_time = ts[min_id] centroid_rmsf = rmsfs[min_id] print( "INFO: Centroid configuration found at configuration t = {}, RMSF = {}" .format(centroid_time, centroid_rmsf), file=stderr) from oxDNA_analysis_tools.mean2dat import make_dat make_dat( { 'g_mean': centroid, 'a1_mean': centroid_a1s, 'a3_mean': centroid_a3s }, outfile)
def main(): #handle commandline arguments #the positional arguments for this are: # 1. the mean structure from compute_mean.py in json format # 2. the trajectory from which to compute the deviations from sys import argv print(argv) parser = argparse.ArgumentParser( prog=os.path.basename(__file__), description= "Compute the RMSD of each nucleotide from the mean structure produced by compute_mean.py" ) parser.add_argument( 'mean_structure', type=str, nargs=1, help="The mean structure .json file from compute_mean.py") parser.add_argument('trajectory', type=str, nargs=1, help='the trajectory file you wish to analyze') parser.add_argument('-p', metavar='num_cpus', nargs=1, type=int, dest='parallel', help="(optional) How many cores to use") parser.add_argument( '-o', '--output', metavar='output_file', nargs=1, help='The filename to save the deviations json file to') parser.add_argument( '-i', metavar='index_file', dest='index_file', nargs=1, help= 'Compute mean structure of a subset of particles from a space-separated list in the provided file' ) parser.add_argument('-r', metavar='rmsd_plot', dest='rmsd_plot', nargs=1, help='The name of the file to save the RMSD plot to.') parser.add_argument( '-d', metavar='rmsd_data', dest='rmsd_data', nargs=1, help='The name of the file to save the RNSD data in json format.') args = parser.parse_args() #system check from oxDNA_analysis_tools.config import check_dependencies check_dependencies(["python", "Bio", "numpy", "matplotlib"]) #-o names the output file if args.output: outfile = args.output[0].strip() if not outfile.split(".")[-1] == 'json': outfile += ".json" else: outfile = "devs.json" print("INFO: No outfile name provided, defaulting to \"{}\"".format( outfile), file=stderr) #prepare the data files and calculate how many configurations there are to run traj_file = args.trajectory[0] parallel = args.parallel if parallel: from oxDNA_analysis_tools.UTILS import parallelize_erik_onefile n_cpus = args.parallel[0] num_confs = cal_confs(traj_file) #-i will make it only run on a subset of nucleotides. #The index file is a space-separated list of particle IDs if args.index_file: index_file = args.index_file[0] with open(index_file, 'r') as f: indexes = f.readline().split() try: indexes = [int(i) for i in indexes] except: print( "ERROR: The index file must be a space-seperated list of particles. These can be generated using oxView by clicking the \"Download Selected Base List\" button" ) else: with ErikReader(traj_file) as r: indexes = list(range(len(r.read().positions))) #-r names the file to print the RMSD plot to if args.rmsd_plot: plot_name = args.rmsd_plot[0] else: plot_name = 'rmsd.png' # -d names the file to print the RMSD data to if args.rmsd_data: data_file = args.rmsd_data[0] # load mean structure mean_structure_file = args.mean_structure[0] with open(mean_structure_file) as file: mean_data = loads(file.read()) mean_structure = np.array(mean_data["g_mean"]) indexed_mean_structure = mean_structure[indexes] print("INFO: mean structure loaded", file=stderr) #Calculate deviations, in parallel if available if not parallel: print( "INFO: Computing deviations from the mean of {} configurations with an alignment of {} particles using 1 core." .format(num_confs, len(indexed_mean_structure)), file=stderr) r = ErikReader(traj_file) deviations, RMSDs = compute_deviations(r, mean_structure, indexed_mean_structure, indexes, num_confs) #If parallel, the trajectory is split into a number of chunks equal to the number of CPUs available. #Each of those chunks is then calculated seperatley and the results are compiled . if parallel: print( "INFO: Computing deviations from the mean of {} configurations with an alignment of {} particles using {} cores." .format(num_confs, len(indexed_mean_structure), n_cpus), file=stderr) deviations = [] RMSDs = [] out = parallelize_erik_onefile.fire_multiprocess( traj_file, compute_deviations, num_confs, n_cpus, mean_structure, indexed_mean_structure, indexes) [deviations.extend(i[0]) for i in out] [RMSDs.extend(i[1]) for i in out] #compute_deviations() returns the deviation of every particle in every configuration #take the mean of the per-configuration deviations to get the RMSF rmsfs = np.sqrt(np.mean(np.square(np.array(deviations)), axis=0)) * 0.8518 #write the deviations to a json file print("INFO: writing deviations to {}".format(outfile), file=stderr) with open(outfile, "w") as file: file.write(dumps({"RMSF (nm)": rmsfs.tolist()})) #plot RMSDs print("INFO: writing RMSD plot to {}".format(plot_name), file=stderr) plt.plot(RMSDs) plt.axhline(np.mean(RMSDs), color='red') plt.xlabel('Configuration') plt.ylabel('RMSD (nm)') plt.savefig(plot_name) #print RMSDs print("INFO: writing RMSD data to {}".format(data_file), file=stderr) if args.rmsd_data: with open(data_file, 'w') as f: f.write(dumps({"RMSD (nm)": RMSDs}))
def main(): parser = argparse.ArgumentParser( prog=os.path.basename(__file__), description="Computes the deviations in the backbone torsion angles") parser.add_argument('trajectory', type=str, nargs=1, help='the trajectory file you wish to analyze') parser.add_argument( 'topology', type=str, nargs=1, help="The topology file associated with the trajectory file") parser.add_argument('outfile', type=str, nargs=1, help='The file name for the output .json file.') parser.add_argument('-p', metavar='num_cpus', nargs=1, type=int, dest='parallel', help="(optional) How many cores to use") args = parser.parse_args() #run system checks from oxDNA_analysis_tools.config import check_dependencies check_dependencies(["python", "numpy"]) top_file = args.topology[0] traj_file = args.trajectory[0] parallel = args.parallel if parallel: n_cpus = args.parallel[0] num_confs = cal_confs(traj_file) r = LorenzoReader2(traj_file, top_file) if not parallel: torsions, dihedrals = get_internal_coords(r, num_confs) if parallel: out = parallelize_lorenzo_onefile.fire_multiprocess( traj_file, top_file, get_internal_coords, num_confs, n_cpus) # Out Dims: 1 Processor, 2 Torsion or Dihedrals, 3 Specific list of torsions listed by conf torsions = np.concatenate([out[i][0] for i in range(n_cpus)], axis=1) dihedrals = np.concatenate([out[i][1] for i in range(n_cpus)], axis=1) torsion_mean = np.mean(torsions, axis=1).tolist() dihedral_mean = np.mean(dihedrals, axis=1).tolist() #make something akin to a ramachandran plot for DNA origami?? import matplotlib.pyplot as plt plt.scatter(torsion_mean[1:], dihedral_mean) plt.xlabel("torsion_angle") plt.ylabel("dihedral_angle") plt.show() torsion_mean.insert(0, torsion_mean[0]) torsion_mean.insert(0, torsion_mean[0]) with open(args.outfile[0], "w") as file: file.write(dumps({"torsion": torsion_mean}))
def main(): #at 2.5 you start to see the hard edges caused by end-loops and see some loop interactions cutoff_distance = 2.5 #get commandline arguments parser = argparse.ArgumentParser( prog=path.basename(__file__), description= "Calculate molecular contacts, and assembles an average set of contacts based on MDS" ) parser.add_argument('inputfile', type=str, nargs=1, help="The inputfile used to run the simulation") parser.add_argument('trajectory', type=str, nargs=1, help='the trajectory file you wish to analyze') parser.add_argument( 'meanfile', type=str, nargs=1, help='the name of the .dat file where the mean will be written') parser.add_argument( 'devfile', type=str, nargs=1, help='the name of the .json file where the devs will be written') parser.add_argument('-p', metavar='num_cpus', nargs=1, type=int, dest='parallel', help="(optional) How many cores to use") #process commandline arguments args = parser.parse_args() traj_file = args.trajectory[0] inputfile = args.inputfile[0] meanfile = args.meanfile[0] devfile = args.devfile[0] parallel = args.parallel if parallel: n_cpus = args.parallel[0] top_file = get_input_parameter(inputfile, "topology") if "RNA" in get_input_parameter(inputfile, "interaction_type"): environ["OXRNA"] = "1" else: environ["OXRNA"] = "0" from oxDNA_analysis_tools.config import check_dependencies check_dependencies(["python", "numpy"]) #get the number of configurations in the trajectory num_confs = cal_confs(traj_file) #Get the mean distance to all other particles if not parallel: print( "INFO: Computing interparticle distances of {} configurations using 1 core." .format(num_confs), file=stderr) r = LorenzoReader2(traj_file, top_file) cartesian_distances = get_mean(r, inputfile, num_confs) mean_distance_map = cartesian_distances * (1 / (num_confs)) if parallel: print( "INFO: Computing interparticle distances of {} configurations using {} cores." .format(num_confs, n_cpus), file=stderr) out = parallelize_lorenzo_onefile.fire_multiprocess( traj_file, top_file, get_mean, num_confs, n_cpus, inputfile) cartesian_distances = np.sum(np.array([i for i in out]), axis=0) mean_distance_map = cartesian_distances * (1 / (num_confs)) #Making a new configuration file from scratch is hard, so we're just going to read in one and then overwrite the positional information r = LorenzoReader2(traj_file, top_file) output_system = r._get_system() #make heatmap of the summed distances #make_heatmap(mean_distance_map) masked_mean = np.ma.masked_array(mean_distance_map, ~(mean_distance_map < cutoff_distance)) #I tried to use DGSOL to analytically solve this, but origamis were too big #f = open('test_dist.nmr', 'w+') #for i, line in enumerate(masked_mean): # for j, dist in enumerate(line): # if dist != "--" and dist != 0 and i < j: # if j%2 == 0: # f.write("{}\t{}\t1\t1\t{}\t{}\tn\tn\tn\tn\n".format(i+1, j+1, dist, dist)) # else: # f.write("{}\t{}\t1\t1\t{}\t{}\tn\tn\tn\tn\n".format(j+1, i+1, dist, dist)) #super_cutoff_ids = mean_distance_map > cutoff_distance #mean_distance_map[super_cutoff_ids] = 0 #sparse_map = csr_matrix(mean_distance_map) print("INFO: fitting local distance data", file=stderr) #Many embedding algorithms were tried... #from sklearn.manifold import LocallyLinearEmbedding #from megaman.geometry import Geometry #from scipy.sparse import csr_matrix #geom = Geometry() #geom = Geometry(adjacency_kwds={'radius':cutoff_distance})#, laplacian_kwds={'scaling_epps':cutoff_distance}) #geom.set_data_matrix(masked_mean) #geom.set_adjacency_matrix(masked_mean) #from megaman.embedding import LocallyLinearEmbedding #lle = LocallyLinearEmbedding(n_neighbors=5, n_components=3, eigen_solver='arpack', max_iter=3000) #lle = LocallyLinearEmbedding(n_components=3, eigen_solver='arpack', geom=geom) #out_coords = lle.fit_transform(masked_mean, input_type='adjacency') #out_coords = lle.fit_transform(masked_mean) #init = np.array([p.cm_pos for p in out_conf._nucleotides]) #Run multidimensional scaling on the average distances to find average positions from sklearn.manifold import MDS mds = MDS(n_components=3, metric=True, max_iter=3000, eps=1e-12, dissimilarity="precomputed", n_jobs=1, n_init=1) out_coords = mds.fit_transform( masked_mean) #, init=init) #this one worked best #Overwrite the system we made earlier with the coordinates calculated via MDS for i, n in enumerate(output_system._nucleotides): n.cm_pos = out_coords[i] n._a1 = np.array([0, 0, 0]) n._a3 = np.array( [0, 0, 0] ) #since the orientation vectors are all 0, this cannot be used in a simulation, but the viewer will handle it #Write the mean structure out as a new .dat and .top pair output_system.print_lorenzo_output("{}.dat".format(meanfile), "{}.top".format(meanfile)) print("INFO: wrote output files: {}.dat, {}.top".format( meanfile, meanfile), file=stderr) #Loop through the trajectory again and calculate deviations from the average distances print( "INFO: Computing distance deviations of {} configurations using 1 core." .format(num_confs), file=stderr) if not parallel: r = LorenzoReader2(traj_file, top_file) devs = get_devs(r, masked_mean, inputfile, cutoff_distance, num_confs) if parallel: print( "INFO: Computing distance deviations of {} configurations using {} cores." .format(num_confs, n_cpus), file=stderr) out = parallelize_lorenzo_onefile.fire_multiprocess( traj_file, top_file, get_devs, num_confs, n_cpus, masked_mean, inputfile, cutoff_distance) devs = np.sum(np.array([i for i in out]), axis=0) #Dump the deviations to an oxView overlay file devs = np.ma.masked_array( devs, ~(devs != 0.0)) #mask all the 0s so they don't contribute to the mean devs *= (1 / num_confs) devs = np.mean(devs, axis=0) devs = np.sqrt(devs) with open(devfile + ".json", "w") as file: file.write(dumps({"contact deviation": list(devs)})) print("INFO: wrote file {}.json".format(devfile), file=stderr)
def main(): parser = argparse.ArgumentParser( prog=path.basename(__file__), description= "Calculates a principal component analysis of nucleotide deviations over a trajectory" ) parser.add_argument('inputfile', type=str, nargs=1, help="The inputfile used to run the simulation") parser.add_argument('trajectory', type=str, nargs=1, help='the trajectory file you wish to analyze') parser.add_argument( 'meanfile', type=str, nargs=1, help='The mean structure .json file from compute_mean.py') parser.add_argument( 'outfile', type=str, nargs=1, help='the name of the .json file where the PCA will be written') parser.add_argument('-p', metavar='num_cpus', nargs=1, type=int, dest='parallel', help="(optional) How many cores to use") parser.add_argument( '-c', metavar='cluster', dest='cluster', action='store_const', const=True, default=False, help="Run the clusterer on each configuration's position in PCA space?" ) args = parser.parse_args() check_dependencies(["python", "numpy", "Bio"]) traj_file = args.trajectory[0] inputfile = args.inputfile[0] mean_file = args.meanfile[0] outfile = args.outfile[0] parallel = args.parallel if parallel: n_cpus = args.parallel[0] #-c makes it run the clusterer on the output cluster = args.cluster num_confs = cal_confs(traj_file) if mean_file.split(".")[-1] == "json": with open(mean_file) as file: align_conf = load(file)['g_mean'] elif mean_file.split(".")[-1] == "dat" or mean_file.split( ".")[-1] == "conf" or mean_file.split(".")[-1] == "oxdna": with ErikReader(mean_file) as reader: align_conf = reader.read().positions else: print( "ERROR: {} is an unrecognized file type. \nThe mean structure must either be provided as an oxDNA configuration file with the extension .dat, .conf or .oxdna or as the .json file produced by compute_mean.py.", file=stderr) exit(1) cms = np.mean(align_conf, axis=0) #all structures must have the same center of mass align_conf -= cms #Compute the deviations if not parallel: r = ErikReader(traj_file) covariation_matrix = get_cov(r, align_conf, num_confs) if parallel: out = parallelize_erik_onefile.fire_multiprocess( traj_file, get_cov, num_confs, n_cpus, align_conf) covariation_matrix = np.sum([i for i in out], axis=0) covariation_matrix /= (num_confs - 1) #now that we have the covatiation matrix we're going to use eigendecomposition to get the principal components. #make_heatmap(covariance) print("INFO: calculating eigenvectors", file=stderr) evalues, evectors = np.linalg.eig( covariation_matrix) #these eigenvalues are already sorted evectors = evectors.T #vectors come out as the columns of the array print("INFO: eigenvectors calculated", file=stderr) import matplotlib.pyplot as plt print("INFO: Saving scree plot to scree.png", file=stderr) plt.scatter(range(0, len(evalues)), evalues, s=25) plt.xlabel("component") plt.ylabel("eigenvalue") plt.savefig("scree.png") total = sum(evalues) running = 0 i = 0 while running < 0.9: running += (evalues[i] / total) i += 1 print("90% of the variance is found in the first {} components".format(i)) #if you want to weight the components by their eigenvectors #mul = np.einsum('ij,i->ij',evectors, evalues) mul = evectors #reconstruct configurations in component space #because we donlist't save the difference matrix, this involves running through the whole trajectory again if not parallel: r = ErikReader(traj_file) coordinates = change_basis(r, align_conf, mul, num_confs) if parallel: out = parallelize_erik_onefile.fire_multiprocess( traj_file, change_basis, num_confs, n_cpus, align_conf, mul) coordinates = np.concatenate([i for i in out]) #make a quick plot from the first three components print( "INFO: Creating coordinate plot from first three eigenvectors. Saving to coordinates.png", file=stderr) from mpl_toolkits.mplot3d import Axes3D fig = plt.figure() ax = fig.gca(projection='3d') ax.scatter(coordinates[:, 0], coordinates[:, 1], coordinates[:, 2], c='g', s=25) plt.savefig("coordinates.png") #Create an oxView overlays for the first N components N = 3 prep_pos_for_json = lambda conf: list(list(p) for p in conf) print( "INFO: Change the number of eigenvalues to sum and display by modifying the N variable in the script. Current value: {}" .format(N), file=stderr) for i in range(0, N): #how many eigenvalues do you want? try: if outfile.split(".")[1] != "json": raise Exception f = outfile.split(".")[0] + str(i) + "." + outfile.split(".")[1] except: print( "ERROR: oxView overlays must have a '.json' extension. No overlays will be produced", file=stderr) break out = np.sqrt(evalues[i]) * evectors[i] with catch_warnings( ): #this produces an annoying warning about casting complex values to real values that is not relevant simplefilter("ignore") output_vectors = out.reshape(int(out.shape[0] / 3), 3).astype(float) with open(f, "w+") as file: file.write(dumps({"pca": prep_pos_for_json(output_vectors)})) #If we're running clustering, feed the linear terms into the clusterer if cluster: print("INFO: Mapping configurations to component space...", file=stderr) #If you want to cluster on only some of the components, uncomment this #out = out[:,0:3] from oxDNA_analysis_tools.clustering import perform_DBSCAN labs = perform_DBSCAN(coordinates, num_confs, traj_file, inputfile, "euclidean", 12, 8)