Example #1
0
def main():
    #read data from files
    parser = argparse.ArgumentParser(prog = path.basename(__file__), description="Compare the bonds found at each trajectory with the intended design")
    parser.add_argument('inputfile', type=str, nargs=1, help="The inputfile used to run the simulation")
    parser.add_argument('trajectory', type=str, nargs=1, help="The trajecotry file to compare against the designed pairs")
    parser.add_argument('designed_pairs', type=str, nargs=1, help="The file containing the desired nucleotides pairings in the format \n a b\nc d")
    parser.add_argument('output_file', type=str, nargs=1, help="name of the file to save the output json overlay to")
    parser.add_argument('-p', metavar='num_cpus', nargs=1, type=int, dest='parallel', help="(optional) How many cores to use")
    
    #run system checks
    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "numpy"])

    args = parser.parse_args()
    inputfile = args.inputfile[0]
    traj_file = args.trajectory[0]
    designfile = args.designed_pairs[0]
    outfile = args.output_file[0]
    parallel = args.parallel
    if parallel:
        n_cpus = args.parallel[0]

    top_file = get_input_parameter(inputfile, "topology")
    if "RNA" in get_input_parameter(inputfile, "interaction_type"):
        environ["OXRNA"] = "1"
    else:
        environ["OXRNA"] = "0"
    num_confs = cal_confs(traj_file)

    with open(designfile, 'r') as file:
        pairs = file.readlines()

    if not parallel:
        print("INFO: Computing base pairs in {} configurations using 1 core.".format(num_confs), file=stderr)
        r = LorenzoReader2(traj_file,top_file)
        tot_bonds, tot_missbonds, out_array, confid = bond_analysis(r, pairs, inputfile, num_confs)
        try:
            _ = tot_bonds #this will fail if DNAnalysis failed.
        except:
            print("ERROR: DNAnalysis encountered an error and could not analyze the trajectory")
            exit(1)

    if parallel:
        print("INFO: Computing base pairs in {} configurations using {} cores.".format(num_confs, n_cpus), file=stderr)
        out = parallelize_lorenzo_onefile.fire_multiprocess(traj_file, top_file, bond_analysis, num_confs, n_cpus, pairs, inputfile)

        tot_bonds = 0
        tot_missbonds = 0
        out_array = np.zeros(len(open(top_file, 'r').readlines())-1)
        confid = 0
        for i in out:
            if i[0] is not None:
                tot_bonds += i[0]
                tot_missbonds += i[1]
                #out_array += i[2]
                confid += i[3]
            else:
                print("WARNING: Some configurations were invalid and not included in the analysis.  Please check the logs", file=stderr)

            #tot_bonds = sum((i[0] for i in out if i[0] != None))
            #tot_missbonds = sum((i[1] for i in out if i[1] != None))
        out_array = sum((i[2] for i in out if len(i[2]) > 0))
            #confid = sum((i[3] for i in out if i[3] != None))

    print("\nSummary:\navg bonds: {}\navg_missbonds: {}".format(tot_bonds/(int(confid)),tot_missbonds/int(confid)))

    print("INFO: Writing bond occupancy data to {}".format(outfile))
    with open(outfile, "w+") as file:
        file.write("{\n\"occupancy\" : [")
        file.write(str(out_array[0]/int(confid)))
        for n in out_array[1:]:
            file.write(", {}".format(n/int(confid)))
        file.write("] \n}") 
Example #2
0
def main():
    parser = argparse.ArgumentParser(
        prog=path.basename(__file__),
        description=
        "Calculate differences between structures and automatically apply DBSCAN to retrieve clusters"
    )
    parser.add_argument('inputfile',
                        type=str,
                        nargs=1,
                        help="The inputfile used to run the simulation")
    parser.add_argument('trajectory',
                        type=str,
                        nargs=1,
                        help='the trajectory file you wish to analyze')
    parser.add_argument('-p',
                        metavar='num_cpus',
                        nargs=1,
                        type=int,
                        dest='parallel',
                        help="(optional) How many cores to use")
    args = parser.parse_args()

    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "numpy", "matplotlib"])

    traj_file = args.trajectory[0]
    inputfile = args.inputfile[0]
    parallel = args.parallel
    if parallel:
        n_cpus = args.parallel[0]

    top_file = get_input_parameter(inputfile, "topology")
    if "RNA" in get_input_parameter(inputfile, "interaction_type"):
        environ["OXRNA"] = "1"
    else:
        environ["OXRNA"] = "0"
    num_confs = cal_confs(traj_file)
    import UTILS.base  #this needs to be imported after the model type is set

    r2 = LorenzoReader2(traj_file, top_file)

    #how do you want to get your eRMSDs?  Do you need to do the time-consuming calculation or is it done and you have a pickle?
    if not parallel:
        r1 = LorenzoReader2(traj_file, top_file)

        eRMSDs = get_eRMSDs(r1, r2, inputfile, traj_file, top_file, num_confs)
    if parallel:
        out = parallelize_lorenzo_onefile.fire_multiprocess(traj_file,
                                                            top_file,
                                                            get_eRMSDs,
                                                            num_confs,
                                                            n_cpus,
                                                            r2,
                                                            inputfile,
                                                            traj_file,
                                                            top_file,
                                                            matrix=True)
        eRMSDs = np.sum((i for i in out), axis=0)
    #eRMSDs = pickle.load(open('tmp_eRMSDs', 'rb'))

    #the eRMSD matrix is actually only half a matrix
    for ni, i in enumerate(eRMSDs):
        for nj, j in enumerate(i):
            eRMSDs[nj][ni] = j
            if ni == nj:
                eRMSDs[ni][nj] = 0

    #since calculating the eRMSDs are so time-consuming to calculate we're gonna pickle it to iterate the DBSCAN later.
    with open("tmp_eRMSDs", "wb") as file:
        pickle.dump(eRMSDs, file)

    ###############################################################################################################
    #Next, we're going to perform a DBSCAN on that matrix of eRMSDs to find clusters of similar structures
    perform_DBSCAN(eRMSDs, num_confs, traj_file, inputfile, "precomputed", 12,
                   8)
Example #3
0
def main():
    parser = argparse.ArgumentParser(
        prog=path.basename(__file__),
        description=
        "Calculates a principal component analysis of nucleotide deviations over a trajectory"
    )
    parser.add_argument('inputfile',
                        type=str,
                        nargs=1,
                        help="The inputfile used to run the simulation")
    parser.add_argument('trajectory',
                        type=str,
                        nargs=1,
                        help='the trajectory file you wish to analyze')
    parser.add_argument(
        'meanfile',
        type=str,
        nargs=1,
        help='The mean structure .json file from compute_mean.py')
    parser.add_argument(
        'outfile',
        type=str,
        nargs=1,
        help='the name of the .json file where the PCA will be written')
    parser.add_argument('-p',
                        metavar='num_cpus',
                        nargs=1,
                        type=int,
                        dest='parallel',
                        help="(optional) How many cores to use")
    parser.add_argument(
        '-c',
        metavar='cluster',
        dest='cluster',
        action='store_const',
        const=True,
        default=False,
        help="Run the clusterer on each configuration's position in PCA space?"
    )
    args = parser.parse_args()

    check_dependencies(["python", "numpy", "Bio"])

    traj_file = args.trajectory[0]
    inputfile = args.inputfile[0]
    mean_file = args.meanfile[0]
    outfile = args.outfile[0]
    parallel = args.parallel
    if parallel:
        n_cpus = args.parallel[0]
    #-c makes it run the clusterer on the output
    cluster = args.cluster
    top_file = get_input_parameter(inputfile, "topology")
    if "RNA" in get_input_parameter(inputfile, "interaction_type"):
        environ["OXRNA"] = "1"
    else:
        environ["OXRNA"] = "0"
    import UTILS.base  #this needs to be imported after the model type is set

    num_confs = cal_confs(traj_file)

    if mean_file.split(".")[-1] == "json":
        with open(mean_file) as file:
            align_conf = load(file)['g_mean']

    elif mean_file.split(".")[-1] == "dat":
        fetch_np = lambda conf: np.array([n.cm_pos for n in conf._nucleotides])
        with LorenzoReader2(mean_file, top_file) as reader:
            s = reader._get_system()
            align_conf = fetch_np(s)

    cms = np.mean(align_conf,
                  axis=0)  #all structures must have the same center of mass
    align_conf -= cms

    #Compute the deviations
    if not parallel:
        r = LorenzoReader2(traj_file, top_file)
        deviations_matrix = get_pca(r, align_conf, num_confs)

    if parallel:
        out = parallelize_lorenzo_onefile.fire_multiprocess(
            traj_file, top_file, get_pca, num_confs, n_cpus, align_conf)
        deviations_matrix = np.concatenate([i for i in out])

    #now that we have the deviations matrix we're gonna get the covariance and PCA it
    #note that in the future we might want a switch for covariance vs correlation matrix because correlation (cov/stdev so all diagonals are 1) is better for really floppy structures
    pca = PCA(n_components=3)
    pca.fit(deviations_matrix)
    transformed = pca.transform(deviations_matrix)

    #THIS IS AS FAR AS I GOT

    import matplotlib.pyplot as plt
    print("INFO: Saving scree plot to scree.png", file=stderr)
    plt.scatter(range(0, len(evalues)), evalues, s=25)
    plt.xlabel("component")
    plt.ylabel("eigenvalue")
    plt.savefig("scree.png")

    print(
        "INFO: Creating coordinate plot from first three eigenvectors.  Saving to coordinates.png",
        file=stderr)
    #if you want to weight the components by their eigenvectors
    #mul = np.einsum('ij,i->ij',evectors[0:3], evalues[0:3])
    mul = evectors

    #reconstruct configurations in component space
    out = np.dot(deviations_matrix, mul).astype(float)

    #make a quick plot from the first three components
    from mpl_toolkits.mplot3d import Axes3D
    fig = plt.figure()
    ax = fig.gca(projection='3d')
    ax.scatter(out[:, 0], out[:, 1], out[:, 2], c='g', s=25)
    plt.savefig("coordinates.png")

    #Create an oxView overlay showing the first SUM components
    SUM = 1
    print(
        "INFO: Change the number of eigenvalues to sum and display by modifying the SUM variable in the script.  Current value: {}"
        .format(SUM),
        file=stderr)
    weighted_sum = np.zeros_like(evectors[0])
    for i in range(0, SUM):  #how many eigenvalues do you want?
        weighted_sum += evalues[i] * evectors[i]

    prep_pos_for_json = lambda conf: list(list(p) for p in conf)
    with catch_warnings(
    ):  #this produces an annoying warning about casting complex values to real values that is not relevant
        simplefilter("ignore")
        output_vectors = weighted_sum.reshape(int(weighted_sum.shape[0] / 3),
                                              3).astype(float)
    with open(outfile, "w+") as file:
        file.write(dumps({"pca": prep_pos_for_json(output_vectors)}))

    #If we're running clustering, feed the linear terms into the clusterer
    if cluster:
        print("INFO: Mapping configurations to component space...",
              file=stderr)

        #If you want to cluster on only some of the components, uncomment this
        #out = out[:,0:3]

        from clustering import perform_DBSCAN
        labs = perform_DBSCAN(out, num_confs, traj_file, inputfile,
                              "euclidean", 12, 8)
def main():
    parser = argparse.ArgumentParser(
        prog=path.basename(__file__),
        description="Fit vectors to every duplex in the structure")
    parser.add_argument('-p',
                        metavar='num_cpus',
                        nargs=1,
                        type=int,
                        dest='parallel',
                        help="(optional) How many cores to use")
    parser.add_argument('inputfile',
                        type=str,
                        nargs=1,
                        help="The inputfile used to run the simulation")
    parser.add_argument('trajectory',
                        type=str,
                        nargs=1,
                        help="The trajectory file from the simulation")
    parser.add_argument('-o',
                        '--output',
                        metavar='output_file',
                        type=str,
                        nargs=1,
                        help='name of the file to write the angle list to')
    args = parser.parse_args()

    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "numpy"])

    #Process command line arguments:
    inputfile = args.inputfile[0]
    traj_file = args.trajectory[0]
    parallel = args.parallel
    if parallel:
        n_cpus = args.parallel[0]

    #-o names the output file
    if args.output:
        outfile = args.output[0]
    else:
        outfile = "angles.txt"
        print("INFO: No outfile name provided, defaulting to \"{}\"".format(
            outfile),
              file=stderr)

    #Get relevant parameters from the input file
    top_file = get_input_parameter(inputfile, "topology")
    if "RNA" in get_input_parameter(inputfile, "interaction_type"):
        environ["OXRNA"] = "1"
    else:
        environ["OXRNA"] = "0"

    #Calculate the number of configurations.
    num_confs = cal_confs(traj_file)

    r0 = LorenzoReader2(traj_file, top_file)
    r0._get_system()

    #launch find_angle using the appropriate number of threads to find all duplexes.
    if not parallel:
        print(
            "INFO: Fitting duplexes to {} configurations using 1 core.".format(
                num_confs),
            file=stderr)
        r = LorenzoReader2(traj_file, top_file)
        duplexes_at_step = find_angles(r, inputfile, num_confs)

    if parallel:
        print("INFO: Fitting duplexes to {} configurations using {} cores.".
              format(num_confs, n_cpus),
              file=stderr)
        duplexes_at_step = []
        out = parallelize_lorenzo_onefile.fire_multiprocess(
            traj_file, top_file, find_angles, num_confs, n_cpus, inputfile)
        [duplexes_at_step.extend(i) for i in out]

    if [] in duplexes_at_step:
        print(
            "WARNING: Some configurations were invalid and not included in the analysis.  Please check the log to view the error",
            file=stderr)

    #print duplexes to a file
    print(
        "INFO: Writing duplex data to {}.  Use duplex_angle_plotter to graph data"
        .format(outfile),
        file=stderr)
    output = open(outfile, 'w')
    output.write(
        "time\tduplex\tstart1\tend1\tstart2\tend2\taxisX\taxisY\taxisZ\thel_pos\n"
    )
    for i in range(0, len(duplexes_at_step)):
        for j in range(0, len(duplexes_at_step[i])):
            line = '{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t[{},{},{}]\n'.format(
                duplexes_at_step[i][j].time, duplexes_at_step[i][j].index,
                duplexes_at_step[i][j].start1, duplexes_at_step[i][j].end1,
                duplexes_at_step[i][j].start2, duplexes_at_step[i][j].end2,
                duplexes_at_step[i][j].axis[0], duplexes_at_step[i][j].axis[1],
                duplexes_at_step[i][j].axis[2],
                duplexes_at_step[i][j].final_hel_pos[0],
                duplexes_at_step[i][j].final_hel_pos[1],
                duplexes_at_step[i][j].final_hel_pos[2])
            output.write(line)
    output.close()
Example #5
0
def main():
    parser = argparse.ArgumentParser(
        prog=os.path.basename(__file__),
        description="Computes the deviations in the backbone torsion angles")
    parser.add_argument('trajectory',
                        type=str,
                        nargs=1,
                        help='the trajectory file you wish to analyze')
    parser.add_argument(
        'topology',
        type=str,
        nargs=1,
        help="The topology file associated with the trajectory file")
    parser.add_argument('outfile',
                        type=str,
                        nargs=1,
                        help='The file name for the output .json file.')
    parser.add_argument('-p',
                        metavar='num_cpus',
                        nargs=1,
                        type=int,
                        dest='parallel',
                        help="(optional) How many cores to use")
    args = parser.parse_args()

    #run system checks
    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "numpy"])

    top_file = args.topology[0]
    traj_file = args.trajectory[0]
    parallel = args.parallel
    if parallel:
        n_cpus = args.parallel[0]

    num_confs = cal_confs(traj_file)

    r = LorenzoReader2(traj_file, top_file)

    if not parallel:
        torsions, dihedrals = get_internal_coords(r, num_confs)

    if parallel:
        out = parallelize_lorenzo_onefile.fire_multiprocess(
            traj_file, top_file, get_internal_coords, num_confs, n_cpus)
        # Out Dims: 1 Processor, 2 Torsion or Dihedrals, 3 Specific list of torsions listed by conf
        torsions = np.concatenate([out[i][0] for i in range(n_cpus)], axis=1)
        dihedrals = np.concatenate([out[i][1] for i in range(n_cpus)], axis=1)

    torsion_mean = np.mean(torsions, axis=1).tolist()
    dihedral_mean = np.mean(dihedrals, axis=1).tolist()
    #make something akin to a ramachandran plot for DNA origami??
    import matplotlib.pyplot as plt
    plt.scatter(torsion_mean[1:], dihedral_mean)
    plt.xlabel("torsion_angle")
    plt.ylabel("dihedral_angle")
    plt.show()

    torsion_mean.insert(0, torsion_mean[0])
    torsion_mean.insert(0, torsion_mean[0])
    with open(args.outfile[0], "w") as file:
        file.write(dumps({"torsion": torsion_mean}))
def main():
    #at 2.5 you start to see the hard edges caused by end-loops and see some loop interactions
    cutoff_distance = 2.5

    #get commandline arguments
    parser = argparse.ArgumentParser(
        prog=path.basename(__file__),
        description=
        "Calculate molecular contacts, and assembles an average set of contacts based on MDS"
    )
    parser.add_argument('inputfile',
                        type=str,
                        nargs=1,
                        help="The inputfile used to run the simulation")
    parser.add_argument('trajectory',
                        type=str,
                        nargs=1,
                        help='the trajectory file you wish to analyze')
    parser.add_argument(
        'meanfile',
        type=str,
        nargs=1,
        help='the name of the .dat file where the mean will be written')
    parser.add_argument(
        'devfile',
        type=str,
        nargs=1,
        help='the name of the .json file where the devs will be written')
    parser.add_argument('-p',
                        metavar='num_cpus',
                        nargs=1,
                        type=int,
                        dest='parallel',
                        help="(optional) How many cores to use")

    #process commandline arguments
    args = parser.parse_args()
    traj_file = args.trajectory[0]
    inputfile = args.inputfile[0]
    meanfile = args.meanfile[0]
    devfile = args.devfile[0]
    parallel = args.parallel
    if parallel:
        n_cpus = args.parallel[0]
    top_file = get_input_parameter(inputfile, "topology")
    if "RNA" in get_input_parameter(inputfile, "interaction_type"):
        environ["OXRNA"] = "1"
    else:
        environ["OXRNA"] = "0"

    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "numpy"])

    #get the number of configurations in the trajectory
    num_confs = cal_confs(traj_file)

    #Get the mean distance to all other particles
    if not parallel:
        print(
            "INFO: Computing interparticle distances of {} configurations using 1 core."
            .format(num_confs),
            file=stderr)
        r = LorenzoReader2(traj_file, top_file)
        cartesian_distances = get_mean(r, inputfile, num_confs)
        mean_distance_map = cartesian_distances * (1 / (num_confs))

    if parallel:
        print(
            "INFO: Computing interparticle distances of {} configurations using {} cores."
            .format(num_confs, n_cpus),
            file=stderr)
        out = parallelize_lorenzo_onefile.fire_multiprocess(
            traj_file, top_file, get_mean, num_confs, n_cpus, inputfile)
        cartesian_distances = np.sum(np.array([i for i in out]), axis=0)

    mean_distance_map = cartesian_distances * (1 / (num_confs))

    #Making a new configuration file from scratch is hard, so we're just going to read in one and then overwrite the positional information
    r = LorenzoReader2(traj_file, top_file)
    output_system = r._get_system()

    #make heatmap of the summed distances
    #make_heatmap(mean_distance_map)

    masked_mean = np.ma.masked_array(mean_distance_map,
                                     ~(mean_distance_map < cutoff_distance))

    #I tried to use DGSOL to analytically solve this, but origamis were too big
    #f = open('test_dist.nmr', 'w+')
    #for i, line in enumerate(masked_mean):
    #    for j, dist in enumerate(line):
    #        if dist != "--" and dist != 0 and i < j:
    #            if j%2 == 0:
    #                f.write("{}\t{}\t1\t1\t{}\t{}\tn\tn\tn\tn\n".format(i+1, j+1, dist, dist))
    #            else:
    #                f.write("{}\t{}\t1\t1\t{}\t{}\tn\tn\tn\tn\n".format(j+1, i+1, dist, dist))

    #super_cutoff_ids = mean_distance_map > cutoff_distance
    #mean_distance_map[super_cutoff_ids] = 0
    #sparse_map = csr_matrix(mean_distance_map)

    print("INFO: fitting local distance data", file=stderr)

    #Many embedding algorithms were tried...

    #from sklearn.manifold import LocallyLinearEmbedding
    #from megaman.geometry import Geometry
    #from scipy.sparse import csr_matrix

    #geom = Geometry()
    #geom = Geometry(adjacency_kwds={'radius':cutoff_distance})#, laplacian_kwds={'scaling_epps':cutoff_distance})
    #geom.set_data_matrix(masked_mean)
    #geom.set_adjacency_matrix(masked_mean)
    #from megaman.embedding import LocallyLinearEmbedding
    #lle = LocallyLinearEmbedding(n_neighbors=5, n_components=3, eigen_solver='arpack', max_iter=3000)
    #lle = LocallyLinearEmbedding(n_components=3, eigen_solver='arpack', geom=geom)
    #out_coords = lle.fit_transform(masked_mean, input_type='adjacency')
    #out_coords = lle.fit_transform(masked_mean)
    #init = np.array([p.cm_pos for p in out_conf._nucleotides])

    #Run multidimensional scaling on the average distances to find average positions
    from sklearn.manifold import MDS
    mds = MDS(n_components=3,
              metric=True,
              max_iter=3000,
              eps=1e-12,
              dissimilarity="precomputed",
              n_jobs=1,
              n_init=1)
    out_coords = mds.fit_transform(
        masked_mean)  #, init=init) #this one worked best

    #Overwrite the system we made earlier with the coordinates calculated via MDS
    for i, n in enumerate(output_system._nucleotides):
        n.cm_pos = out_coords[i]
        n._a1 = np.array([0, 0, 0])
        n._a3 = np.array(
            [0, 0, 0]
        )  #since the orientation vectors are all 0, this cannot be used in a simulation, but the viewer will handle it

    #Write the mean structure out as a new .dat and .top pair
    output_system.print_lorenzo_output("{}.dat".format(meanfile),
                                       "{}.top".format(meanfile))
    print("INFO: wrote output files: {}.dat, {}.top".format(
        meanfile, meanfile),
          file=stderr)

    #Loop through the trajectory again and calculate deviations from the average distances
    print(
        "INFO: Computing distance deviations of {} configurations using 1 core."
        .format(num_confs),
        file=stderr)
    if not parallel:
        r = LorenzoReader2(traj_file, top_file)
        devs = get_devs(r, masked_mean, inputfile, cutoff_distance, num_confs)

    if parallel:
        print(
            "INFO: Computing distance deviations of {} configurations using {} cores."
            .format(num_confs, n_cpus),
            file=stderr)
        out = parallelize_lorenzo_onefile.fire_multiprocess(
            traj_file, top_file, get_devs, num_confs, n_cpus, masked_mean,
            inputfile, cutoff_distance)
        devs = np.sum(np.array([i for i in out]), axis=0)

    #Dump the deviations to an oxView overlay file
    devs = np.ma.masked_array(
        devs,
        ~(devs != 0.0))  #mask all the 0s so they don't contribute to the mean
    devs *= (1 / num_confs)
    devs = np.mean(devs, axis=0)
    devs = np.sqrt(devs)
    with open(devfile + ".json", "w") as file:
        file.write(dumps({"contact deviation": list(devs)}))
    print("INFO: wrote file {}.json".format(devfile), file=stderr)