예제 #1
0
def main():
    import argparse
    parser = argparse.ArgumentParser(prog = path.basename(__file__), description="List all the interactions between nucleotides")
    parser.add_argument('inputfile', type=str, nargs=1, help="The inputfile used to run the simulation")
    parser.add_argument('trajectory', type=str, nargs=1, help='the trajectory file you wish to analyze')
    parser.add_argument('-v', type=str, nargs=1, dest='outfile', help='if you want instead average per-particle energy as a viewer JSON')
    args = parser.parse_args()

    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "numpy"])

    traj_file = args.trajectory[0]
    inputfile = args.inputfile[0]

    try:
        outfile = args.outfile[0]
        visualize = True
    except:
        visualize = False

    if path.dirname(inputfile) != getcwd():
        sim_directory = path.dirname(inputfile)
    else:
        sim_directory = ""

    top_file = sim_directory + get_input_parameter(inputfile, "topology")
    if "RNA" in get_input_parameter(inputfile, "interaction_type"):
        environ["OXRNA"] = "1"
    else:
        environ["OXRNA"] = "0"
    import oxDNA_analysis_tools.UTILS.base #this needs to be imported after the model type is set

    myreader = LorenzoReader2(traj_file,top_file)
    mysystem = myreader._get_system()

    energies = np.zeros(mysystem.N)
    count = 0

    while mysystem != False:
        out = output_bonds(inputfile, mysystem)
        if visualize:
            for line in out.split('\n'):
                if not (line.startswith('#') or line == ''):
                    line = [float(l) for l in line.split(' ')]
                    energies[int(line[0])] += sum(line[2:])
                    energies[int(line[1])] += sum(line[2:])
        else:
            print(out)

        count += 1
        mysystem = myreader._get_system()

    if visualize:
        energies *= (41.42/count)
        with open(outfile, "w+") as file:
            file.write("{\n\"Energy (pN nm)\" : [")
            file.write(str(energies[0]))
            for n in energies[1:]:
                file.write(", {}".format(n))
            file.write("] \n}")
예제 #2
0
def main():
    #get commandline arguments
    parser = argparse.ArgumentParser(
        prog=os.path.basename(__file__),
        description=
        "Converts a mean structure .json from compute_mean.py to an oxDNA-readable .dat"
    )
    parser.add_argument('mean',
                        type=str,
                        nargs=1,
                        help="A mean structure from compute_mean.py")
    parser.add_argument('output',
                        type=str,
                        nargs=1,
                        help="The name of the output file")
    args = parser.parse_args()

    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "numpy"])

    #load the mean file, which is in .json format
    with open(args.mean[0], "r") as file:
        mean_info = loads(file.read())

    #write the file out in oxDNA format
    outfile = args.output[0]
    make_dat(mean_info, outfile)
def fire_multiprocess(traj_file, top_file, function, num_confs, n_cpus, *args):
    """
    Distributes a function over a given number of processes

    Parameters:
        traj_file (str): The name of the trajectory file to analyze.
        top_file (str): The name of the topology file associated with the trajectory.
        function (function): The analysis function to be parallelized.
        num_confs (int): The number of configurations in the trajectory.
        n_cpus (int): The number of processes to launch.
        *args: The arguments for the provided function.

    Returns:
        results (list): The results from each individual processor's run.

    Note: The manner in which to concatenate the results is function-specific so should be handled in the calling module.
    """

    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["pathos"])
    confs_per_processor = int(np.floor(num_confs / n_cpus))

    reader_pool = []
    processor_pool = pp.Pool(n_cpus)

    #split_starts and split_ends are around for backwards compatability with the old parallelize algorithm
    reader_pool, tmpfiles = split_trajectory(traj_file, top_file, num_confs,
                                             n_cpus, confs_per_processor)
    split_starts = [0 for r in reader_pool]
    split_ends = [confs_per_processor for r in reader_pool]
    rem = num_confs % n_cpus
    for i in range(rem):
        split_ends[i] += 1

    #Staple everything together, send it out to the workers, and collect the results as a list
    #Functions passed to this parallelizer must have the argument order defined by the lst variable (reader, <unique args>, number of configurations total, starting conf id, number of confs for this processor)
    #This args unpacking method was added in Python 3.6, so if you have an older version of Python that's why this isn't working
    results = []
    lst = [(r, *args, num_confs, s, e)
           for r, s, e in zip(reader_pool, split_starts, split_ends)]

    #starmap allows you to have arguments that themselves are iterables
    #async because we don't actually care what order stuff finishes in.
    results = processor_pool.starmap_async(function, lst).get()
    processor_pool.close()
    for f in tmpfiles:
        f.close()
        remove(f.name)

    return (results)
예제 #4
0
def main():
    #doesn't actually do anything...
    import argparse
    from UTILS.readers import LorenzoReader2, get_input_parameter
    parser = argparse.ArgumentParser(
        description=
        "A python wrapper for getting all vectors between nucleotides from a simulation"
    )
    parser.add_argument('inputfile',
                        type=str,
                        nargs=1,
                        help="The inputfile used to run the simulation")
    parser.add_argument(
        'trajectory',
        type=str,
        nargs=1,
        help=
        "The file containing the configurations of which the contact map is needed"
    )
    args = parser.parse_args()

    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "numpy"])

    inputfile = args.inputfile[0]
    traj_file = args.trajectory[0]

    top_file = get_input_parameter(inputfile, "topology")
    if "RNA" in get_input_parameter(inputfile, "interaction_type"):
        environ["OXRNA"] = "1"
    else:
        environ["OXRNA"] = "0"

    import UTILS.base  #this needs to be imported after the model type is set

    r = LorenzoReader2(traj_file, top_file)
    system = r._get_system()

    while system:
        m = all_vectors(inputfile, system, True)
        system = r._get_system()

    print("well, it finished...")
예제 #5
0
def main():
    parser = argparse.ArgumentParser(prog = os.path.basename(__file__), description="Create an external forces file enforcing the current base-pairing arrangement")
    parser.add_argument('db_file', type=str, nargs=1, help="A text file containing dot-bracket notation of the base-pairing arrangement")
    parser.add_argument('-o', '--output', type=str, nargs=1, help='Name of the file to write the force list to')
    parser.add_argument('-s', '--strength', type=float, nargs=1, help='Strength of the forces')
    args = parser.parse_args()

    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "numpy"])

    # Get input
    with open(args.db_file[0]) as f:
        db_str = f.read()

    # Check for strength input, otherwise default to 0.09 which won't explode most simulations.
    if args.strength:
        strength = args.strength[0]
        print("INFO: Using strength {}".format(strength), file=stderr)
    else:
        strength = 0.09
        print("INFO: No strength provided, defaulting to {}".format(strength), file=stderr)

    # convert the db string to an index list
    db_idx = parse_dot_bracket(db_str)

    force_list = []

    #p is particle id, q is paired particle id
    for p, q in enumerate(db_idx):
        if q != -1:
            force_list.append(mutual_trap(p, q, strength, 1.2, 1))

    # write the force file
    if args.output:
        outfile = args.output[0]
        print("INFO: Writing forces to {}".format(outfile), file=stderr)
    else:
        outfile = "external_forces.txt"
        print("INFO: No output filename found.  Defaulting to {}".format(outfile), file=stderr)

    write_force_file(force_list, outfile)
예제 #6
0
def main():
    #read data from files
    parser = argparse.ArgumentParser(prog = path.basename(__file__), description="Compare the bonds found at each trajectory with the intended design")
    parser.add_argument('inputfile', type=str, nargs=1, help="The inputfile used to run the simulation")
    parser.add_argument('trajectory', type=str, nargs=1, help="The trajecotry file to compare against the designed pairs")
    parser.add_argument('designed_pairs', type=str, nargs=1, help="The file containing the desired nucleotides pairings in the format \n a b\nc d")
    parser.add_argument('output_file', type=str, nargs=1, help="name of the file to save the output json overlay to")
    parser.add_argument('-p', metavar='num_cpus', nargs=1, type=int, dest='parallel', help="(optional) How many cores to use")
    
    #run system checks
    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "numpy"])

    args = parser.parse_args()
    inputfile = args.inputfile[0]
    traj_file = args.trajectory[0]
    designfile = args.designed_pairs[0]
    outfile = args.output_file[0]
    parallel = args.parallel
    if parallel:
        n_cpus = args.parallel[0]

    top_file = get_input_parameter(inputfile, "topology")
    if "RNA" in get_input_parameter(inputfile, "interaction_type"):
        environ["OXRNA"] = "1"
    else:
        environ["OXRNA"] = "0"
    num_confs = cal_confs(traj_file)

    with open(designfile, 'r') as file:
        pairs = file.readlines()

    if not parallel:
        print("INFO: Computing base pairs in {} configurations using 1 core.".format(num_confs), file=stderr)
        r = LorenzoReader2(traj_file,top_file)
        tot_bonds, tot_missbonds, out_array, confid = bond_analysis(r, pairs, inputfile, num_confs)
        try:
            _ = tot_bonds #this will fail if DNAnalysis failed.
        except:
            print("ERROR: DNAnalysis encountered an error and could not analyze the trajectory")
            exit(1)

    if parallel:
        print("INFO: Computing base pairs in {} configurations using {} cores.".format(num_confs, n_cpus), file=stderr)
        out = parallelize_lorenzo_onefile.fire_multiprocess(traj_file, top_file, bond_analysis, num_confs, n_cpus, pairs, inputfile)

        tot_bonds = 0
        tot_missbonds = 0
        out_array = np.zeros(len(open(top_file, 'r').readlines())-1)
        confid = 0
        for i in out:
            if i[0] is not None:
                tot_bonds += i[0]
                tot_missbonds += i[1]
                #out_array += i[2]
                confid += i[3]
            else:
                print("WARNING: Some configurations were invalid and not included in the analysis.  Please check the logs", file=stderr)

            #tot_bonds = sum((i[0] for i in out if i[0] != None))
            #tot_missbonds = sum((i[1] for i in out if i[1] != None))
        out_array = sum((i[2] for i in out if len(i[2]) > 0))
            #confid = sum((i[3] for i in out if i[3] != None))

    print("\nSummary:\navg bonds: {}\navg_missbonds: {}".format(tot_bonds/(int(confid)),tot_missbonds/int(confid)))

    print("INFO: Writing bond occupancy data to {}".format(outfile))
    with open(outfile, "w+") as file:
        file.write("{\n\"occupancy\" : [")
        file.write(str(out_array[0]/int(confid)))
        for n in out_array[1:]:
            file.write(", {}".format(n/int(confid)))
        file.write("] \n}") 
예제 #7
0
#!/usr/bin/env python3

from os import environ, path
import sys
import subprocess
import tempfile
import numpy as np

from oxDNA_analysis_tools.config import set_analysis_path

PROCESSPROGRAM = set_analysis_path()

from oxDNA_analysis_tools.config import check_dependencies

check_dependencies(["numpy"])


def contact_map(inputfile, mysystem, return_full_matrix):
    """
    Computes the distance between every pair of nucleotides and creates a matrix of these distances.

    Parameters:
        inputfile (string): the input file with which the simulation was run,
        mysystem (base.System): a base.py system object containing the system to analyze.
        return_full_matrix (bool): The matrix is symmetric. Return only the lower half or the whole matrix?
    
    Returns:
        distances (numpy.array): The matrix containing pairwise distances between every pair of nucleotides.
    """
    tempfile_obj = tempfile.NamedTemporaryFile()
예제 #8
0
def main():
    import argparse
    import matplotlib.pyplot as plt
    from oxDNA_analysis_tools.UTILS.readers import LorenzoReader2, get_input_parameter

    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "numpy", "matplotlib"])

    #get commandline arguments
    parser = argparse.ArgumentParser(
        prog=path.basename(__file__),
        description="Calculate and display the contact map for a structure")
    parser.add_argument('inputfile',
                        type=str,
                        nargs=1,
                        help="The inputfile used to run the simulation")
    parser.add_argument(
        'trajectory',
        type=str,
        nargs=1,
        help=
        "The file containing the configurations of which the contact map is needed"
    )
    parser.add_argument(
        '-v',
        dest='visualize',
        action='store_const',
        const=True,
        default=False,
        help=
        "should we display the contact map once its calculated? Only recommend if there are few confs."
    )

    args = parser.parse_args()
    visualize = args.visualize
    inputfile = args.inputfile[0]
    traj_file = args.trajectory[0]

    #process files
    top_file = get_input_parameter(inputfile, "topology")
    if "RNA" in get_input_parameter(inputfile, "interaction_type"):
        environ["OXRNA"] = "1"
    else:
        environ["OXRNA"] = "0"

    #create system object from first configuration in the trajectory
    r = LorenzoReader2(traj_file, top_file)
    system = r._get_system()

    #for every configuration, create a graphical contact map
    while system:
        m = contact_map(inputfile, system, True)
        if visualize:
            fig, ax = plt.subplots()
            a = ax.imshow(m, cmap='viridis', origin='lower')
            ax.set(title="interaction network",
                   ylabel="nucleotide id",
                   xlabel="nucleotide id")
            b = fig.colorbar(a, ax=ax)
            b.set_label("distance", rotation=270)
            plt.show()
        system = r._get_system()
예제 #9
0
def main():
    parser = argparse.ArgumentParser(
        prog=path.basename(__file__),
        description=
        "Calculate differences between structures and automatically apply DBSCAN to retrieve clusters"
    )
    parser.add_argument('inputfile',
                        type=str,
                        nargs=1,
                        help="The inputfile used to run the simulation")
    parser.add_argument('trajectory',
                        type=str,
                        nargs=1,
                        help='the trajectory file you wish to analyze')
    parser.add_argument('-p',
                        metavar='num_cpus',
                        nargs=1,
                        type=int,
                        dest='parallel',
                        help="(optional) How many cores to use")
    args = parser.parse_args()

    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "numpy", "matplotlib"])

    traj_file = args.trajectory[0]
    inputfile = args.inputfile[0]
    parallel = args.parallel
    if parallel:
        n_cpus = args.parallel[0]

    top_file = get_input_parameter(inputfile, "topology")
    if "RNA" in get_input_parameter(inputfile, "interaction_type"):
        environ["OXRNA"] = "1"
    else:
        environ["OXRNA"] = "0"
    num_confs = cal_confs(traj_file)
    import UTILS.base  #this needs to be imported after the model type is set

    r2 = LorenzoReader2(traj_file, top_file)

    #how do you want to get your eRMSDs?  Do you need to do the time-consuming calculation or is it done and you have a pickle?
    if not parallel:
        r1 = LorenzoReader2(traj_file, top_file)

        eRMSDs = get_eRMSDs(r1, r2, inputfile, traj_file, top_file, num_confs)
    if parallel:
        out = parallelize_lorenzo_onefile.fire_multiprocess(traj_file,
                                                            top_file,
                                                            get_eRMSDs,
                                                            num_confs,
                                                            n_cpus,
                                                            r2,
                                                            inputfile,
                                                            traj_file,
                                                            top_file,
                                                            matrix=True)
        eRMSDs = np.sum((i for i in out), axis=0)
    #eRMSDs = pickle.load(open('tmp_eRMSDs', 'rb'))

    #the eRMSD matrix is actually only half a matrix
    for ni, i in enumerate(eRMSDs):
        for nj, j in enumerate(i):
            eRMSDs[nj][ni] = j
            if ni == nj:
                eRMSDs[ni][nj] = 0

    #since calculating the eRMSDs are so time-consuming to calculate we're gonna pickle it to iterate the DBSCAN later.
    with open("tmp_eRMSDs", "wb") as file:
        pickle.dump(eRMSDs, file)

    ###############################################################################################################
    #Next, we're going to perform a DBSCAN on that matrix of eRMSDs to find clusters of similar structures
    perform_DBSCAN(eRMSDs, num_confs, traj_file, inputfile, "precomputed", 12,
                   8)
예제 #10
0
def main():
    parser = argparse.ArgumentParser(
        prog=path.basename(__file__),
        description=
        "Calculates a principal component analysis of nucleotide deviations over a trajectory"
    )
    parser.add_argument('inputfile',
                        type=str,
                        nargs=1,
                        help="The inputfile used to run the simulation")
    parser.add_argument('trajectory',
                        type=str,
                        nargs=1,
                        help='the trajectory file you wish to analyze')
    parser.add_argument(
        'meanfile',
        type=str,
        nargs=1,
        help='The mean structure .json file from compute_mean.py')
    parser.add_argument(
        'outfile',
        type=str,
        nargs=1,
        help='the name of the .json file where the PCA will be written')
    parser.add_argument('-p',
                        metavar='num_cpus',
                        nargs=1,
                        type=int,
                        dest='parallel',
                        help="(optional) How many cores to use")
    parser.add_argument(
        '-c',
        metavar='cluster',
        dest='cluster',
        action='store_const',
        const=True,
        default=False,
        help="Run the clusterer on each configuration's position in PCA space?"
    )
    args = parser.parse_args()

    check_dependencies(["python", "numpy", "Bio"])

    traj_file = args.trajectory[0]
    inputfile = args.inputfile[0]
    mean_file = args.meanfile[0]
    outfile = args.outfile[0]
    parallel = args.parallel
    if parallel:
        n_cpus = args.parallel[0]
    #-c makes it run the clusterer on the output
    cluster = args.cluster

    num_confs = cal_confs(traj_file)

    if mean_file.split(".")[-1] == "json":
        with open(mean_file) as file:
            align_conf = load(file)['g_mean']

    elif mean_file.split(".")[-1] == "dat" or mean_file.split(
            ".")[-1] == "conf" or mean_file.split(".")[-1] == "oxdna":
        with ErikReader(mean_file) as reader:
            align_conf = reader.read().positions
    else:
        print(
            "ERROR: {} is an unrecognized file type. \nThe mean structure must either be provided as an oxDNA configuration file with the extension .dat, .conf or .oxdna or as the .json file produced by compute_mean.py.",
            file=stderr)
        exit(1)

    cms = np.mean(align_conf,
                  axis=0)  #all structures must have the same center of mass
    align_conf -= cms

    #Compute the deviations
    if not parallel:
        r = ErikReader(traj_file)
        covariation_matrix = get_cov(r, align_conf, num_confs)

    if parallel:
        out = parallelize_erik_onefile.fire_multiprocess(
            traj_file, get_cov, num_confs, n_cpus, align_conf)
        covariation_matrix = np.sum([i for i in out], axis=0)

    covariation_matrix /= (num_confs - 1)

    #now that we have the covatiation matrix we're going to use eigendecomposition to get the principal components.
    #make_heatmap(covariance)
    print("INFO: calculating eigenvectors", file=stderr)
    evalues, evectors = np.linalg.eig(
        covariation_matrix)  #these eigenvalues are already sorted
    evectors = evectors.T  #vectors come out as the columns of the array
    print("INFO: eigenvectors calculated", file=stderr)

    import matplotlib.pyplot as plt
    print("INFO: Saving scree plot to scree.png", file=stderr)
    plt.scatter(range(0, len(evalues)), evalues, s=25)
    plt.xlabel("component")
    plt.ylabel("eigenvalue")
    plt.savefig("scree.png")

    total = sum(evalues)
    running = 0
    i = 0
    while running < 0.9:
        running += (evalues[i] / total)
        i += 1

    print("90% of the variance is found in the first {} components".format(i))

    #if you want to weight the components by their eigenvectors
    #mul = np.einsum('ij,i->ij',evectors, evalues)
    mul = evectors

    #reconstruct configurations in component space
    #because we donlist't save the difference matrix, this involves running through the whole trajectory again
    if not parallel:
        r = ErikReader(traj_file)
        coordinates = change_basis(r, align_conf, mul, num_confs)
    if parallel:
        out = parallelize_erik_onefile.fire_multiprocess(
            traj_file, change_basis, num_confs, n_cpus, align_conf, mul)
        coordinates = np.concatenate([i for i in out])

    #make a quick plot from the first three components
    print(
        "INFO: Creating coordinate plot from first three eigenvectors.  Saving to coordinates.png",
        file=stderr)
    from mpl_toolkits.mplot3d import Axes3D
    fig = plt.figure()
    ax = fig.gca(projection='3d')
    ax.scatter(coordinates[:, 0],
               coordinates[:, 1],
               coordinates[:, 2],
               c='g',
               s=25)
    plt.savefig("coordinates.png")

    #Create an oxView overlays for the first N components
    N = 3
    prep_pos_for_json = lambda conf: list(list(p) for p in conf)
    print(
        "INFO: Change the number of eigenvalues to sum and display by modifying the N variable in the script.  Current value: {}"
        .format(N),
        file=stderr)
    for i in range(0, N):  #how many eigenvalues do you want?
        try:
            if outfile.split(".")[1] != "json":
                raise Exception
            f = outfile.split(".")[0] + str(i) + "." + outfile.split(".")[1]
        except:
            print(
                "ERROR: oxView overlays must have a '.json' extension.  No overlays will be produced",
                file=stderr)
            break
        out = np.sqrt(evalues[i]) * evectors[i]

        with catch_warnings(
        ):  #this produces an annoying warning about casting complex values to real values that is not relevant
            simplefilter("ignore")
            output_vectors = out.reshape(int(out.shape[0] / 3),
                                         3).astype(float)

        with open(f, "w+") as file:
            file.write(dumps({"pca": prep_pos_for_json(output_vectors)}))

    #If we're running clustering, feed the linear terms into the clusterer
    if cluster:
        print("INFO: Mapping configurations to component space...",
              file=stderr)

        #If you want to cluster on only some of the components, uncomment this
        #out = out[:,0:3]

        from oxDNA_analysis_tools.clustering import perform_DBSCAN
        labs = perform_DBSCAN(coordinates, num_confs, traj_file, inputfile,
                              "euclidean", 12, 8)
예제 #11
0
def main():
    parser = argparse.ArgumentParser(
        prog=os.path.basename(__file__),
        description="Computes the deviations in the backbone torsion angles")
    parser.add_argument('trajectory',
                        type=str,
                        nargs=1,
                        help='the trajectory file you wish to analyze')
    parser.add_argument(
        'topology',
        type=str,
        nargs=1,
        help="The topology file associated with the trajectory file")
    parser.add_argument('outfile',
                        type=str,
                        nargs=1,
                        help='The file name for the output .json file.')
    parser.add_argument('-p',
                        metavar='num_cpus',
                        nargs=1,
                        type=int,
                        dest='parallel',
                        help="(optional) How many cores to use")
    args = parser.parse_args()

    #run system checks
    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "numpy"])

    top_file = args.topology[0]
    traj_file = args.trajectory[0]
    parallel = args.parallel
    if parallel:
        n_cpus = args.parallel[0]

    num_confs = cal_confs(traj_file)

    r = LorenzoReader2(traj_file, top_file)

    if not parallel:
        torsions, dihedrals = get_internal_coords(r, num_confs)

    if parallel:
        out = parallelize_lorenzo_onefile.fire_multiprocess(
            traj_file, top_file, get_internal_coords, num_confs, n_cpus)
        # Out Dims: 1 Processor, 2 Torsion or Dihedrals, 3 Specific list of torsions listed by conf
        torsions = np.concatenate([out[i][0] for i in range(n_cpus)], axis=1)
        dihedrals = np.concatenate([out[i][1] for i in range(n_cpus)], axis=1)

    torsion_mean = np.mean(torsions, axis=1).tolist()
    dihedral_mean = np.mean(dihedrals, axis=1).tolist()
    #make something akin to a ramachandran plot for DNA origami??
    import matplotlib.pyplot as plt
    plt.scatter(torsion_mean[1:], dihedral_mean)
    plt.xlabel("torsion_angle")
    plt.ylabel("dihedral_angle")
    plt.show()

    torsion_mean.insert(0, torsion_mean[0])
    torsion_mean.insert(0, torsion_mean[0])
    with open(args.outfile[0], "w") as file:
        file.write(dumps({"torsion": torsion_mean}))
예제 #12
0
def main():
    #Get command line arguments.
    parser = argparse.ArgumentParser(
        prog=path.basename(__file__),
        description=
        "Finds the ensemble of angles between any two duplexes defined by a starting or ending nucleotide in the system"
    )
    parser.add_argument(
        '-i',
        '--input',
        metavar='angle_file',
        dest='input',
        nargs='+',
        action='append',
        help=
        'An angle file from duplex_angle_finder.py and a list of duplex-end particle pairs to compare.  Can call -i multiple times to plot multiple datasets.'
    )
    parser.add_argument('-o',
                        '--output',
                        metavar='output_file',
                        nargs=1,
                        help='The name to save the graph file to')
    parser.add_argument(
        '-f',
        '--format',
        metavar='<histogram/trajectory/both>',
        nargs=1,
        help=
        'Output format for the graphs.  Defaults to histogram.  Options are \"histogram\", \"trajectory\", and \"both\"'
    )
    parser.add_argument(
        '-d',
        '--data',
        metavar='data_file',
        nargs=1,
        help=
        'If set, the output for the graphs will be dropped as a json to this filename for loading in oxView or your own scripts'
    )
    parser.add_argument(
        '-n',
        '--names',
        metavar='names',
        nargs='+',
        action='append',
        help=
        'Names of the data series.  Will default to particle ids if not provided'
    )

    args = parser.parse_args()

    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "numpy", "matplotlib"])

    try:
        files = [i[0] for i in args.input]
        p1s = [i[1::2] for i in args.input]
        p2s = [i[2::2] for i in args.input]
    except Exception as e:
        print("ERROR: Failed to read files")
        print(e)
        parser.print_help()
        exit(1)

    n_angles = sum(len(p) for p in p1s)

    #Make sure that the input is correctly formatted
    if (len(files) != len(p1s) != len(p2s)):
        print(
            "ERROR: bad input arguments\nPlease supply an equal number of trajectory and particle pairs",
            file=stderr)
        exit(1)

    #-o names the output file
    if args.output:
        outfile = args.output[0]
    else:
        if environ.get('DISPLAY', None) != "":
            print("INFO: No display detected, outputting to \"angle.png\"",
                  file=stderr)
            outfile = False
        else:
            print(
                "INFO: No outfile name provided, defaulting to \"angle.png\"",
                file=stderr)
            outfile = "angle.png"

    #-f defines which type of graph to produce
    hist = False
    line = False
    if args.format:
        if "histogram" in args.format:
            hist = True
        if "trajectory" in args.format:
            line = True
        if "both" in args.format:
            hist = line = True
        if hist == line == False:
            print(
                "ERROR: unrecognized graph format\nAccepted formats are \"histogram\", \"trajectory\", and \"both\"",
                file=stderr)
            exit(1)
    else:
        print("INFO: No graph format specified, defaulting to histogram",
              file=stderr)
        hist = True

    all_angles = [[] for _ in files]
    means = [[] for _ in files]
    medians = [[] for _ in files]
    stdevs = [[] for _ in files]
    representations = [[] for _ in files]

    #For each input triplet
    for i, (anglefile, search1, search2) in enumerate(zip(files, p1s, p2s)):

        steps = 0  #counts the number of configurations in the file
        last_step = 0
        all_angles[i] = [[] for _ in p1s[i]]
        found = False

        #the format of the angle file is as follows: (tbh this should be a JSON)
        # 0: time
        # 1: duplex id
        # 2: strand 1 start nucleotide id
        # 3: strand 1 end nucleotide id
        # 4: strand 2 start nucleotide id
        # 5: strand 2 end nucleotide id
        # 6: X-component of the axis vector
        # 7: Y-component of the axis vector
        # 8: Z-component of the axis vector
        # 9: Helix position

        with open(anglefile) as file:
            all_search = search1.copy()
            all_search.extend(search2)
            d = {i: np.array([0, 0, 0]) for i in all_search}
            for l in file.readlines(
            )[1:]:  #the first line is a header, so it can be dropped
                try:
                    l = l.split("\t")
                    t = float(l[0])
                except Exception as e:
                    print(
                        "ERROR: The following line is incorrectly formatted:")
                    print(l)
                    print("The error was:\n", e)
                    print("skiping the line")
                    continue

                #dump values and reset if we're in a new time (but also skip the first pass)
                if (t != last_step):
                    if (steps != 0):
                        for j, (p1, p2) in enumerate(zip(search1, search2)):
                            if np.linalg.norm(d[p1]) != 0 and np.linalg.norm(
                                    d[p2]) != 0:
                                angle = rad2degree(
                                    angle_between(d[p1], -1 * d[p2])
                                )  #add a -90 here if your duplexes in question are antiparallel
                                all_angles[i][j].append(angle)
                            else:
                                all_angles[i][j].append(np.nan)

                    found = False
                    steps += 1
                    d = dict.fromkeys(d, np.array([0, 0, 0]))
                    count = 0  #counts the number of search targets found

                #don't need to do anything if both angles were already found for this timestep
                if found:
                    continue

                #look for the nucleotide IDs.  The -1 on axis 2 assumes you're looking at contiguous duplexes
                for s in all_search:
                    idx = l.index(s, 2, 6) if s in l[2:6] else None
                    if idx:
                        d[s] = np.array(
                            [float(l[6]),
                             float(l[7]),
                             float(l[8])])
                        count += 1

                #once all are found, add them to angle list
                if count == len(d):
                    found = True

                last_step = t

        #catch last configuration
        for j, (p1, p2) in enumerate(zip(search1, search2)):
            if np.linalg.norm(d[p1]) != 0 and np.linalg.norm(d[p2]) != 0:
                angle = rad2degree(
                    angle_between(d[p1], -1 * d[p2])
                )  #add a -90 here if your duplexes in question are antiparallel
                all_angles[i][j].append(angle)
            else:
                all_angles[i][j].append(np.nan)

        #compute some statistics
        all_angles[i] = [np.array(a) for a in all_angles[i]]
        mean = [np.nanmean(a) for a in all_angles[i]]
        median = [np.nanmedian(a) for a in all_angles[i]]
        stdev = [np.nanstd(a) for a in all_angles[i]]
        representation = [
            np.count_nonzero(~np.isnan(a)) / steps for a in all_angles[i]
        ]

        #add to the output data
        means[i] = mean
        medians[i] = median
        stdevs[i] = stdev
        representations[i] = representation

    #for i, m in enumerate(means):
    #    if m > 90:
    #        all_angles[i] = [180 - a for a in all_angles[i]]
    #        means[i] = 180 - m
    #        medians[i] = 180 - medians[i]

    # -n sets the names of the data series
    if args.names:
        names = args.names[0]
        if len(names) < n_angles:
            print(
                "WARNING: Names list too short.  There are {} items in names and {} angles were calculated.  Will pad with particle IDs"
                .format(len(names), n_angles),
                file=stderr)
            for i in range(len(names), n_angles):
                names.append("{}-{}".format([j for sl in p1s for j in sl][i],
                                            [j for sl in p2s for j in sl][i]))
        if len(names) > n_angles:
            print(
                "WARNING: Names list too long. There are {} items in names and {} angles were calculated.  Truncating to be the same as distances"
                .format(len(names), n_angles),
                file=stderr)
            names = names[:n_angles]

    else:
        print("INFO: Defaulting to particle IDs as data series names")
        names = [
            "{}-{}".format(p1, p2)
            for p1, p2 in zip([i for sl in p1s
                               for i in sl], [i for sl in p2s for i in sl])
        ]

    # -d will dump the distances as json files for loading with the trajectories in oxView
    if args.data:
        from json import dump
        if len(files) > 1:
            f_names = [path.basename(f) for f in files]
            print(
                "INFO: angle lists from separate trajectories are printed to separate files for oxView compatibility.  Trajectory names will be appended to your provided data file name.",
                file=stderr)
            file_names = [
                "{}_{}.json".format(args.data[0].strip('.json'), i)
                for i, _ in enumerate(f_names)
            ]
        else:
            file_names = [args.data[0].strip('.json') + '.json']
        names_by_traj = [['{}-{}'.format(p1, p2) for p1, p2 in zip(p1l, p2l)]
                         for p1l, p2l in zip(p1s, p2s)]

        for file_name, ns, ang_list in zip(file_names, names_by_traj,
                                           all_angles):
            obj = {}
            for n, a in zip(ns, ang_list):
                obj[n] = list(a)
            with open(file_name, 'w+') as f:
                print(
                    "INFO: writing data to {}.  This can be opened in oxView using the Order parameter selector"
                    .format(file_name))
                dump(obj, f)

    #print statistical information
    print("name:\t", end='')
    [print("{}\t".format(t), end='') for t in names[:n_angles]]
    print("")

    print("mean:\t", end='')
    [
        print("{:.2f}\t".format(m), end='')
        for m in [i for sl in means for i in sl]
    ]
    print("")

    print("stdevs:\t", end='')
    [
        print("{:.2f}\t".format(s), end='')
        for s in [i for sl in stdevs for i in sl]
    ]
    print("")

    print("median:\t", end='')
    [
        print("{:.2f}\t".format(m), end='')
        for m in [i for sl in medians for i in sl]
    ]
    print("")

    print("freqs:\t", end='')
    [
        print("{:.2f}\t".format(r), end='')
        for r in [i for sl in representations for i in sl]
    ]
    print("")

    #make a histogram
    import matplotlib.pyplot as plt
    if outfile and hist == True:
        if line == True:
            out = outfile[:outfile.find(".")] + "_hist" + outfile[outfile.
                                                                  find("."):]
        else:
            out = outfile

        bins = np.linspace(0, 180, 60)

        artists = []
        for i, traj_set in enumerate(all_angles):
            for alist in traj_set:
                a = plt.hist(alist,
                             bins,
                             weights=np.ones(len(alist)) / len(alist),
                             alpha=0.3,
                             label=names[i],
                             histtype=u'stepfilled',
                             edgecolor='k')
                artists.append(a)
        plt.legend(labels=names)
        plt.xlim((0, 180))
        plt.xlabel("Angle (degrees)")
        plt.ylabel("Normalized frequency")
        if outfile:
            print("INFO: Saving histogram to {}".format(out), file=stderr)
            plt.savefig(out)
        else:
            plt.show()

    #make a trajectory plot
    if outfile and line == True:
        if hist == True:
            plt.clf()
            out = outfile[:outfile.find(".")] + "_traj" + outfile[outfile.
                                                                  find("."):]
        else:
            out = outfile

        artists = []
        for i, traj_set in enumerate(all_angles):
            for alist in traj_set:
                a = plt.plot(alist)
                artists.append(a)
        plt.legend(labels=names)
        plt.xlabel("Configuration Number")
        plt.ylabel("Angle (degrees)")
        if outfile:
            print("INFO: Saving line plot to {}".format(out), file=stderr)
            plt.savefig(out)
        else:
            plt.show()
예제 #13
0
def main():
    parser = argparse.ArgumentParser(
        prog=os.path.basename(__file__),
        description="Computes the mean structure of a trajectory file")
    parser.add_argument('trajectory',
                        type=str,
                        nargs=1,
                        help='the trajectory file you wish to analyze')
    parser.add_argument('-p',
                        metavar='num_cpus',
                        nargs=1,
                        type=int,
                        dest='parallel',
                        help="(optional) How many cores to use")
    parser.add_argument('-o',
                        '--output',
                        metavar='output_file',
                        nargs=1,
                        help='The filename to save the mean structure to')
    parser.add_argument(
        '-f',
        '--format',
        metavar='<json/oxDNA/both>',
        nargs=1,
        help=
        'Output format for the mean file.  Defaults to json.  Options are \"json\", \"oxdna/oxDNA\", and \"both\"'
    )
    parser.add_argument(
        '-d',
        '--deviations',
        metavar='deviation_file',
        nargs=1,
        help='Immediatley run compute_deviations.py from the output')
    parser.add_argument(
        '-i',
        metavar='index_file',
        dest='index_file',
        nargs=1,
        help=
        'Compute mean structure of a subset of particles from a space-separated list in the provided file'
    )
    parser.add_argument(
        '-a',
        '--align',
        metavar='alignment_configuration',
        nargs=1,
        help='The id of the configuration to align to, otherwise random')
    args = parser.parse_args()

    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "Bio", "numpy"])

    #get file names
    traj_file = args.trajectory[0]
    parallel = args.parallel
    if parallel:
        from oxDNA_analysis_tools.UTILS import parallelize_erik_onefile
        n_cpus = args.parallel[0]

    #-f defines the format of the output file
    outjson = False
    outoxdna = False
    if args.format:
        if "json" in args.format:
            outjson = True
        if "oxDNA" in args.format or "oxdna" in args.format:
            outoxdna = True
        if "both" in args.format:
            outjson = True
            outoxdna = True
        if outjson == outoxdna == False:
            print(
                "ERROR: unrecognized output format\nAccepted formats are \"json\", \"oxDNA/oxdna\", and \"both\"",
                file=stderr)
            exit(1)
    else:
        print("INFO: No output format specified, defaulting to oxDNA",
              file=stderr)
        outoxdna = True

    #-o names the output file
    if args.output:
        outfile = args.output[0]
    else:
        if outjson and not outoxdna:
            ext = ".json"
        elif outjson and outoxdna:
            ext = ".json/.dat"
        elif outoxdna and not outjson:
            ext = ".dat"
        outfile = "mean{}".format(ext)
        print("INFO: No outfile name provided, defaulting to \"{}\"".format(
            outfile),
              file=stderr)

    #-d will run compute_deviations.py when this script is completed.
    dev_file = None
    if args.deviations:
        dev_file = args.deviations[0]

    #-i will make it only run on a subset of nucleotides.
    #The index file is a space-separated list of particle IDs
    if args.index_file:
        index_file = args.index_file[0]
        with open(index_file, 'r') as f:
            indexes = f.readline().split()
            try:
                indexes = [int(i) for i in indexes]
            except:
                print(
                    "ERROR: The index file must be a space-seperated list of particles.  These can be generated using oxView by clicking the \"Download Selected Base List\" button"
                )
    else:
        with ErikReader(traj_file) as r:
            indexes = list(range(len(r.read().positions)))

    # The reference configuration which is used to define alignment
    align_conf = []

    #calculate the number of configurations in the trajectory
    num_confs = cal_confs(traj_file)

    # if we have no align_conf we need to chose one
    # and realign its cms to be @ 0,0,0
    if align_conf == []:
        align = None
        if args.align:
            align = args.align[0]
        align_conf_id, align_poses = pick_starting_configuration(
            traj_file, num_confs, align)
        # we are just interested in the nucleotide positions
        align_conf = align_poses.positions[indexes]

    #Actually compute mean structure
    if not parallel:
        print(
            "INFO: Computing mean of {} configurations with an alignment of {} particles using 1 core."
            .format(num_confs, len(align_conf)),
            file=stderr)
        r = ErikReader(traj_file)
        mean_pos_storage, mean_a1_storage, mean_a3_storage, intermediate_mean_structures, processed_frames = compute_mean(
            r, align_conf, indexes, num_confs)

    #If parallel, the trajectory is split into a number of chunks equal to the number of CPUs available.
    #Each of those chunks is then calculated seperatley and the result is summed.
    if parallel:
        print(
            "INFO: Computing mean of {} configurations with an alignment of {} particles using {} cores."
            .format(num_confs, len(align_conf), n_cpus),
            file=stderr)
        out = parallelize_erik_onefile.fire_multiprocess(
            traj_file, compute_mean, num_confs, n_cpus, align_conf, indexes)
        mean_pos_storage = np.sum(np.array([i[0] for i in out]), axis=0)
        mean_a1_storage = np.sum(np.array([i[1] for i in out]), axis=0)
        mean_a3_storage = np.sum(np.array([i[2] for i in out]), axis=0)
        intermediate_mean_structures = []
        [intermediate_mean_structures.extend(i[3]) for i in out]
        processed_frames = sum((i[4] for i in out))
    # finished task entry
    print("INFO: processed frames total: {}".format(processed_frames),
          file=stderr)

    #Convert mean structure to a json file
    mean_file = dumps({
        "i_means":
        intermediate_mean_structures,
        "g_mean":
        prep_pos_for_json(mean_pos_storage / processed_frames),
        "a1_mean":
        prep_pos_for_json(
            [normalize(v) for v in (mean_a1_storage / processed_frames)]),
        "a3_mean":
        prep_pos_for_json(
            [normalize(v) for v in (mean_a3_storage / processed_frames)]),
        "p_frames":
        processed_frames,
        "ini_conf": {
            "conf": prep_pos_for_json(align_conf),
            "id": align_conf_id
        }
    })

    #Save the mean structure to the specified output file.
    if outjson or dev_file:
        #save output as json format
        if outoxdna == True:
            #if making both outputs, automatically set file extensions.
            jsonfile = outfile.split(".")[0] + ".json"
        else:
            jsonfile = outfile
        print("INFO: Writing mean configuration to", jsonfile, file=stderr)
        with open(jsonfile, "w") as file:
            file.write(mean_file)

    if outoxdna:
        #save output as oxDNA .dat format
        if outjson == True:
            #if making both outputs, automatically set file extensions.
            outname = outfile.split(".")[0] + ".dat"
        else:
            outname = outfile
        from oxDNA_analysis_tools.mean2dat import make_dat
        make_dat(loads(mean_file), outname)

    #If requested, run compute_deviations.py using the output from this script.
    if dev_file:
        print("INFO: launching compute_deviations.py", file=stderr)

        #this is probably horrible practice, but to maintain the ability to call things from the command line, I cannot pass arguments between main() calls.
        #so instead we're gonna spoof a global variable to make it look like compute_deviations was called explicitally
        argv.clear()
        argv.extend([
            'compute_deviations.py', '-o', dev_file, "-r",
            dev_file.split('.')[0] + "_rmsd.png", "-d",
            dev_file.split('.')[0] + "_rmsd_data.json"
        ])
        if args.index_file:
            argv.append("-i")
            argv.append(index_file)
        if parallel:
            argv.append("-p")
            argv.append(str(n_cpus))
        argv.append(jsonfile)
        argv.append(traj_file)

        from oxDNA_analysis_tools import compute_deviations
        from sys import executable
        print(executable)
        print(argv)

        compute_deviations.main()

        #compute_deviations needs the json meanfile, but its not useful for visualization
        #so we dump it
        if not outjson:
            print("INFO: deleting {}".format(jsonfile), file=stderr)
            from os import remove
            remove(jsonfile)

    print(time.time() - start_t)
def main():
    #handle commandline arguments
    parser = argparse.ArgumentParser(
        prog=os.path.basename(__file__),
        description="Aligns each frame in a trajectory to the first frame")
    parser.add_argument('traj',
                        type=str,
                        nargs=1,
                        help="The trajectory file to align")
    parser.add_argument(
        'outfile',
        type=str,
        nargs=1,
        help='The name of the new trajectory file to write out')
    parser.add_argument(
        '-i',
        metavar='index_file',
        dest='index_file',
        nargs=1,
        help=
        'Align to only a subset of particles from a space-separated list in the provided file'
    )
    parser.add_argument(
        '-r',
        metavar='reference_structure',
        dest='reference_structure',
        nargs=1,
        help="Align to a provided configuration instead of the first frame.")
    args = parser.parse_args()

    #run system checks
    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "numpy", "Bio"])

    #Parse command line arguments
    traj_file = args.traj[0]
    outfile = args.outfile[0]
    sup = SVDSuperimposer()

    #-i will make it only run on a subset of nucleotides.
    #The index file is a space-separated list of particle IDs
    if args.index_file:
        index_file = args.index_file[0]
        with open(index_file, 'r') as f:
            indexes = f.readline().split()
            try:
                indexes = [int(i) for i in indexes]
            except:
                print(
                    "ERROR: The index file must be a space-seperated list of particles.  These can be generated using oxView by clicking the \"Download Selected Base List\" button"
                )
    else:
        with ErikReader(traj_file) as r:
            indexes = list(range(len(r.read().positions)))

    #-r will make it align to a provided .dat file instead of the first configuration
    if args.reference_structure:
        #read reference configuration
        r = ErikReader(args.reference_structure[0])
        ref = r.read()
        ref.inbox()
        r = ErikReader(traj_file)
        ref_conf = ref.positions[indexes]

        mysystem = align_frame(ref_conf, sup, r.read())

    else:
        #read the first configuration and use it as the reference configuration for the rest
        r = ErikReader(traj_file)
        mysystem = r.read()
        mysystem.inbox()
        ref_conf = mysystem.positions[indexes]

    #write first configuration to output file
    mysystem.write_new(outfile)
    mysystem = r.read()

    #Read the trajectory one configuration at a time and perform the alignment
    while mysystem != False:
        print("working on t = ", mysystem.time)

        mysystem = align_frame(ref_conf, sup, mysystem, indexes)

        mysystem.write_append(outfile)

        mysystem = r.read()
예제 #15
0
def perform_DBSCAN(points, num_confs, traj_file, inputfile, metric_name, eps, min_samples):
    """
    Runs the DBSCAN algorithm using the provided analysis as positions and splits the trajectory into clusters.

    Parameters:
        points (numpy.array): The points fed to the clstering algorithm.
        num_confs (int): The number of configurations in the trajectory.
        traj_file (str): The analyzed trajectory file.
        inputfile (str): The input file used to run the analyzed simulation.
        metric_name (str): The type of data the points represent (usually either "euclidean" or "precomputed").
    
    Returns:
        labels (numpy.array): The clusterID of each configuration in the trajectory.
    """

    #run system checks
    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "sklearn", "matplotlib"])
    
    print("INFO: Running DBSCAN...", file=stderr)

    #dump the input as a json file so you can iterate on eps and min_samples
    dump_file = "cluster_data.json"
    print("INFO: Serializing input data to {}".format(dump_file), file=stderr)
    print("INFO: Run just clustering.py with the serialized data to adjust clustering parameters", file=stderr)
    out = [points.tolist(), num_confs, traj_file, inputfile, metric_name]
    dump(out, codecs.open(dump_file, 'w', encoding='utf-8'), separators=(',', ':'), sort_keys=True, indent=4)

    #prepping to show the plot later
    #this only shows the first three dimensions because we assume that this is either PCA data or only a few dimensions anyway

    #components = perform_pca(points, 3)
    dimensions = []
    x = []
    dimensions.append(x)

    if points.shape[1] > 1:
        y = []
        dimensions.append(y)

    if points.shape[1] > 2:
        z = []
        dimensions.append(z)
    
    for i in points:
        for j, dim in enumerate(dimensions):
            dim.append(i[j])

    #DBSCAN parameters:
    #eps: the pairwise distance that configurations below are considered neighbors
    #min_samples: The smallest number of neighboring configurations required to start a cluster
    #metric: If the matrix fed in are points in n-dimensional space, then the metric needs to be "euclidean".
    #        If the matrix is already a square distance matrix, the metrix needs to be "precomputed".
    #the eps and min_samples need to be determined for each input based on the values of the input data
    #If you're making your own multidimensional data, you probably want to normalize your data first.
    print("INFO: Adjust clustering parameters by adding the -e and -m flags to the invocation of this script.", file=stderr)
    print("INFO: Current values: eps={}, min_samples={}".format(eps, min_samples))
    db = DBSCAN(eps=eps, min_samples=min_samples, metric=metric_name).fit(points) 
    labels = db.labels_
    
    n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
    print ("Number of clusters:", n_clusters_)

    
    print("INFO: Making cluster plot...")
    if len(dimensions) == 3:
        fig = plt.figure()
        ax = fig.add_subplot(111, projection='3d')
    else:
        fig = plt.figure()
        ax = fig.add_subplot(1, 1, 1)

    plt.xlabel("OP0")
    plt.ylabel("OP1")

    if len(dimensions) == 3:
        ax.set_zlabel("OP2")
        #to show the plot immediatley and interactivley
        '''a = ax.scatter(x, y, z, s=2, alpha=0.4, c=labels, cmap=plt.get_cmap('tab10', 7))
        b = fig.colorbar(a, ax=ax)
        plt.show()'''
        
        #to make a video showing a rotating plot
        plot_file = "animated.mp4"
        def init():
            a = ax.scatter(x, y, z, s=2, alpha=0.4, c=labels, cmap=plt.get_cmap('tab10', n_clusters_+1))
            fig.colorbar(a, ax=ax)
            return [fig]

        def animate(i):
            ax.view_init(elev=10., azim=i)
            return [fig]

        anim = animation.FuncAnimation(fig, animate, init_func=init, frames=range(360), interval=20, blit=True)
        
        anim.save(plot_file, fps=30, extra_args=['-vcodec', 'libx264'])

    else:
        plot_file = "plot.png"
        if len(dimensions) == 1:
            dimensions.append(np.arange(len(dimensions[0])))
            a = ax.scatter(dimensions[1], dimensions[0], s=2, alpha=0.4, c=labels, cmap=plt.get_cmap('tab10', n_clusters_+1))
        else:
            a = ax.scatter(dimensions[0], dimensions[1], s=2, alpha=0.4, c=labels, cmap=plt.get_cmap('tab10', n_clusters_+1))
        b = fig.colorbar(a, ax=ax)
        plt.savefig(plot_file)
    print("INFO: Saved cluster plot to {}".format(plot_file), file=stderr)

    if metric_name == "precomputed":
        get_centroid(points, metric_name, num_confs, labels, traj_file, inputfile)

    split_trajectory(traj_file, inputfile, labels, n_clusters_)

    return labels
def main():
    #at 2.5 you start to see the hard edges caused by end-loops and see some loop interactions
    cutoff_distance = 2.5

    #get commandline arguments
    parser = argparse.ArgumentParser(
        prog=path.basename(__file__),
        description=
        "Calculate molecular contacts, and assembles an average set of contacts based on MDS"
    )
    parser.add_argument('inputfile',
                        type=str,
                        nargs=1,
                        help="The inputfile used to run the simulation")
    parser.add_argument('trajectory',
                        type=str,
                        nargs=1,
                        help='the trajectory file you wish to analyze')
    parser.add_argument(
        'meanfile',
        type=str,
        nargs=1,
        help='the name of the .dat file where the mean will be written')
    parser.add_argument(
        'devfile',
        type=str,
        nargs=1,
        help='the name of the .json file where the devs will be written')
    parser.add_argument('-p',
                        metavar='num_cpus',
                        nargs=1,
                        type=int,
                        dest='parallel',
                        help="(optional) How many cores to use")

    #process commandline arguments
    args = parser.parse_args()
    traj_file = args.trajectory[0]
    inputfile = args.inputfile[0]
    meanfile = args.meanfile[0]
    devfile = args.devfile[0]
    parallel = args.parallel
    if parallel:
        n_cpus = args.parallel[0]
    top_file = get_input_parameter(inputfile, "topology")
    if "RNA" in get_input_parameter(inputfile, "interaction_type"):
        environ["OXRNA"] = "1"
    else:
        environ["OXRNA"] = "0"

    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "numpy"])

    #get the number of configurations in the trajectory
    num_confs = cal_confs(traj_file)

    #Get the mean distance to all other particles
    if not parallel:
        print(
            "INFO: Computing interparticle distances of {} configurations using 1 core."
            .format(num_confs),
            file=stderr)
        r = LorenzoReader2(traj_file, top_file)
        cartesian_distances = get_mean(r, inputfile, num_confs)
        mean_distance_map = cartesian_distances * (1 / (num_confs))

    if parallel:
        print(
            "INFO: Computing interparticle distances of {} configurations using {} cores."
            .format(num_confs, n_cpus),
            file=stderr)
        out = parallelize_lorenzo_onefile.fire_multiprocess(
            traj_file, top_file, get_mean, num_confs, n_cpus, inputfile)
        cartesian_distances = np.sum(np.array([i for i in out]), axis=0)

    mean_distance_map = cartesian_distances * (1 / (num_confs))

    #Making a new configuration file from scratch is hard, so we're just going to read in one and then overwrite the positional information
    r = LorenzoReader2(traj_file, top_file)
    output_system = r._get_system()

    #make heatmap of the summed distances
    #make_heatmap(mean_distance_map)

    masked_mean = np.ma.masked_array(mean_distance_map,
                                     ~(mean_distance_map < cutoff_distance))

    #I tried to use DGSOL to analytically solve this, but origamis were too big
    #f = open('test_dist.nmr', 'w+')
    #for i, line in enumerate(masked_mean):
    #    for j, dist in enumerate(line):
    #        if dist != "--" and dist != 0 and i < j:
    #            if j%2 == 0:
    #                f.write("{}\t{}\t1\t1\t{}\t{}\tn\tn\tn\tn\n".format(i+1, j+1, dist, dist))
    #            else:
    #                f.write("{}\t{}\t1\t1\t{}\t{}\tn\tn\tn\tn\n".format(j+1, i+1, dist, dist))

    #super_cutoff_ids = mean_distance_map > cutoff_distance
    #mean_distance_map[super_cutoff_ids] = 0
    #sparse_map = csr_matrix(mean_distance_map)

    print("INFO: fitting local distance data", file=stderr)

    #Many embedding algorithms were tried...

    #from sklearn.manifold import LocallyLinearEmbedding
    #from megaman.geometry import Geometry
    #from scipy.sparse import csr_matrix

    #geom = Geometry()
    #geom = Geometry(adjacency_kwds={'radius':cutoff_distance})#, laplacian_kwds={'scaling_epps':cutoff_distance})
    #geom.set_data_matrix(masked_mean)
    #geom.set_adjacency_matrix(masked_mean)
    #from megaman.embedding import LocallyLinearEmbedding
    #lle = LocallyLinearEmbedding(n_neighbors=5, n_components=3, eigen_solver='arpack', max_iter=3000)
    #lle = LocallyLinearEmbedding(n_components=3, eigen_solver='arpack', geom=geom)
    #out_coords = lle.fit_transform(masked_mean, input_type='adjacency')
    #out_coords = lle.fit_transform(masked_mean)
    #init = np.array([p.cm_pos for p in out_conf._nucleotides])

    #Run multidimensional scaling on the average distances to find average positions
    from sklearn.manifold import MDS
    mds = MDS(n_components=3,
              metric=True,
              max_iter=3000,
              eps=1e-12,
              dissimilarity="precomputed",
              n_jobs=1,
              n_init=1)
    out_coords = mds.fit_transform(
        masked_mean)  #, init=init) #this one worked best

    #Overwrite the system we made earlier with the coordinates calculated via MDS
    for i, n in enumerate(output_system._nucleotides):
        n.cm_pos = out_coords[i]
        n._a1 = np.array([0, 0, 0])
        n._a3 = np.array(
            [0, 0, 0]
        )  #since the orientation vectors are all 0, this cannot be used in a simulation, but the viewer will handle it

    #Write the mean structure out as a new .dat and .top pair
    output_system.print_lorenzo_output("{}.dat".format(meanfile),
                                       "{}.top".format(meanfile))
    print("INFO: wrote output files: {}.dat, {}.top".format(
        meanfile, meanfile),
          file=stderr)

    #Loop through the trajectory again and calculate deviations from the average distances
    print(
        "INFO: Computing distance deviations of {} configurations using 1 core."
        .format(num_confs),
        file=stderr)
    if not parallel:
        r = LorenzoReader2(traj_file, top_file)
        devs = get_devs(r, masked_mean, inputfile, cutoff_distance, num_confs)

    if parallel:
        print(
            "INFO: Computing distance deviations of {} configurations using {} cores."
            .format(num_confs, n_cpus),
            file=stderr)
        out = parallelize_lorenzo_onefile.fire_multiprocess(
            traj_file, top_file, get_devs, num_confs, n_cpus, masked_mean,
            inputfile, cutoff_distance)
        devs = np.sum(np.array([i for i in out]), axis=0)

    #Dump the deviations to an oxView overlay file
    devs = np.ma.masked_array(
        devs,
        ~(devs != 0.0))  #mask all the 0s so they don't contribute to the mean
    devs *= (1 / num_confs)
    devs = np.mean(devs, axis=0)
    devs = np.sqrt(devs)
    with open(devfile + ".json", "w") as file:
        file.write(dumps({"contact deviation": list(devs)}))
    print("INFO: wrote file {}.json".format(devfile), file=stderr)
def main():
    parser = argparse.ArgumentParser(
        prog=path.basename(__file__),
        description="Fit vectors to every duplex in the structure")
    parser.add_argument('-p',
                        metavar='num_cpus',
                        nargs=1,
                        type=int,
                        dest='parallel',
                        help="(optional) How many cores to use")
    parser.add_argument('inputfile',
                        type=str,
                        nargs=1,
                        help="The inputfile used to run the simulation")
    parser.add_argument('trajectory',
                        type=str,
                        nargs=1,
                        help="The trajectory file from the simulation")
    parser.add_argument('-o',
                        '--output',
                        metavar='output_file',
                        type=str,
                        nargs=1,
                        help='name of the file to write the angle list to')
    args = parser.parse_args()

    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "numpy"])

    #Process command line arguments:
    inputfile = args.inputfile[0]
    traj_file = args.trajectory[0]
    parallel = args.parallel
    if parallel:
        n_cpus = args.parallel[0]

    #-o names the output file
    if args.output:
        outfile = args.output[0]
    else:
        outfile = "angles.txt"
        print("INFO: No outfile name provided, defaulting to \"{}\"".format(
            outfile),
              file=stderr)

    #Get relevant parameters from the input file
    top_file = get_input_parameter(inputfile, "topology")
    if "RNA" in get_input_parameter(inputfile, "interaction_type"):
        environ["OXRNA"] = "1"
    else:
        environ["OXRNA"] = "0"

    #Calculate the number of configurations.
    num_confs = cal_confs(traj_file)

    r0 = LorenzoReader2(traj_file, top_file)
    r0._get_system()

    #launch find_angle using the appropriate number of threads to find all duplexes.
    if not parallel:
        print(
            "INFO: Fitting duplexes to {} configurations using 1 core.".format(
                num_confs),
            file=stderr)
        r = LorenzoReader2(traj_file, top_file)
        duplexes_at_step = find_angles(r, inputfile, num_confs)

    if parallel:
        print("INFO: Fitting duplexes to {} configurations using {} cores.".
              format(num_confs, n_cpus),
              file=stderr)
        duplexes_at_step = []
        out = parallelize_lorenzo_onefile.fire_multiprocess(
            traj_file, top_file, find_angles, num_confs, n_cpus, inputfile)
        [duplexes_at_step.extend(i) for i in out]

    if [] in duplexes_at_step:
        print(
            "WARNING: Some configurations were invalid and not included in the analysis.  Please check the log to view the error",
            file=stderr)

    #print duplexes to a file
    print(
        "INFO: Writing duplex data to {}.  Use duplex_angle_plotter to graph data"
        .format(outfile),
        file=stderr)
    output = open(outfile, 'w')
    output.write(
        "time\tduplex\tstart1\tend1\tstart2\tend2\taxisX\taxisY\taxisZ\thel_pos\n"
    )
    for i in range(0, len(duplexes_at_step)):
        for j in range(0, len(duplexes_at_step[i])):
            line = '{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t[{},{},{}]\n'.format(
                duplexes_at_step[i][j].time, duplexes_at_step[i][j].index,
                duplexes_at_step[i][j].start1, duplexes_at_step[i][j].end1,
                duplexes_at_step[i][j].start2, duplexes_at_step[i][j].end2,
                duplexes_at_step[i][j].axis[0], duplexes_at_step[i][j].axis[1],
                duplexes_at_step[i][j].axis[2],
                duplexes_at_step[i][j].final_hel_pos[0],
                duplexes_at_step[i][j].final_hel_pos[1],
                duplexes_at_step[i][j].final_hel_pos[2])
            output.write(line)
    output.close()
예제 #18
0
def main():
    #handle commandline arguments
    #the positional arguments for this are:
    # 1. the mean structure from compute_mean.py in json format
    # 2. the trajectory from which to compute the centroid
    # 3. the name of the file to write out the centroid to.  Should be a .dat because oxView uses file extensions
    parser = argparse.ArgumentParser(
        prog=os.path.basename(__file__),
        description=
        "Compute the RMSD of each nucleotide from the mean structure produced by compute_mean.py"
    )
    parser.add_argument(
        'mean_structure',
        type=str,
        nargs=1,
        help="The mean structure .json file from compute_mean.py")
    parser.add_argument('trajectory',
                        type=str,
                        nargs=1,
                        help='the trajectory file you wish to analyze')
    parser.add_argument('-p',
                        metavar='num_cpus',
                        nargs=1,
                        type=int,
                        dest='parallel',
                        help="(optional) How many cores to use")
    parser.add_argument('-o',
                        '--output',
                        metavar='output_file',
                        nargs=1,
                        help='The filename to save the centroid to')
    parser.add_argument(
        '-i',
        metavar='index_file',
        dest='index_file',
        nargs=1,
        help=
        'Compute mean structure of a subset of particles from a space-separated list in the provided file'
    )
    args = parser.parse_args()

    #system check
    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "Bio", "numpy"])

    #-o names the output file
    if args.output:
        outfile = args.output[0].strip()
    else:
        outfile = "centroid.dat"
        print("INFO: No outfile name provided, defaulting to \"{}\"".format(
            outfile),
              file=stderr)

    #prepare the data files and calculate how many configurations there are to run
    traj_file = args.trajectory[0]
    parallel = args.parallel
    if parallel:
        n_cpus = args.parallel[0]
    num_confs = cal_confs(traj_file)

    #-i will make it only run on a subset of nucleotides.
    #The index file is a space-separated list of particle IDs
    if args.index_file:
        index_file = args.index_file[0]
        with open(index_file, 'r') as f:
            indexes = f.readline().split()
            try:
                indexes = [int(i) for i in indexes]
            except:
                print(
                    "ERROR: The index file must be a space-seperated list of particles.  These can be generated using oxView by clicking the \"Download Selected Base List\" button"
                )
    else:
        with ErikReader(traj_file) as r:
            indexes = list(range(len(r.read().positions)))

    # load mean structure
    mean_file = args.mean_structure[0]
    if mean_file.split(".")[-1] == "json":
        with open(mean_file) as file:
            mean_structure = load(file)['g_mean'][indexes]

    elif mean_file.split(".")[-1] == "dat":
        with ErikReader(mean_file) as reader:
            s = reader.read()
            mean_structure = s.positions[indexes]
    print("INFO: mean structure loaded", file=stderr)

    #Calculate centroid, in parallel if available
    if not parallel:
        print(
            "INFO: Computing centroid from the mean of {} configurations using 1 core."
            .format(num_confs),
            file=stderr)
        r = ErikReader(traj_file)
        centroid, centroid_a1s, centroid_a3s, centroid_rmsf, centroid_time = compute_centroid(
            r, mean_structure, indexes, num_confs)

    #If parallel, the trajectory is split into a number of chunks equal to the number of CPUs available.
    #Each of those chunks is then calculated seperatley and the results are compiled .
    if parallel:
        print(
            "INFO: Computing centroid from the mean of {} configurations using {} cores."
            .format(num_confs, n_cpus),
            file=stderr)
        candidates = []
        rmsfs = []
        a1s = []
        a3s = []
        ts = []
        out = parallelize_erik_onefile.fire_multiprocess(
            traj_file, compute_centroid, num_confs, n_cpus, mean_structure,
            indexes)
        [candidates.append(i[0]) for i in out]
        [rmsfs.append(i[3]) for i in out]
        [a1s.append(i[1]) for i in out]
        [a3s.append(i[2]) for i in out]
        [ts.append(i[4]) for i in out]
        min_id = rmsfs.index(min(rmsfs))
        centroid = candidates[min_id]
        centroid_a1s = a1s[min_id]
        centroid_a3s = a3s[min_id]
        centroid_time = ts[min_id]
        centroid_rmsf = rmsfs[min_id]

    print(
        "INFO: Centroid configuration found at configuration t = {}, RMSF = {}"
        .format(centroid_time, centroid_rmsf),
        file=stderr)

    from oxDNA_analysis_tools.mean2dat import make_dat

    make_dat(
        {
            'g_mean': centroid,
            'a1_mean': centroid_a1s,
            'a3_mean': centroid_a3s
        }, outfile)
예제 #19
0
def main():
    parser = argparse.ArgumentParser(
        prog=os.path.basename(__file__),
        description=
        "superimposes one or more structures sharing a topology to a reference structure"
    )
    parser.add_argument('reference',
                        type=str,
                        nargs=1,
                        help="The reference configuration to superimpose to")
    parser.add_argument(
        'victims',
        type=str,
        nargs='+',
        help="The configuraitons to superimpose on the reference")
    parser.add_argument(
        '-i',
        metavar='index_file',
        dest='index_file',
        nargs=1,
        help=
        'Align to only a subset of particles from a space-separated list in the provided file'
    )
    args = parser.parse_args()

    #run system checks
    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "numpy", "Bio"])

    #Get the reference files
    ref_dat = args.reference[0]

    #-i will make it only run on a subset of nucleotides.
    #The index file is a space-separated list of particle IDs
    if args.index_file:
        index_file = args.index_file[0]
        with open(index_file, 'r') as f:
            indexes = f.readline().split()
            try:
                indexes = [int(i) for i in indexes]
            except:
                print(
                    "ERROR: The index file must be a space-seperated list of particles.  These can be generated using oxView by clicking the \"Download Selected Base List\" button"
                )
    else:
        with ErikReader(ref_dat) as r:
            indexes = list(range(len(r.read().positions)))

    #Create list of configurations to superimpose
    to_sup = []
    r = ErikReader(ref_dat)
    ref = r.read()
    ref.inbox()
    ref_conf = ref.positions[indexes]
    for i in args.victims:
        r = ErikReader(i)
        sys = r.read()
        sys.inbox()
        to_sup.append(sys)

    sup = SVDSuperimposer()

    #Run the biopython superimposer on each configuration and rewrite its configuration file
    for i, sys in enumerate(to_sup):
        indexed_cur_conf = sys.positions[indexes]
        sup.set(ref_conf, indexed_cur_conf)
        sup.run()
        rot, tran = sup.get_rotran()
        sys.positions = np.einsum('ij, ki -> kj', rot, sys.positions) + tran
        sys.a1s = np.einsum('ij, ki -> kj', rot, sys.a1s)
        sys.a3s = np.einsum('ij, ki -> kj', rot, sys.a3s)
        sys.write_new("aligned{}.dat".format(i))
        print("INFO: Wrote file aligned{}.dat".format(i), file=stderr)
예제 #20
0
def main():
    #handle commandline arguments
    #this program has no positional arguments, only flags
    parser = argparse.ArgumentParser(
        prog=os.path.basename(__file__),
        description=
        "Finds the ensemble of distances between any two particles in the system"
    )
    parser.add_argument(
        '-i',
        '--input',
        metavar='input',
        nargs='+',
        action='append',
        help=
        'An input, trajectory, and a list of particle pairs to compare.  Can call -i multiple times to plot multiple datasets.'
    )
    parser.add_argument('-o',
                        '--output',
                        metavar='output_file',
                        nargs=1,
                        help='The name to save the graph file to')
    parser.add_argument(
        '-f',
        '--format',
        metavar='<histogram/trajectory/both>',
        nargs=1,
        help=
        'Output format for the graphs.  Defaults to histogram.  Options are \"histogram\", \"trajectory\", and \"both\"'
    )
    parser.add_argument(
        '-d',
        '--data',
        metavar='data_file',
        nargs=1,
        help=
        'If set, the output for the graphs will be dropped as a json to this filename for loading in oxView or your own scripts'
    )
    parser.add_argument(
        '-n',
        '--names',
        metavar='names',
        nargs='+',
        action='append',
        help=
        'Names of the data series.  Will default to particle ids if not provided'
    )
    parser.add_argument(
        '-c',
        metavar='cluster',
        dest='cluster',
        action='store_const',
        const=True,
        default=False,
        help="Run the clusterer on each configuration's distance?")
    args = parser.parse_args()

    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "matplotlib", "numpy"])

    #-i requires 4 or more arguments, the topology file of the structure, the trajectory to analyze, and any number of particle pairs to compute the distance between.
    try:
        input_files = [i[0] for i in args.input]
        trajectories = [i[1] for i in args.input]
        p1s = [i[2::2] for i in args.input]
        p2s = [i[3::2] for i in args.input]
        p1s = [[int(j) for j in i] for i in p1s]
        p2s = [[int(j) for j in i] for i in p2s]

    except Exception as e:
        print("ERROR:", e)
        parser.print_help()
        exit(1)

    #get number of distances to calculate
    n_dists = sum([len(l) for l in p1s])

    #Make sure that the input is correctly formatted
    if (len(input_files) != len(trajectories)):
        print(
            "ERROR: bad input arguments\nPlease supply an equal number of input and, trajectory files",
            file=stderr)
        exit(1)
    if len(p1s) != len(p2s):
        print(
            "ERROR: bad input arguments\nPlease supply an even number of particles",
            file=stderr)
        exit(1)

    #-o names the output file
    if args.output:
        outfile = args.output[0]
    else:
        print("INFO: No outfile name provided, defaulting to \"distance.png\"",
              file=stderr)
        outfile = "distance.png"

    #-f defines which type of graph to produce
    hist = False
    lineplt = False
    if args.format:
        if "histogram" in args.format:
            hist = True
        if "trajectory" in args.format:
            lineplt = True
        if "both" in args.format:
            hist = True
            lineplt = True
        if hist == lineplt == False:
            print(
                "ERROR: unrecognized graph format\nAccepted formats are \"histogram\", \"trajectory\", and \"both\"",
                file=stderr)
            exit(1)
    else:
        print("INFO: No graph format specified, defaulting to histogram",
              file=stderr)
        hist = True

    #-c makes it run the clusterer on the output
    cluster = args.cluster

    #get the specified distances
    distances = get_distances(trajectories, p1s, p2s)

    # -n sets the names of the data series
    if args.names:
        names = args.names[0]
        if len(names) < n_dists:
            print(
                "WARNING: Names list too short.  There are {} items in names and {} distances were calculated.  Will pad with particle IDs"
                .format(len(names), n_dists),
                file=stderr)
            for i in range(len(names), len(distances)):
                names.append("{}-{}".format([j for sl in p1s for j in sl][i],
                                            [j for sl in p2s for j in sl][i]))
        if len(names) > n_dists:
            print(
                "WARNING: Names list too long. There are {} items in names and {} distances were calculated.  Truncating to be the same as distances"
                .format(len(names), n_dists),
                file=stderr)
            names = names[:n_dists]

    else:
        print("INFO: Defaulting to particle IDs as data series names")
        names = [
            "{}-{}".format(p1, p2)
            for p1, p2 in zip([i for sl in p1s
                               for i in sl], [i for sl in p2s for i in sl])
        ]

    # -d will dump the distances as json files for loading with the trajectories in oxView
    if args.data:
        from json import dump
        if len(trajectories) > 1:
            print(
                "INFO: distance lists from separate trajectories are printed to separate files for oxView compatibility.  Trajectory numbers will be appended to your provided data file name.",
                file=stderr)
            file_names = [
                "{}_{}.json".format(args.data[0].strip('.json'), i)
                for i, _ in enumerate(trajectories)
            ]
        else:
            file_names = [args.data[0].strip('.json') + '.json']
        names_by_traj = [['{}-{}'.format(p1, p2) for p1, p2 in zip(p1l, p2l)]
                         for p1l, p2l in zip(p1s, p2s)]

        for file_name, ns, dist_list in zip(file_names, names_by_traj,
                                            distances):
            obj = {}
            for n, d in zip(ns, dist_list):
                obj[n] = d
            with open(file_name, 'w+') as f:
                print(
                    "INFO: writing data to {}.  This can be opened in oxView using the Order parameter selector"
                    .format(file_name))
                dump(obj, f)

    #convert the distance list into numpy arrays because they're easier to work with
    for i, l in enumerate(distances):
        distances[i] = np.array(l)

    means = [np.mean(i, axis=1) for i in distances]
    medians = [np.median(i, axis=1) for i in distances]
    stdevs = [np.std(i, axis=1) for i in distances]

    #get some min/max values to make the plots pretty
    lower = min((l.min() for l in distances))
    upper = max((l.max() for l in distances))

    #those horrific list comprehensions unpack lists of lists into a single list
    print("input:\t", end='')
    [
        print("{}-{}\t".format(p1, p2), end='')
        for p1, p2 in zip([i for sl in p1s
                           for i in sl], [i for sl in p2s for i in sl])
    ]
    print("")

    print("name:\t", end='')
    [print("{}\t".format(t), end='') for t in names[:n_dists]]
    print("")

    print("mean:\t", end='')
    [
        print("{:.2f}\t".format(m), end='')
        for m in [i for sl in means for i in sl]
    ]
    print("")

    print("stdev:\t", end='')
    [
        print("{:.2f}\t".format(s), end='')
        for s in [i for sl in stdevs for i in sl]
    ]
    print("")

    print("median:\t", end='')
    [
        print("{:.2f}\t".format(m), end='')
        for m in [i for sl in medians for i in sl]
    ]
    print("")

    #make a histogram
    if hist == True:
        if lineplt == True:
            #if making two plots, automatically append the plot type to the output file name
            out = outfile[:outfile.find(".")] + "_hist" + outfile[outfile.
                                                                  find("."):]
        else:
            out = outfile
        bins = np.linspace(np.floor(lower - (lower * 0.1)),
                           np.ceil(upper + (upper * 0.1)), 60)
        graph_count = 0
        for traj_set in distances:
            for dist_list in traj_set:
                a = plt.hist(dist_list,
                             bins,
                             weights=np.ones(len(dist_list)) / len(dist_list),
                             alpha=0.5,
                             histtype=u'stepfilled',
                             edgecolor='k',
                             label=names[graph_count])
                graph_count += 1
        plt.xlabel("Distance (nm)")
        plt.ylabel("Normalized frequency")
        plt.legend()
        #plt.show()
        print("INFO: Writing histogram to file {}".format(out), file=stderr)
        plt.savefig("{}".format(out))

    #make a trajectory plot
    if lineplt == True:
        if hist == True:
            #clear the histogram plot
            plt.clf()
            #if making two plots, automatically append the plot type to the output file name
            out = outfile[:outfile.find(".")] + "_traj" + outfile[outfile.
                                                                  find("."):]
        else:
            out = outfile
        graph_count = 0
        for traj_set in distances:
            for dist_list in traj_set:
                a = plt.plot(dist_list, alpha=0.5, label=names[graph_count])
                graph_count += 1
        plt.xlabel("Simulation Steps")
        plt.ylabel("Distance (nm)")
        plt.legend()
        #plt.show()
        print("INFO: Writing trajectory plot to file {}".format(out),
              file=stderr)
        plt.savefig("{}".format(out))

    if cluster == True:
        if not all([x == trajectories[0] for x in trajectories]):
            print("ERROR: Clustering can only be run on a single trajectory",
                  file=stderr)
            exit(1)
        from oxDNA_analysis_tools.clustering import perform_DBSCAN

        labs = perform_DBSCAN(distances[0].T, len(distances[0][0]),
                              trajectories[0], input_files[0], "euclidean", 12,
                              8)
예제 #21
0
def main():
    parser = argparse.ArgumentParser(
        prog=path.basename(__file__),
        description=
        "Calculates a principal component analysis of nucleotide deviations over a trajectory"
    )
    parser.add_argument('inputfile',
                        type=str,
                        nargs=1,
                        help="The inputfile used to run the simulation")
    parser.add_argument('trajectory',
                        type=str,
                        nargs=1,
                        help='the trajectory file you wish to analyze')
    parser.add_argument(
        'meanfile',
        type=str,
        nargs=1,
        help='The mean structure .json file from compute_mean.py')
    parser.add_argument(
        'outfile',
        type=str,
        nargs=1,
        help='the name of the .json file where the PCA will be written')
    parser.add_argument('-p',
                        metavar='num_cpus',
                        nargs=1,
                        type=int,
                        dest='parallel',
                        help="(optional) How many cores to use")
    parser.add_argument(
        '-c',
        metavar='cluster',
        dest='cluster',
        action='store_const',
        const=True,
        default=False,
        help="Run the clusterer on each configuration's position in PCA space?"
    )
    args = parser.parse_args()

    check_dependencies(["python", "numpy", "Bio"])

    traj_file = args.trajectory[0]
    inputfile = args.inputfile[0]
    mean_file = args.meanfile[0]
    outfile = args.outfile[0]
    parallel = args.parallel
    if parallel:
        n_cpus = args.parallel[0]
    #-c makes it run the clusterer on the output
    cluster = args.cluster
    top_file = get_input_parameter(inputfile, "topology")
    if "RNA" in get_input_parameter(inputfile, "interaction_type"):
        environ["OXRNA"] = "1"
    else:
        environ["OXRNA"] = "0"
    import UTILS.base  #this needs to be imported after the model type is set

    num_confs = cal_confs(traj_file)

    if mean_file.split(".")[-1] == "json":
        with open(mean_file) as file:
            align_conf = load(file)['g_mean']

    elif mean_file.split(".")[-1] == "dat":
        fetch_np = lambda conf: np.array([n.cm_pos for n in conf._nucleotides])
        with LorenzoReader2(mean_file, top_file) as reader:
            s = reader._get_system()
            align_conf = fetch_np(s)

    cms = np.mean(align_conf,
                  axis=0)  #all structures must have the same center of mass
    align_conf -= cms

    #Compute the deviations
    if not parallel:
        r = LorenzoReader2(traj_file, top_file)
        deviations_matrix = get_pca(r, align_conf, num_confs)

    if parallel:
        out = parallelize_lorenzo_onefile.fire_multiprocess(
            traj_file, top_file, get_pca, num_confs, n_cpus, align_conf)
        deviations_matrix = np.concatenate([i for i in out])

    #now that we have the deviations matrix we're gonna get the covariance and PCA it
    #note that in the future we might want a switch for covariance vs correlation matrix because correlation (cov/stdev so all diagonals are 1) is better for really floppy structures
    pca = PCA(n_components=3)
    pca.fit(deviations_matrix)
    transformed = pca.transform(deviations_matrix)

    #THIS IS AS FAR AS I GOT

    import matplotlib.pyplot as plt
    print("INFO: Saving scree plot to scree.png", file=stderr)
    plt.scatter(range(0, len(evalues)), evalues, s=25)
    plt.xlabel("component")
    plt.ylabel("eigenvalue")
    plt.savefig("scree.png")

    print(
        "INFO: Creating coordinate plot from first three eigenvectors.  Saving to coordinates.png",
        file=stderr)
    #if you want to weight the components by their eigenvectors
    #mul = np.einsum('ij,i->ij',evectors[0:3], evalues[0:3])
    mul = evectors

    #reconstruct configurations in component space
    out = np.dot(deviations_matrix, mul).astype(float)

    #make a quick plot from the first three components
    from mpl_toolkits.mplot3d import Axes3D
    fig = plt.figure()
    ax = fig.gca(projection='3d')
    ax.scatter(out[:, 0], out[:, 1], out[:, 2], c='g', s=25)
    plt.savefig("coordinates.png")

    #Create an oxView overlay showing the first SUM components
    SUM = 1
    print(
        "INFO: Change the number of eigenvalues to sum and display by modifying the SUM variable in the script.  Current value: {}"
        .format(SUM),
        file=stderr)
    weighted_sum = np.zeros_like(evectors[0])
    for i in range(0, SUM):  #how many eigenvalues do you want?
        weighted_sum += evalues[i] * evectors[i]

    prep_pos_for_json = lambda conf: list(list(p) for p in conf)
    with catch_warnings(
    ):  #this produces an annoying warning about casting complex values to real values that is not relevant
        simplefilter("ignore")
        output_vectors = weighted_sum.reshape(int(weighted_sum.shape[0] / 3),
                                              3).astype(float)
    with open(outfile, "w+") as file:
        file.write(dumps({"pca": prep_pos_for_json(output_vectors)}))

    #If we're running clustering, feed the linear terms into the clusterer
    if cluster:
        print("INFO: Mapping configurations to component space...",
              file=stderr)

        #If you want to cluster on only some of the components, uncomment this
        #out = out[:,0:3]

        from clustering import perform_DBSCAN
        labs = perform_DBSCAN(out, num_confs, traj_file, inputfile,
                              "euclidean", 12, 8)
예제 #22
0
def main():
    parser = argparse.ArgumentParser(
        prog=os.path.basename(__file__),
        description="A very simple plotting utility for oxDNA.org")
    parser.add_argument('energy', nargs='+', help='Energy files to plot')
    parser.add_argument('-o',
                        '--output',
                        metavar='output_file',
                        nargs=1,
                        help='The name to save the graph file to')
    parser.add_argument(
        '-f',
        '--format',
        metavar='<histogram/trajectory/both>',
        nargs=1,
        help=
        'Output format for the graphs.  Defaults to histogram.  Options are \"histogram\", \"trajectory\", and \"both\"'
    )

    args = parser.parse_args()

    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "numpy", "matplotlib"])

    #get file name
    energy_files = args.energy

    #-o names the output file
    if args.output:
        outfile = args.output[0]
    else:
        outfile = 'energy.png'

    #-f defines which type of graph to produce
    hist = False
    line = False
    if args.format:
        if "histogram" in args.format:
            hist = True
        if "trajectory" in args.format:
            line = True
        if "both" in args.format:
            hist = line = True
        if hist == line == False:
            print(
                "ERROR: unrecognized graph format\nAccepted formats are \"histogram\", \"trajectory\", and \"both\"",
                file=stderr)
            exit(1)
    else:
        print("INFO: No graph format specified, defaulting to histogram",
              file=stderr)
        hist = True

    all_times = []
    all_energies = []
    for efile in energy_files:
        times = []
        energies = []
        with open(efile, 'r') as f:
            l = f.readline()
            while l:
                times.append(float(l.split()[0]))
                energies.append(float(l.split()[1]))
                l = f.readline()
        all_times.append(times)
        all_energies.append(energies)

    names = ["1", "2", "3"]

    if outfile and hist == True:
        if line == True:
            out = outfile[:outfile.find(".")] + "_hist" + outfile[outfile.
                                                                  find("."):]
        else:
            out = outfile

        bins = np.linspace(min([min(e) for e in all_energies]),
                           max([max(e) for e in all_energies]), 40)

        artists = []
        for i, elist in enumerate(all_energies):
            a = plt.hist(elist,
                         bins,
                         weights=np.ones(len(elist)) / len(elist),
                         alpha=0.3,
                         label=names[i],
                         histtype=u'stepfilled',
                         edgecolor='k')
            artists.append(a)
        plt.legend(labels=names)
        plt.xlabel("Energy per particle (SU)")
        plt.ylabel("Normalized frequency")
        if outfile:
            print("INFO: Saving histogram to {}".format(out), file=stderr)
            plt.savefig(out)
        else:
            plt.show()

    #make a trajectory plot
    if outfile and line == True:
        if hist == True:
            plt.clf()
            out = outfile[:outfile.find(".")] + "_traj" + outfile[outfile.
                                                                  find("."):]
        else:
            out = outfile

        artists = []
        for tlist, elist in zip(all_times, all_energies):
            a = plt.plot(tlist, elist, alpha=0.5)
            artists.append(a)
        plt.legend(labels=names)
        plt.xlabel("Time (SU)")
        plt.ylabel("Energy (SU)")
        if outfile:
            print("INFO: Saving line plot to {}".format(out), file=stderr)
            plt.savefig(out)
        else:
            plt.show()
def main():
    #handle commandline arguments
    #the positional arguments for this are:
    # 1. the mean structure from compute_mean.py in json format
    # 2. the trajectory from which to compute the deviations
    from sys import argv
    print(argv)
    parser = argparse.ArgumentParser(
        prog=os.path.basename(__file__),
        description=
        "Compute the RMSD of each nucleotide from the mean structure produced by compute_mean.py"
    )
    parser.add_argument(
        'mean_structure',
        type=str,
        nargs=1,
        help="The mean structure .json file from compute_mean.py")
    parser.add_argument('trajectory',
                        type=str,
                        nargs=1,
                        help='the trajectory file you wish to analyze')
    parser.add_argument('-p',
                        metavar='num_cpus',
                        nargs=1,
                        type=int,
                        dest='parallel',
                        help="(optional) How many cores to use")
    parser.add_argument(
        '-o',
        '--output',
        metavar='output_file',
        nargs=1,
        help='The filename to save the deviations json file to')
    parser.add_argument(
        '-i',
        metavar='index_file',
        dest='index_file',
        nargs=1,
        help=
        'Compute mean structure of a subset of particles from a space-separated list in the provided file'
    )
    parser.add_argument('-r',
                        metavar='rmsd_plot',
                        dest='rmsd_plot',
                        nargs=1,
                        help='The name of the file to save the RMSD plot to.')
    parser.add_argument(
        '-d',
        metavar='rmsd_data',
        dest='rmsd_data',
        nargs=1,
        help='The name of the file to save the RNSD data in json format.')
    args = parser.parse_args()

    #system check
    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "Bio", "numpy", "matplotlib"])

    #-o names the output file
    if args.output:
        outfile = args.output[0].strip()
        if not outfile.split(".")[-1] == 'json':
            outfile += ".json"
    else:
        outfile = "devs.json"
        print("INFO: No outfile name provided, defaulting to \"{}\"".format(
            outfile),
              file=stderr)

    #prepare the data files and calculate how many configurations there are to run
    traj_file = args.trajectory[0]
    parallel = args.parallel
    if parallel:
        from oxDNA_analysis_tools.UTILS import parallelize_erik_onefile
        n_cpus = args.parallel[0]
    num_confs = cal_confs(traj_file)

    #-i will make it only run on a subset of nucleotides.
    #The index file is a space-separated list of particle IDs
    if args.index_file:
        index_file = args.index_file[0]
        with open(index_file, 'r') as f:
            indexes = f.readline().split()
            try:
                indexes = [int(i) for i in indexes]
            except:
                print(
                    "ERROR: The index file must be a space-seperated list of particles.  These can be generated using oxView by clicking the \"Download Selected Base List\" button"
                )
    else:
        with ErikReader(traj_file) as r:
            indexes = list(range(len(r.read().positions)))

    #-r names the file to print the RMSD plot to
    if args.rmsd_plot:
        plot_name = args.rmsd_plot[0]
    else:
        plot_name = 'rmsd.png'

    # -d names the file to print the RMSD data to
    if args.rmsd_data:
        data_file = args.rmsd_data[0]

    # load mean structure
    mean_structure_file = args.mean_structure[0]
    with open(mean_structure_file) as file:
        mean_data = loads(file.read())
    mean_structure = np.array(mean_data["g_mean"])
    indexed_mean_structure = mean_structure[indexes]
    print("INFO: mean structure loaded", file=stderr)

    #Calculate deviations, in parallel if available
    if not parallel:
        print(
            "INFO: Computing deviations from the mean of {} configurations with an alignment of {} particles using 1 core."
            .format(num_confs, len(indexed_mean_structure)),
            file=stderr)
        r = ErikReader(traj_file)
        deviations, RMSDs = compute_deviations(r, mean_structure,
                                               indexed_mean_structure, indexes,
                                               num_confs)

    #If parallel, the trajectory is split into a number of chunks equal to the number of CPUs available.
    #Each of those chunks is then calculated seperatley and the results are compiled .
    if parallel:
        print(
            "INFO: Computing deviations from the mean of {} configurations with an alignment of {} particles using {} cores."
            .format(num_confs, len(indexed_mean_structure), n_cpus),
            file=stderr)
        deviations = []
        RMSDs = []
        out = parallelize_erik_onefile.fire_multiprocess(
            traj_file, compute_deviations, num_confs, n_cpus, mean_structure,
            indexed_mean_structure, indexes)
        [deviations.extend(i[0]) for i in out]
        [RMSDs.extend(i[1]) for i in out]

    #compute_deviations() returns the deviation of every particle in every configuration
    #take the mean of the per-configuration deviations to get the RMSF
    rmsfs = np.sqrt(np.mean(np.square(np.array(deviations)), axis=0)) * 0.8518

    #write the deviations to a json file
    print("INFO: writing deviations to {}".format(outfile), file=stderr)
    with open(outfile, "w") as file:
        file.write(dumps({"RMSF (nm)": rmsfs.tolist()}))

    #plot RMSDs
    print("INFO: writing RMSD plot to {}".format(plot_name), file=stderr)
    plt.plot(RMSDs)
    plt.axhline(np.mean(RMSDs), color='red')
    plt.xlabel('Configuration')
    plt.ylabel('RMSD (nm)')
    plt.savefig(plot_name)

    #print RMSDs
    print("INFO: writing RMSD data to {}".format(data_file), file=stderr)
    if args.rmsd_data:
        with open(data_file, 'w') as f:
            f.write(dumps({"RMSD (nm)": RMSDs}))