Beispiel #1
0
def main():
    import argparse
    parser = argparse.ArgumentParser(prog = path.basename(__file__), description="List all the interactions between nucleotides")
    parser.add_argument('inputfile', type=str, nargs=1, help="The inputfile used to run the simulation")
    parser.add_argument('trajectory', type=str, nargs=1, help='the trajectory file you wish to analyze')
    parser.add_argument('-v', type=str, nargs=1, dest='outfile', help='if you want instead average per-particle energy as a viewer JSON')
    args = parser.parse_args()

    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "numpy"])

    traj_file = args.trajectory[0]
    inputfile = args.inputfile[0]

    try:
        outfile = args.outfile[0]
        visualize = True
    except:
        visualize = False

    if path.dirname(inputfile) != getcwd():
        sim_directory = path.dirname(inputfile)
    else:
        sim_directory = ""

    top_file = sim_directory + get_input_parameter(inputfile, "topology")
    if "RNA" in get_input_parameter(inputfile, "interaction_type"):
        environ["OXRNA"] = "1"
    else:
        environ["OXRNA"] = "0"
    import oxDNA_analysis_tools.UTILS.base #this needs to be imported after the model type is set

    myreader = LorenzoReader2(traj_file,top_file)
    mysystem = myreader._get_system()

    energies = np.zeros(mysystem.N)
    count = 0

    while mysystem != False:
        out = output_bonds(inputfile, mysystem)
        if visualize:
            for line in out.split('\n'):
                if not (line.startswith('#') or line == ''):
                    line = [float(l) for l in line.split(' ')]
                    energies[int(line[0])] += sum(line[2:])
                    energies[int(line[1])] += sum(line[2:])
        else:
            print(out)

        count += 1
        mysystem = myreader._get_system()

    if visualize:
        energies *= (41.42/count)
        with open(outfile, "w+") as file:
            file.write("{\n\"Energy (pN nm)\" : [")
            file.write(str(energies[0]))
            for n in energies[1:]:
                file.write(", {}".format(n))
            file.write("] \n}")
def get_centroid(points, metric_name, num_confs, labs, traj_file, inputfile):
    """
    Takes the output from DBSCAN and produces the trajectory and centroid from each cluster.

    Parameters:
        points (numpy.array): The points fed to the clstering algorithm.
        metric_name (str): The type of data the points represent.
        labs (numpy.array): The cluster each point belongs to.
        traj_file (str): The analyzed trajectory file.
        inputfile (str): The input file used to run the analyzed simulation.
    """
    
    print("INFO: splitting clusters...", file=stderr)
    print("INFO: Will write cluster trajectories to traj_<cluster_number>.dat", file=stderr)
    print ("cluster\tn\tavg_E\tE_dev\tavg_H\tH_dev\tcentroid_t")
    for cluster in (set(labs)):
        if metric_name == "precomputed":
            masked = points[labs == cluster]
            in_cluster_id = np.sum(masked, axis = 1).argmin()

        in_cluster = list(labs).count(cluster)
        centroid_id = find_element(in_cluster_id, cluster, labs)
        top_file = get_input_parameter(inputfile, "topology")

        r = LorenzoReader2(traj_file, top_file)
        output = r._get_system(N_skip=centroid_id)
        filename = "centroid"+str(cluster)

        output.print_lorenzo_output(filename+".dat", filename+".top")
        
        make_heatmap(inputfile, output, filename)
Beispiel #3
0
def main():
    #read data from files
    parser = argparse.ArgumentParser(prog = path.basename(__file__), description="Compare the bonds found at each trajectory with the intended design")
    parser.add_argument('inputfile', type=str, nargs=1, help="The inputfile used to run the simulation")
    parser.add_argument('trajectory', type=str, nargs=1, help="The trajecotry file to compare against the designed pairs")
    parser.add_argument('designed_pairs', type=str, nargs=1, help="The file containing the desired nucleotides pairings in the format \n a b\nc d")
    parser.add_argument('output_file', type=str, nargs=1, help="name of the file to save the output json overlay to")
    parser.add_argument('-p', metavar='num_cpus', nargs=1, type=int, dest='parallel', help="(optional) How many cores to use")
    
    #run system checks
    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "numpy"])

    args = parser.parse_args()
    inputfile = args.inputfile[0]
    traj_file = args.trajectory[0]
    designfile = args.designed_pairs[0]
    outfile = args.output_file[0]
    parallel = args.parallel
    if parallel:
        n_cpus = args.parallel[0]

    top_file = get_input_parameter(inputfile, "topology")
    if "RNA" in get_input_parameter(inputfile, "interaction_type"):
        environ["OXRNA"] = "1"
    else:
        environ["OXRNA"] = "0"
    num_confs = cal_confs(traj_file)

    with open(designfile, 'r') as file:
        pairs = file.readlines()

    if not parallel:
        print("INFO: Computing base pairs in {} configurations using 1 core.".format(num_confs), file=stderr)
        r = LorenzoReader2(traj_file,top_file)
        tot_bonds, tot_missbonds, out_array, confid = bond_analysis(r, pairs, inputfile, num_confs)
        try:
            _ = tot_bonds #this will fail if DNAnalysis failed.
        except:
            print("ERROR: DNAnalysis encountered an error and could not analyze the trajectory")
            exit(1)

    if parallel:
        print("INFO: Computing base pairs in {} configurations using {} cores.".format(num_confs, n_cpus), file=stderr)
        out = parallelize_lorenzo_onefile.fire_multiprocess(traj_file, top_file, bond_analysis, num_confs, n_cpus, pairs, inputfile)

        tot_bonds = 0
        tot_missbonds = 0
        out_array = np.zeros(len(open(top_file, 'r').readlines())-1)
        confid = 0
        for i in out:
            if i[0] is not None:
                tot_bonds += i[0]
                tot_missbonds += i[1]
                #out_array += i[2]
                confid += i[3]
            else:
                print("WARNING: Some configurations were invalid and not included in the analysis.  Please check the logs", file=stderr)

            #tot_bonds = sum((i[0] for i in out if i[0] != None))
            #tot_missbonds = sum((i[1] for i in out if i[1] != None))
        out_array = sum((i[2] for i in out if len(i[2]) > 0))
            #confid = sum((i[3] for i in out if i[3] != None))

    print("\nSummary:\navg bonds: {}\navg_missbonds: {}".format(tot_bonds/(int(confid)),tot_missbonds/int(confid)))

    print("INFO: Writing bond occupancy data to {}".format(outfile))
    with open(outfile, "w+") as file:
        file.write("{\n\"occupancy\" : [")
        file.write(str(out_array[0]/int(confid)))
        for n in out_array[1:]:
            file.write(", {}".format(n/int(confid)))
        file.write("] \n}") 
Beispiel #4
0
def main():
    import argparse
    import matplotlib.pyplot as plt
    from oxDNA_analysis_tools.UTILS.readers import LorenzoReader2, get_input_parameter

    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "numpy", "matplotlib"])

    #get commandline arguments
    parser = argparse.ArgumentParser(
        prog=path.basename(__file__),
        description="Calculate and display the contact map for a structure")
    parser.add_argument('inputfile',
                        type=str,
                        nargs=1,
                        help="The inputfile used to run the simulation")
    parser.add_argument(
        'trajectory',
        type=str,
        nargs=1,
        help=
        "The file containing the configurations of which the contact map is needed"
    )
    parser.add_argument(
        '-v',
        dest='visualize',
        action='store_const',
        const=True,
        default=False,
        help=
        "should we display the contact map once its calculated? Only recommend if there are few confs."
    )

    args = parser.parse_args()
    visualize = args.visualize
    inputfile = args.inputfile[0]
    traj_file = args.trajectory[0]

    #process files
    top_file = get_input_parameter(inputfile, "topology")
    if "RNA" in get_input_parameter(inputfile, "interaction_type"):
        environ["OXRNA"] = "1"
    else:
        environ["OXRNA"] = "0"

    #create system object from first configuration in the trajectory
    r = LorenzoReader2(traj_file, top_file)
    system = r._get_system()

    #for every configuration, create a graphical contact map
    while system:
        m = contact_map(inputfile, system, True)
        if visualize:
            fig, ax = plt.subplots()
            a = ax.imshow(m, cmap='viridis', origin='lower')
            ax.set(title="interaction network",
                   ylabel="nucleotide id",
                   xlabel="nucleotide id")
            b = fig.colorbar(a, ax=ax)
            b.set_label("distance", rotation=270)
            plt.show()
        system = r._get_system()
def main():
    parser = argparse.ArgumentParser(
        prog=path.basename(__file__),
        description=
        "Calculate differences between structures and automatically apply DBSCAN to retrieve clusters"
    )
    parser.add_argument('inputfile',
                        type=str,
                        nargs=1,
                        help="The inputfile used to run the simulation")
    parser.add_argument('trajectory',
                        type=str,
                        nargs=1,
                        help='the trajectory file you wish to analyze')
    parser.add_argument('-p',
                        metavar='num_cpus',
                        nargs=1,
                        type=int,
                        dest='parallel',
                        help="(optional) How many cores to use")
    args = parser.parse_args()

    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "numpy", "matplotlib"])

    traj_file = args.trajectory[0]
    inputfile = args.inputfile[0]
    parallel = args.parallel
    if parallel:
        n_cpus = args.parallel[0]

    top_file = get_input_parameter(inputfile, "topology")
    if "RNA" in get_input_parameter(inputfile, "interaction_type"):
        environ["OXRNA"] = "1"
    else:
        environ["OXRNA"] = "0"
    num_confs = cal_confs(traj_file)
    import UTILS.base  #this needs to be imported after the model type is set

    r2 = LorenzoReader2(traj_file, top_file)

    #how do you want to get your eRMSDs?  Do you need to do the time-consuming calculation or is it done and you have a pickle?
    if not parallel:
        r1 = LorenzoReader2(traj_file, top_file)

        eRMSDs = get_eRMSDs(r1, r2, inputfile, traj_file, top_file, num_confs)
    if parallel:
        out = parallelize_lorenzo_onefile.fire_multiprocess(traj_file,
                                                            top_file,
                                                            get_eRMSDs,
                                                            num_confs,
                                                            n_cpus,
                                                            r2,
                                                            inputfile,
                                                            traj_file,
                                                            top_file,
                                                            matrix=True)
        eRMSDs = np.sum((i for i in out), axis=0)
    #eRMSDs = pickle.load(open('tmp_eRMSDs', 'rb'))

    #the eRMSD matrix is actually only half a matrix
    for ni, i in enumerate(eRMSDs):
        for nj, j in enumerate(i):
            eRMSDs[nj][ni] = j
            if ni == nj:
                eRMSDs[ni][nj] = 0

    #since calculating the eRMSDs are so time-consuming to calculate we're gonna pickle it to iterate the DBSCAN later.
    with open("tmp_eRMSDs", "wb") as file:
        pickle.dump(eRMSDs, file)

    ###############################################################################################################
    #Next, we're going to perform a DBSCAN on that matrix of eRMSDs to find clusters of similar structures
    perform_DBSCAN(eRMSDs, num_confs, traj_file, inputfile, "precomputed", 12,
                   8)
Beispiel #6
0
def main():
    parser = argparse.ArgumentParser(
        prog=path.basename(__file__),
        description=
        "Calculates a principal component analysis of nucleotide deviations over a trajectory"
    )
    parser.add_argument('inputfile',
                        type=str,
                        nargs=1,
                        help="The inputfile used to run the simulation")
    parser.add_argument('trajectory',
                        type=str,
                        nargs=1,
                        help='the trajectory file you wish to analyze')
    parser.add_argument(
        'meanfile',
        type=str,
        nargs=1,
        help='The mean structure .json file from compute_mean.py')
    parser.add_argument(
        'outfile',
        type=str,
        nargs=1,
        help='the name of the .json file where the PCA will be written')
    parser.add_argument('-p',
                        metavar='num_cpus',
                        nargs=1,
                        type=int,
                        dest='parallel',
                        help="(optional) How many cores to use")
    parser.add_argument(
        '-c',
        metavar='cluster',
        dest='cluster',
        action='store_const',
        const=True,
        default=False,
        help="Run the clusterer on each configuration's position in PCA space?"
    )
    args = parser.parse_args()

    check_dependencies(["python", "numpy", "Bio"])

    traj_file = args.trajectory[0]
    inputfile = args.inputfile[0]
    mean_file = args.meanfile[0]
    outfile = args.outfile[0]
    parallel = args.parallel
    if parallel:
        n_cpus = args.parallel[0]
    #-c makes it run the clusterer on the output
    cluster = args.cluster
    top_file = get_input_parameter(inputfile, "topology")
    if "RNA" in get_input_parameter(inputfile, "interaction_type"):
        environ["OXRNA"] = "1"
    else:
        environ["OXRNA"] = "0"
    import UTILS.base  #this needs to be imported after the model type is set

    num_confs = cal_confs(traj_file)

    if mean_file.split(".")[-1] == "json":
        with open(mean_file) as file:
            align_conf = load(file)['g_mean']

    elif mean_file.split(".")[-1] == "dat":
        fetch_np = lambda conf: np.array([n.cm_pos for n in conf._nucleotides])
        with LorenzoReader2(mean_file, top_file) as reader:
            s = reader._get_system()
            align_conf = fetch_np(s)

    cms = np.mean(align_conf,
                  axis=0)  #all structures must have the same center of mass
    align_conf -= cms

    #Compute the deviations
    if not parallel:
        r = LorenzoReader2(traj_file, top_file)
        deviations_matrix = get_pca(r, align_conf, num_confs)

    if parallel:
        out = parallelize_lorenzo_onefile.fire_multiprocess(
            traj_file, top_file, get_pca, num_confs, n_cpus, align_conf)
        deviations_matrix = np.concatenate([i for i in out])

    #now that we have the deviations matrix we're gonna get the covariance and PCA it
    #note that in the future we might want a switch for covariance vs correlation matrix because correlation (cov/stdev so all diagonals are 1) is better for really floppy structures
    pca = PCA(n_components=3)
    pca.fit(deviations_matrix)
    transformed = pca.transform(deviations_matrix)

    #THIS IS AS FAR AS I GOT

    import matplotlib.pyplot as plt
    print("INFO: Saving scree plot to scree.png", file=stderr)
    plt.scatter(range(0, len(evalues)), evalues, s=25)
    plt.xlabel("component")
    plt.ylabel("eigenvalue")
    plt.savefig("scree.png")

    print(
        "INFO: Creating coordinate plot from first three eigenvectors.  Saving to coordinates.png",
        file=stderr)
    #if you want to weight the components by their eigenvectors
    #mul = np.einsum('ij,i->ij',evectors[0:3], evalues[0:3])
    mul = evectors

    #reconstruct configurations in component space
    out = np.dot(deviations_matrix, mul).astype(float)

    #make a quick plot from the first three components
    from mpl_toolkits.mplot3d import Axes3D
    fig = plt.figure()
    ax = fig.gca(projection='3d')
    ax.scatter(out[:, 0], out[:, 1], out[:, 2], c='g', s=25)
    plt.savefig("coordinates.png")

    #Create an oxView overlay showing the first SUM components
    SUM = 1
    print(
        "INFO: Change the number of eigenvalues to sum and display by modifying the SUM variable in the script.  Current value: {}"
        .format(SUM),
        file=stderr)
    weighted_sum = np.zeros_like(evectors[0])
    for i in range(0, SUM):  #how many eigenvalues do you want?
        weighted_sum += evalues[i] * evectors[i]

    prep_pos_for_json = lambda conf: list(list(p) for p in conf)
    with catch_warnings(
    ):  #this produces an annoying warning about casting complex values to real values that is not relevant
        simplefilter("ignore")
        output_vectors = weighted_sum.reshape(int(weighted_sum.shape[0] / 3),
                                              3).astype(float)
    with open(outfile, "w+") as file:
        file.write(dumps({"pca": prep_pos_for_json(output_vectors)}))

    #If we're running clustering, feed the linear terms into the clusterer
    if cluster:
        print("INFO: Mapping configurations to component space...",
              file=stderr)

        #If you want to cluster on only some of the components, uncomment this
        #out = out[:,0:3]

        from clustering import perform_DBSCAN
        labs = perform_DBSCAN(out, num_confs, traj_file, inputfile,
                              "euclidean", 12, 8)
def split_trajectory(traj_file, inputfile, labs, n_clusters):
    """
    Splits the trajectory into the clustered trajectories

    Parameters:
        traj_file (str): The analyzed trajectory file.
        inputfile (str): The input file used to run the analyzed simulation.
        labs (numpy.array): The cluster each point belongs to.
    """
    top_file = get_input_parameter(inputfile, "topology")

    print ("cluster\tmembers")

    #energies = []
    #H_counts = []

    for cluster in (set(labs)):
        in_cluster = list(labs).count(cluster)

        print ("{}\t{}".format(cluster, in_cluster))

        #energies.append([])
        #H_counts.append([])

        #for making trajectories of each cluster
        try:
            remove("cluster_"+str(cluster)+".dat")
        except: pass

    confid = 0
    r1 = LorenzoReader2(traj_file, top_file)
    system = r1._get_system() 
    
    print ("INFO: splitting trajectory...", file=stderr)
    print ("INFO: Will write cluster trajectories to cluster_<cluster number>.dat", file=stderr)

    while system != False:
        system.print_traj_output("cluster_"+str(labs[confid])+".dat", "/dev/null")

        ###########
        #If you want to get additional information about a cluster, add that code here
        #for example, if you want average energy and hydrogen bonds:
        '''
        energies[labs[confid]].append(0)
        H_counts[labs[confid]].append(0)
        system.map_nucleotides_to_strands()
        out = output_bonds(inputfile, system)

        for line in out.split('\n'):
            if line[0] != '#' and line[0] != '\n':
                line = line.split(" ")
                for m in line[2:9]:
                    energies[labs[confid]][-1] += float(m)
                if float(line[6]) != 0:
                    H_counts[labs[confid]][-1] += 1
        energies[labs[confid]][-1] /= len(system._nucleotides)
        '''
        ############
            
        confid += 1
        system = r1._get_system()

    #This is where you print the information about each cluster
    '''    
def main():
    parser = argparse.ArgumentParser(
        prog=path.basename(__file__),
        description="Fit vectors to every duplex in the structure")
    parser.add_argument('-p',
                        metavar='num_cpus',
                        nargs=1,
                        type=int,
                        dest='parallel',
                        help="(optional) How many cores to use")
    parser.add_argument('inputfile',
                        type=str,
                        nargs=1,
                        help="The inputfile used to run the simulation")
    parser.add_argument('trajectory',
                        type=str,
                        nargs=1,
                        help="The trajectory file from the simulation")
    parser.add_argument('-o',
                        '--output',
                        metavar='output_file',
                        type=str,
                        nargs=1,
                        help='name of the file to write the angle list to')
    args = parser.parse_args()

    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "numpy"])

    #Process command line arguments:
    inputfile = args.inputfile[0]
    traj_file = args.trajectory[0]
    parallel = args.parallel
    if parallel:
        n_cpus = args.parallel[0]

    #-o names the output file
    if args.output:
        outfile = args.output[0]
    else:
        outfile = "angles.txt"
        print("INFO: No outfile name provided, defaulting to \"{}\"".format(
            outfile),
              file=stderr)

    #Get relevant parameters from the input file
    top_file = get_input_parameter(inputfile, "topology")
    if "RNA" in get_input_parameter(inputfile, "interaction_type"):
        environ["OXRNA"] = "1"
    else:
        environ["OXRNA"] = "0"

    #Calculate the number of configurations.
    num_confs = cal_confs(traj_file)

    r0 = LorenzoReader2(traj_file, top_file)
    r0._get_system()

    #launch find_angle using the appropriate number of threads to find all duplexes.
    if not parallel:
        print(
            "INFO: Fitting duplexes to {} configurations using 1 core.".format(
                num_confs),
            file=stderr)
        r = LorenzoReader2(traj_file, top_file)
        duplexes_at_step = find_angles(r, inputfile, num_confs)

    if parallel:
        print("INFO: Fitting duplexes to {} configurations using {} cores.".
              format(num_confs, n_cpus),
              file=stderr)
        duplexes_at_step = []
        out = parallelize_lorenzo_onefile.fire_multiprocess(
            traj_file, top_file, find_angles, num_confs, n_cpus, inputfile)
        [duplexes_at_step.extend(i) for i in out]

    if [] in duplexes_at_step:
        print(
            "WARNING: Some configurations were invalid and not included in the analysis.  Please check the log to view the error",
            file=stderr)

    #print duplexes to a file
    print(
        "INFO: Writing duplex data to {}.  Use duplex_angle_plotter to graph data"
        .format(outfile),
        file=stderr)
    output = open(outfile, 'w')
    output.write(
        "time\tduplex\tstart1\tend1\tstart2\tend2\taxisX\taxisY\taxisZ\thel_pos\n"
    )
    for i in range(0, len(duplexes_at_step)):
        for j in range(0, len(duplexes_at_step[i])):
            line = '{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t[{},{},{}]\n'.format(
                duplexes_at_step[i][j].time, duplexes_at_step[i][j].index,
                duplexes_at_step[i][j].start1, duplexes_at_step[i][j].end1,
                duplexes_at_step[i][j].start2, duplexes_at_step[i][j].end2,
                duplexes_at_step[i][j].axis[0], duplexes_at_step[i][j].axis[1],
                duplexes_at_step[i][j].axis[2],
                duplexes_at_step[i][j].final_hel_pos[0],
                duplexes_at_step[i][j].final_hel_pos[1],
                duplexes_at_step[i][j].final_hel_pos[2])
            output.write(line)
    output.close()
def main():
    #at 2.5 you start to see the hard edges caused by end-loops and see some loop interactions
    cutoff_distance = 2.5

    #get commandline arguments
    parser = argparse.ArgumentParser(
        prog=path.basename(__file__),
        description=
        "Calculate molecular contacts, and assembles an average set of contacts based on MDS"
    )
    parser.add_argument('inputfile',
                        type=str,
                        nargs=1,
                        help="The inputfile used to run the simulation")
    parser.add_argument('trajectory',
                        type=str,
                        nargs=1,
                        help='the trajectory file you wish to analyze')
    parser.add_argument(
        'meanfile',
        type=str,
        nargs=1,
        help='the name of the .dat file where the mean will be written')
    parser.add_argument(
        'devfile',
        type=str,
        nargs=1,
        help='the name of the .json file where the devs will be written')
    parser.add_argument('-p',
                        metavar='num_cpus',
                        nargs=1,
                        type=int,
                        dest='parallel',
                        help="(optional) How many cores to use")

    #process commandline arguments
    args = parser.parse_args()
    traj_file = args.trajectory[0]
    inputfile = args.inputfile[0]
    meanfile = args.meanfile[0]
    devfile = args.devfile[0]
    parallel = args.parallel
    if parallel:
        n_cpus = args.parallel[0]
    top_file = get_input_parameter(inputfile, "topology")
    if "RNA" in get_input_parameter(inputfile, "interaction_type"):
        environ["OXRNA"] = "1"
    else:
        environ["OXRNA"] = "0"

    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "numpy"])

    #get the number of configurations in the trajectory
    num_confs = cal_confs(traj_file)

    #Get the mean distance to all other particles
    if not parallel:
        print(
            "INFO: Computing interparticle distances of {} configurations using 1 core."
            .format(num_confs),
            file=stderr)
        r = LorenzoReader2(traj_file, top_file)
        cartesian_distances = get_mean(r, inputfile, num_confs)
        mean_distance_map = cartesian_distances * (1 / (num_confs))

    if parallel:
        print(
            "INFO: Computing interparticle distances of {} configurations using {} cores."
            .format(num_confs, n_cpus),
            file=stderr)
        out = parallelize_lorenzo_onefile.fire_multiprocess(
            traj_file, top_file, get_mean, num_confs, n_cpus, inputfile)
        cartesian_distances = np.sum(np.array([i for i in out]), axis=0)

    mean_distance_map = cartesian_distances * (1 / (num_confs))

    #Making a new configuration file from scratch is hard, so we're just going to read in one and then overwrite the positional information
    r = LorenzoReader2(traj_file, top_file)
    output_system = r._get_system()

    #make heatmap of the summed distances
    #make_heatmap(mean_distance_map)

    masked_mean = np.ma.masked_array(mean_distance_map,
                                     ~(mean_distance_map < cutoff_distance))

    #I tried to use DGSOL to analytically solve this, but origamis were too big
    #f = open('test_dist.nmr', 'w+')
    #for i, line in enumerate(masked_mean):
    #    for j, dist in enumerate(line):
    #        if dist != "--" and dist != 0 and i < j:
    #            if j%2 == 0:
    #                f.write("{}\t{}\t1\t1\t{}\t{}\tn\tn\tn\tn\n".format(i+1, j+1, dist, dist))
    #            else:
    #                f.write("{}\t{}\t1\t1\t{}\t{}\tn\tn\tn\tn\n".format(j+1, i+1, dist, dist))

    #super_cutoff_ids = mean_distance_map > cutoff_distance
    #mean_distance_map[super_cutoff_ids] = 0
    #sparse_map = csr_matrix(mean_distance_map)

    print("INFO: fitting local distance data", file=stderr)

    #Many embedding algorithms were tried...

    #from sklearn.manifold import LocallyLinearEmbedding
    #from megaman.geometry import Geometry
    #from scipy.sparse import csr_matrix

    #geom = Geometry()
    #geom = Geometry(adjacency_kwds={'radius':cutoff_distance})#, laplacian_kwds={'scaling_epps':cutoff_distance})
    #geom.set_data_matrix(masked_mean)
    #geom.set_adjacency_matrix(masked_mean)
    #from megaman.embedding import LocallyLinearEmbedding
    #lle = LocallyLinearEmbedding(n_neighbors=5, n_components=3, eigen_solver='arpack', max_iter=3000)
    #lle = LocallyLinearEmbedding(n_components=3, eigen_solver='arpack', geom=geom)
    #out_coords = lle.fit_transform(masked_mean, input_type='adjacency')
    #out_coords = lle.fit_transform(masked_mean)
    #init = np.array([p.cm_pos for p in out_conf._nucleotides])

    #Run multidimensional scaling on the average distances to find average positions
    from sklearn.manifold import MDS
    mds = MDS(n_components=3,
              metric=True,
              max_iter=3000,
              eps=1e-12,
              dissimilarity="precomputed",
              n_jobs=1,
              n_init=1)
    out_coords = mds.fit_transform(
        masked_mean)  #, init=init) #this one worked best

    #Overwrite the system we made earlier with the coordinates calculated via MDS
    for i, n in enumerate(output_system._nucleotides):
        n.cm_pos = out_coords[i]
        n._a1 = np.array([0, 0, 0])
        n._a3 = np.array(
            [0, 0, 0]
        )  #since the orientation vectors are all 0, this cannot be used in a simulation, but the viewer will handle it

    #Write the mean structure out as a new .dat and .top pair
    output_system.print_lorenzo_output("{}.dat".format(meanfile),
                                       "{}.top".format(meanfile))
    print("INFO: wrote output files: {}.dat, {}.top".format(
        meanfile, meanfile),
          file=stderr)

    #Loop through the trajectory again and calculate deviations from the average distances
    print(
        "INFO: Computing distance deviations of {} configurations using 1 core."
        .format(num_confs),
        file=stderr)
    if not parallel:
        r = LorenzoReader2(traj_file, top_file)
        devs = get_devs(r, masked_mean, inputfile, cutoff_distance, num_confs)

    if parallel:
        print(
            "INFO: Computing distance deviations of {} configurations using {} cores."
            .format(num_confs, n_cpus),
            file=stderr)
        out = parallelize_lorenzo_onefile.fire_multiprocess(
            traj_file, top_file, get_devs, num_confs, n_cpus, masked_mean,
            inputfile, cutoff_distance)
        devs = np.sum(np.array([i for i in out]), axis=0)

    #Dump the deviations to an oxView overlay file
    devs = np.ma.masked_array(
        devs,
        ~(devs != 0.0))  #mask all the 0s so they don't contribute to the mean
    devs *= (1 / num_confs)
    devs = np.mean(devs, axis=0)
    devs = np.sqrt(devs)
    with open(devfile + ".json", "w") as file:
        file.write(dumps({"contact deviation": list(devs)}))
    print("INFO: wrote file {}.json".format(devfile), file=stderr)
Beispiel #10
0
def main():
    parser = argparse.ArgumentParser(
        prog=os.path.basename(__file__),
        description=
        "Create an external forces file enforcing the current base-pairing arrangement"
    )
    parser.add_argument('inputfile',
                        type=str,
                        nargs=1,
                        help="The inputfile used to run the simulation")
    parser.add_argument('configuration',
                        type=str,
                        nargs=1,
                        help="The configuration to generate the forces from")
    parser.add_argument(
        '-o',
        '--output',
        type=str,
        nargs=1,
        help='name of the file to write the forces to. Defaults to forces.txt')
    parser.add_argument(
        '-f',
        '--pairs',
        type=str,
        nargs=1,
        help='name of the file to write the designed pairs list to')

    args = parser.parse_args()

    #Process command line arguments:
    inputfile = args.inputfile[0]
    conf_file = args.configuration[0]

    #-o names the output file
    if args.output:
        outfile = args.output[0]
    else:
        outfile = "forces.txt"
        print("INFO: No outfile name provided, defaulting to \"{}\"".format(
            outfile),
              file=stderr)

    if args.pairs:
        pairsfile = args.pairs[0]
    else:
        pairsfile = False

    #Get relevant parameters from the input file
    top_file = get_input_parameter(inputfile, "topology")

    #get base pairs
    r = LorenzoReader2(conf_file, top_file)
    mysystem = r._get_system()
    out = output_bonds(inputfile, mysystem)
    out = out.split('\n')

    #Find out the forming bonds series
    print("INFO: Analyze the output...", file=stderr)
    Bonded = {}
    for i in out:
        if i[0] == '#':
            continue
        splitline = i.split(' ')
        try:
            HB = float(splitline[6])
        except:
            continue
        if HB < -0.001:
            ntid0 = int(splitline[0])
            ntid1 = int(splitline[1])
            if ntid0 not in Bonded:
                Bonded[ntid0] = ntid1
            if ntid1 not in Bonded:
                Bonded[ntid1] = ntid0

    lines = []
    pairlines = []
    mutual_trap_template = '{ \ntype = mutual_trap\nparticle = %d\nstiff = 0.9\nr0 = 1.2\nref_particle = %d\nPBC=1\n}\n'
    for key in sorted(Bonded):
        from_particle_id = key
        to_particle_id = Bonded[key]
        if from_particle_id < to_particle_id:
            if pairsfile:
                pairlines.append("{} {}\n".format(from_particle_id,
                                                  to_particle_id))
            lines.append(mutual_trap_template %
                         (from_particle_id, to_particle_id))
            lines.append(mutual_trap_template %
                         (to_particle_id, from_particle_id))

    if pairsfile:
        with open(pairsfile, "w") as file:
            file.writelines(pairlines)
            print("INFO: Wrote pairs to {}".format(pairsfile), file=stderr)

    with open(outfile, "w") as file:
        file.writelines(lines)
        print("INFO: Job finished. Wrote forces to {}".format(outfile),
              file=stderr)