Python MatrixHandler.getMatrix 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: pyRMSD.matrixHandler

클래스/타입: MatrixHandler

메소드/함수: getMatrix

hotexamples.com에서의 예제들: 3

Python MatrixHandler.getMatrix - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 pyRMSD.matrixHandler.MatrixHandler.getMatrix에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

MatrixHandler(6)

loadMatrix(4)

getMatrix(3)

createMatrix(1)

distance_matrix(1)

get_data(1)

load_matrix(1)

saveMatrix(1)

save_matrix(1)

save_statistics(1)

예제 #1

파일 보기

    def get_rmsd_matrix(self, align, symmetry):
        (ps, masses, radii, conforms, symm_groups, models_name,
         n_models) = rmsd_calculation.get_rmfs_coordinates_one_rmf(
             "./", self.get_input_file_name("SampledA.rmf3"),
             self.get_input_file_name("SampledC.rmf3"), None, symmetry, None,
             1)

        inner_data = rmsd_calculation.get_rmsds_matrix(  # noqa
            conforms, 'cpu_omp', align, 2, symm_groups)
        del conforms

        mHandler = MatrixHandler()
        mHandler.loadMatrix("Distances_Matrix.data")

        rmsd_matrix = mHandler.getMatrix()
        distmat = rmsd_matrix.get_data()

        distmat_full = sp.spatial.distance.squareform(distmat)
        return distmat_full

예제 #2

파일 보기

    args.extension = "rmf3"
    ps_names, masses, radii, conforms, models_name = get_rmfs_coordinates(
        args.path, idfile_A, idfile_B, args.subunit)
print "Size of conformation matrix", conforms.shape

if not args.skip_sampling_precision:
    inner_data = get_rmsds_matrix(conforms, args.mode, args.align, args.cores)
    print "Size of RMSD matrix (flattened):", inner_data.shape

import pyRMSD.RMSDCalculator
from pyRMSD.matrixHandler import MatrixHandler

mHandler = MatrixHandler()
mHandler.loadMatrix("Distances_Matrix.data")

rmsd_matrix = mHandler.getMatrix()
distmat = rmsd_matrix.get_data()

distmat_full = sp.spatial.distance.squareform(distmat)
print "Size of RMSD matrix (unpacked, N x N):", distmat_full.shape

# Get model lists
sampleA_all_models, sampleB_all_models = get_sample_identity(
    idfile_A, idfile_B)
total_num_models = len(sampleA_all_models) + len(sampleB_all_models)
all_models = sampleA_all_models + sampleB_all_models
print "Size of Sample A:", len(
    sampleA_all_models), " ; Size of Sample B: ", len(
        sampleB_all_models), "; Total", total_num_models

if not args.skip_sampling_precision:

예제 #3

파일 보기

def main():
    args = parse_args()

    import os
    import shutil
    import numpy

    import scipy as sp

    import IMP.sampcon
    from IMP.sampcon import scores_convergence, clustering_rmsd
    from IMP.sampcon import rmsd_calculation, precision_rmsd

    import IMP

    idfile_A = "Identities_A.txt"
    idfile_B = "Identities_B.txt"

    # Step 0: Compute Score convergence
    score_A = []
    score_B = []

    with open(os.path.join(args.path, args.scoreA), 'r') as f:
        for line in f:
            score_A.append(float(line.strip("\n")))

    with open(os.path.join(args.path, args.scoreB), 'r') as f:
        for line in f:
            score_B.append(float(line.strip("\n")))

    scores = score_A + score_B

    # Get the convergence of the best score
    scores_convergence.get_top_scorings_statistics(scores, 0, args.sysname)

    # Check if the two score distributions are similar
    scores_convergence.get_scores_distributions_KS_Stats(
        score_A, score_B, 100, args.sysname)

    # Step 1: Compute RMSD matrix
    if args.extension == "pdb":
        ps_names = []  # bead names are not stored in PDB files
        symm_groups = None
        conforms, masses, radii, models_name = \
            rmsd_calculation.get_pdbs_coordinates(
                args.path, idfile_A, idfile_B)
    else:
        args.extension = "rmf3"
        # If we have a single RMF file, read conformations from that
        if args.rmf_A is not None:
            (ps_names, masses, radii, conforms, symm_groups, models_name,
             n_models) = rmsd_calculation.get_rmfs_coordinates_one_rmf(
                 args.path, args.rmf_A, args.rmf_B, args.subunit,
                 args.symmetry_groups)

        # If not, default to the Identities.txt file
        else:
            symm_groups = None
            (ps_names, masses, radii, conforms,
             models_name) = rmsd_calculation.get_rmfs_coordinates(
                 args.path, idfile_A, idfile_B, args.subunit)

    print("Size of conformation matrix", conforms.shape)

    if not args.skip_sampling_precision:
        # get_rmsds_matrix modifies conforms, so save it to a file and restore
        # afterwards (so that we retain the original IMP orientation)
        numpy.save("conforms", conforms)
        inner_data = rmsd_calculation.get_rmsds_matrix(conforms, args.mode,
                                                       args.align, args.cores,
                                                       symm_groups)
        print("Size of RMSD matrix (flattened):", inner_data.shape)
        del conforms
        conforms = numpy.load("conforms.npy")
        os.unlink('conforms.npy')

    from pyRMSD.matrixHandler import MatrixHandler
    mHandler = MatrixHandler()
    mHandler.loadMatrix("Distances_Matrix.data")

    rmsd_matrix = mHandler.getMatrix()
    distmat = rmsd_matrix.get_data()

    distmat_full = sp.spatial.distance.squareform(distmat)
    print("Size of RMSD matrix (unpacked, N x N):", distmat_full.shape)

    # Get model lists
    if args.rmf_A is not None:
        sampleA_all_models = list(range(n_models[0]))
        sampleB_all_models = list(range(n_models[0],
                                        n_models[1] + n_models[0]))
        total_num_models = n_models[1] + n_models[0]
    else:
        (sampleA_all_models,
         sampleB_all_models) = clustering_rmsd.get_sample_identity(
             idfile_A, idfile_B)
        total_num_models = len(sampleA_all_models) + len(sampleB_all_models)
    all_models = list(sampleA_all_models) + list(sampleB_all_models)
    print("Size of Sample A:",
          len(sampleA_all_models), " ; Size of Sample B: ",
          len(sampleB_all_models), "; Total", total_num_models)

    if not args.skip_sampling_precision:

        print("Calculating sampling precision")

        # Step 2: Cluster at intervals of grid size to get the
        # sampling precision
        gridSize = args.gridsize

        # Get cutoffs for clustering
        cutoffs_list = clustering_rmsd.get_cutoffs_list(distmat, gridSize)
        print("Clustering at thresholds:", cutoffs_list)

        # Do clustering at each cutoff
        pvals, cvs, percents = clustering_rmsd.get_clusters(
            cutoffs_list, distmat_full, all_models, total_num_models,
            sampleA_all_models, sampleB_all_models, args.sysname)

        # Now apply the rule for selecting the right precision based
        # on population of contingency table, pvalue and cramersv
        (sampling_precision, pval_converged, cramersv_converged,
         percent_converged) = clustering_rmsd.get_sampling_precision(
             cutoffs_list, pvals, cvs, percents)

        # Output test statistics
        with open("%s.Sampling_Precision_Stats.txt" % args.sysname,
                  'w+') as fpv:
            print(
                "The sampling precision is defined as the largest allowed "
                "RMSD between the cluster centroid and a ",
                args.sysname,
                "model within any cluster in the finest clustering for "
                "which each sample contributes models proportionally to "
                "its size (considering both significance and magnitude of "
                "the difference) and for which a sufficient proportion of "
                "all models occur in sufficiently large clusters. The "
                "sampling precision for our ",
                args.sysname,
                " modeling is %.3f" % (sampling_precision),
                " A.",
                file=fpv)

            print(
                "Sampling precision, P-value, Cramer's V and percentage "
                "of clustered models below:",
                file=fpv)
            print("%.3f\t%.3f\t%.3f\t%.3f" %
                  (sampling_precision, pval_converged, cramersv_converged,
                   percent_converged),
                  file=fpv)
            print("", file=fpv)

        final_clustering_threshold = sampling_precision

    else:
        final_clustering_threshold = args.cluster_threshold

    # Perform final clustering at the required precision
    print("Clustering at threshold %.3f" % final_clustering_threshold)
    (cluster_centers, cluster_members) = clustering_rmsd.precision_cluster(
        distmat_full, total_num_models, final_clustering_threshold)

    (ctable, retained_clusters) = clustering_rmsd.get_contingency_table(
        len(cluster_centers), cluster_members, all_models, sampleA_all_models,
        sampleB_all_models)
    print("Contingency table:", ctable)
    # Output the number of models in each cluster and each sample
    with open("%s.Cluster_Population.txt" % args.sysname, 'w+') as fcp:
        for rows in range(len(ctable)):
            print(rows, ctable[rows][0], ctable[rows][1], file=fcp)

    # Obtain the subunits for which we need to calculate densities
    density_custom_ranges = precision_rmsd.parse_custom_ranges(args.density)

    # Output cluster precisions
    fpc = open("%s.Cluster_Precision.txt" % args.sysname, 'w+')

    # For each cluster, output the models in the cluster
    # Also output the densities for the cluster models
    for i in range(len(retained_clusters)):
        clus = retained_clusters[i]

        # The cluster centroid is the first conformation.
        # We use this as to align and compute RMSD/precision
        conform_0 = conforms[all_models[cluster_members[clus][0]]]

        # create a directory for the cluster
        if not os.path.exists("./cluster.%s" % i):
            os.mkdir("./cluster.%s" % i)
            os.mkdir("./cluster.%s/Sample_A/" % i)
            os.mkdir("./cluster.%s/Sample_B/" % i)
        else:
            shutil.rmtree("./cluster.%s" % i)
            os.mkdir("./cluster.%s" % i)
            os.mkdir("./cluster.%s/Sample_A/" % i)
            os.mkdir("./cluster.%s/Sample_B/" % i)

        # Create densities for all subunits for both sample A and sample B
        # as well as separately.
        gmd1 = precision_rmsd.GetModelDensity(
            custom_ranges=density_custom_ranges,
            resolution=args.density_threshold,
            voxel=args.voxel,
            bead_names=ps_names)
        gmd2 = precision_rmsd.GetModelDensity(
            custom_ranges=density_custom_ranges,
            resolution=args.density_threshold,
            voxel=args.voxel,
            bead_names=ps_names)
        gmdt = precision_rmsd.GetModelDensity(
            custom_ranges=density_custom_ranges,
            resolution=args.density_threshold,
            voxel=args.voxel,
            bead_names=ps_names)

        # Also output the identities of cluster members
        both_file = open('cluster.' + str(i) + '.all.txt', 'w')
        sampleA_file = open('cluster.' + str(i) + '.sample_A.txt', 'w')
        sampleB_file = open('cluster.' + str(i) + '.sample_B.txt', 'w')

        # Create a model with just the cluster_member particles
        model = IMP.Model()
        ps = []  # particle list to be updated by each RMF frame
        for pi in range(len(conform_0)):
            p = IMP.Particle(model, "%s" % str(pi))
            IMP.core.XYZ.setup_particle(p, (0, 0, 0))
            IMP.core.XYZR.setup_particle(p, float(radii[pi]))
            IMP.atom.Mass.setup_particle(p, float(masses[pi]))
            ps.append(p)

        # Obtain cluster precision by obtaining average RMSD of each model
        # to the cluster center
        cluster_precision = 0.0

        # transformation from internal pyRMSD orientation
        trans = None
        # for each model in the cluster
        for mem in cluster_members[clus]:

            model_index = all_models[mem]

            # get superposition of each model to cluster center and the
            # RMSD between the two
            if args.symmetry_groups:
                rmsd, superposed_ps, trans = \
                    precision_rmsd.get_particles_from_superposed_amb(
                        conforms[model_index], conform_0, args.align, ps,
                        trans, symm_groups)
            else:
                rmsd, superposed_ps, trans = \
                    precision_rmsd.get_particles_from_superposed(
                        conforms[model_index], conform_0, args.align,
                        ps, trans)

            model.update()  # why not?

            cluster_precision += rmsd

            # Add the superposed particles to the respective density maps
            gmdt.add_subunits_density(superposed_ps)  # total density map
            print(model_index, file=both_file)

            if model_index in sampleA_all_models:
                # density map for sample A
                gmd1.add_subunits_density(superposed_ps)
                print(model_index, file=sampleA_file)
            else:
                # density map for sample B
                gmd2.add_subunits_density(superposed_ps)
                print(model_index, file=sampleB_file)

        cluster_precision /= float(len(cluster_members[clus]) - 1.0)

        print(
            "Cluster precision (average distance to cluster centroid) "
            "of cluster ",
            str(i),
            " is %.3f" % cluster_precision,
            "A",
            file=fpc)

        both_file.close()
        sampleA_file.close()
        sampleB_file.close()

        # Output density files for the cluster
        density = gmdt.write_mrc(path="./cluster.%s" % i, file_prefix="LPD")
        gmd1.write_mrc(path="./cluster.%s/Sample_A/" % i, file_prefix="LPD")
        gmd2.write_mrc(path="./cluster.%s/Sample_B/" % i, file_prefix="LPD")

        # Add the cluster center model RMF to the cluster directory
        cluster_center_index = cluster_members[clus][0]
        if args.rmf_A is not None:
            cluster_center_model_id = cluster_center_index
            if cluster_center_index < n_models[0]:
                make_cluster_centroid(
                    os.path.join(args.path, args.rmf_A), cluster_center_index,
                    os.path.join("cluster.%d" % i,
                                 "cluster_center_model.rmf3"), i,
                    len(cluster_members[clus]), cluster_precision, density,
                    args.path)
            else:
                make_cluster_centroid(
                    os.path.join(args.path, args.rmf_B),
                    cluster_center_index - n_models[0],
                    os.path.join("cluster.%d" % i,
                                 "cluster_center_model.rmf3"), i,
                    len(cluster_members[clus]), cluster_precision, density,
                    args.path)
        else:
            # index to Identities file.
            cluster_center_model_id = all_models[cluster_center_index]
            outfname = os.path.join("cluster.%d" % i,
                                    "cluster_center_model." + args.extension)
            if 'rmf' in args.extension:
                make_cluster_centroid(models_name[cluster_center_model_id], 0,
                                      outfname, i, len(cluster_members[clus]),
                                      cluster_precision, density, args.path)
            else:
                shutil.copy(models_name[cluster_center_model_id], outfname)

    fpc.close()

    # generate plots for the score and structure tests
    if args.gnuplot:
        import subprocess
        import glob

        gnuplotdir = IMP.sampcon.get_data_path("gnuplot_scripts")
        for filename in sorted(glob.glob(os.path.join(gnuplotdir, "*.plt"))):
            cmd = ['gnuplot', '-e', 'sysname="%s"' % args.sysname, filename]
            print(" ".join(cmd))
            subprocess.check_call(cmd)