Esempio n. 1
0
    def save_matrix(self, matrix_path):
        """
        Writes matrix contents to disk.

        @param matrix_save_file: Complete path (with filename) where to save the matrix.
        """
        pyRMSD_MatrixHandler.save_matrix(matrix_path, self.distance_matrix)
Esempio n. 2
0
    def save_matrix(self, matrix_path):
        """
        Writes matrix contents to disk.

        @param matrix_save_file: Complete path (with filename) where to save the matrix.
        """
        pyRMSD_MatrixHandler.save_matrix(matrix_path, self.distance_matrix)
Esempio n. 3
0
    def save_statistics(self, matrix_base_path):
        """
        Writes matrix statistics to disk in JSON format.

        @param matrix_base_path: The folder where to save the 'statistics.json' file.
        """
        return pyRMSD_MatrixHandler.save_statistics(matrix_base_path, self.distance_matrix)
Esempio n. 4
0
    def calculate(cls, data_handler, matrix_params):
        """
        :param matrix_params: The parameters to build the matrix. In this base case the only
        option is "load".
        
        Base parameters :
        
        {
            "method": STRING,
            "parameters":{
                ...
            }
        }
        
        Options:
        
        - "load": Load an already created matrix from disk
    
            "parameters":{
                "path": STRING
            }
        
        "path":  The path from where the matrix is going to be loaded.
        
        :return: A CondensedMatrix.
        """

        return pyRMSD_MatrixHandler.load_matrix(matrix_params["path"])
Esempio n. 5
0
 def calculate(cls, data_handler, matrix_params):
     """
     :param matrix_params: The parameters to build the matrix. In this base case the only
     option is "load".
     
     Base parameters :
     
     {
         "method": STRING,
         "parameters":{
             ...
         }
     }
     
     Options:
     
     - "load": Load an already created matrix from disk
 
         "parameters":{
             "path": STRING
         }
     
     "path":  The path from where the matrix is going to be loaded.
     
     :return: A CondensedMatrix.
     """
     
     return pyRMSD_MatrixHandler.load_matrix(matrix_params["path"])
Esempio n. 6
0
    def save_statistics(self, matrix_base_path):
        """
        Writes matrix statistics to disk in JSON format.

        @param matrix_base_path: The folder where to save the 'statistics.json' file.
        """
        return pyRMSD_MatrixHandler.save_statistics(matrix_base_path,
                                                    self.distance_matrix)
Esempio n. 7
0
    def get_rmsd_matrix(self, align, symmetry):
        (ps, masses, radii, conforms, symm_groups, models_name,
         n_models) = rmsd_calculation.get_rmfs_coordinates_one_rmf(
             "./", self.get_input_file_name("SampledA.rmf3"),
             self.get_input_file_name("SampledC.rmf3"), None, symmetry, None,
             1)

        inner_data = rmsd_calculation.get_rmsds_matrix(  # noqa
            conforms, 'cpu_omp', align, 2, symm_groups)
        del conforms

        mHandler = MatrixHandler()
        mHandler.loadMatrix("Distances_Matrix.data")

        rmsd_matrix = mHandler.getMatrix()
        distmat = rmsd_matrix.get_data()

        distmat_full = sp.spatial.distance.squareform(distmat)
        return distmat_full
Esempio n. 8
0
    def test_write_and_load(self):
        mh = MatrixHandler(".")
        data = range(1000)
        matrix = CondensedMatrix(data)
        mh.distance_matrix = matrix
        mh.saveMatrix("matrix")

        mh2 = MatrixHandler(None)
        mh2.loadMatrix("matrix")
        recovered_data = mh2.distance_matrix.get_data()

        numpy.testing.assert_array_equal(mh.distance_matrix.get_data(), data)
        numpy.testing.assert_array_equal(recovered_data, data)

        # Clean it!
        os.system("rm matrix.npy")
 def test_write_and_load(self):
     mh = MatrixHandler(".")
     data = range(1000)
     matrix = CondensedMatrix(data)
     mh.distance_matrix = matrix
     mh.saveMatrix("matrix")
     
     mh2 = MatrixHandler(None)
     mh2.loadMatrix("matrix")
     recovered_data = mh2.distance_matrix.get_data()
     
     numpy.testing.assert_array_equal(mh.distance_matrix.get_data(), data)
     numpy.testing.assert_array_equal(recovered_data, data) 
     
     # Clean it!
     os.system("rm matrix.npy")
def calcDistMatrix(coordsets):
  from pyRMSD.matrixHandler import MatrixHandler
  matrix = MatrixHandler().createMatrix(coordsets,'NOSUP_SERIAL_CALCULATOR')
  return matrix.get_data()
Esempio n. 11
0
    conforms, masses, radii, models_name = get_pdbs_coordinates(
        args.path, idfile_A, idfile_B)
else:
    args.extension = "rmf3"
    ps_names, masses, radii, conforms, models_name = get_rmfs_coordinates(
        args.path, idfile_A, idfile_B, args.subunit)
print "Size of conformation matrix", conforms.shape

if not args.skip_sampling_precision:
    inner_data = get_rmsds_matrix(conforms, args.mode, args.align, args.cores)
    print "Size of RMSD matrix (flattened):", inner_data.shape

import pyRMSD.RMSDCalculator
from pyRMSD.matrixHandler import MatrixHandler

mHandler = MatrixHandler()
mHandler.loadMatrix("Distances_Matrix.data")

rmsd_matrix = mHandler.getMatrix()
distmat = rmsd_matrix.get_data()

distmat_full = sp.spatial.distance.squareform(distmat)
print "Size of RMSD matrix (unpacked, N x N):", distmat_full.shape

# Get model lists
sampleA_all_models, sampleB_all_models = get_sample_identity(
    idfile_A, idfile_B)
total_num_models = len(sampleA_all_models) + len(sampleB_all_models)
all_models = sampleA_all_models + sampleB_all_models
print "Size of Sample A:", len(
    sampleA_all_models), " ; Size of Sample B: ", len(
Esempio n. 12
0
    def process(self, args=None):
        if not args:
            args = self.args

        NUCS = self.atypes
        iNUCS = dict(map(lambda x: (
            x[1],
            x[0],
        ), enumerate(NUCS)))
        iNUCS = self.mpi.comm.bcast(iNUCS)

        lnucs = len(NUCS)
        fNUCS = np.zeros((lnucs, ), dtype=np.float)

        # Init storage for matrices
        # Get file name

        tSf = dict()
        for i in NUCS:
            tSf[i] = np.zeros(self.N, dtype=np.float)

        args['mpi'] = self.mpi
        extractor = er.PepExtractor(**args)
        lM = len(self.aplist)

        self.mpi.comm.Barrier()

        #        if self.mpi.rank == 0:
        #            pbar = tqdm(total=lM)

        tota_ = 0
        totba_ = 0

        for cm in range(lM):
            m = self.aplist[cm]

            print('Rank %d model %d of %d' % (self.mpi.rank, cm, lM))

            #            if self.mpi.rank == 0:
            #                pbar.update(cm)

            try:
                S = extractor.extract_result(m)
            except:
                print('ERROR: BAD PEPTIDE: %s' % m)
                continue

            lS = S.numCoordsets()
            tlS = range(lS)

            if self.cluster is True:
                resc = S.select('not element H').getCoordsets()
                cl = 'NOSUP_SERIAL_CALCULATOR'

                mHandler = MatrixHandler()
                matrix = mHandler.createMatrix(resc, cl)
                mat = scipy.spatial.distance.squareform(matrix.get_data())
                smatrix = (mat**2) * (-1)
                aff = AffinityPropagation(affinity='precomputed')
                aff_cluster = aff.fit(smatrix)
                tlS = aff_cluster.cluster_centers_indices_

            if tlS is None:
                continue

            for S_ in tlS:

                S.setACSIndex(S_)

                for a in S.iterAtoms():

                    # skip hydrogens
                    if a.getElement() == 'H':
                        continue

                    try:
                        atype = self.rtypes[(a.getResname(), a.getName())]
                    except:
                        print('ATYPE not found', a.getResname(), a.getName())

                    if atype not in self.atypes:
                        continue

                    Agrid, AminXYZ = gu.process_atom(a, self.step)

                    adj = (AminXYZ - self.GminXYZ)
                    adj = (adj / self.step).astype(np.int)
                    x, y, z = adj

                    try:
                        tSf[atype][x:x + Agrid.shape[0], y:y + Agrid.shape[1],
                                   z:z + Agrid.shape[2]] += Agrid

                        fNUCS[iNUCS[atype]] += 1
                        tota_ += 1

                    except:
                        # print(m, a)
                        totba_ += 1
                        pass


#        if self.mpi.rank == 0:
#            pbar.close()

        self.mpi.comm.Barrier()

        if self.mpi.rank == 0:
            print('Collecting grids')

        fNUCS_ = self.mpi.comm.allreduce(fNUCS)
        nNUCS = np.zeros((lnucs, ), dtype=np.float)

        tota = self.mpi.comm.reduce(tota_)
        totba = self.mpi.comm.reduce(totba_)

        for i in range(lnucs):
            NUC_ = NUCS[i]

            if self.mpi.rank != 0:
                self.mpi.comm.Send(tSf[NUC_], dest=0, tag=i)

            elif self.mpi.rank == 0:
                for j in range(1, self.mpi.NPROCS):
                    tG = np.empty(tSf[NUC_].shape, dtype=np.float)
                    self.mpi.comm.Recv(tG, source=j, tag=i)
                    tSf[NUC_] += tG
                nNUCS[i] = np.max(tSf[NUC_])

        nNUCS_ = self.mpi.comm.bcast(nNUCS)

        self.mpi.comm.Barrier()

        # Allocate results file

        Sfn = args['Sfn']

        if self.mpi.rank == 0:

            print('Saving data')
            # Sf.atomic = True

            nmax = bn.nanmax(np.divide(nNUCS_, fNUCS_))

            Sf = h5py.File(Sfn, 'w')

            for i in range(lnucs):

                NUC_ = NUCS[i]
                iNUC_ = iNUCS[NUC_]
                mult = fNUCS_[iNUC_]

                if mult > 0.0:

                    tG = tSf[NUC_]

                    med = np.median(tG)
                    tG[tG < (med)] = 0

                    tG /= float(mult)
                    tG /= float(nmax)
                    tG *= 100.0

                    tSf[NUC_] = tG

                else:
                    print('Array is empty for: ', NUC_)

                Sf.create_dataset(NUC_, data=tSf[NUC_])

            Gstep = np.array([self.step, self.step, self.step], dtype=np.float)
            Sf.create_dataset('step', data=Gstep)
            Sf.create_dataset('origin', data=self.GminXYZ)
            Sf.create_dataset('atypes',
                              data=np.array([
                                  args['atypes'],
                              ], dtype='S20'))

            print('Total bad atoms %d of %d' % (totba, tota))

            Sf.close()

        self.mpi.comm.Barrier()
        # Open matrix file in parallel mode

        self.database.close()
#import numpy as np
#
#for k in range(self._cool_cycle):
#  confs = self.confs['cool']['samples'][-1][k]
#  for c in range(len(confs)):
#    self.universe.setConfiguration(Configuration(self.universe,confs[c]))
#    self.universe.normalizeConfiguration()
#    self.confs['cool']['samples'][-1][k][c] = np.copy(self.universe.configuration().array)

import itertools
confs = [self.confs['cool']['samples'][-1][k] for k in range(self._cool_cycle)]
confs = np.array([conf[self.molecule.heavy_atoms,:] for conf in itertools.chain.from_iterable(confs)])

from pyRMSD.matrixHandler import MatrixHandler
rmsd_matrix = MatrixHandler().createMatrix(confs,'QCP_SERIAL_CALCULATOR')

# NOSUP_SERIAL_CALCULATOR

#GBSA_energy = [(self.cool_Es[-1][k]['LNAMD_GBSA'][:,-1]-self.cool_Es[-1][k]['LNAMD_Gas'][:,-1]) for k in range(self._cool_cycle)]
#GBSA_energy = np.array(list(itertools.chain.from_iterable(GBSA_energy)))

cum_Nk = np.cumsum([len(self.confs['cool']['samples'][-1][k]) for k in range(self._cool_cycle)])

#  # Compute distance matrix with centering
#  self._write_traj('cool.dcd',confs,moiety='L')
#  import mdtraj as md
#  traj = md.load('cool.dcd',top=self._FNs['prmtop']['L'])
#  dist_matrix = [mdtraj.rmsd(traj,traj,frame=k,atom_indices=traj.topology.select('type!=H')) for k in range(N)]
#  dist_matrix = np.array(dist_matrix)
Esempio n. 14
0
def main():
    args = parse_args()

    import os
    import shutil
    import numpy

    import scipy as sp

    import IMP.sampcon
    from IMP.sampcon import scores_convergence, clustering_rmsd
    from IMP.sampcon import rmsd_calculation, precision_rmsd

    import IMP

    idfile_A = "Identities_A.txt"
    idfile_B = "Identities_B.txt"

    # Step 0: Compute Score convergence
    score_A = []
    score_B = []

    with open(os.path.join(args.path, args.scoreA), 'r') as f:
        for line in f:
            score_A.append(float(line.strip("\n")))

    with open(os.path.join(args.path, args.scoreB), 'r') as f:
        for line in f:
            score_B.append(float(line.strip("\n")))

    scores = score_A + score_B

    # Get the convergence of the best score
    scores_convergence.get_top_scorings_statistics(scores, 0, args.sysname)

    # Check if the two score distributions are similar
    scores_convergence.get_scores_distributions_KS_Stats(
        score_A, score_B, 100, args.sysname)

    # Step 1: Compute RMSD matrix
    if args.extension == "pdb":
        ps_names = []  # bead names are not stored in PDB files
        symm_groups = None
        conforms, masses, radii, models_name = \
            rmsd_calculation.get_pdbs_coordinates(
                args.path, idfile_A, idfile_B)
    else:
        args.extension = "rmf3"
        # If we have a single RMF file, read conformations from that
        if args.rmf_A is not None:
            (ps_names, masses, radii, conforms, symm_groups, models_name,
             n_models) = rmsd_calculation.get_rmfs_coordinates_one_rmf(
                 args.path, args.rmf_A, args.rmf_B, args.subunit,
                 args.symmetry_groups)

        # If not, default to the Identities.txt file
        else:
            symm_groups = None
            (ps_names, masses, radii, conforms,
             models_name) = rmsd_calculation.get_rmfs_coordinates(
                 args.path, idfile_A, idfile_B, args.subunit)

    print("Size of conformation matrix", conforms.shape)

    if not args.skip_sampling_precision:
        # get_rmsds_matrix modifies conforms, so save it to a file and restore
        # afterwards (so that we retain the original IMP orientation)
        numpy.save("conforms", conforms)
        inner_data = rmsd_calculation.get_rmsds_matrix(conforms, args.mode,
                                                       args.align, args.cores,
                                                       symm_groups)
        print("Size of RMSD matrix (flattened):", inner_data.shape)
        del conforms
        conforms = numpy.load("conforms.npy")
        os.unlink('conforms.npy')

    from pyRMSD.matrixHandler import MatrixHandler
    mHandler = MatrixHandler()
    mHandler.loadMatrix("Distances_Matrix.data")

    rmsd_matrix = mHandler.getMatrix()
    distmat = rmsd_matrix.get_data()

    distmat_full = sp.spatial.distance.squareform(distmat)
    print("Size of RMSD matrix (unpacked, N x N):", distmat_full.shape)

    # Get model lists
    if args.rmf_A is not None:
        sampleA_all_models = list(range(n_models[0]))
        sampleB_all_models = list(range(n_models[0],
                                        n_models[1] + n_models[0]))
        total_num_models = n_models[1] + n_models[0]
    else:
        (sampleA_all_models,
         sampleB_all_models) = clustering_rmsd.get_sample_identity(
             idfile_A, idfile_B)
        total_num_models = len(sampleA_all_models) + len(sampleB_all_models)
    all_models = list(sampleA_all_models) + list(sampleB_all_models)
    print("Size of Sample A:",
          len(sampleA_all_models), " ; Size of Sample B: ",
          len(sampleB_all_models), "; Total", total_num_models)

    if not args.skip_sampling_precision:

        print("Calculating sampling precision")

        # Step 2: Cluster at intervals of grid size to get the
        # sampling precision
        gridSize = args.gridsize

        # Get cutoffs for clustering
        cutoffs_list = clustering_rmsd.get_cutoffs_list(distmat, gridSize)
        print("Clustering at thresholds:", cutoffs_list)

        # Do clustering at each cutoff
        pvals, cvs, percents = clustering_rmsd.get_clusters(
            cutoffs_list, distmat_full, all_models, total_num_models,
            sampleA_all_models, sampleB_all_models, args.sysname)

        # Now apply the rule for selecting the right precision based
        # on population of contingency table, pvalue and cramersv
        (sampling_precision, pval_converged, cramersv_converged,
         percent_converged) = clustering_rmsd.get_sampling_precision(
             cutoffs_list, pvals, cvs, percents)

        # Output test statistics
        with open("%s.Sampling_Precision_Stats.txt" % args.sysname,
                  'w+') as fpv:
            print(
                "The sampling precision is defined as the largest allowed "
                "RMSD between the cluster centroid and a ",
                args.sysname,
                "model within any cluster in the finest clustering for "
                "which each sample contributes models proportionally to "
                "its size (considering both significance and magnitude of "
                "the difference) and for which a sufficient proportion of "
                "all models occur in sufficiently large clusters. The "
                "sampling precision for our ",
                args.sysname,
                " modeling is %.3f" % (sampling_precision),
                " A.",
                file=fpv)

            print(
                "Sampling precision, P-value, Cramer's V and percentage "
                "of clustered models below:",
                file=fpv)
            print("%.3f\t%.3f\t%.3f\t%.3f" %
                  (sampling_precision, pval_converged, cramersv_converged,
                   percent_converged),
                  file=fpv)
            print("", file=fpv)

        final_clustering_threshold = sampling_precision

    else:
        final_clustering_threshold = args.cluster_threshold

    # Perform final clustering at the required precision
    print("Clustering at threshold %.3f" % final_clustering_threshold)
    (cluster_centers, cluster_members) = clustering_rmsd.precision_cluster(
        distmat_full, total_num_models, final_clustering_threshold)

    (ctable, retained_clusters) = clustering_rmsd.get_contingency_table(
        len(cluster_centers), cluster_members, all_models, sampleA_all_models,
        sampleB_all_models)
    print("Contingency table:", ctable)
    # Output the number of models in each cluster and each sample
    with open("%s.Cluster_Population.txt" % args.sysname, 'w+') as fcp:
        for rows in range(len(ctable)):
            print(rows, ctable[rows][0], ctable[rows][1], file=fcp)

    # Obtain the subunits for which we need to calculate densities
    density_custom_ranges = precision_rmsd.parse_custom_ranges(args.density)

    # Output cluster precisions
    fpc = open("%s.Cluster_Precision.txt" % args.sysname, 'w+')

    # For each cluster, output the models in the cluster
    # Also output the densities for the cluster models
    for i in range(len(retained_clusters)):
        clus = retained_clusters[i]

        # The cluster centroid is the first conformation.
        # We use this as to align and compute RMSD/precision
        conform_0 = conforms[all_models[cluster_members[clus][0]]]

        # create a directory for the cluster
        if not os.path.exists("./cluster.%s" % i):
            os.mkdir("./cluster.%s" % i)
            os.mkdir("./cluster.%s/Sample_A/" % i)
            os.mkdir("./cluster.%s/Sample_B/" % i)
        else:
            shutil.rmtree("./cluster.%s" % i)
            os.mkdir("./cluster.%s" % i)
            os.mkdir("./cluster.%s/Sample_A/" % i)
            os.mkdir("./cluster.%s/Sample_B/" % i)

        # Create densities for all subunits for both sample A and sample B
        # as well as separately.
        gmd1 = precision_rmsd.GetModelDensity(
            custom_ranges=density_custom_ranges,
            resolution=args.density_threshold,
            voxel=args.voxel,
            bead_names=ps_names)
        gmd2 = precision_rmsd.GetModelDensity(
            custom_ranges=density_custom_ranges,
            resolution=args.density_threshold,
            voxel=args.voxel,
            bead_names=ps_names)
        gmdt = precision_rmsd.GetModelDensity(
            custom_ranges=density_custom_ranges,
            resolution=args.density_threshold,
            voxel=args.voxel,
            bead_names=ps_names)

        # Also output the identities of cluster members
        both_file = open('cluster.' + str(i) + '.all.txt', 'w')
        sampleA_file = open('cluster.' + str(i) + '.sample_A.txt', 'w')
        sampleB_file = open('cluster.' + str(i) + '.sample_B.txt', 'w')

        # Create a model with just the cluster_member particles
        model = IMP.Model()
        ps = []  # particle list to be updated by each RMF frame
        for pi in range(len(conform_0)):
            p = IMP.Particle(model, "%s" % str(pi))
            IMP.core.XYZ.setup_particle(p, (0, 0, 0))
            IMP.core.XYZR.setup_particle(p, float(radii[pi]))
            IMP.atom.Mass.setup_particle(p, float(masses[pi]))
            ps.append(p)

        # Obtain cluster precision by obtaining average RMSD of each model
        # to the cluster center
        cluster_precision = 0.0

        # transformation from internal pyRMSD orientation
        trans = None
        # for each model in the cluster
        for mem in cluster_members[clus]:

            model_index = all_models[mem]

            # get superposition of each model to cluster center and the
            # RMSD between the two
            if args.symmetry_groups:
                rmsd, superposed_ps, trans = \
                    precision_rmsd.get_particles_from_superposed_amb(
                        conforms[model_index], conform_0, args.align, ps,
                        trans, symm_groups)
            else:
                rmsd, superposed_ps, trans = \
                    precision_rmsd.get_particles_from_superposed(
                        conforms[model_index], conform_0, args.align,
                        ps, trans)

            model.update()  # why not?

            cluster_precision += rmsd

            # Add the superposed particles to the respective density maps
            gmdt.add_subunits_density(superposed_ps)  # total density map
            print(model_index, file=both_file)

            if model_index in sampleA_all_models:
                # density map for sample A
                gmd1.add_subunits_density(superposed_ps)
                print(model_index, file=sampleA_file)
            else:
                # density map for sample B
                gmd2.add_subunits_density(superposed_ps)
                print(model_index, file=sampleB_file)

        cluster_precision /= float(len(cluster_members[clus]) - 1.0)

        print(
            "Cluster precision (average distance to cluster centroid) "
            "of cluster ",
            str(i),
            " is %.3f" % cluster_precision,
            "A",
            file=fpc)

        both_file.close()
        sampleA_file.close()
        sampleB_file.close()

        # Output density files for the cluster
        density = gmdt.write_mrc(path="./cluster.%s" % i, file_prefix="LPD")
        gmd1.write_mrc(path="./cluster.%s/Sample_A/" % i, file_prefix="LPD")
        gmd2.write_mrc(path="./cluster.%s/Sample_B/" % i, file_prefix="LPD")

        # Add the cluster center model RMF to the cluster directory
        cluster_center_index = cluster_members[clus][0]
        if args.rmf_A is not None:
            cluster_center_model_id = cluster_center_index
            if cluster_center_index < n_models[0]:
                make_cluster_centroid(
                    os.path.join(args.path, args.rmf_A), cluster_center_index,
                    os.path.join("cluster.%d" % i,
                                 "cluster_center_model.rmf3"), i,
                    len(cluster_members[clus]), cluster_precision, density,
                    args.path)
            else:
                make_cluster_centroid(
                    os.path.join(args.path, args.rmf_B),
                    cluster_center_index - n_models[0],
                    os.path.join("cluster.%d" % i,
                                 "cluster_center_model.rmf3"), i,
                    len(cluster_members[clus]), cluster_precision, density,
                    args.path)
        else:
            # index to Identities file.
            cluster_center_model_id = all_models[cluster_center_index]
            outfname = os.path.join("cluster.%d" % i,
                                    "cluster_center_model." + args.extension)
            if 'rmf' in args.extension:
                make_cluster_centroid(models_name[cluster_center_model_id], 0,
                                      outfname, i, len(cluster_members[clus]),
                                      cluster_precision, density, args.path)
            else:
                shutil.copy(models_name[cluster_center_model_id], outfname)

    fpc.close()

    # generate plots for the score and structure tests
    if args.gnuplot:
        import subprocess
        import glob

        gnuplotdir = IMP.sampcon.get_data_path("gnuplot_scripts")
        for filename in sorted(glob.glob(os.path.join(gnuplotdir, "*.plt"))):
            cmd = ['gnuplot', '-e', 'sysname="%s"' % args.sysname, filename]
            print(" ".join(cmd))
            subprocess.check_call(cmd)
              "chain":ligand_file_description[1],
              "atoms":ligand_file_description[2:]
              }
    ligand_description = "resname %s and name %s"%(ligand["resname"],"".join( a+" " for a in ligand["atoms"]))
    print "* Ligand parsed: ",ligand_description


    #######################################################################################################################
    # Generate matrix with metrics (so now we are going to cluster based on Energy and spawning
    #######################################################################################################################
    print "* Creating Spawning - totalE matrix"
    records = []
    processFile(traj_pdb, records, True)
    all_metrics = genMetrics(plots["totale_spawning"], records)
    matrix_data = scipy.spatial.distance.pdist(normalize_metrics(all_metrics), 'euclidean')
    m_handler = MatrixHandler()
    m_handler.distance_matrix = CondensedMatrix(matrix_data)
    matrix_file = os.path.join(base_dir, TENERGY_SPAWN_MATRIX)
    m_handler.saveMatrix(matrix_file)

    #######################################################################################################################
    # Cluster by metrics
    #######################################################################################################################
    print "* Spawning - totalE clustering"
    be_rmsd_clustering_script_path = os.path.join(base_dir, 'scripts', CLUSTERING_SPAWN_TOTE_SCRIPT)
    working_directory = os.path.join(base_dir, TOTALE_SPAWN_WORKSPACE)
    params = load_dic_in_json(be_rmsd_clustering_script_path)
    params['global']['workspace']['base'] = working_directory
    params['data']['files'] = [os.path.join(os.getcwd(), traj_pdb)]
    params['data']['matrix']['parameters']['path'] = matrix_file
    save_dic_in_json(params, be_rmsd_clustering_script_path)