Beispiel #1
0
def permutInverse(n):
    """Returns inverse permutation given integers in range(len(n)),
    such that permitInverse(permutInverse(range(4)))==range(4).
    """
    n = Numeric.asarray(n)
    pInv = Numeric.argsort(n)
    assert Numeric.all(Numeric.equal(n, Numeric.argsort(pInv))), "Inverse not successful; input should be permutation of range(len(input))."
    return pInv
    def __init__(self, data, sort='binding'):
        """data is a list of Conformations.
        """
        self.data = data
        # extract the binding (default) or docking energy
        if sort == 'docking':
            energy_list = [d.docking_energy for d in data]
        elif sort == 'intermolecular':
            energy_list = [d.intermol_energy for d in data]
        elif sort == 'flexres':
            if hasattr(data[0], 'subset'):
                ind = len(data[0].getCoords()) - len(data[0].subset) 
            else:
                print "no subsets have been set up for conformations!"
                return "ERROR"
            for d in data:
                d.flexres_energy = Numeric.add.reduce(d.total_energies[ind:])
                d.flexres_index = ind
            energy_list = [d.flexres_energy for d in data]
        else:
            energy_list = [d.binding_energy for d in data]

        # sort the conformations by energy
        self.argsort = Numeric.argsort(energy_list)
        self.energy_used = sort

        # save the pair-wise distances for reuse by get_distance
        self.dist_matrix = Numeric.zeros([len(data), len(data)]) - 1.0
        # set the customizable get_distance method to default
        self.set_get_distance(self._get_distance_default)
        
        self.clustering_dict = {}
Beispiel #3
0
    def sortPoly(self, order=-1):
        if __debug__:
         if hasattr(DejaVu, 'functionName'): DejaVu.functionName()
        """None <- sortPoly(order=-1)
Sorts the geometry polygons according to z values of polygon's
geomtric centers. Order=-1 sorts by furthest z first, order=1 sorts
by closest z first"""
        # FIXME will not work with instance matrices
        mat = self.GetMatrix()
        mat = Numeric.reshape(mat, (4,4))
        vt = self.vertexSet.vertices*mat
        if vt is None:
            return
        triv = Numeric.take(vt, self.faceSet.faces.array)
        trig = Numeric.sum(triv,1)/3.
        trigz = trig[:,2]  #triangle's center of gravity z value
        
        ind = Numeric.argsort(trigz) # sorted indices
        
        if len(self.faceSet.faces.array):
            faces = Numeric.take(self.faceSet.faces.array, ind[::order])
        
            if self.shading==GL.GL_FLAT: # we also need to re-arrange the
                                       # face normals
                if self.normals is None:
                    normals = None
                else:
                    if len(self.normals)>1:
                        normals = Numeric.take(self.normals, ind[::order])
                    else:
                        normals = self.normals
            else:
                normals = None

            self.Set(faces=faces, fnormals=normals)
Beispiel #4
0
    def __init__(self, data, sort='binding'):
        """data is a list of Conformations.
        """
        self.data = data
        # extract the binding (default) or docking energy
        if sort == 'docking':
            energy_list = [d.docking_energy for d in data]
        elif sort == 'intermolecular':
            energy_list = [d.intermol_energy for d in data]
        elif sort == 'flexres':
            if hasattr(data[0], 'subset'):
                ind = len(data[0].getCoords()) - len(data[0].subset)
            else:
                print "no subsets have been set up for conformations!"
                return "ERROR"
            for d in data:
                d.flexres_energy = Numeric.add.reduce(d.total_energies[ind:])
                d.flexres_index = ind
            energy_list = [d.flexres_energy for d in data]
        else:
            energy_list = [d.binding_energy for d in data]

        # sort the conformations by energy
        self.argsort = Numeric.argsort(energy_list)
        self.energy_used = sort

        # save the pair-wise distances for reuse by get_distance
        self.dist_matrix = Numeric.zeros([len(data), len(data)]) - 1.0
        # set the customizable get_distance method to default
        self.set_get_distance(self._get_distance_default)

        self.clustering_dict = {}
Beispiel #5
0
def rankData(n, inverse=False):
    """Returns ranks of 1D Numeric array in range 1...shape[0].
    """
    n = Numeric.asarray(n)
    assert Numeric.rank(n) == 1
    r = Numeric.zeros(n.shape[0], Numeric.Float)
    Numeric.put(r, Numeric.argsort(n), Numeric.arange(n.shape[0]))
    if inverse:
        return -1*r+n.shape[0]
    else:
        return r+1
    def patchAround(self, center, nAtoms):
        """
        patchAround( float_center, int_nAtoms ) -> mask for self.model
        Create single patch of nAtoms atoms that are closest to center.
        """
        dist = self.__distances(center)
        order = N.argsort(dist)

        r = N.zeros(len(self.model), 'i')
        N.put(r, order[:nAtoms], 1)

        return self.centerPatch(r)
    def get_min_distance_objid(self, objid, not_objid_set, sheets_only=False):
        """
        Get the sheet or helix with minimum distance from the supplied
        sheet or helix, specified by id (e.g. 'A' for a sheet or
        'HELIX_A_10' for a helix).

        Optionally, set the element that was found to infinity so that
        this routine can be used iteratively to find only elements that
        have not already been found.

        Paremeters:
           objid  - sheet id (e.g. 'A') or helix id (e.g. 'HELIX_A_10')
                    of the object to find the id of the closest object for.
           not_objid_set - set of objids that we do NOT want to find.
                    Used so we can find the nearest element to an
                    already positioned element that is not itself
                    an already positioned element.
           sheets_only - (Default False) only find sheets, not helices.


        Uses data members (readonly):
           sheet_index_map
           reverse_sheet_index_map
           sheet_dist_matrix  

        Return value:
           tuple (id, dist) where
           id (as per the objid paramter) of the closest sheet or helix
            to the speicfied one and
            dist is that smallest distance, and it is not in the
            not_objid_set.
           
        """

        row = self.sheet_index_map[objid]
        mindist_index = Numeric.argmin(self.sheet_dist_matrix[row])

        # get 1d array of object ids sorted (ascending) by their distance
        # from the target objid in the sheet dist matrix
        # NB: use of argsort depends on having set diagonal (self distance)
        # elements to inf instead of 0 in calc_sse_dist_matrix().
        objids_sorted_by_dist = Numeric.argsort(self.sheet_dist_matrix[row])

        # find the first (i.e. smallest distance) id that is not in
        # the not_objid_set
        mindist_index = None
        for mindist_index in objids_sorted_by_dist:
            mindist_objid = self.reverse_sheet_index_map[mindist_index]
            if ( (mindist_objid not in not_objid_set) and
                 (not sheets_only or len(mindist_objid) == 1) ): 
                dist = self.sheet_dist_matrix[row, mindist_index]
                break
        return (mindist_objid, dist)
    def get_min_distance_objid(self, objid, not_objid_set, sheets_only=False):
        """
        Get the sheet or helix with minimum distance from the supplied
        sheet or helix, specified by id (e.g. 'A' for a sheet or
        'HELIX_A_10' for a helix).

        Optionally, set the element that was found to infinity so that
        this routine can be used iteratively to find only elements that
        have not already been found.

        Paremeters:
           objid  - sheet id (e.g. 'A') or helix id (e.g. 'HELIX_A_10')
                    of the object to find the id of the closest object for.
           not_objid_set - set of objids that we do NOT want to find.
                    Used so we can find the nearest element to an
                    already positioned element that is not itself
                    an already positioned element.
           sheets_only - (Default False) only find sheets, not helices.


        Uses data members (readonly):
           sheet_index_map
           reverse_sheet_index_map
           sheet_dist_matrix  

        Return value:
           tuple (id, dist) where
           id (as per the objid paramter) of the closest sheet or helix
            to the speicfied one and
            dist is that smallest distance, and it is not in the
            not_objid_set.
           
        """

        row = self.sheet_index_map[objid]
        mindist_index = Numeric.argmin(self.sheet_dist_matrix[row])

        # get 1d array of object ids sorted (ascending) by their distance
        # from the target objid in the sheet dist matrix
        # NB: use of argsort depends on having set diagonal (self distance)
        # elements to inf instead of 0 in calc_sse_dist_matrix().
        objids_sorted_by_dist = Numeric.argsort(self.sheet_dist_matrix[row])

        # find the first (i.e. smallest distance) id that is not in
        # the not_objid_set
        mindist_index = None
        for mindist_index in objids_sorted_by_dist:
            mindist_objid = self.reverse_sheet_index_map[mindist_index]
            if ((mindist_objid not in not_objid_set)
                    and (not sheets_only or len(mindist_objid) == 1)):
                dist = self.sheet_dist_matrix[row, mindist_index]
                break
        return (mindist_objid, dist)
    def orderCenters(self, points, origin=None):
        """
        Order random points by increasing distance to first or to origin.
        points  - n x 3 array of float, random center coordinates
        origin  - 3 array of float
        -> [ int ], indices into  points ordered by increasing distance
        """
        origin = origin
        if origin is None:
            origin = points[0]

        dist = self.__distances(origin, points)

        return N.take(points, N.argsort(dist))
    def centerPatch(self, patch_mask):
        """
        patch_mask - [ 1|0 ], mask of non-centered patch
        -> [ 1|0 ], mask of patch around geometric center of first patch
        """
        c = self.model.center(patch_mask)
        dist = self.__distances(c)

        n_atoms = len(N.nonzero(patch_mask))
        i_dist = N.argsort(dist)[:n_atoms]

        result = N.zeros(len(patch_mask))
        N.put(result, i_dist, 1)

        return result
Beispiel #11
0
def pca(M):
    "Perform PCA on M, return eigenvectors and eigenvalues, sorted."
    T, N = shape(M)
    # if there are fewer rows T than columns N, use snapshot method
    if T < N:
        C = dot(M, t(M))
        evals, evecsC = eigenvectors(C)
        # HACK: make sure evals are all positive
        evals = where(evals < 0, 0, evals)
        evecs = 1. / sqrt(evals) * dot(t(M), t(evecsC))
    else:
        # calculate covariance matrix
        K = 1. / T * dot(t(M), M)
        evals, evecs = eigenvectors(K)
    # sort the eigenvalues and eigenvectors, descending order
    order = (argsort(evals)[::-1])
    evecs = take(evecs, order, 1)
    evals = take(evals, order)
    return evals, t(evecs)
Beispiel #12
0
    def __find_intervals(self, l):
        l = N.array(l)
        l = N.take(l, N.argsort(l))

        globals().update(locals())

        break_points = N.nonzero(N.greater(l[1:] - l[:-1], 1))

        start = 0
        intervals = []

        for i in range(len(break_points)):
            index = break_points[i]
            intervals.append(tuple(N.take(l, range(start, index + 1))))
            start = index + 1

        intervals.append(tuple(l[start:]))

        return intervals
Beispiel #13
0
    def __find_intervals(self, l):
        l = N.array(l)
        l = N.take(l, N.argsort(l))

        globals().update( locals() )

        break_points = N.nonzero(N.greater(l[1:] - l[:-1], 1))

        start = 0
        intervals = []

        for i in range(len(break_points)):
            index = break_points[i]
            intervals.append(tuple(N.take(l, range(start, index + 1))))
            start = index + 1

        intervals.append(tuple(l[start:]))

        return intervals
Beispiel #14
0
    def confidenceInterval(self, level):
        """
        confidenceInterval(self, level)

        @param level: confidence level (e.g. 0.68 for stdev interval)
        @type  level: float

        @return: start and end of the confidence interval
                 containing |level|*100 % of the probability
        @rtype: float, float
        """
        order = N.argsort(self.p).tolist()
        cumulative = N.add.accumulate(N.take(self.p, order)) * self.delta_x

        ind = N.nonzero(N.greater_equal(cumulative, 1. - level))

        sub_set = order[ind[0]:]

        intervals = self.__find_intervals(sub_set)

        boundaries = [(self.x[i[0]], self.x[i[-1]]) for i in intervals]

        return tuple(boundaries)
Beispiel #15
0
    def confidenceInterval(self, level):
        """
        confidenceInterval(self, level)

        @param level: confidence level (e.g. 0.68 for stdev interval)
        @type  level: float

        @return: start and end of the confidence interval
                 containing |level|*100 % of the probability
        @rtype: float, float
        """          
        order = N.argsort(self.p).tolist()
        cumulative = N.add.accumulate(N.take(self.p, order)) * self.delta_x

        ind = N.nonzero(N.greater_equal(cumulative, 1. - level))

        sub_set = order[ind[0]:]

        intervals = self.__find_intervals(sub_set)

        boundaries = [(self.x[i[0]], self.x[i[-1]]) for i in intervals]

        return tuple(boundaries)
Beispiel #16
0
    def sortPoly(self, geom,vt, order=-1):
        """None <- sortPoly(order=-1)
Sorts the geometry polygons according to z values of polygon's
geomtric centers. Order=-1 sorts by furthest z first, order=1 sorts
by closest z first"""
        # FIXME will not work with instance matrices
        #mat = geom.GetMatrix()
        #mat = Numeric.reshape(mat, (4,4))
        #vt = geom.vertexSet.vertices*mat
        if vt is None:
            return
        triv = Numeric.take(vt, geom.faceSet.faces.array)
        trig = Numeric.sum(triv,1)/3.
        trigz = trig[:,2]  #triangle's center of gravity z value
        
        ind = Numeric.argsort(trigz) # sorted indices
        
        if len(geom.faceSet.faces.array):
            faces = Numeric.take(geom.faceSet.faces.array, ind[::order])
	    n = geom.getFNormals()
	    n = geom.faceSet.normals * geom.GetMatrix()
	    normals = Numeric.take(n, ind[::order])
            #
            #if geom.shading==GL.GL_FLAT: # we also need to re-arrange the
            #                          # face normals
            #	if geom.normals is None:
            #        normals = None
           # 	else:
           #         if len(geom.normals)>1:
            #normals = Numeric.take(geom.normals, ind[::order])
           #         else:
           #             normals = geom.normals
           # else:
           #     normals = None
	    #normals = None
            #geom.Set(faces=faces, fnormals=normals)
	    return faces.copy(),normals.copy()
Beispiel #17
0
    def sortPoly(self, geom, vt, order=-1):
        """None <- sortPoly(order=-1)
Sorts the geometry polygons according to z values of polygon's
geomtric centers. Order=-1 sorts by furthest z first, order=1 sorts
by closest z first"""
        # FIXME will not work with instance matrices
        #mat = geom.GetMatrix()
        #mat = Numeric.reshape(mat, (4,4))
        #vt = geom.vertexSet.vertices*mat
        if vt is None:
            return
        triv = Numeric.take(vt, geom.faceSet.faces.array)
        trig = Numeric.sum(triv, 1) / 3.
        trigz = trig[:, 2]  #triangle's center of gravity z value

        ind = Numeric.argsort(trigz)  # sorted indices

        if len(geom.faceSet.faces.array):
            faces = Numeric.take(geom.faceSet.faces.array, ind[::order])
            n = geom.getFNormals()
            n = geom.faceSet.normals * geom.GetMatrix()
            normals = Numeric.take(n, ind[::order])
            #
            #if geom.shading==GL.GL_FLAT: # we also need to re-arrange the
            #                          # face normals
            #	if geom.normals is None:
            #        normals = None
            # 	else:
            #         if len(geom.normals)>1:
            #normals = Numeric.take(geom.normals, ind[::order])
            #         else:
            #             normals = geom.normals
            # else:
            #     normals = None
            #normals = None
            #geom.Set(faces=faces, fnormals=normals)
            return faces.copy(), normals.copy()
def wavenumber_integration(tb, wl, response):
    x = []
    y = []
    for i in range(len(wl)):
        x.append(1. / wl[i])
        #print "wavelength,wavenumber: ",wl[i],1./wl[i]
        yval_length = blackbody(wl[i], tb) * response[i] * (wl[i] * wl[i])
        yval = blackbody(1. / wl[i], tb, "wavenumber") * response[i]
        #print "YVAL: ",yval,yval_length
        y.append(yval)

    x = Numeric.array(x)
    y = Numeric.array(y)
    y = Numeric.choose(Numeric.argsort(x), y)
    x = Numeric.sort(x)

    response = Numeric.array(response)

    res = integral(x, y)
    # Normalisation:
    norm = integral(x, response)
    #print "Norm: ",norm

    return res / norm
Beispiel #19
0
    def run(self):
        import time
        from numpy.oldnumeric import argsort

        while not self.isStopped():

            bindings = self.getBindings()

            ## loop through all messages and check for
            ## incoming events

            incoming = {}

            for tid, message in bindings.keys():

                if P.probe(tid, message):

                    P.recv(tid, message)

                    ## parameters must be tuple

                    parameters = pvm.unpack()

                    ##    self.post_message_received(message, tid, parameters)

                    value = (message, parameters[0], parameters[1])

                    try:
                        incoming[tid].append(value)
                    except:
                        incoming[tid] = [value]

            ## for every incoming message call
            ## bound method

            ## TODO: do this in a specific order!
            ## probably pvm supports a sort of time-stamp or so.

            for tid, values in incoming.items():

                time_stamps = map(lambda v: v[1], values)
                indices = argsort(time_stamps)

                for i in indices:

                    message = values[i][0]
                    parameters = values[i][2]

                    ##    self.post_execute_method(message, tid, parameters)

                    if parameters is None:
                        bindings[(tid, message)]()
                    else:
                        bindings[(tid, message)](*parameters)

            ## wait some time

            time.sleep(self.getMessageLoopDelay())

            ## if message-loop is stopped, wait until
            ## it is continued

            self.__loopEvent.wait()
Beispiel #20
0
    def read(self, filename):
        """
1.5 2.0 binding
  0   0   0   0  0.000  0.000  0.000 -14.645
  1   0   1   0  2.449  0.000  0.000 -14.636
  0   1   0   1  1.281  1.281  1.281 -14.424
  1   1   1   1  2.548  1.014  1.014 -14.210
                   |      |      |
                   |      |      |-rmsd from cluster seed @ 2.0 tolerance
                   |      |-rmsd from cluster seed @ 1.0 tolerance
                   |-rmsd from overall reference structure      
        """
        file_ptr = open(filename)
        lines = file_ptr.readlines()  # read the file
        file_ptr.close()

        d = self.clustering_dict  # local copy

        # see if there's the last char is a 'd' or 'b'  or 'e'to denote energy
        word_list = string.split(lines[0])
        if word_list[-1][0] in ['b', 'd', 'e']:
            file_energy_used = word_list[-1]
            ind = string.find(lines[0], file_energy_used)
            lines[0] = lines[0][:ind]  # strip energy symbol
        else:
            file_energy_used = 'binding'

        # ??? make sure we're consistent with the argsort
        #check that self has done some clustering before this
        if len(self.clustering_dict) > 0:
            assert file_energy_used[0] == self.energy_used[
                0], 'Cluster energy mismatch'
        else:
            #here the dlg had no clustering in it
            self.energy_used = file_energy_used
            #redo argsort with file_energy_used
            if file_energy_used[0] == 'd':
                energy_list = [conf.docking_energy for conf in self.data]
            else:
                energy_list = [conf.binding_energy for conf in self.data]
            # sort the conformations by energy
            self.argsort = Numeric.argsort(energy_list)
            #print "self.argsort=", self.argsort

        t_list = map(float, string.split(lines[0]))
        #t_list for the example is [0.5, 2.0]
        num_t = len(t_list)  #number of clusterings

        for tolerance in t_list:  # initialize the keys
            if d.has_key(tolerance):
                raise RuntimeError, "overwriting existing clustering"
            c = d[tolerance] = Clustering()
            c.tolerance = tolerance

        # cx is the index into self.data, the list of conformatons
        # NEW FORMAT:
        # 0   0   0   0  0.000  0.000  0.000 -14.645
        #first line has list of tolerances
        for cx, l in enumerate(lines[1:]):
            ll = l.split()
            #eg: 2 tolerances gives [ 0,  0,  0,  0, 0.000, 0.000, 0.000,-14.645]
            #num_t *2
            c_list = map(int, ll[:num_t * 2])
            data_list = map(float, ll[num_t * 2:])

            for t, i in zip(t_list, xrange(len(c_list) / 2)):
                cluster_index = c_list[2 * i]
                cluster_rank = c_list[2 * i + 1]
                conf = self.data[int(self.argsort[cx])]
                if cluster_rank == 0:
                    assert len(d[t]) == cluster_index
                    d[t].append(Cluster(conf))
                else:
                    # add conformation to its cluster
                    assert len(d[t][cluster_index]) == cluster_rank
                    d[t][cluster_index].append(conf)
                # tell the conformation what cluster(s) it belongs to...
                conf.cluster_dict[t] = (cluster_index, cluster_rank)
                conf.refRMS = data_list[0]
                conf.clRMS = data_list[1]
Beispiel #21
0
    def run(self):
        import time
        from numpy.oldnumeric import argsort

        while not self.isStopped():

            bindings = self.getBindings()

            ## loop through all messages and check for
            ## incoming events

            incoming = {}

            for tid, message in bindings.keys():

                if P.probe(tid, message):

                    P.recv(tid, message)

                    ## parameters must be tuple

                    parameters = pvm.unpack()

##    self.post_message_received(message, tid, parameters)

                    value = (message, parameters[0], parameters[1])

                    try:
                        incoming[tid].append(value)
                    except:
                        incoming[tid] = [value]

            ## for every incoming message call
            ## bound method

            ## TODO: do this in a specific order!
            ## probably pvm supports a sort of time-stamp or so.

            for tid, values in incoming.items():

                time_stamps = map(lambda v: v[1], values)
                indices = argsort(time_stamps)

                for i in indices:

                    message = values[i][0]
                    parameters = values[i][2]

##    self.post_execute_method(message, tid, parameters)

                    if parameters is None:
                        bindings[(tid, message)]()
                    else:
                        bindings[(tid, message)](*parameters)

            ## wait some time

            time.sleep(self.getMessageLoopDelay())

            ## if message-loop is stopped, wait until
            ## it is continued

            self.__loopEvent.wait()
    def read(self, filename):
        """
1.5 2.0 binding
  0   0   0   0  0.000  0.000  0.000 -14.645
  1   0   1   0  2.449  0.000  0.000 -14.636
  0   1   0   1  1.281  1.281  1.281 -14.424
  1   1   1   1  2.548  1.014  1.014 -14.210
                   |      |      |
                   |      |      |-rmsd from cluster seed @ 2.0 tolerance
                   |      |-rmsd from cluster seed @ 1.0 tolerance
                   |-rmsd from overall reference structure      
        """
        file_ptr = open(filename)
        lines = file_ptr.readlines() # read the file
        file_ptr.close()

        d = self.clustering_dict # local copy

        # see if there's the last char is a 'd' or 'b'  or 'e'to denote energy
        word_list = string.split(lines[0])
        if word_list[-1][0] in ['b', 'd', 'e']: 
            file_energy_used = word_list[-1]
            ind = string.find(lines[0], file_energy_used)
            lines[0] = lines[0][:ind] # strip energy symbol
        else:
            file_energy_used = 'binding'

        # ??? make sure we're consistent with the argsort
        #check that self has done some clustering before this
        if len(self.clustering_dict)>0:
            assert file_energy_used[0] == self.energy_used[0], 'Cluster energy mismatch'
        else:
            #here the dlg had no clustering in it
            self.energy_used = file_energy_used
            #redo argsort with file_energy_used
            if file_energy_used[0] == 'd':
                energy_list = [conf.docking_energy for conf in self.data]
            else:
                energy_list = [conf.binding_energy for conf in self.data]
            # sort the conformations by energy
            self.argsort = Numeric.argsort(energy_list)
            #print "self.argsort=", self.argsort

        t_list = map(float, string.split(lines[0]))
        #t_list for the example is [0.5, 2.0]
        num_t = len(t_list)   #number of clusterings

        for tolerance in t_list: # initialize the keys
            if d.has_key(tolerance):
                raise RuntimeError, "overwriting existing clustering"
            c = d[tolerance] = Clustering()
            c.tolerance = tolerance

        # cx is the index into self.data, the list of conformatons
        # NEW FORMAT: 
        # 0   0   0   0  0.000  0.000  0.000 -14.645
        #first line has list of tolerances
        for cx, l in enumerate(lines[1:]):
            ll = l.split()
            #eg: 2 tolerances gives [ 0,  0,  0,  0, 0.000, 0.000, 0.000,-14.645]
            #num_t *2
            c_list = map(int, ll[:num_t*2])
            data_list = map(float, ll[num_t*2:])

            for t, i in zip(t_list, xrange(len(c_list)/2)):
                cluster_index = c_list[2*i]
                cluster_rank = c_list[2*i+1]
                conf = self.data[int(self.argsort[cx])]
                if cluster_rank == 0:
                    assert len(d[t]) == cluster_index
                    d[t].append(Cluster(conf))
                else:
                    # add conformation to its cluster
                    assert len(d[t][cluster_index]) == cluster_rank
                    d[t][cluster_index].append(conf)
                # tell the conformation what cluster(s) it belongs to...
                conf.cluster_dict[t] = (cluster_index, cluster_rank)
                conf.refRMS = data_list[0]
                conf.clRMS = data_list[1]
Beispiel #23
0
def process_data(data, referencing=None, n_points=200, exposure_cutoff=0.05, atom_type='H',
                 exclude_entries=(),molType='protein'):

    stats = process_secondary(data, referencing, n_points, atom_type, exclude_entries=exclude_entries,molType=molType)

    ## decompose with respect to accessibility

    S = {}
    bounds = {}

    for key, values in stats.items():

        shifts, exposure = values

        ind = Numeric.argsort(exposure)

        shifts = Numeric.take(shifts, ind)
        exposure = Numeric.take(exposure, ind)

        B = []

        mask = Numeric.less(exposure, exposure_cutoff)
        ind = Numeric.nonzero(mask)

        if len(shifts)-len(ind) < n_points:
            ind = range(len(shifts))

        if len(ind) >= n_points:

            print key, 0, len(ind), len(shifts)

            S[key + (0,)] = (Numeric.take(shifts, ind), Numeric.take(exposure, ind))
            B.append(exposure[len(ind)-1])

            shifts = shifts[len(ind):]
            exposure = exposure[len(ind):]

            i = 1
            
        else:
            i = 0

        n_classes = len(exposure) / int(n_points)

        n = int(len(exposure) / float(max(1, n_classes))) + 1

        while len(shifts) > n:

            print key, i, len(shifts[:n]), len(shifts)

            S[key + (i,)] = shifts[:n], exposure[:n]
            
            B.append(exposure[n])

            shifts = shifts[n:]
            exposure = exposure[n:]

            i += 1

        if len(shifts):

            print key, i, len(shifts)

            S[key + (i,)] = shifts, exposure
            B.append(exposure[-1])

        bounds[key] = B

    return S, bounds