Ejemplo n.º 1
0
    def centerFrames(self):
        """
        Get indices for frame nearest to each cluster center.

        @return: list of cluster center indecies
        @rtype: [int]
        """
        return N0.argmax(self.memberships(), 1)
Ejemplo n.º 2
0
    def argmax(self, key):
        """
        @param key: item attribute
        @type  key: any

        @return: index of item with highest item[key] value
        @rtype: int
        """
        vLst = self.valuesOf(key)
        return N0.argmax(vLst)
Ejemplo n.º 3
0
    def argmax( self, infoKey ):
        """
        Get index of complex c with highest c.infos[infokey] value

        @param infoKey: key for info dict
        @type  infoKey: str

        @return: index of complex c with highest c.infos[infokey] value
        @rtype: int
        """
        vLst = self.valuesOf( infoKey )
        return N0.argmax( vLst )
Ejemplo n.º 4
0
    def memberFrames(self, threshold=0.):
        """
        Get indices of all frames belonging to each cluster. Each frame
        is guaranteed to belong, at least, to the cluster for which it has
        its maximum membership. If threshold > 0, it can additionally pop
        up in other clusters.

        @param threshold: minimal cluster membership or 0 to consider
                          only max membership (default: 0)
        @type  threshold: float

        @return: n_cluster, lst of lst of int, frame indices
        @rtype: [[int]]
        """
        ## best cluster for each frame
        msm = self.memberships()
        maxMemb = N0.argmax(msm, 0)

        r = [
            N0.nonzero(N0.equal(maxMemb, i))
            for i in range(0, self.n_clusters)
        ]
        r = [x.tolist() for x in r]

        ## same thing but now taking all above threshold
        ## -> same frame can end up in several clusters
        if threshold > 0.:
            r2 = [N0.nonzero(N0.greater(l, threshold)) for l in msm]

            ## add only additional frames
            for i in range(0, len(r)):
                try:
                    frames = r[i].tolist()
                except:
                    frames = r[i]

                r[i] = frames + [fr for fr in r2[i] if fr not in r[i]]

        ## sort frames within each cluster by their membership
        r = [self.membershipSort(r[i], i) for i in range(0, len(r))]

        return r
Ejemplo n.º 5
0
 def max(self):
     """
     Max height of distribution.
     """
     index = N0.argmax(self.p)
     return self.x[index]
Ejemplo n.º 6
0
    def parse_result(self):
        """
        Extract some information about the profile as well as the
        match state emmission scores. Keys of the returned dictionary::
          'AA', 'name', 'NrSeq', 'emmScore', 'accession',
          'maxAllScale', 'seqNr', 'profLength', 'ent', 'absSum'
          
        @return: dictionary with warious information about the profile
        @rtype: dict
        """
        ## check that the outfut file is there and seems valid
        if not os.path.exists(self.f_out):
            raise HmmerError,\
                  'Hmmerfetch result file %s does not exist.'%self.f_out

        if T.fileLength(self.f_out) < 10:
            raise HmmerError,\
                  'Hmmerfetch result file %s seems incomplete.'%self.f_out

        profileDic = {}

        ## read result
        hmm = open(self.f_out, 'r')
        out = hmm.read()
        hmm.close()

        ## collect some data about the hmm profile
        profileDic['name'] = self.hmmName
        profileDic['profLength'] = \
                  int( string.split(re.findall('LENG\s+[0-9]+', out)[0])[1] )
        profileDic['accession'] = \
                  string.split(re.findall('ACC\s+PF[0-9]+', out)[0])[1]
        profileDic['NrSeq'] = \
                  int( string.split(re.findall('NSEQ\s+[0-9]+', out)[0])[1] )
        profileDic['AA'] = \
              string.split(re.findall('HMM[ ]+' + '[A-Y][ ]+'*20, out)[0] )[1:]

        ## collect null emmission scores
        pattern = 'NULE[ ]+' + '[-0-9]+[ ]+' * 20
        nullEmm = [
            float(j) for j in string.split(re.findall(pattern, out)[0])[1:]
        ]

        ## get emmision scores
        prob = []
        for i in range(1, profileDic['profLength'] + 1):
            pattern = "[ ]+%i" % i + "[ ]+[-0-9]+" * 20
            e = [float(j) for j in string.split(re.findall(pattern, out)[0])]
            prob += [e]

        profileDic['seqNr'] = N0.transpose(N0.take(prob, (0, ), 1))
        profileDic['emmScore'] = N0.array(prob)[:, 1:]

        ## calculate emission probablitities
        emmProb, nullProb = self.hmmEmm2Prob(nullEmm, profileDic['emmScore'])

        ent = [
            N0.resize(self.entropy(e, nullProb), (1, 20))[0] for e in emmProb
        ]
        profileDic['ent'] = N0.array(ent)

        ###### TEST #####

        proba = N0.array(prob)[:, 1:]

        ##         # test set all to max score
        ##         p = proba
        ##         p1 = []
        ##         for i in range( len(p) ):
        ##             p1 += [ N0.resize( p[i][N0.argmax( N0.array( p[i] ) )] , N0.shape( p[i] ) ) ]
        ##         profileDic['maxAll'] = p1

        # test set all to N0.sum( abs( probabilities ) )
        p = proba
        p2 = []
        for i in range(len(p)):
            p2 += [N0.resize(N0.sum(N0.absolute(p[i])), N0.shape(p[i]))]
        profileDic['absSum'] = p2

        # set all to normalized max score
        p = proba
        p4 = []
        for i in range(len(p)):
            p_scale = (p[i] - N0.average(p[i])) / math.SD(p[i])
            p4 += [
                N0.resize(p_scale[N0.argmax(N0.array(p_scale))],
                          N0.shape(p[i]))
            ]
        profileDic['maxAllScale'] = p4

        return profileDic
Ejemplo n.º 7
0
    def parse_result( self ):
        """
        Extract some information about the profile as well as the
        match state emmission scores. Keys of the returned dictionary::
          'AA', 'name', 'NrSeq', 'emmScore', 'accession',
          'maxAllScale', 'seqNr', 'profLength', 'ent', 'absSum'
          
        @return: dictionary with warious information about the profile
        @rtype: dict
        """
        ## check that the outfut file is there and seems valid
        if not os.path.exists( self.f_out ):
            raise HmmerError,\
                  'Hmmerfetch result file %s does not exist.'%self.f_out
        
        if T.fileLength( self.f_out ) < 10:
            raise HmmerError,\
                  'Hmmerfetch result file %s seems incomplete.'%self.f_out
        
        profileDic = {}

        ## read result
        hmm = open( self.f_out, 'r')
        out = hmm.read()
        hmm.close()

        ## collect some data about the hmm profile
        profileDic['name'] =  self.hmmName 
        profileDic['profLength'] = \
                  int( string.split(re.findall('LENG\s+[0-9]+', out)[0])[1] )
        profileDic['accession'] = \
                  string.split(re.findall('ACC\s+PF[0-9]+', out)[0])[1] 
        profileDic['NrSeq'] = \
                  int( string.split(re.findall('NSEQ\s+[0-9]+', out)[0])[1] )
        profileDic['AA'] = \
              string.split(re.findall('HMM[ ]+' + '[A-Y][ ]+'*20, out)[0] )[1:]

        ## collect null emmission scores
        pattern = 'NULE[ ]+' + '[-0-9]+[ ]+'*20
        nullEmm = [ float(j) for j in string.split(re.findall(pattern, out)[0])[1:] ]

        ## get emmision scores
        prob=[]
        for i in range(1, profileDic['profLength']+1):
            pattern = "[ ]+%i"%i + "[ ]+[-0-9]+"*20
            e = [ float(j) for j in string.split(re.findall(pattern, out)[0]) ]
            prob += [ e ]

        profileDic['seqNr'] = N0.transpose( N0.take( prob, (0,),1 ) )
        profileDic['emmScore'] = N0.array(prob)[:,1:]

        ## calculate emission probablitities
        emmProb, nullProb = self.hmmEmm2Prob( nullEmm, profileDic['emmScore'])

        ent = [ N0.resize( self.entropy(e, nullProb), (1,20) )[0] for e in emmProb ]
        profileDic['ent'] = N0.array(ent)

        ###### TEST #####

        proba = N0.array(prob)[:,1:]

##         # test set all to max score
##         p = proba
##         p1 = []
##         for i in range( len(p) ):
##             p1 += [ N0.resize( p[i][N0.argmax( N0.array( p[i] ) )] , N0.shape( p[i] ) ) ]
##         profileDic['maxAll'] = p1

        # test set all to N0.sum( abs( probabilities ) )
        p = proba
        p2 = []
        for i in range( len(p) ) :
            p2 += [ N0.resize( N0.sum( N0.absolute( p[i] )), N0.shape( p[i] ) ) ]
        profileDic['absSum'] = p2

        # set all to normalized max score 
        p = proba
        p4 = []
        for i in range( len(p) ) :
            p_scale = (p[i] - N0.average(p[i]) )/ math.SD(p[i])
            p4 += [ N0.resize( p_scale[N0.argmax( N0.array(p_scale) )] ,
                              N0.shape( p[i] ) ) ]
        profileDic['maxAllScale'] = p4

        return profileDic