Esempio n. 1
0
 def readSamples(self, fileName, key,recalc=False,samples=None):
     fn = fileName + ".pre"
     try:
         if recalc: raise IOError()
         with open(fn): pass
         print "precalculated file present"
         self.mu, self.cov = hsplit(mat(fromfile(fn).reshape((3,-1))),[1])
     except IOError:
         if samples != None:
             self._samples = samples
             print "got samples: " , self._samples
         else:
             print "no file present, calculating..."
             smpls = loadmat(fileName)[key]
             print "loaded from mat file"
             self._samples = mat(smpls)
             print "reshaped into samples"
         self.mu = sum(self._samples, axis=1) / self._samples.shape[1]
         print "mu=", str(self.mu)
         sampdiffmu = self._samples - self.mu
         self.cov = sampdiffmu*sampdiffmu.T / self._samples.shape[1]
         print"cov=", str(self.cov)
         mat(hstack((self.mu,self.cov))).tofile(fn)
     self._invCov = self.cov.I
     self._detCov = det(self.cov)
     self._multConst = 1 / sqrt((2 * pi) ** 3 * self._detCov)
def bicgstabReg(X,Y,my_Y,B):
    '''
    #稳定双共轭梯度下降
    '''
    my_Y_copy=[]
    for i in my_Y:
      my_Y_copy.append(i)
      
    error = CostFunctionReg(Y,my_Y_copy,B)
    
    R0star = Y - dot(X,B)
    R0 = Y - dot(X,B)
    rho0 = 1
    alp0 = 1
    w0 = 1
    V0 =mat(zeros(len(Y)).reshape(len(Y),1))
    P0 = mat(zeros(len(Y)).reshape(len(Y),1))
    #print R0
    while 1:
        rho1 = array(dot(R0star.T, R0))[0][0]
        beta = (rho1/rho0) * (alp0/w0)
        P1 = R0 + beta*(P0 - w0*V0)
        
        V1 = dot(X,P1)
        alp0 = rho1/(array(dot(R0star.T,V1))[0][0])
        h = B + alp0 * P1
        my_Y_copy = array(dot(X,array(h).reshape(len(h),1)).reshape(1,len(Y)))[0]
        new_error = CostFunctionReg(Y,my_Y_copy,h) 
        if abs(new_error -error) <=e:
            B=h
            break
        #error = new_error
        S = R0 - alp0*V1
        
        t = dot(X,S)
        w1 = array(dot(t.T, S))[0][0]/array(dot(t.T, t))[0][0]
        B = h + w1*S
        my_Y_copy = array(dot(X,array(B).reshape(len(B),1)).reshape(1,len(Y)))[0]
        new_error = CostFunctionReg(Y,my_Y_copy,B) 
       # print abs(new_error -error)
        if abs(new_error -error) <=e:
            break
        R0 = S - w1 * t
        rho0 = rho1
        P0 = P1
        V0 =V1
        w0 = w1
        error = new_error       
    return dot(X,B),B
Esempio n. 3
0
def getXmat(Xc):
    X_matrix = []
    for i in range(ORDER + 1):
        X_matrix.append(array(Xc)**i)
    X_matrix = mat(X_matrix).T
    #print X_matrix
    return X_matrix
Esempio n. 4
0
def reFeature(B):
    tha_ = (array(B).reshape(1, ORDER + 1))[0]
    b = []
    for i in range(ORDER + 1):
        b.append(tha_[i] / (2.5**i))

    return mat(array(b).reshape(ORDER + 1, 1))
Esempio n. 5
0
 def __calculateP__(self):
     p = mat(self.p.shape)
     for k in range(0,self.K):
         llh = Likelihood()
         llh.setParams(self.mean[k,:],self.covm[k,:,:])
         for i in range(0,self.N):
             p[k,i] = llh.getP(self.data[i,:])
     sp = sum(p,axis=0)
     for k in range(0,self.K):
         for i in range(0,self.N):
             self.p[k,i] = p[k,i]/sp[i]
def normalize(dataset):
    
    for i in range(len(dataset)):
        dataset[i].insert(0,'1.0')
        dataset[i] = map(eval, dataset[i])
    dataset = mat(dataset)
#         dataset[i] = mat(dataset[i])                
    for i in range(dataset.shape[1] - 1):   
        dataset[:,i] = dataset[:,i] / dataset[:,i].max()
#     print "dataset dataset is:",dataset.tolist()
    return dataset
def gradAscent(data,label,labelSet):
    hmat = zeros((data.shape[0],len(labelSet) - 1)).tolist()
#     print "hmat[2][1]",hmat[2][1]
    i = 0
#     print "ge zhong changdu",len(labelSet) - 1,data.shape[0]
    for k in range(len(labelSet) - 1):
        for j in range(data.shape[0]):
            if(label[j] == labelSet[k]):
                hmat[j][k] = 1
            
    iteration = 1
    error = 0.00001
    m,n = shape(data)
    labelNum = len(labelSet) - 1
    if(labelNum >= 2):
        weights = ones((n,labelNum))
    else:
        weights = ones((n,1))
    
#     for i in range(iteration):
    for i in range(labelNum):
        diff = 1
        hmat = mat(hmat)
        while(diff > error):
            if(labelNum == 1): 
                h = sigMoid(data * weights[:,i])
                h = mat(h)
                deri = mat(label).transpose() - h  ###11*1
            else:
                h = fakeSigMoid(data * weights)
                h = mat(h)
                deri = hmat[:,i] - h[:,i]  ###11*1
    #         cichu hai zhengchang
            formal = copy.deepcopy(weights[:,i])
            weights[:,i] = weights[:,i] + alpha * data.transpose() * deri   ####梯度下降法目标函数取最小值,所以此处为+
            diff = abs(formal.transpose() * formal - weights[:,i].transpose() * weights[:,i])
            print "diff = ",diff
    return weights
Esempio n. 8
0
def TheLeastSquareMethod(X, Y):
    """
    最小二乘法
    """

    regula = eye(ORDER + 1)
    X_matrix = []
    for i in range(ORDER + 1):
        X_matrix.append(array(X)**i)
    X_matrix = mat(X_matrix).T
    Y_matrix = array(Y).reshape((len(Y), 1))

    X_matrix_T = X_matrix.T
    #print dot(X_matrix_T,X_matrix)
    B = dot(dot(dot(X_matrix_T, X_matrix).I, X_matrix_T), Y_matrix)
    B1 = dot(dot((dot(X_matrix_T, X_matrix) + lamda * regula).I, X_matrix_T),
             Y_matrix)
    result = dot(X_matrix, B)
    result_reg = dot(X_matrix, B1)
    return X_matrix, Y_matrix, B, result, result_reg, B1
Esempio n. 9
0
# coding:utf-8

import numpy as np
from numpy.matrixlib.defmatrix import mat
from numpy.core.numeric import ones, full
from common_utils import typeName

# 从范围【10,30) 返回以10开始,等差值为5的数列. 不包含30
print np.arange(10, 30, 5)  #[10 15 20 25]

result = np.arange(0, 2, 0.5)
print result
print type(result).__name__  # ndarray
print mat(result).transpose()

# 从范围[-1,0] , 5个元素组成的等差元素的数组.
print np.linspace(-1, 0, 5)

x = np.array([[1, 2], [3, 4]])
print mat(x)
# 转化为numpy数组
print mat(x).transpose()

# transpose numpy矩阵转化. 这里从1行4列,变成,4行1列.
x = np.array((1, 2, 3, 4))
print mat(x)
print mat(x).transpose()

# transpose numpy矩阵转化. 这里从3行2列,变成,2行3列. 并且将每列的数据 合并成1行。
x = np.array([[1, 2], [3, 4], [5, 6]])
print mat(x)
Esempio n. 10
0
def main():
    'See above.'
    cvs_file_abs_name_gz = os.path.join(cingDirData, 'PluginCode', 'Whatif', cvs_file_abs_name + '.gz')
    gunzip(cvs_file_abs_name_gz)
    reader = csv.reader(open(cvs_file_abs_name, "rb"), quoting=csv.QUOTE_NONE)
    valueBySs0AndResTypes = {} # keys are SSi,   RTi, RTi-1
    valueBySs1AndResTypes = {} # keys are SSi-1, RTi, RTi-1
    valueByResTypes = {}
    valueBySs0 = {} # keys are SSi
    valueBySs1 = {} # keys are SSi-1
    histd1CtupleBySsAndResTypes = {}
    value = [] # NB is an array without being keyed.

    histd1BySs0AndResTypes = {} # keys are SSi,   RTi, RTi-1
    histd1BySs1AndResTypes = {} # keys are SSi-1, RTi, RTi-1
    histd1ByResTypes = {}
    histd1BySs0 = {}
    histd1BySs1 = {}


    linesByEntry = {}
    lineCount = 0
    for row in reader:
        lineCount += 1
        if lineCount > lineCountMax:
            break
        entryId = row[0]
        if not linesByEntry.has_key(entryId):
            linesByEntry[ entryId ] = []
        linesByEntry[ entryId ].append( row )

    skippedResTypes = []
    entryIdList = linesByEntry.keys()
    entryIdList.sort()

    # Do some pre filtering.
    for entryId2 in entryIdList:
        lineList = linesByEntry[ entryId2 ]
        for idx,line in enumerate(lineList):
            line.append(idx)
        lineListSorted = NTsort(lineList,BFACTOR_COLUMN,inplace=False)
        # Now throw away the worst 10 % of residues.
        n = len(lineListSorted)
        bad_count = int(round((n * DEFAULT_BFACTOR_PERCENTAGE_FILTER) / 100.))
        to_remove_count = n-bad_count
#        nTmessage("Removing at least %d from %d residues" % (bad_count,n))
        badIdxList = [lineItem[IDX_COLUMN] for lineItem in lineListSorted[to_remove_count:n]]
        iList = range(n)
        iList.reverse()
        for i in iList:
            lineItem = lineList[i]
            max_bfactor = float(lineItem[BFACTOR_COLUMN])
            if max_bfactor > DEFAULT_MAX_BFACTOR:
#                nTdebug('Skipping because max bfactor in dihedral %.3f is above %.3f %s' % (max_bfactor, DEFAULT_MAX_BFACTOR, lineItem))
                del lineList[i] # TODO: check if indexing is still right or we shoot in the foot.
                continue
            if i in badIdxList:
#                nTdebug('Skipping because bfactor worst %.3f %s' % (max_bfactor, lineItem))
                del lineList[i]
                continue
        removed_count = n - len(lineList)
#        nTdebug("Reduced list by %d" % removed_count)
        if removed_count < bad_count:
            nTwarning("Failed to remove at least %d residues" % bad_count)

    for entryId2 in entryIdList:
        prevChainId = None
        prevResType = None
        prevResNum = None
        prevSsType = None
        for _r, row in enumerate(linesByEntry[ entryId2 ]):
    #1zzk,A,GLN ,  17,E, 205.2, 193.6
    #1zzk,A,VAL ,  18,E, 193.6, 223.2
    #1zzk,A,THR ,  19,E, 223.2, 190.1
            (entryId, chainId, resType, resNum, ssType, d1, _d2, _max_bfactor, _idx) = row
            resNum = int(resNum)
            ssType = to3StateDssp(ssType)[0]
            resType = resType.strip()
            db = NTdb.getResidueDefByName( resType )
            if not db:
                nTerror("resType not in db: %s" % resType)
                return
            resType = db.nameDict['IUPAC']
            d1 = d1.strip()
            d1 = floatParse(d1)
            if isNaN(d1):
#                nTdebug("d1 %s is a NaN on row: %s" % (d1,row))
                continue
            if not inRange(d1):
                nTerror("d1 not in range for row: %s" % str(row))
                return

            if not (resType in common20AAList):
    #            nTmessage("Skipping uncommon residue: %s" % resType)
                if not ( resType in skippedResTypes):
                    skippedResTypes.append( resType )
                continue

            if isSibling(chainId, resNum, prevChainId, prevResNum):
                appendDeepByKeys(valueBySs0AndResTypes, d1, ssType,     resType, prevResType)
                appendDeepByKeys(valueBySs1AndResTypes, d1, prevSsType, resType, prevResType)
                appendDeepByKeys(valueByResTypes, d1, resType, prevResType)
                appendDeepByKeys(valueBySs0, d1, ssType)
                appendDeepByKeys(valueBySs1, d1, prevSsType)
                value.append( d1 )
            prevResType = resType
            prevResNum = resNum
            prevChainId = chainId
            prevSsType = ssType

    os.unlink(cvs_file_abs_name)
    nTmessage("Skipped skippedResTypes: %r" % skippedResTypes )
    nTmessage("Got count of values: %r" % len(value) )
    # fill FOUR types of hist.
    # TODO: filter differently for pro/gly
    keyListSorted1 = valueBySs0AndResTypes.keys()
    keyListSorted1.sort()
    for isI in (True, False):
        if isI:
            valueBySs = valueBySs0
            valueBySsAndResTypes = valueBySs0AndResTypes
            histd1BySs = histd1BySs0
            histd1BySsAndResTypes = histd1BySs0AndResTypes
        else:
            valueBySs = valueBySs1
            valueBySsAndResTypes = valueBySs1AndResTypes
            histd1BySs = histd1BySs1
            histd1BySsAndResTypes = histd1BySs1AndResTypes
        for ssType in keyListSorted1:
#            keyListSorted1b = deepcopy(keyListSorted1)
    #        for ssTypePrev in keyListSorted1b:
            d1List = valueBySs[ssType]
            if not d1List:
                nTerror("Expected d1List from valueBySs[%s]" % (ssType))
                continue
            hist1d, _bins, _patches = hist(d1List, bins=binCount, range=xRange)
            nTmessage("Count %6d in valueBySs[%s]" % (sum(hist1d), ssType))
            setDeepByKeys(histd1BySs, hist1d, ssType)

            keyListSorted2 = valueBySsAndResTypes[ssType].keys()
            keyListSorted2.sort()
            for resType in keyListSorted2:
    #            nTmessage("Working on valueBySsAndResTypes for [%s][%s]" % (ssType, resType)) # nice for balancing output verbosity.
                keyListSorted3 = valueBySsAndResTypes[ssType][resType].keys()
                keyListSorted3.sort()
                for prevResType in keyListSorted3:
    #                nTmessage("Working on valueBySsAndResTypes[%s][%s][%s]" % (ssType, resType, prevResType))
                    d1List = valueBySsAndResTypes[ssType][resType][prevResType]
                    if not d1List:
                        nTerror("Expected d1List from valueBySsAndResTypes[%s][%s][%s]" % (ssType, resType, prevResType))
                        continue
                    hist1d, _bins, _patches = hist(d1List, bins=binCount, range=xRange)
    #                nTmessage("Count %6d in valueBySsAndResTypes[%s][%s][%s]" % (sum(hist1d), ssType, resType, prevResType))
                    setDeepByKeys(histd1BySsAndResTypes, hist1d, ssType, resType, prevResType)
            # Now that they are all in we can redo this.
    # Delete the reference -not- the object.
    valueBySs = None
    valueBySsAndResTypes = None
    histd1BySs = None
    histd1BySsAndResTypes = None

    for ssType in keyListSorted1:
        for resType in keyListSorted2:
#            nTmessage("Working on valueBySsAndResTypes for [%s][%s]" % (ssType, resType)) # nice for balancing output verbosity.
            keyListSorted3 = valueBySs0AndResTypes[ssType][resType].keys()
            keyListSorted3.sort()
            for resTypePrev in keyListSorted3:
                keyListSorted4 = keyListSorted3[:] # take a copy
                for resTypeNext in keyListSorted4:
                    hist1 = getDeepByKeys(histd1BySs0AndResTypes, ssType, resType, resTypePrev) # x-axis
                    # This was bug! It needs to be hashed on the ssType of resType -not- on resTypeNext
                    hist2 = getDeepByKeys(histd1BySs1AndResTypes, ssType, resTypeNext, resType) 
                    if hist1 == None:
                        nTdebug('skipping for hist1 is empty for [%s] [%s] [%s]' % (ssType, resTypePrev, resType))
                        continue
                    if hist2 == None:
                        nTdebug('skipping for hist2 is empty for [%s] [%s] [%s]' % (ssType, resType, resTypeNext))
                        continue
                    m1 = mat(hist1,dtype='float')
                    m2 = mat(hist2,dtype='float')
                    m2 = m2.transpose() # pylint: disable=E1101
                    hist2d = multiply(m1,m2)

                    cTuple = getEnsembleAverageAndSigmaHis( hist2d )
                    (_c_av, c_sd, _hisMin, _hisMax) = cTuple #@UnusedVariable
                    cTuple += tuple([str([ssType, resType, resTypePrev, resTypeNext])]) # append the hash keys as a way of id.
#                    nTdebug("For ssType %s residue types %s %s %s found (av/sd/min/max) %8.0f %8.0f %8.0f %8.0f" % (
#                        ssType, resType, resTypePrev, resTypeNext, c_av, c_sd, hisMin, hisMax))
                    if c_sd == None:
                        nTdebug('Failed to get c_sd when testing not all residues are present in smaller sets.')
                        continue
                    if c_sd == 0.:
                        nTdebug('Got zero c_sd, ignoring histogram. This should only occur in smaller sets. Not setting values.')
                        continue
                    setDeepByKeys( histd1CtupleBySsAndResTypes, cTuple, ssType, resType, resTypePrev, resTypeNext)
    # end for isI

    keyListSorted1 = valueByResTypes.keys()
    keyListSorted1.sort()
    for resType in keyListSorted1:
        keyListSorted2 = valueByResTypes[resType].keys()
        keyListSorted2.sort()
        for prevResType in keyListSorted2:
            d1List = valueByResTypes[resType][prevResType]
            if not d1List:
                nTerror("Expected d1List from valueByResTypes[%s][%s]" % (resType, prevResType))
                continue
            hist1d, _bins, _patches = hist(d1List, bins=binCount, range=xRange)
#            nTmessage("Count %6d in valueByResTypes[%s][%s]" % (sum(hist1d), resType, prevResType))
            setDeepByKeys(histd1ByResTypes, hist1d, resType, prevResType)

    histd1, _bins, _patches = hist(value, bins=binCount, range=xRange)
    nTmessage("Count %6d in value" % sum(histd1))
#    setDeepByKeys(histd1, hist1d, resType, prevResType)

    if os.path.exists(dbase_file_abs_name):
        os.unlink(dbase_file_abs_name)
    output = open(dbase_file_abs_name, 'wb')
    dbase = {}
    dbase[ 'histd1BySs0AndResTypes' ] = histd1BySs0AndResTypes # 92 kb uncompressed in the case of ~1000 lines only
    dbase[ 'histd1BySs1AndResTypes' ] = histd1BySs1AndResTypes
    dbase[ 'histd1CtupleBySsAndResTypes' ] = histd1CtupleBySsAndResTypes
    dbase[ 'histd1ByResTypes' ] = histd1ByResTypes # 56 kb
    dbase[ 'histd1BySs0' ] = histd1BySs0 # 4 kb
    dbase[ 'histd1BySs1' ] = histd1BySs1
    dbase[ 'histd1' ] = histd1 #  4 kb

    cPickle.dump(dbase, output, 2)
    output.close()
Esempio n. 11
0
def main():
    'See above.'
    cvs_file_abs_name_gz = os.path.join(cingDirData, 'PluginCode', 'Whatif',
                                        cvs_file_abs_name + '.gz')
    gunzip(cvs_file_abs_name_gz)
    reader = csv.reader(open(cvs_file_abs_name, "rb"), quoting=csv.QUOTE_NONE)
    valueBySs0AndResTypes = {}  # keys are SSi,   RTi, RTi-1
    valueBySs1AndResTypes = {}  # keys are SSi-1, RTi, RTi-1
    valueByResTypes = {}
    valueBySs0 = {}  # keys are SSi
    valueBySs1 = {}  # keys are SSi-1
    histd1CtupleBySsAndResTypes = {}
    value = []  # NB is an array without being keyed.

    histd1BySs0AndResTypes = {}  # keys are SSi,   RTi, RTi-1
    histd1BySs1AndResTypes = {}  # keys are SSi-1, RTi, RTi-1
    histd1ByResTypes = {}
    histd1BySs0 = {}
    histd1BySs1 = {}

    linesByEntry = {}
    lineCount = 0
    for row in reader:
        lineCount += 1
        if lineCount > lineCountMax:
            break
        entryId = row[0]
        if not linesByEntry.has_key(entryId):
            linesByEntry[entryId] = []
        linesByEntry[entryId].append(row)

    skippedResTypes = []
    entryIdList = linesByEntry.keys()
    entryIdList.sort()

    # Do some pre filtering.
    for entryId2 in entryIdList:
        lineList = linesByEntry[entryId2]
        for idx, line in enumerate(lineList):
            line.append(idx)
        lineListSorted = NTsort(lineList, BFACTOR_COLUMN, inplace=False)
        # Now throw away the worst 10 % of residues.
        n = len(lineListSorted)
        bad_count = int(round((n * DEFAULT_BFACTOR_PERCENTAGE_FILTER) / 100.))
        to_remove_count = n - bad_count
        #        nTmessage("Removing at least %d from %d residues" % (bad_count,n))
        badIdxList = [
            lineItem[IDX_COLUMN]
            for lineItem in lineListSorted[to_remove_count:n]
        ]
        iList = range(n)
        iList.reverse()
        for i in iList:
            lineItem = lineList[i]
            max_bfactor = float(lineItem[BFACTOR_COLUMN])
            if max_bfactor > DEFAULT_MAX_BFACTOR:
                #                nTdebug('Skipping because max bfactor in dihedral %.3f is above %.3f %s' % (max_bfactor, DEFAULT_MAX_BFACTOR, lineItem))
                del lineList[
                    i]  # TODO: check if indexing is still right or we shoot in the foot.
                continue
            if i in badIdxList:
                #                nTdebug('Skipping because bfactor worst %.3f %s' % (max_bfactor, lineItem))
                del lineList[i]
                continue
        removed_count = n - len(lineList)
        #        nTdebug("Reduced list by %d" % removed_count)
        if removed_count < bad_count:
            nTwarning("Failed to remove at least %d residues" % bad_count)

    for entryId2 in entryIdList:
        prevChainId = None
        prevResType = None
        prevResNum = None
        prevSsType = None
        for _r, row in enumerate(linesByEntry[entryId2]):
            #1zzk,A,GLN ,  17,E, 205.2, 193.6
            #1zzk,A,VAL ,  18,E, 193.6, 223.2
            #1zzk,A,THR ,  19,E, 223.2, 190.1
            (entryId, chainId, resType, resNum, ssType, d1, _d2, _max_bfactor,
             _idx) = row
            resNum = int(resNum)
            ssType = to3StateDssp(ssType)[0]
            resType = resType.strip()
            db = NTdb.getResidueDefByName(resType)
            if not db:
                nTerror("resType not in db: %s" % resType)
                return
            resType = db.nameDict['IUPAC']
            d1 = d1.strip()
            d1 = floatParse(d1)
            if isNaN(d1):
                #                nTdebug("d1 %s is a NaN on row: %s" % (d1,row))
                continue
            if not inRange(d1):
                nTerror("d1 not in range for row: %s" % str(row))
                return

            if not (resType in common20AAList):
                #            nTmessage("Skipping uncommon residue: %s" % resType)
                if not (resType in skippedResTypes):
                    skippedResTypes.append(resType)
                continue

            if isSibling(chainId, resNum, prevChainId, prevResNum):
                appendDeepByKeys(valueBySs0AndResTypes, d1, ssType, resType,
                                 prevResType)
                appendDeepByKeys(valueBySs1AndResTypes, d1, prevSsType,
                                 resType, prevResType)
                appendDeepByKeys(valueByResTypes, d1, resType, prevResType)
                appendDeepByKeys(valueBySs0, d1, ssType)
                appendDeepByKeys(valueBySs1, d1, prevSsType)
                value.append(d1)
            prevResType = resType
            prevResNum = resNum
            prevChainId = chainId
            prevSsType = ssType

    os.unlink(cvs_file_abs_name)
    nTmessage("Skipped skippedResTypes: %r" % skippedResTypes)
    nTmessage("Got count of values: %r" % len(value))
    # fill FOUR types of hist.
    # TODO: filter differently for pro/gly
    keyListSorted1 = valueBySs0AndResTypes.keys()
    keyListSorted1.sort()
    for isI in (True, False):
        if isI:
            valueBySs = valueBySs0
            valueBySsAndResTypes = valueBySs0AndResTypes
            histd1BySs = histd1BySs0
            histd1BySsAndResTypes = histd1BySs0AndResTypes
        else:
            valueBySs = valueBySs1
            valueBySsAndResTypes = valueBySs1AndResTypes
            histd1BySs = histd1BySs1
            histd1BySsAndResTypes = histd1BySs1AndResTypes
        for ssType in keyListSorted1:
            #            keyListSorted1b = deepcopy(keyListSorted1)
            #        for ssTypePrev in keyListSorted1b:
            d1List = valueBySs[ssType]
            if not d1List:
                nTerror("Expected d1List from valueBySs[%s]" % (ssType))
                continue
            hist1d, _bins, _patches = hist(d1List, bins=binCount, range=xRange)
            nTmessage("Count %6d in valueBySs[%s]" % (sum(hist1d), ssType))
            setDeepByKeys(histd1BySs, hist1d, ssType)

            keyListSorted2 = valueBySsAndResTypes[ssType].keys()
            keyListSorted2.sort()
            for resType in keyListSorted2:
                #            nTmessage("Working on valueBySsAndResTypes for [%s][%s]" % (ssType, resType)) # nice for balancing output verbosity.
                keyListSorted3 = valueBySsAndResTypes[ssType][resType].keys()
                keyListSorted3.sort()
                for prevResType in keyListSorted3:
                    #                nTmessage("Working on valueBySsAndResTypes[%s][%s][%s]" % (ssType, resType, prevResType))
                    d1List = valueBySsAndResTypes[ssType][resType][prevResType]
                    if not d1List:
                        nTerror(
                            "Expected d1List from valueBySsAndResTypes[%s][%s][%s]"
                            % (ssType, resType, prevResType))
                        continue
                    hist1d, _bins, _patches = hist(d1List,
                                                   bins=binCount,
                                                   range=xRange)
                    #                nTmessage("Count %6d in valueBySsAndResTypes[%s][%s][%s]" % (sum(hist1d), ssType, resType, prevResType))
                    setDeepByKeys(histd1BySsAndResTypes, hist1d, ssType,
                                  resType, prevResType)
            # Now that they are all in we can redo this.
    # Delete the reference -not- the object.
    valueBySs = None
    valueBySsAndResTypes = None
    histd1BySs = None
    histd1BySsAndResTypes = None

    for ssType in keyListSorted1:
        for resType in keyListSorted2:
            #            nTmessage("Working on valueBySsAndResTypes for [%s][%s]" % (ssType, resType)) # nice for balancing output verbosity.
            keyListSorted3 = valueBySs0AndResTypes[ssType][resType].keys()
            keyListSorted3.sort()
            for resTypePrev in keyListSorted3:
                keyListSorted4 = keyListSorted3[:]  # take a copy
                for resTypeNext in keyListSorted4:
                    hist1 = getDeepByKeys(histd1BySs0AndResTypes, ssType,
                                          resType, resTypePrev)  # x-axis
                    # This was bug! It needs to be hashed on the ssType of resType -not- on resTypeNext
                    hist2 = getDeepByKeys(histd1BySs1AndResTypes, ssType,
                                          resTypeNext, resType)
                    if hist1 == None:
                        nTdebug(
                            'skipping for hist1 is empty for [%s] [%s] [%s]' %
                            (ssType, resTypePrev, resType))
                        continue
                    if hist2 == None:
                        nTdebug(
                            'skipping for hist2 is empty for [%s] [%s] [%s]' %
                            (ssType, resType, resTypeNext))
                        continue
                    m1 = mat(hist1, dtype='float')
                    m2 = mat(hist2, dtype='float')
                    m2 = m2.transpose()  # pylint: disable=E1101
                    hist2d = multiply(m1, m2)

                    cTuple = getEnsembleAverageAndSigmaHis(hist2d)
                    (_c_av, c_sd, _hisMin, _hisMax) = cTuple  #@UnusedVariable
                    cTuple += tuple([
                        str([ssType, resType, resTypePrev, resTypeNext])
                    ])  # append the hash keys as a way of id.
                    #                    nTdebug("For ssType %s residue types %s %s %s found (av/sd/min/max) %8.0f %8.0f %8.0f %8.0f" % (
                    #                        ssType, resType, resTypePrev, resTypeNext, c_av, c_sd, hisMin, hisMax))
                    if c_sd == None:
                        nTdebug(
                            'Failed to get c_sd when testing not all residues are present in smaller sets.'
                        )
                        continue
                    if c_sd == 0.:
                        nTdebug(
                            'Got zero c_sd, ignoring histogram. This should only occur in smaller sets. Not setting values.'
                        )
                        continue
                    setDeepByKeys(histd1CtupleBySsAndResTypes, cTuple, ssType,
                                  resType, resTypePrev, resTypeNext)
    # end for isI

    keyListSorted1 = valueByResTypes.keys()
    keyListSorted1.sort()
    for resType in keyListSorted1:
        keyListSorted2 = valueByResTypes[resType].keys()
        keyListSorted2.sort()
        for prevResType in keyListSorted2:
            d1List = valueByResTypes[resType][prevResType]
            if not d1List:
                nTerror("Expected d1List from valueByResTypes[%s][%s]" %
                        (resType, prevResType))
                continue
            hist1d, _bins, _patches = hist(d1List, bins=binCount, range=xRange)
            #            nTmessage("Count %6d in valueByResTypes[%s][%s]" % (sum(hist1d), resType, prevResType))
            setDeepByKeys(histd1ByResTypes, hist1d, resType, prevResType)

    histd1, _bins, _patches = hist(value, bins=binCount, range=xRange)
    nTmessage("Count %6d in value" % sum(histd1))
    #    setDeepByKeys(histd1, hist1d, resType, prevResType)

    if os.path.exists(dbase_file_abs_name):
        os.unlink(dbase_file_abs_name)
    output = open(dbase_file_abs_name, 'wb')
    dbase = {}
    dbase[
        'histd1BySs0AndResTypes'] = histd1BySs0AndResTypes  # 92 kb uncompressed in the case of ~1000 lines only
    dbase['histd1BySs1AndResTypes'] = histd1BySs1AndResTypes
    dbase['histd1CtupleBySsAndResTypes'] = histd1CtupleBySsAndResTypes
    dbase['histd1ByResTypes'] = histd1ByResTypes  # 56 kb
    dbase['histd1BySs0'] = histd1BySs0  # 4 kb
    dbase['histd1BySs1'] = histd1BySs1
    dbase['histd1'] = histd1  #  4 kb

    cPickle.dump(dbase, output, 2)
    output.close()
Esempio n. 12
0
    B = reFeature(thata)
    return my_copy, B


if __name__ == "__main__":
    X, Y, Xc, Yc, ALLX, ALLY = CreatData()
    Xc_ = getXmat(Xc)
    '''
    #批量梯度下降
    #初始的系数矩阵取B(即thata)
    '''
    X_lemad = array(X) / 2.5  #特征收缩
    X_mat_ = []
    for i in range(ORDER + 1):
        X_mat_.append(X_lemad**i)
    X_mat_ = mat(X_mat_).T
    #thata = array(B.reshape(1,len(B)))[0] +uniform(0,0.1)
    X_matrix = getXmat(X)
    thata_ = []
    for i in range(10):
        thata_.append(uniform(-1, 1))
    thata0 = mat(array(thata_).reshape(ORDER + 1, 1))
    thata1 = mat([1, 1])
    thata1 = thata0

    my_Y = array(dot(X_matrix, thata0).reshape(1, len(Y)))[0]

    thata = array(thata0.reshape(1, ORDER + 1))[0]
    Y_bgd, B = BatchGradientDescent(my_Y, Y, X_mat_, thata)

    thata = array(thata0.reshape(1, len(B)))[0]
Esempio n. 13
0
#coding:utf-8
# ------------------------------------------------------------
# 简介 : 机器学习基础 numpy
#
# 更新 : 2015年1月7日@
# ------------------------------------------------------------
from numpy import random
from numpy.matrixlib.defmatrix import mat
from numpy.lib.twodim_base import eye
#4*4随机数组
random.rand(4, 4)
#数组转矩阵
randmat = mat(random.rand(4, 4))
print(randmat)
#.I专为逆矩阵
print(randmat.I)
randmatv = randmat.I
#矩阵和逆矩阵相乘,对角为1 其他为0   -eye(4)为单位矩阵 相减后看到有误差
print(randmat * randmatv - eye(4))