def readSamples(self, fileName, key,recalc=False,samples=None): fn = fileName + ".pre" try: if recalc: raise IOError() with open(fn): pass print "precalculated file present" self.mu, self.cov = hsplit(mat(fromfile(fn).reshape((3,-1))),[1]) except IOError: if samples != None: self._samples = samples print "got samples: " , self._samples else: print "no file present, calculating..." smpls = loadmat(fileName)[key] print "loaded from mat file" self._samples = mat(smpls) print "reshaped into samples" self.mu = sum(self._samples, axis=1) / self._samples.shape[1] print "mu=", str(self.mu) sampdiffmu = self._samples - self.mu self.cov = sampdiffmu*sampdiffmu.T / self._samples.shape[1] print"cov=", str(self.cov) mat(hstack((self.mu,self.cov))).tofile(fn) self._invCov = self.cov.I self._detCov = det(self.cov) self._multConst = 1 / sqrt((2 * pi) ** 3 * self._detCov)
def bicgstabReg(X,Y,my_Y,B): ''' #稳定双共轭梯度下降 ''' my_Y_copy=[] for i in my_Y: my_Y_copy.append(i) error = CostFunctionReg(Y,my_Y_copy,B) R0star = Y - dot(X,B) R0 = Y - dot(X,B) rho0 = 1 alp0 = 1 w0 = 1 V0 =mat(zeros(len(Y)).reshape(len(Y),1)) P0 = mat(zeros(len(Y)).reshape(len(Y),1)) #print R0 while 1: rho1 = array(dot(R0star.T, R0))[0][0] beta = (rho1/rho0) * (alp0/w0) P1 = R0 + beta*(P0 - w0*V0) V1 = dot(X,P1) alp0 = rho1/(array(dot(R0star.T,V1))[0][0]) h = B + alp0 * P1 my_Y_copy = array(dot(X,array(h).reshape(len(h),1)).reshape(1,len(Y)))[0] new_error = CostFunctionReg(Y,my_Y_copy,h) if abs(new_error -error) <=e: B=h break #error = new_error S = R0 - alp0*V1 t = dot(X,S) w1 = array(dot(t.T, S))[0][0]/array(dot(t.T, t))[0][0] B = h + w1*S my_Y_copy = array(dot(X,array(B).reshape(len(B),1)).reshape(1,len(Y)))[0] new_error = CostFunctionReg(Y,my_Y_copy,B) # print abs(new_error -error) if abs(new_error -error) <=e: break R0 = S - w1 * t rho0 = rho1 P0 = P1 V0 =V1 w0 = w1 error = new_error return dot(X,B),B
def getXmat(Xc): X_matrix = [] for i in range(ORDER + 1): X_matrix.append(array(Xc)**i) X_matrix = mat(X_matrix).T #print X_matrix return X_matrix
def reFeature(B): tha_ = (array(B).reshape(1, ORDER + 1))[0] b = [] for i in range(ORDER + 1): b.append(tha_[i] / (2.5**i)) return mat(array(b).reshape(ORDER + 1, 1))
def __calculateP__(self): p = mat(self.p.shape) for k in range(0,self.K): llh = Likelihood() llh.setParams(self.mean[k,:],self.covm[k,:,:]) for i in range(0,self.N): p[k,i] = llh.getP(self.data[i,:]) sp = sum(p,axis=0) for k in range(0,self.K): for i in range(0,self.N): self.p[k,i] = p[k,i]/sp[i]
def normalize(dataset): for i in range(len(dataset)): dataset[i].insert(0,'1.0') dataset[i] = map(eval, dataset[i]) dataset = mat(dataset) # dataset[i] = mat(dataset[i]) for i in range(dataset.shape[1] - 1): dataset[:,i] = dataset[:,i] / dataset[:,i].max() # print "dataset dataset is:",dataset.tolist() return dataset
def gradAscent(data,label,labelSet): hmat = zeros((data.shape[0],len(labelSet) - 1)).tolist() # print "hmat[2][1]",hmat[2][1] i = 0 # print "ge zhong changdu",len(labelSet) - 1,data.shape[0] for k in range(len(labelSet) - 1): for j in range(data.shape[0]): if(label[j] == labelSet[k]): hmat[j][k] = 1 iteration = 1 error = 0.00001 m,n = shape(data) labelNum = len(labelSet) - 1 if(labelNum >= 2): weights = ones((n,labelNum)) else: weights = ones((n,1)) # for i in range(iteration): for i in range(labelNum): diff = 1 hmat = mat(hmat) while(diff > error): if(labelNum == 1): h = sigMoid(data * weights[:,i]) h = mat(h) deri = mat(label).transpose() - h ###11*1 else: h = fakeSigMoid(data * weights) h = mat(h) deri = hmat[:,i] - h[:,i] ###11*1 # cichu hai zhengchang formal = copy.deepcopy(weights[:,i]) weights[:,i] = weights[:,i] + alpha * data.transpose() * deri ####梯度下降法目标函数取最小值,所以此处为+ diff = abs(formal.transpose() * formal - weights[:,i].transpose() * weights[:,i]) print "diff = ",diff return weights
def TheLeastSquareMethod(X, Y): """ 最小二乘法 """ regula = eye(ORDER + 1) X_matrix = [] for i in range(ORDER + 1): X_matrix.append(array(X)**i) X_matrix = mat(X_matrix).T Y_matrix = array(Y).reshape((len(Y), 1)) X_matrix_T = X_matrix.T #print dot(X_matrix_T,X_matrix) B = dot(dot(dot(X_matrix_T, X_matrix).I, X_matrix_T), Y_matrix) B1 = dot(dot((dot(X_matrix_T, X_matrix) + lamda * regula).I, X_matrix_T), Y_matrix) result = dot(X_matrix, B) result_reg = dot(X_matrix, B1) return X_matrix, Y_matrix, B, result, result_reg, B1
# coding:utf-8 import numpy as np from numpy.matrixlib.defmatrix import mat from numpy.core.numeric import ones, full from common_utils import typeName # 从范围【10,30) 返回以10开始,等差值为5的数列. 不包含30 print np.arange(10, 30, 5) #[10 15 20 25] result = np.arange(0, 2, 0.5) print result print type(result).__name__ # ndarray print mat(result).transpose() # 从范围[-1,0] , 5个元素组成的等差元素的数组. print np.linspace(-1, 0, 5) x = np.array([[1, 2], [3, 4]]) print mat(x) # 转化为numpy数组 print mat(x).transpose() # transpose numpy矩阵转化. 这里从1行4列,变成,4行1列. x = np.array((1, 2, 3, 4)) print mat(x) print mat(x).transpose() # transpose numpy矩阵转化. 这里从3行2列,变成,2行3列. 并且将每列的数据 合并成1行。 x = np.array([[1, 2], [3, 4], [5, 6]]) print mat(x)
def main(): 'See above.' cvs_file_abs_name_gz = os.path.join(cingDirData, 'PluginCode', 'Whatif', cvs_file_abs_name + '.gz') gunzip(cvs_file_abs_name_gz) reader = csv.reader(open(cvs_file_abs_name, "rb"), quoting=csv.QUOTE_NONE) valueBySs0AndResTypes = {} # keys are SSi, RTi, RTi-1 valueBySs1AndResTypes = {} # keys are SSi-1, RTi, RTi-1 valueByResTypes = {} valueBySs0 = {} # keys are SSi valueBySs1 = {} # keys are SSi-1 histd1CtupleBySsAndResTypes = {} value = [] # NB is an array without being keyed. histd1BySs0AndResTypes = {} # keys are SSi, RTi, RTi-1 histd1BySs1AndResTypes = {} # keys are SSi-1, RTi, RTi-1 histd1ByResTypes = {} histd1BySs0 = {} histd1BySs1 = {} linesByEntry = {} lineCount = 0 for row in reader: lineCount += 1 if lineCount > lineCountMax: break entryId = row[0] if not linesByEntry.has_key(entryId): linesByEntry[ entryId ] = [] linesByEntry[ entryId ].append( row ) skippedResTypes = [] entryIdList = linesByEntry.keys() entryIdList.sort() # Do some pre filtering. for entryId2 in entryIdList: lineList = linesByEntry[ entryId2 ] for idx,line in enumerate(lineList): line.append(idx) lineListSorted = NTsort(lineList,BFACTOR_COLUMN,inplace=False) # Now throw away the worst 10 % of residues. n = len(lineListSorted) bad_count = int(round((n * DEFAULT_BFACTOR_PERCENTAGE_FILTER) / 100.)) to_remove_count = n-bad_count # nTmessage("Removing at least %d from %d residues" % (bad_count,n)) badIdxList = [lineItem[IDX_COLUMN] for lineItem in lineListSorted[to_remove_count:n]] iList = range(n) iList.reverse() for i in iList: lineItem = lineList[i] max_bfactor = float(lineItem[BFACTOR_COLUMN]) if max_bfactor > DEFAULT_MAX_BFACTOR: # nTdebug('Skipping because max bfactor in dihedral %.3f is above %.3f %s' % (max_bfactor, DEFAULT_MAX_BFACTOR, lineItem)) del lineList[i] # TODO: check if indexing is still right or we shoot in the foot. continue if i in badIdxList: # nTdebug('Skipping because bfactor worst %.3f %s' % (max_bfactor, lineItem)) del lineList[i] continue removed_count = n - len(lineList) # nTdebug("Reduced list by %d" % removed_count) if removed_count < bad_count: nTwarning("Failed to remove at least %d residues" % bad_count) for entryId2 in entryIdList: prevChainId = None prevResType = None prevResNum = None prevSsType = None for _r, row in enumerate(linesByEntry[ entryId2 ]): #1zzk,A,GLN , 17,E, 205.2, 193.6 #1zzk,A,VAL , 18,E, 193.6, 223.2 #1zzk,A,THR , 19,E, 223.2, 190.1 (entryId, chainId, resType, resNum, ssType, d1, _d2, _max_bfactor, _idx) = row resNum = int(resNum) ssType = to3StateDssp(ssType)[0] resType = resType.strip() db = NTdb.getResidueDefByName( resType ) if not db: nTerror("resType not in db: %s" % resType) return resType = db.nameDict['IUPAC'] d1 = d1.strip() d1 = floatParse(d1) if isNaN(d1): # nTdebug("d1 %s is a NaN on row: %s" % (d1,row)) continue if not inRange(d1): nTerror("d1 not in range for row: %s" % str(row)) return if not (resType in common20AAList): # nTmessage("Skipping uncommon residue: %s" % resType) if not ( resType in skippedResTypes): skippedResTypes.append( resType ) continue if isSibling(chainId, resNum, prevChainId, prevResNum): appendDeepByKeys(valueBySs0AndResTypes, d1, ssType, resType, prevResType) appendDeepByKeys(valueBySs1AndResTypes, d1, prevSsType, resType, prevResType) appendDeepByKeys(valueByResTypes, d1, resType, prevResType) appendDeepByKeys(valueBySs0, d1, ssType) appendDeepByKeys(valueBySs1, d1, prevSsType) value.append( d1 ) prevResType = resType prevResNum = resNum prevChainId = chainId prevSsType = ssType os.unlink(cvs_file_abs_name) nTmessage("Skipped skippedResTypes: %r" % skippedResTypes ) nTmessage("Got count of values: %r" % len(value) ) # fill FOUR types of hist. # TODO: filter differently for pro/gly keyListSorted1 = valueBySs0AndResTypes.keys() keyListSorted1.sort() for isI in (True, False): if isI: valueBySs = valueBySs0 valueBySsAndResTypes = valueBySs0AndResTypes histd1BySs = histd1BySs0 histd1BySsAndResTypes = histd1BySs0AndResTypes else: valueBySs = valueBySs1 valueBySsAndResTypes = valueBySs1AndResTypes histd1BySs = histd1BySs1 histd1BySsAndResTypes = histd1BySs1AndResTypes for ssType in keyListSorted1: # keyListSorted1b = deepcopy(keyListSorted1) # for ssTypePrev in keyListSorted1b: d1List = valueBySs[ssType] if not d1List: nTerror("Expected d1List from valueBySs[%s]" % (ssType)) continue hist1d, _bins, _patches = hist(d1List, bins=binCount, range=xRange) nTmessage("Count %6d in valueBySs[%s]" % (sum(hist1d), ssType)) setDeepByKeys(histd1BySs, hist1d, ssType) keyListSorted2 = valueBySsAndResTypes[ssType].keys() keyListSorted2.sort() for resType in keyListSorted2: # nTmessage("Working on valueBySsAndResTypes for [%s][%s]" % (ssType, resType)) # nice for balancing output verbosity. keyListSorted3 = valueBySsAndResTypes[ssType][resType].keys() keyListSorted3.sort() for prevResType in keyListSorted3: # nTmessage("Working on valueBySsAndResTypes[%s][%s][%s]" % (ssType, resType, prevResType)) d1List = valueBySsAndResTypes[ssType][resType][prevResType] if not d1List: nTerror("Expected d1List from valueBySsAndResTypes[%s][%s][%s]" % (ssType, resType, prevResType)) continue hist1d, _bins, _patches = hist(d1List, bins=binCount, range=xRange) # nTmessage("Count %6d in valueBySsAndResTypes[%s][%s][%s]" % (sum(hist1d), ssType, resType, prevResType)) setDeepByKeys(histd1BySsAndResTypes, hist1d, ssType, resType, prevResType) # Now that they are all in we can redo this. # Delete the reference -not- the object. valueBySs = None valueBySsAndResTypes = None histd1BySs = None histd1BySsAndResTypes = None for ssType in keyListSorted1: for resType in keyListSorted2: # nTmessage("Working on valueBySsAndResTypes for [%s][%s]" % (ssType, resType)) # nice for balancing output verbosity. keyListSorted3 = valueBySs0AndResTypes[ssType][resType].keys() keyListSorted3.sort() for resTypePrev in keyListSorted3: keyListSorted4 = keyListSorted3[:] # take a copy for resTypeNext in keyListSorted4: hist1 = getDeepByKeys(histd1BySs0AndResTypes, ssType, resType, resTypePrev) # x-axis # This was bug! It needs to be hashed on the ssType of resType -not- on resTypeNext hist2 = getDeepByKeys(histd1BySs1AndResTypes, ssType, resTypeNext, resType) if hist1 == None: nTdebug('skipping for hist1 is empty for [%s] [%s] [%s]' % (ssType, resTypePrev, resType)) continue if hist2 == None: nTdebug('skipping for hist2 is empty for [%s] [%s] [%s]' % (ssType, resType, resTypeNext)) continue m1 = mat(hist1,dtype='float') m2 = mat(hist2,dtype='float') m2 = m2.transpose() # pylint: disable=E1101 hist2d = multiply(m1,m2) cTuple = getEnsembleAverageAndSigmaHis( hist2d ) (_c_av, c_sd, _hisMin, _hisMax) = cTuple #@UnusedVariable cTuple += tuple([str([ssType, resType, resTypePrev, resTypeNext])]) # append the hash keys as a way of id. # nTdebug("For ssType %s residue types %s %s %s found (av/sd/min/max) %8.0f %8.0f %8.0f %8.0f" % ( # ssType, resType, resTypePrev, resTypeNext, c_av, c_sd, hisMin, hisMax)) if c_sd == None: nTdebug('Failed to get c_sd when testing not all residues are present in smaller sets.') continue if c_sd == 0.: nTdebug('Got zero c_sd, ignoring histogram. This should only occur in smaller sets. Not setting values.') continue setDeepByKeys( histd1CtupleBySsAndResTypes, cTuple, ssType, resType, resTypePrev, resTypeNext) # end for isI keyListSorted1 = valueByResTypes.keys() keyListSorted1.sort() for resType in keyListSorted1: keyListSorted2 = valueByResTypes[resType].keys() keyListSorted2.sort() for prevResType in keyListSorted2: d1List = valueByResTypes[resType][prevResType] if not d1List: nTerror("Expected d1List from valueByResTypes[%s][%s]" % (resType, prevResType)) continue hist1d, _bins, _patches = hist(d1List, bins=binCount, range=xRange) # nTmessage("Count %6d in valueByResTypes[%s][%s]" % (sum(hist1d), resType, prevResType)) setDeepByKeys(histd1ByResTypes, hist1d, resType, prevResType) histd1, _bins, _patches = hist(value, bins=binCount, range=xRange) nTmessage("Count %6d in value" % sum(histd1)) # setDeepByKeys(histd1, hist1d, resType, prevResType) if os.path.exists(dbase_file_abs_name): os.unlink(dbase_file_abs_name) output = open(dbase_file_abs_name, 'wb') dbase = {} dbase[ 'histd1BySs0AndResTypes' ] = histd1BySs0AndResTypes # 92 kb uncompressed in the case of ~1000 lines only dbase[ 'histd1BySs1AndResTypes' ] = histd1BySs1AndResTypes dbase[ 'histd1CtupleBySsAndResTypes' ] = histd1CtupleBySsAndResTypes dbase[ 'histd1ByResTypes' ] = histd1ByResTypes # 56 kb dbase[ 'histd1BySs0' ] = histd1BySs0 # 4 kb dbase[ 'histd1BySs1' ] = histd1BySs1 dbase[ 'histd1' ] = histd1 # 4 kb cPickle.dump(dbase, output, 2) output.close()
def main(): 'See above.' cvs_file_abs_name_gz = os.path.join(cingDirData, 'PluginCode', 'Whatif', cvs_file_abs_name + '.gz') gunzip(cvs_file_abs_name_gz) reader = csv.reader(open(cvs_file_abs_name, "rb"), quoting=csv.QUOTE_NONE) valueBySs0AndResTypes = {} # keys are SSi, RTi, RTi-1 valueBySs1AndResTypes = {} # keys are SSi-1, RTi, RTi-1 valueByResTypes = {} valueBySs0 = {} # keys are SSi valueBySs1 = {} # keys are SSi-1 histd1CtupleBySsAndResTypes = {} value = [] # NB is an array without being keyed. histd1BySs0AndResTypes = {} # keys are SSi, RTi, RTi-1 histd1BySs1AndResTypes = {} # keys are SSi-1, RTi, RTi-1 histd1ByResTypes = {} histd1BySs0 = {} histd1BySs1 = {} linesByEntry = {} lineCount = 0 for row in reader: lineCount += 1 if lineCount > lineCountMax: break entryId = row[0] if not linesByEntry.has_key(entryId): linesByEntry[entryId] = [] linesByEntry[entryId].append(row) skippedResTypes = [] entryIdList = linesByEntry.keys() entryIdList.sort() # Do some pre filtering. for entryId2 in entryIdList: lineList = linesByEntry[entryId2] for idx, line in enumerate(lineList): line.append(idx) lineListSorted = NTsort(lineList, BFACTOR_COLUMN, inplace=False) # Now throw away the worst 10 % of residues. n = len(lineListSorted) bad_count = int(round((n * DEFAULT_BFACTOR_PERCENTAGE_FILTER) / 100.)) to_remove_count = n - bad_count # nTmessage("Removing at least %d from %d residues" % (bad_count,n)) badIdxList = [ lineItem[IDX_COLUMN] for lineItem in lineListSorted[to_remove_count:n] ] iList = range(n) iList.reverse() for i in iList: lineItem = lineList[i] max_bfactor = float(lineItem[BFACTOR_COLUMN]) if max_bfactor > DEFAULT_MAX_BFACTOR: # nTdebug('Skipping because max bfactor in dihedral %.3f is above %.3f %s' % (max_bfactor, DEFAULT_MAX_BFACTOR, lineItem)) del lineList[ i] # TODO: check if indexing is still right or we shoot in the foot. continue if i in badIdxList: # nTdebug('Skipping because bfactor worst %.3f %s' % (max_bfactor, lineItem)) del lineList[i] continue removed_count = n - len(lineList) # nTdebug("Reduced list by %d" % removed_count) if removed_count < bad_count: nTwarning("Failed to remove at least %d residues" % bad_count) for entryId2 in entryIdList: prevChainId = None prevResType = None prevResNum = None prevSsType = None for _r, row in enumerate(linesByEntry[entryId2]): #1zzk,A,GLN , 17,E, 205.2, 193.6 #1zzk,A,VAL , 18,E, 193.6, 223.2 #1zzk,A,THR , 19,E, 223.2, 190.1 (entryId, chainId, resType, resNum, ssType, d1, _d2, _max_bfactor, _idx) = row resNum = int(resNum) ssType = to3StateDssp(ssType)[0] resType = resType.strip() db = NTdb.getResidueDefByName(resType) if not db: nTerror("resType not in db: %s" % resType) return resType = db.nameDict['IUPAC'] d1 = d1.strip() d1 = floatParse(d1) if isNaN(d1): # nTdebug("d1 %s is a NaN on row: %s" % (d1,row)) continue if not inRange(d1): nTerror("d1 not in range for row: %s" % str(row)) return if not (resType in common20AAList): # nTmessage("Skipping uncommon residue: %s" % resType) if not (resType in skippedResTypes): skippedResTypes.append(resType) continue if isSibling(chainId, resNum, prevChainId, prevResNum): appendDeepByKeys(valueBySs0AndResTypes, d1, ssType, resType, prevResType) appendDeepByKeys(valueBySs1AndResTypes, d1, prevSsType, resType, prevResType) appendDeepByKeys(valueByResTypes, d1, resType, prevResType) appendDeepByKeys(valueBySs0, d1, ssType) appendDeepByKeys(valueBySs1, d1, prevSsType) value.append(d1) prevResType = resType prevResNum = resNum prevChainId = chainId prevSsType = ssType os.unlink(cvs_file_abs_name) nTmessage("Skipped skippedResTypes: %r" % skippedResTypes) nTmessage("Got count of values: %r" % len(value)) # fill FOUR types of hist. # TODO: filter differently for pro/gly keyListSorted1 = valueBySs0AndResTypes.keys() keyListSorted1.sort() for isI in (True, False): if isI: valueBySs = valueBySs0 valueBySsAndResTypes = valueBySs0AndResTypes histd1BySs = histd1BySs0 histd1BySsAndResTypes = histd1BySs0AndResTypes else: valueBySs = valueBySs1 valueBySsAndResTypes = valueBySs1AndResTypes histd1BySs = histd1BySs1 histd1BySsAndResTypes = histd1BySs1AndResTypes for ssType in keyListSorted1: # keyListSorted1b = deepcopy(keyListSorted1) # for ssTypePrev in keyListSorted1b: d1List = valueBySs[ssType] if not d1List: nTerror("Expected d1List from valueBySs[%s]" % (ssType)) continue hist1d, _bins, _patches = hist(d1List, bins=binCount, range=xRange) nTmessage("Count %6d in valueBySs[%s]" % (sum(hist1d), ssType)) setDeepByKeys(histd1BySs, hist1d, ssType) keyListSorted2 = valueBySsAndResTypes[ssType].keys() keyListSorted2.sort() for resType in keyListSorted2: # nTmessage("Working on valueBySsAndResTypes for [%s][%s]" % (ssType, resType)) # nice for balancing output verbosity. keyListSorted3 = valueBySsAndResTypes[ssType][resType].keys() keyListSorted3.sort() for prevResType in keyListSorted3: # nTmessage("Working on valueBySsAndResTypes[%s][%s][%s]" % (ssType, resType, prevResType)) d1List = valueBySsAndResTypes[ssType][resType][prevResType] if not d1List: nTerror( "Expected d1List from valueBySsAndResTypes[%s][%s][%s]" % (ssType, resType, prevResType)) continue hist1d, _bins, _patches = hist(d1List, bins=binCount, range=xRange) # nTmessage("Count %6d in valueBySsAndResTypes[%s][%s][%s]" % (sum(hist1d), ssType, resType, prevResType)) setDeepByKeys(histd1BySsAndResTypes, hist1d, ssType, resType, prevResType) # Now that they are all in we can redo this. # Delete the reference -not- the object. valueBySs = None valueBySsAndResTypes = None histd1BySs = None histd1BySsAndResTypes = None for ssType in keyListSorted1: for resType in keyListSorted2: # nTmessage("Working on valueBySsAndResTypes for [%s][%s]" % (ssType, resType)) # nice for balancing output verbosity. keyListSorted3 = valueBySs0AndResTypes[ssType][resType].keys() keyListSorted3.sort() for resTypePrev in keyListSorted3: keyListSorted4 = keyListSorted3[:] # take a copy for resTypeNext in keyListSorted4: hist1 = getDeepByKeys(histd1BySs0AndResTypes, ssType, resType, resTypePrev) # x-axis # This was bug! It needs to be hashed on the ssType of resType -not- on resTypeNext hist2 = getDeepByKeys(histd1BySs1AndResTypes, ssType, resTypeNext, resType) if hist1 == None: nTdebug( 'skipping for hist1 is empty for [%s] [%s] [%s]' % (ssType, resTypePrev, resType)) continue if hist2 == None: nTdebug( 'skipping for hist2 is empty for [%s] [%s] [%s]' % (ssType, resType, resTypeNext)) continue m1 = mat(hist1, dtype='float') m2 = mat(hist2, dtype='float') m2 = m2.transpose() # pylint: disable=E1101 hist2d = multiply(m1, m2) cTuple = getEnsembleAverageAndSigmaHis(hist2d) (_c_av, c_sd, _hisMin, _hisMax) = cTuple #@UnusedVariable cTuple += tuple([ str([ssType, resType, resTypePrev, resTypeNext]) ]) # append the hash keys as a way of id. # nTdebug("For ssType %s residue types %s %s %s found (av/sd/min/max) %8.0f %8.0f %8.0f %8.0f" % ( # ssType, resType, resTypePrev, resTypeNext, c_av, c_sd, hisMin, hisMax)) if c_sd == None: nTdebug( 'Failed to get c_sd when testing not all residues are present in smaller sets.' ) continue if c_sd == 0.: nTdebug( 'Got zero c_sd, ignoring histogram. This should only occur in smaller sets. Not setting values.' ) continue setDeepByKeys(histd1CtupleBySsAndResTypes, cTuple, ssType, resType, resTypePrev, resTypeNext) # end for isI keyListSorted1 = valueByResTypes.keys() keyListSorted1.sort() for resType in keyListSorted1: keyListSorted2 = valueByResTypes[resType].keys() keyListSorted2.sort() for prevResType in keyListSorted2: d1List = valueByResTypes[resType][prevResType] if not d1List: nTerror("Expected d1List from valueByResTypes[%s][%s]" % (resType, prevResType)) continue hist1d, _bins, _patches = hist(d1List, bins=binCount, range=xRange) # nTmessage("Count %6d in valueByResTypes[%s][%s]" % (sum(hist1d), resType, prevResType)) setDeepByKeys(histd1ByResTypes, hist1d, resType, prevResType) histd1, _bins, _patches = hist(value, bins=binCount, range=xRange) nTmessage("Count %6d in value" % sum(histd1)) # setDeepByKeys(histd1, hist1d, resType, prevResType) if os.path.exists(dbase_file_abs_name): os.unlink(dbase_file_abs_name) output = open(dbase_file_abs_name, 'wb') dbase = {} dbase[ 'histd1BySs0AndResTypes'] = histd1BySs0AndResTypes # 92 kb uncompressed in the case of ~1000 lines only dbase['histd1BySs1AndResTypes'] = histd1BySs1AndResTypes dbase['histd1CtupleBySsAndResTypes'] = histd1CtupleBySsAndResTypes dbase['histd1ByResTypes'] = histd1ByResTypes # 56 kb dbase['histd1BySs0'] = histd1BySs0 # 4 kb dbase['histd1BySs1'] = histd1BySs1 dbase['histd1'] = histd1 # 4 kb cPickle.dump(dbase, output, 2) output.close()
B = reFeature(thata) return my_copy, B if __name__ == "__main__": X, Y, Xc, Yc, ALLX, ALLY = CreatData() Xc_ = getXmat(Xc) ''' #批量梯度下降 #初始的系数矩阵取B(即thata) ''' X_lemad = array(X) / 2.5 #特征收缩 X_mat_ = [] for i in range(ORDER + 1): X_mat_.append(X_lemad**i) X_mat_ = mat(X_mat_).T #thata = array(B.reshape(1,len(B)))[0] +uniform(0,0.1) X_matrix = getXmat(X) thata_ = [] for i in range(10): thata_.append(uniform(-1, 1)) thata0 = mat(array(thata_).reshape(ORDER + 1, 1)) thata1 = mat([1, 1]) thata1 = thata0 my_Y = array(dot(X_matrix, thata0).reshape(1, len(Y)))[0] thata = array(thata0.reshape(1, ORDER + 1))[0] Y_bgd, B = BatchGradientDescent(my_Y, Y, X_mat_, thata) thata = array(thata0.reshape(1, len(B)))[0]
#coding:utf-8 # ------------------------------------------------------------ # 简介 : 机器学习基础 numpy # # 更新 : 2015年1月7日@ # ------------------------------------------------------------ from numpy import random from numpy.matrixlib.defmatrix import mat from numpy.lib.twodim_base import eye #4*4随机数组 random.rand(4, 4) #数组转矩阵 randmat = mat(random.rand(4, 4)) print(randmat) #.I专为逆矩阵 print(randmat.I) randmatv = randmat.I #矩阵和逆矩阵相乘,对角为1 其他为0 -eye(4)为单位矩阵 相减后看到有误差 print(randmat * randmatv - eye(4))