def roc(labels, predictions): """roc - calculate receiver operator curve labels: true labels (>0 : True, else False) predictions: the ranking generated from whatever predictor is used""" #1. convert to arrays labels = S.array(labels).reshape([-1]) predictions = S.array(predictions).reshape([-1]) #threshold t = labels>0 #sort predictions in desceninding order #get order implied by predictor (descending) Ix = S.argsort(predictions)[::-1] #reorder truth t = t[Ix] #compute true positiive and false positive rates tp = S.double(N.cumsum(t))/t.sum() fp = S.double(N.cumsum(~t))/(~t).sum() #add end points tp = S.concatenate(([0],tp,[1])) fp = S.concatenate(([0],fp,[1])) return [tp,fp]
def crossvalidate(X, Y, f=5, trainfun=train_ncc): ''' Test generalization performance of a linear classifier by crossvalidation Definition: crossvalidate(X,Y, f=5, trainfun=train_ncc) Input: X - DxN array of N data points with D features Y - 1D array of length N of class labels f - number of cross-validation folds trainfun - function for linear classification training Output: acc_train - (f,) array of accuracies in test train folds acc_test - (f,) array of accuracies in each test fold ''' N = f * (int(X.shape[-1] / f)) idx = sp.reshape(sp.arange(N), (f, N / f)) acc_train = sp.zeros((f)) acc_test = sp.zeros((f)) for ifold in sp.arange(f): testidx = sp.zeros((f), dtype=bool) testidx[ifold] = 1 test = idx[testidx, :].flatten() train = idx[~testidx, :].flatten() w, b = trainfun(X[:, train], Y[train]) acc_train[ifold] = sp.sum(sp.sign(w.dot(X[:, train]) - b) == Y[train]) / sp.double( train.shape[0]) acc_test[ifold] = sp.sum(sp.sign(w.dot(X[:, test]) - b) == Y[test]) / sp.double( test.shape[0]) return acc_train, acc_test
def lnGauss(x, params): """ Returns the ``log normal distribution`` and its derivation in interval x, given mean mu and variance sigma:: [N(params), d/dx N(params)] = N(mu,sigma|x). **Note**: Give mu and sigma as mean and variance, the result will be logarithmic! **Parameters:** x : [double] the interval in which the distribution shall be computed. params : [k, t] the distribution parameters k and t. """ mu = SP.double(params[0]) sigma = SP.double(params[1]) halfLog2Pi = 0.91893853320467267 # =.5*(log(2*pi)) N = SP.log(SP.exp( (-((x - mu)**2) / (2 * (sigma**2)))) / sigma) - halfLog2Pi dN = -(x - mu) / (sigma**2) return [N, dN]
def crossvalidate(X,Y, f=5, trainfun=train_ncc): ''' Test generalization performance of a linear classifier by crossvalidation Definition: crossvalidate(X,Y, f=5, trainfun=train_ncc) Input: X - DxN array of N data points with D features Y - 1D array of length N of class labels f - number of cross-validation folds trainfun - function for linear classification training Output: acc_train - (f,) array of accuracies in test train folds acc_test - (f,) array of accuracies in each test fold ''' N = f*(X.shape[-1]/f) idx = sp.reshape(sp.arange(N),(f,N/f)) acc_train = sp.zeros((f)) acc_test = sp.zeros((f)) for ifold in sp.arange(f): testidx = sp.zeros((f),dtype=bool) testidx[ifold] = 1 test = idx[testidx,:].flatten() train = idx[~testidx,:].flatten() w,b = trainfun(X[:,train],Y[train]) acc_train[ifold] = sp.sum(sp.sign(w.dot(X[:,train])-b)==Y[train])/sp.double(train.shape[0]) acc_test[ifold] = sp.sum(sp.sign(w.dot(X[:,test])-b)==Y[test])/sp.double(test.shape[0]) # pdb.set_trace() return acc_train,acc_test
def roc(labels, predictions): """roc - calculate receiver operator curve labels: true labels (>0 : True, else False) predictions: the ranking generated from whatever predictor is used""" #1. convert to arrays labels = S.array(labels).reshape([-1]) predictions = S.array(predictions).reshape([-1]) #threshold t = labels > 0 #sort predictions in desceninding order #get order implied by predictor (descending) Ix = S.argsort(predictions)[::-1] #reorder truth t = t[Ix] #compute true positiive and false positive rates tp = S.double(N.cumsum(t)) / t.sum() fp = S.double(N.cumsum(~t)) / (~t).sum() #add end points tp = S.concatenate(([0], tp, [1])) fp = S.concatenate(([0], fp, [1])) return [tp, fp]
def pr(labels, predictions): #1. convert to arrays labels = S.array(labels).reshape([-1]) predictions = S.array(predictions).reshape([-1]) #threshold t = labels>0 Ix = S.argsort(predictions)[::-1] #reorder truth t = t[Ix] pr = S.double(N.cumsum(t))/(N.cumsum(t)+N.cumsum(~t)) rr = S.double(N.cumsum(t))/(N.cumsum(t)+((t).sum()-N.cumsum(t))) return [rr,pr]
def calc_AF(M,major=0,minor=2): """calculate minor allelel frequency, by default assuming that minor==2""" if minor==2: Nhet = (M==0).sum(axis=0) Nmajor = 2*(M==0).sum(axis=0) Nminor = 2*(M==2).sum(axis=0) af = Nminor/sp.double(2*M.shape[0]) else: Nmajor = (M==0).sum(axis=0) Nminor = (M==1).sum(axis=0) af = Nminor/sp.double(1*M.shape[0]) RV = {} RV['af'] = af RV['Nmajor'] = Nmajor RV['Nminor'] = Nminor return RV
def split_jobs(Y, Njobs): #slit phenotype matrix into jobs #think about splitting snps also splits = [] [N, Np] = Y.shape #maximal splitting range is one job per phenotype Njobs = min(Njobs,Np) #figure out phenotypes per job (down rounded) npj = int(SP.floor(SP.double(Np)/Njobs)) i0 = 0 i1 = npj for n in xrange(Njobs): if n==(Njobs-1): #make sure last jobs spans all the rest. i1 = Np Y_ = Y[:,i0:i1] splits.append([i0, i1, Y_]) #nex split i0 = i1 i1 = i1 + npj return splits
def getParameters(self, key="name", parse=True): """return the parameters of an xml model structure(key: key of the attributes, parse: True/False if true attributes are parsed, i.e. eval evaluated etc.""" params = self.getElementsByTagName('param', 1) rv = {} for param in params: value = param.getAttribute('value') if parse: ptype = param.getAttribute('type') if (param.getAttribute('eval')): value = eval(value) elif (ptype == 'matrix'): value = self.parseMatrixParameter(value) elif (ptype == 'double'): value = S.double(value) elif (ptype == 'int'): value = S.int32(value) elif (ptype == 'str'): #no action for string pass else: raise Exception( "Invalid Attribute exception attribute %s has no type or eval!" % param) rv[str(param.getAttribute(key))] = value return rv
def gender_label_format(userPath,contentPath,DataPath,WritePath): '''加性别标签''' data = [] label = [] userlist =[] imagename =[] contentlist= [] print'''导入数据''' img = codecs.open(DataPath) for line in img.readlines(): datatemp = line.strip().split(',') imagename.append(datatemp[1]) data.append([double(tk) for tk in datatemp[2:]]) img.close() imagename = np.array(imagename) data = np.array(data) print'''导入用户信息''' userf = codecs.open(userPath) for line in userf.readlines(): usertemp = line.strip().split(',') #print usertemp[1] userlist.append([tk for tk in usertemp[:]]) userf.close() print'''导入图片信息''' contentf = codecs.open(contentPath) for line in contentf.readlines(): contenttemp = line.strip().split(',') #print contenttemp[1] contentlist.append([tk for tk in contenttemp[:]]) contentf.close() print '''填入标签''' for i in range(0,len(imagename)): name = imagename[i] print name flag = 0 for li in contentlist: #print li if(name == li[1]): for user in userlist: #print user if(user[0] == li [0]): flag = 1 print user[1] if(user[1] == '女'): label.append(0) else: label.append(1) break break #print genderlabel if(flag == 0): print i print(name+"没有对应的标签") np.delete(data, i, 0)#删除对应的数据 label = np.array(label) print label print len(data) print len(label) dump_svmlight_file(data, label,WritePath,zero_based=False)
def check_grey(coords): """ Function to check if a particular cluster corresponds to grey matter. Note: this function uses the CA_N27_GW atlas. Other metrics could be used, but this feature needs to be added. Parameters ---------- coords: tuple or list of floats Coordinates, should have length 3 Returns ------- prob: float probability of grey matter """ assert len(coords) == 3 atlas = "CA_N27_GW" # where am I command. waicmd = "whereami -atlas %s -space MNI %d %d %d 2>/dev/null" % ( (atlas, ) + tuple(coords)) proc = subprocess.Popen(waicmd, stdout=subprocess.PIPE, shell=True) (out, err) = proc.communicate() lines = out.split("\n") patt = re.compile(" Focus point: grey \(p = ([0-9]\.[0-9]*)\)") prob = double( [m.group(1) for m in [patt.match(line) for line in lines] if m]) assert len(prob) == 1 return prob[0]
def estimate_q_values(PV, m=None, pi=1): """estimate q vlaues from a list of Pvalues this algorithm is taken from Storey, significance testing for genomic ... m: number of tests, (if not len(PV)), pi: fraction of expected true null (1 is a conservative estimate) originally written by Oliver Stegel from MPI and edited by Vipin """ if m is None: m = len(PV) lPV = len(PV) #1. sort pvalues PV = PV.squeeze() IPV = PV.argsort() PV = PV[IPV] #2. estimate lambda if pi is None: lrange = sp.linspace(0.05, 0.95, max(lPV / 100, 10)) pil = sp.double((PV[:, SP.newaxis] > lrange).sum(axis=0)) / lPV pilr = pil / (1 - lrange) #ok, I think for SNPs this is pretty useless, pi is close to 1! pi = 1 #if there is something useful in there use the something close to 1 if pilr[-1] < 1: pi = pilr[-1] #3. initialise q values QV_ = pi * m / lPV * PV #4. update estimate for i in xrange(lPV - 2, 0, -1): QV_[i] = min(pi * m * PV[i] / (i + 1), QV_[i + 1]) #5. inverst sorting QV = sp.zeros_like(PV) QV[IPV] = QV_ return QV
def stacked_classifier(splt, eps=15): test = splt['test'] sols = [] for r in splt['train']: # compute SVM solution at a site sols = sols + [dpsvmsolve(r[0], r[1], test[0], test[1], eps=eps)] e = {} for kind in sols[0][0]: # train data = data2data(splt['train'][-1][0], sols[:-1], kind=kind) clf = LogisticRegression() clf.fit(data, splt['train'][-1][1]) #splt['train'][-1][1]) # test data = data2data(test[0], sols[:-1], kind=kind) e[kind] = 100 * abs( sum(map(lambda x: min(0, x), clf.predict(data) * test[1]))) / double(len(test[1])) # e[kind] = 100*len(where(clf.predict(data)*test[1]==-1)[0])/double(len(test[1])) return ([v[1]['obj'] for v in sols] + [e['obj']], [v[1]['svm'] for v in sols] + [e['svm']], [v[1]['out'] for v in sols] + [e['out']])
def user_format(userPath,userSavePath): '''给粉丝|关注|微博数加性别标签''' print '''读取用户列表''' userlist =[] gender =[] t=[] userf = codecs.open(userPath) for line in userf.readlines(): usertemp = line.strip().split(',') userlist.append([tk for tk in usertemp[:]]) userlist = np.array(userlist) data = userlist.T[4:].T genderlabel = userlist.T[1].T for i in range(0,len(genderlabel)): if genderlabel[i] =='女': gender.append(0) else: gender.append(1) t.append([double(tk) for tk in data[i][:]]) data = np.array(t) genderlabel = np.array(gender) print len(data[0]) print len(genderlabel) dump_svmlight_file(data, genderlabel,userSavePath,zero_based=False) userf.close()
def check_grey(coords): """ Function to check if a particular cluster corresponds to grey matter. Note: this function uses the CA_N27_GW atlas. Other metrics could be used, but this feature needs to be added. Parameters ---------- coords: tuple or list of floats Coordinates, should have length 3 Returns ------- prob: float probability of grey matter """ assert len(coords) == 3 atlas = "CA_N27_GW" # where am I command. waicmd = "whereami -atlas %s -space MNI %d %d %d 2>/dev/null" % ((atlas, ) + tuple(coords)) proc = subprocess.Popen(waicmd, stdout=subprocess.PIPE, shell=True) (out,err) = proc.communicate() lines = out.split("\n") patt = re.compile(" Focus point: grey \(p = ([0-9]\.[0-9]*)\)") prob = double([m.group(1) for m in [patt.match(line) for line in lines] if m]) assert len(prob) == 1 return prob[0]
def estimate_q_values(PV,m=None,pi=1): """estimate q vlaues from a list of Pvalues this algorithm is taken from Storey, significance testing for genomic ... m: number of tests, (if not len(PV)), pi: fraction of expected true null (1 is a conservative estimate) originally written by Oliver Stegel from MPI and edited by Vipin """ if m is None: m = len(PV) lPV = len(PV) #1. sort pvalues PV = PV.squeeze() IPV = PV.argsort() PV = PV[IPV] #2. estimate lambda if pi is None: lrange = sp.linspace(0.05,0.95,max(lPV/100,10)) pil = sp.double((PV[:,SP.newaxis]>lrange).sum(axis=0))/lPV pilr = pil/(1-lrange) #ok, I think for SNPs this is pretty useless, pi is close to 1! pi =1 #if there is something useful in there use the something close to 1 if pilr[-1]<1: pi = pilr[-1] #3. initialise q values QV_ = pi * m/lPV* PV #4. update estimate for i in xrange(lPV-2,0,-1): QV_[i] = min(pi*m*PV[i]/(i+1),QV_[i+1]) #5. inverst sorting QV = sp.zeros_like(PV) QV[IPV] = QV_ return QV
def afCalc(M): hom_minor = (M == 0).sum(axis=0) het = (M == 1).sum(axis=0) #hom_major = (snps==2).sum(axis = 0) maf = (2 * hom_minor + het) / sp.double(2 * M.shape[0]) return (maf)
def load_sentiment(negative='SentiWS_v1.8c/SentiWS_v1.8c_Negative.txt',\ positive='SentiWS_v1.8c/SentiWS_v1.8c_Positive.txt'): words = dict() for line in open(negative).readlines(): parts = line.strip('\n').split('\t') words[parts[0].split('|')[0]] = double(parts[1]) if len(parts)>2: for inflection in parts[2].strip('\n').split(','): words[inflection] = double(parts[1]) for line in open(positive).readlines(): parts = line.strip('\n').split('\t') words[parts[0].split('|')[0]] = double(parts[1]) if len(parts)>2: for inflection in parts[2].strip('\n').split(','): words[inflection] = double(parts[1]) return words
def lnL1(x,params): """L1 type prior defined on the non-log weights params[0]: prior cost Note: this prior only works if the paramter is constraint to be strictly positive """ l = SP.double(params[0]) x_ = 1./x lng = -l * x_ dlng = + l*x_**2 return [lng,dlng]
def lib_size_factors(data): """calculate library size correction factors""" res_sum = data['counts_res'].sum() sus_sum = data['counts_sus'].sum() print(res_sum) print(sus_sum) #L = [1.0, SP.double(res_sum/sus_sum)] #corrected the direction for normalisation (Norman and Anza, jan 2020) L = [SP.double(res_sum/sus_sum), 1.0] #print(L) return L
def lnL1(x, params): """L1 type prior defined on the non-log weights params[0]: prior cost Note: this prior only works if the paramter is constraint to be strictly positive """ l = SP.double(params[0]) x_ = 1. / x lng = -l * x_ dlng = +l * x_**2 return [lng, dlng]
def term_label_format(imagePath,termPath,termWritePath): ''''加终端标签''' data = [] termlabel = [] termlist = [] imagename =[] contentlist= [] img = codecs.open(imagePath) for line in img.readlines(): datatemp = line.strip().split(',') imagename.append(datatemp[1]) data.append([double(tk) for tk in datatemp[2:]]) img.close() imagename = np.array(imagename) data = np.array(data) comf = codecs.open(termPath) for line in comf.readlines(): termlisttemp = line.strip().split(",") termlist.append(termlisttemp) comf.close() contentf = codecs.open(contentPath) for line in contentf.readlines(): contenttemp = line.strip().split(',') #print contenttemp[1] contentlist.append([tk for tk in contenttemp[:]]) contentf.close() print '''填入标签''' for i in range(0,len(imagename)): name = imagename[i] flag = 0 for li in contentlist: #print li if(name == li[1]): for term in termlist: if(term[0] == li [6]): flag = 1 termlabel.append(int(term[1])) print i,name,term[0],term[1] break break if(flag == 0): print i print(name+"没有对应的标签") del data[i]#删除对应的数据 termlabel = np.array(termlabel) print termlabel print len(data) print len(termlabel) dump_svmlight_file(data, termlabel,termWritePath,zero_based=False)
def lnGamma(x,params): """ Returns the ``log gamma (x,k,t)`` distribution and its derivation with:: lngamma = (k-1)*log(x) - x/t -gammaln(k) - k*log(t) dlngamma = (k-1)/x - 1/t **Parameters:** x : [double] the interval in which the distribution shall be computed. params : [k, t] the distribution parameters k and t. """ #explicitly convert to double to avoid int trouble :-) k=SP.double(params[0]) t=SP.double(params[1]) lng = (k-1)*SP.log(x) - x/t -SPs.gammaln(k) - k*SP.log(t) dlng = (k-1)/x - 1/t return [lng,dlng]
def lnGamma(x, params): """ Returns the ``log gamma (x,k,t)`` distribution and its derivation with:: lngamma = (k-1)*log(x) - x/t -gammaln(k) - k*log(t) dlngamma = (k-1)/x - 1/t **Parameters:** x : [double] the interval in which the distribution shall be computed. params : [k, t] the distribution parameters k and t. """ #explicitly convert to double to avoid int trouble :-) k = SP.double(params[0]) t = SP.double(params[1]) lng = (k - 1) * SP.log(x) - x / t - SPs.gammaln(k) - k * SP.log(t) dlng = (k - 1) / x - 1 / t return [lng, dlng]
def lib_size_factors(data): """calculate library size correction factors""" print('lib_size_factors values printed:') logging.info('lib_size_factors values printed:') res_sum = data['counts_res'].sum() sus_sum = data['counts_sus'].sum() print('res_sum: %s'%res_sum) logging.info('res_sum: %s' %res_sum) print('sus_sum:%s' %sus_sum) logging.info('res_sum: %s' %sus_sum) L = [1.0, SP.double(res_sum/sus_sum)] print("L is equal to %s"%L) logging.info("L is equal to %s"%L) return L
def lnGauss(x,params): """ Returns the ``log normal distribution`` and its derivation in interval x, given mean mu and variance sigma:: [N(params), d/dx N(params)] = N(mu,sigma|x). **Note**: Give mu and sigma as mean and variance, the result will be logarithmic! **Parameters:** x : [double] the interval in which the distribution shall be computed. params : [k, t] the distribution parameters k and t. """ mu = SP.double(params[0]) sigma = SP.double(params[1]) halfLog2Pi = 0.91893853320467267 # =.5*(log(2*pi)) N = SP.log(SP.exp((-((x-mu)**2)/(2*(sigma**2))))/sigma)- halfLog2Pi dN = -(x-mu)/(sigma**2) return [N,dN]
def accumulator(acc, curr): predictions = sp.double(p_val < curr) tp = sp.sum( sp.logical_and(predictions == 1, sp.asarray(Y_val == 1).ravel())) fp = sp.sum( sp.logical_and(predictions == 1, sp.asarray(Y_val == 0).ravel())) fn = sp.sum( sp.logical_and(predictions == 0, sp.asarray(Y_val == 1).ravel())) prec = tp / (tp + fp) rec = tp / (tp + fn) F1 = 2 * prec * rec / (prec + rec) return {'epsilon': curr, 'F1': F1} if F1 > acc['F1'] else acc
def fit(self, X): ''' fits a topic model INPUT X list of strings ''' # transform list of strings into sparse BoW matrix X = self.bow['tfidf_transformer'].fit_transform(\ self.bow['count_vectorizer'].fit_transform(X)) # transform word to BoW index into reverse lookup table words = self.bow['count_vectorizer'].vocabulary_.values() wordidx = self.bow['count_vectorizer'].vocabulary_.keys() self.idx2word = dict(zip(words, wordidx)) # depending on the model, train if self.modeltype is 'kmeans': Xc = self.model.fit_predict(X) if self.modeltype is 'kpcakmeans': Xc = self.model['kpca'].fit_transform(X) Xc = self.model['kmeans'].fit_predict(Xc) if self.modeltype is 'nmf': Xc = self.model.fit_transform(X).argmax(axis=0) # for each cluster/topic compute covariance of word with cluster label # this measure is indicative of the importance of the word for the topic ass = zeros(self.topics) self.topicstats = [] for cluster in range(self.topics): # this is a binary vector, true if a data point was in this cluster y = double(Xc == cluster) # this is the covariance of the data with the cluster label Xcov = X.T.dot(y) # find the most strongly covarying (with the cluster label) words wordidx = reversed(Xcov.argsort()[-self.topwords:]) topicwords = dict([(self.idx2word[idx], Xcov[idx]) for idx in wordidx]) self.topicstats.append({'assignments':y.sum(),'clusterid':cluster,\ 'words': topicwords}) print 'Topic %d: %3d Assignments '%(cluster,y.sum())\ + 'Topwords: ' + ' '.join(topicwords.keys()[:10]) datestr = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S") fn = self.folder + '/topicmodel-%s-' % self.modeltype + datestr + '.json' print "Saving model stats to " + fn open(fn, 'wb').write(json.dumps(self.topicstats))
def fit(self,X): ''' fits a topic model INPUT X list of strings ''' # transform list of strings into sparse BoW matrix X = self.bow['tfidf_transformer'].fit_transform(\ self.bow['count_vectorizer'].fit_transform(X)) # transform word to BoW index into reverse lookup table words = self.bow['count_vectorizer'].vocabulary_.values() wordidx = self.bow['count_vectorizer'].vocabulary_.keys() self.idx2word = dict(zip(words,wordidx)) # depending on the model, train if self.modeltype is 'kmeans': Xc = self.model.fit_predict(X) if self.modeltype is 'kpcakmeans': Xc = self.model['kpca'].fit_transform(X) Xc = self.model['kmeans'].fit_predict(Xc) if self.modeltype is 'nmf': Xc = self.model.fit_transform(X).argmax(axis=0) # for each cluster/topic compute covariance of word with cluster label # this measure is indicative of the importance of the word for the topic ass = zeros(self.topics) self.topicstats = [] for cluster in range(self.topics): # this is a binary vector, true if a data point was in this cluster y = double(Xc==cluster) # this is the covariance of the data with the cluster label Xcov = X.T.dot(y) # find the most strongly covarying (with the cluster label) words wordidx = reversed(Xcov.argsort()[-self.topwords:]) topicwords = dict([(self.idx2word[idx],Xcov[idx]) for idx in wordidx]) self.topicstats.append({'assignments':y.sum(),'clusterid':cluster,\ 'words': topicwords}) print 'Topic %d: %3d Assignments '%(cluster,y.sum())\ + 'Topwords: ' + ' '.join(topicwords.keys()[:10]) datestr = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S") fn = self.folder+'/topicmodel-%s-'%self.modeltype +datestr+'.json' print "Saving model stats to "+fn open(fn,'wb').write(json.dumps(self.topicstats))
def qvalues1(PV,m=None,pi=1.0): """estimate q vlaues from a list of Pvalues this algorihm is taken from Storey, significance testing for genomic ... m: number of tests, (if not len(PV)), pi: fraction of expected true null (1.0 is a conservative estimate) @param PV: pvalues @param m: total number of tests if PV is not the entire array. @param pi: fraction of true null """ S = PV.shape PV = PV.flatten() if m is None: m = len(PV) * 1.0 else: m*=1.0 lPV = len(PV) #1. sort pvalues PV = PV.squeeze() IPV = PV.argsort() PV = PV[IPV] #2. estimate lambda if pi is None: lrange = sp.linspace(0.05,0.95,max(lPV/100.0,10)) pil = sp.double((PV[:,sp.newaxis]>lrange).sum(axis=0))/lPV pilr = pil/(1.0-lrange) #ok, I think for SNPs this is pretty useless, pi is close to 1! pi =1.0 #if there is something useful in there use the something close to 1 if pilr[-1]<1.0: pi = pilr[-1] #3. initialise q values QV_ = pi * m/lPV* PV QV_[-1] = min(QV_[-1],1.0) #4. update estimate for i in xrange(lPV-2,-1,-1): QV_[i] = min(pi*m*PV[i]/(i+1.0),QV_[i+1]) #5. invert sorting QV = sp.zeros_like(PV) QV[IPV] = QV_ QV = QV.reshape(S) return QV
def qvalues1(PV, m=None, pi=1.0): """estimate q vlaues from a list of Pvalues this algorihm is taken from Storey, significance testing for genomic ... m: number of tests, (if not len(PV)), pi: fraction of expected true null (1.0 is a conservative estimate) @param PV: pvalues @param m: total number of tests if PV is not the entire array. @param pi: fraction of true null """ S = PV.shape PV = PV.flatten() if m is None: m = len(PV) * 1.0 else: m *= 1.0 lPV = len(PV) # 1. sort pvalues PV = PV.squeeze() IPV = PV.argsort() PV = PV[IPV] # 2. estimate lambda if pi is None: lrange = sp.linspace(0.05, 0.95, max(lPV / 100.0, 10)) pil = sp.double((PV[:, sp.newaxis] > lrange).sum(axis=0)) / lPV pilr = pil / (1.0 - lrange) # ok, I think for SNPs this is pretty useless, pi is close to 1! pi = 1.0 # if there is something useful in there use the something close to 1 if pilr[-1] < 1.0: pi = pilr[-1] # 3. initialise q values QV_ = pi * m / lPV * PV QV_[-1] = min(QV_[-1], 1.0) # 4. update estimate for i in range(lPV - 2, -1, -1): QV_[i] = min(pi * m * PV[i] / (i + 1.0), QV_[i + 1]) # 5. invert sorting QV = sp.zeros_like(PV) QV[IPV] = QV_ QV = QV.reshape(S) return QV
def mean_std_rand(labels_all): # labels_all is nvert x nsub matrix # delete subjects for which parcellation is not done labs1 = labels_all ind = (sp.sum(labs1, axis=0) != 0) labs1 = labs1[:, ind] labs = reorder_labels(labs1) labs_mode, freq = sp.stats.mode(labs, axis=1) freq1 = sp.double(freq.squeeze()) freq1 /= labs.shape[1] ars = sp.zeros(labs.shape[1]) for ind in range(labs.shape[1]): ars[ind] = adjusted_rand_score(labs_mode.squeeze(), labs[:, ind]) return ars.mean(), ars.std(), freq1, labs_mode
def getParameters(self,key="name",parse=True): """return the parameters of an xml model structure(key: key of the attributes, parse: True/False if true attributes are parsed, i.e. eval evaluated etc.""" params = self.getElementsByTagName('param',1) rv = {} for param in params: value = param.getAttribute('value') if parse: ptype = param.getAttribute('type') if(param.getAttribute('eval')): value = eval(value) elif(ptype=='matrix'): value = self.parseMatrixParameter(value) elif(ptype=='double'): value = S.double(value) elif(ptype=='int'): value = S.int32(value) elif(ptype=='str'): #no action for string pass else: raise Exception("Invalid Attribute exception attribute %s has no type or eval!" % param) rv[str(param.getAttribute(key))]=value return rv
def wechat_fomat(dataPath,labelPath,writeGenderPath,writeLocPath): '''微信数据格式化''' imagename = []#每行数据所对应的图片么 data = []# 数据矩阵 genderlabel = []#性别标签 loclabel = []#位置标签 labelfile = [] #注意读取的格式编码!!!,有中文时字符编码是uft-8的菜可以识别, #可以在eclipse建立普通文件复制内容过来就可以解决 ''''读取数据''''' f = codecs.open(dataPath) for line in f.readlines(): tokens = line.strip().split(' ') imagename.append(tokens[0]) data.append([double(tk) for tk in tokens[1:]]) f.close() imagename = np.array(imagename) data = np.array(data) print imagename '''''读取标签''' labelf = codecs.open(labelPath) for line in labelf.readlines(): tokens = line.strip().split(' ') labelfile.append([tk for tk in tokens[:]]) # print labelfile flag = 0 '''填入标签''''' for i in range(0,len(imagename)): name = imagename[i] flag = 0 for li in labelfile: # print li[3] if(name == li[3]): flag = 1 if(li[1] == '女'): genderlabel.append(0) else: genderlabel.append(1) if(li[5] == '2'): loclabel.append(0) else: loclabel.append(1) if(flag == 0): print i print(name+"没有对应的标签") np.delete(data, i, 0)#删除对应的数据 # print loclabel # label = np.array(label) labelf.close() ''''稀疏矩阵化数据''' data = np.array(data) genderlabel = np.array(genderlabel) loclabel = np.array(loclabel) '''查看数据是否一致大小 如果结果不一致说明标签和数据不匹配. ''' print data.shape[0] print genderlabel.shape[0] print loclabel.shape[0] ''''将libsvm格式数据写到文件''' dump_svmlight_file(data, genderlabel,writeGenderPath,zero_based=False) dump_svmlight_file(data, loclabel,writeLocPath,zero_based=False) print ("Wechat format End!")
def plot_pairwise_velocities_mass(cases,color): #central_halo_masses = ['3.5e11'] #central_halo_masses = ['3.50e+11','9.98e+11','5.00e+12','2.50e+13','5.20e+14'] central_halo_masses = ['2.00e+11','1.08e+12','6.50e+12','8.00e+13','5.75e+14'] double_central_halo_masses = [sp.double(central_halo_mass) for central_halo_mass in central_halo_masses] #path = '../cases/'+case+'/ROCKSTAR_' #path = '../cases/'+case+'/' path = '../cases/'+case Rs = [1,5] dRs = [1,0.2] round = 0 subplots = [221,222] for R,dR,subplot in zip(Rs,dRs,subplots): v12_of_masses = [] sigma_pp_of_masses = [] Rmin, Rmax = R-dR/2, R+dR/2 for central_halo_mass in central_halo_masses: pairwise_velocities_file = path+'pairwise_velocities_'+central_halo_mass+'.npy' radial_distances_file = path+'radial_distances_'+central_halo_mass+'.npy' pairwise_velocities = sp.load(pairwise_velocities_file) radial_distances = sp.load(radial_distances_file) if round == 0: if not 'all_pairwise_velocities' in locals(): all_pairwise_velocities = pairwise_velocities else: all_pairwise_velocities = sp.hstack((all_pairwise_velocities,pairwise_velocities)) if not 'all_radial_distances' in locals(): all_radial_distances = radial_distances else: all_radial_distances = sp.hstack((all_radial_distances,radial_distances)) pairwise_velocities_R = sp.array([pairwise_velocity\ for pairwise_velocity,radial_distance in zip(pairwise_velocities,radial_distances)\ if (Rmin < radial_distance) & (radial_distance < Rmax)]) print "len(pairwise_velocities_R) = ", len(pairwise_velocities_R) v12 = -sp.mean(pairwise_velocities_R) sigma_pp = sp.sqrt(sp.mean(pairwise_velocities_R**2)) v12_of_masses.append(v12) sigma_pp_of_masses.append(sigma_pp) plt.subplot(subplot) plt.plot(double_central_halo_masses,sigma_pp_of_masses,'.-',color=color,label=case) if subplot == 221: plt.ylabel('$\sigma_{||}$ [km/s]') plt.title('R=1Mpc/h') plt.legend(loc=2,prop={'size':8}) if subplot == 222: plt.title('R=5Mpc/h') plt.axis([1e11,1e15,0,600]) plt.xscale('log') plt.subplot(subplot+2) plt.plot(double_central_halo_masses,v12_of_masses,'.-',color=color,label=case) if (subplot == 221) | (subplot == 222): plt.xlabel('$M_{200}$ [$M_{sun}$/h]') if subplot == 221: plt.ylabel('$-v_{12}$ [km/s]') plt.axis([1e11,1e15,0,600]) plt.xscale('log') round = round+1 return all_radial_distances, all_pairwise_velocities
plt.figure(1) plt.xlabel('x1') plt.ylabel('x2') pos = sp.where(Y == 1)[0] neg = sp.where(Y == 0)[0] plt.plot(X[pos, 1], X[pos, 2], 'k+', linewidth=2, markersize=7) plt.plot(X[neg, 1], X[neg, 2], 'ko', markerfacecolor='y', markersize=7) # Plot fiqure 2 (decision boundary) plt.figure(2) plt.xlabel('x1') plt.ylabel('x2') pos = sp.where(Y == 1)[0] neg = sp.where(Y == 0)[0] plt.plot(X[pos, 1], X[pos, 2], 'k+', linewidth=2, markersize=7) plt.plot(X[neg, 1], X[neg, 2], 'ko', markerfacecolor='y', markersize=7) if X.shape[0] >= 3: plot_x = sp.array([sp.amin(X[:, 1]) - 2, sp.amax(X[:, 1]) + 2]) plot_y = (-1 / theta[2, 0]) * (theta[0, 0] + theta[1, 0] * plot_x) plt.plot(plot_x, plot_y) plt.savefig('1.png') p = predict(theta, X) r = sp.mean(sp.double(p == Y)) * 100 print("Train Accuracy: {r}%".format(**locals()))
def feature_format(sinadataPath,userPath,contentPath,sinaGenderPath): '''性别标签格式化''' imagename = []#每行数据所对应的图片么 data = []# 数据矩阵 contentlist = []#微博列表 genderlabel = []#性别标签 userlist = [] #用户列表 #注意读取的格式编码!!!,有中文时字符编码是uft-8的菜可以识别, #可以在eclipse建立普通文件复制内容过来就可以解决 ''''读取数据''''' f = codecs.open(sinadataPath) for line in f.readlines(): datatemp = line.strip().split(',') imagename.append(datatemp[1]) data.append([double(tk) for tk in datatemp[2:]]) f.close() imagename = np.array(imagename) data = np.array(data) #print imagename #print data print '''读取发图微博''' contentf = codecs.open(contentPath) for line in contentf.readlines(): contenttemp = line.strip().split(',') #print contenttemp[1] contentlist.append([tk for tk in contenttemp[:]]) contentf.close() print '''读取用户列表''' userf = codecs.open(userPath) for line in userf.readlines(): usertemp = line.strip().split(',') #print usertemp[1] userlist.append([tk for tk in usertemp[:]]) userf.close() print '''填入标签''' for i in range(0,len(imagename)): name = imagename[i] print name flag = 0 for li in contentlist: #print li if(name == li[1]): for user in userlist: #print user if(user[0] == li [0]): flag = 1 #print user[1] if(user[1] == '女'): genderlabel.append(0) else: genderlabel.append(1) break break #print genderlabel if(flag == 0): print i print(name+"没有对应的标签") np.delete(data, i, 0)#删除对应的数据 genderlabel = np.array(genderlabel) print genderlabel print data.shape[0] print genderlabel.shape[0] print ''''构建libsvm数据''' dump_svmlight_file(data, genderlabel,sinaGenderPath,zero_based=False)
window='boxcar', pass_zero=True) ''' boxcar, triang, blackman, hamming, hann, bartlett, flattop, parzen, bohman, blackmanharris, nuttall, barthann, kaiser (needs beta), gaussian (needs std), general_gaussian (needs power, width), slepian (needs width), chebwin (needs attenuation) ''' #%% HPF #f = ss.firwin(numtaps=N, cutoff=fc/(Fs/2.), window='blackman', pass_zero=False) #%% BPF #e = ss.firwin(numtaps=N, cutoff=scipy.array([fc/(Fs/2.), fc2/(Fs/2.)]), window='blackman', pass_zero=False) #%% BEF #h = ss.firwin(numtaps=N, cutoff=scipy.array([fc/(Fs/2.), fc2/(Fs/2.)]), window='blackman', pass_zero=True) #%% 表示 f = scipy.array(range(0, N)) * Fs / scipy.double(N) tf = scipy.fft(h) mag = scipy.absolute(tf) phase = scipy.unwrap(scipy.angle(tf)) * 180. / scipy.pi f2 = scipy.array(range(0, N2)) * Fs / scipy.double(N2) tf2 = scipy.fft(h2) mag2 = scipy.absolute(tf2) phase2 = scipy.unwrap(scipy.angle(tf2)) * 180. / scipy.pi f3 = scipy.array(range(0, N3)) * Fs / scipy.double(N3) tf3 = scipy.fft(h3) mag3 = scipy.absolute(tf3) phase3 = scipy.unwrap(scipy.angle(tf3)) * 180. / scipy.pi figure(1)
prediction3 = df['prediction3'] label4 = df['label4'] prediction4 = df['prediction4'] label5 = df['label5'] prediction5 = df['prediction5'] n_classes = 34 y_test = [] y_score = [] for gt, l1, p1, l2, p2, l3, p3, l4, p4, l5, p5 in zip(ground_truth, label1, prediction1, label2, prediction2, label3, prediction3, label4, prediction4, label5, prediction5): y_score_aux = np.double(np.zeros(n_classes)) y_test_aux = np.int64(np.zeros(n_classes)) y_score_aux[l1] = double(p1) / 100 y_score_aux[l2] = double(p2) / 100 y_score_aux[l3] = double(p3) / 100 y_score_aux[l4] = double(p4) / 100 y_score_aux[l5] = double(p5) / 100 y_score.append(y_score_aux) y_test_aux[gt] = 1 y_test.append(y_test_aux) y_score = np.array(y_score) y_test = np.array(y_test) ############################################################################### # Compute the average precision score # ................................... from sklearn.metrics import average_precision_score
def f3(x): return SP.double(warping_function.pLML(x, C, gp.y))
plt.ylabel('x2') pos = sp.where(Y == 1)[0] neg = sp.where(Y == 0)[0] plt.plot(X[pos, 1], X[pos, 2], 'k+', linewidth=2, markersize=7) plt.plot(X[neg, 1], X[neg, 2], 'ko', markerfacecolor='y', markersize=7) plot_x = sp.array([sp.amin(X[:, 1]) - 2, sp.amax(X[:, 1]) + 2]) plot_y = (-1 / theta[2, 0]) * (theta[0, 0] + theta[1, 0] * plot_x) plt.plot(plot_x, plot_y) plt.savefig('1.png') # Estimate performance p = predict(theta, X) r = sp.around(sp.mean(sp.double(p == Y)) * 100, 1) print("Train Accuracy: {r}%".format(**locals())) # Regularize # Load data from data source 2 data = sp.matrix(sp.loadtxt("ex2data2.txt", delimiter=',')) X = data[:, 0:2] Y = data[:, 2] m, n = X.shape # Compute regularized cost and gradients # Initialize X = map_feature(X[:, 0], X[:, 1]) # theta = sp.zeros(X.shape[1])
def f3(x): return SP.double(warping_function.pLML(x,C,gp.y))
data = loadmat('ex3data1.mat') X = sp.matrix(data['X']) Y = sp.matrix(data['y']) m = X.shape[0] rand_indices = sp.random.randint(0, m, size=100) sel = X[rand_indices, :] display_data(sel, save=True) # Logistic regression _lambda = 0.1 all_theta = one_vs_all(X, Y, num_labels, _lambda, cost_function_reg, gradients_reg) p = predict_one_vs_all(all_theta, X) r = sp.around(sp.mean(sp.double(p == (Y % 10))) * 100, 1) print("Train Accuracy (Logistic Regression): {r}%".format(**locals())) # Neural network hidden_layer_size = 25 # Load calculated weights weights = loadmat('ex3weights.mat') theta_1 = sp.matrix(weights['Theta1']) theta_2 = sp.matrix(weights['Theta2']) prep = forward_prop(X)(theta_1, theta_2) r = sp.around(sp.mean(sp.double(prep == (Y % 10))) * 100, 1) print("Train Accuracy (Neural Network): {r}%".format(**locals()))
if 1: PL.figure() #0. plot theoretical curve around xc pos_range = SP.linspace(pos.min(), pos.max(), 1000) D_range = SP.absolute(pos_range - (xc + 0.01E7)) podd = rm._podd(D_range) Spodd = SP.sqrt(rm._Vpodd(D_range, options.n_res)) #1. plot theory rt = 0.98 PL.plot(pos_range, rt - podd, 'k-') PL.plot(pos_range, (rt - podd) + Spodd, 'k--') PL.plot(pos_range, (rt - podd) - Spodd, 'k--') #1. plot raw data PL.plot(pos, SP.double(counts_res[:, 0]) / counts_res.sum(axis=1), 'b.') PL.savefig(os.path.join(out_dir, 'fit.pdf')) if 0: PL.figure() PL.subplot(311) PL.plot(pos, SP.double(counts_sus[:, 0]) / counts_sus.sum(axis=1), 'b.') PL.subplot(312) PL.plot(pos, SP.double(counts_res[:, 0]) / counts_res.sum(axis=1), 'b.') PL.subplot(313) PL.plot(pos, SP.double(counts_both[:, 0]) / counts_both.sum(axis=1), 'r.')
def parse(self): self.mshfid = open(self.mshfilename, 'r') #Advance to nodes line = self.mshfid.readline() while(line.find("$Nodes") < 0): line = self.mshfid.readline() pass line = self.mshfid.readline() #This line should contain number of nodes #Check that number of nodes in file is still the number of nodes in memory if(not sp.int32(line) == self.Nnodes): self.__error__("Something wrong. Aborting.") exit(-1) self.__inform__("Parsing nodes") if len(self.nodes_rules) == 0: self.__inform__("No rules for nodes... skipping nodes.") for i in range(self.Nnodes): self.mshfid.readline() else: #Read all nodes and do stuff for i in range(self.Nnodes): #Parse the line sl = self.mshfid.readline().split() tag = sp.int32(sl[0]) x = sp.double(sl[1]) y = sp.double(sl[2]) z = sp.double(sl[3]) #Figure out the groups to which this node belongs physgroups = [] for grp in self.physical_groups: if self.nodes_in_physical_groups[grp][tag] == 1: physgroups.append(grp) for condition, action in self.nodes_rules: if condition(tag,x,y,z,physgroups): action(tag,x,y,z) pass #Read another 2 lines after nodes are done. This should be $Elements line = self.mshfid.readline() line = self.mshfid.readline() if(line.find("$Elements") == 0): self.__inform__("Parsing elements") else: self.__error__("Something wrong reading elements. ") exit(-1) line = self.mshfid.readline() #This line should contain number of elements #Check that number of elements in file is still the number of elements in memory if(not sp.int32(line) == self.Nelem): self.__error__("Something wrong. Aborting.") exit(-1) if len(self.elements_rules) == 0: self.__inform__("No rules for elements... skipping elements.") for i in range(self.Nelem): self.mshfid.readline() else: #Read all elements and do stuff nodes = [] for i in range(self.Nelem): sl = self.mshfid.readline().split() #Parse the line eletag = sp.int32(sl[0]) eletype = sp.int32(sl[1]) ntags = sp.int32(sl[2]) physgrp = sp.int32(sl[3]) partition = sp.int32(sl[4]) if ntags >= 2: physgrp = sp.int32(sl[3]) nodes = sp.array(sl[(3 + ntags)::], dtype=sp.int32) for condition, action in self.elements_rules: if condition(eletag,eletype,physgrp,nodes): action(eletag,eletype,physgrp,nodes) pass else: self.__error__(".msh file has < 2 tags element with tag " + str(eletag)) pass
w = scipy.ones([no], dtype=scipy.float64) w = w.astype(dtype=scipy.float64, order='F', copy=True) w_r = w.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) # jd jd = scipy.ones([1], dtype=scipy.int32) jd = jd.astype(dtype=scipy.int32, order='F', copy=True) jd_r = jd.ctypes.data_as(ctypes.POINTER(ctypes.c_int)) # vp vp = scipy.ones([ni], dtype=scipy.float64) vp = vp.astype(dtype=scipy.float64, order='F', copy=True) vp_r = vp.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) # cl options = glmnetSet() inparms = glmnetControl() cl = options['cl'] cl[0, cl[0, :] == scipy.double('-inf')] = -1.0 * inparms['big'] cl[1, cl[1, :] == scipy.double('inf')] = 1.0 * inparms['big'] if cl.shape[1] < ni: if cl.shape[1] == 1: cl = cl * scipy.ones([1, ni], dtype=scipy.float64) else: raise ValueError( 'ERROR: Require length 1 or nvars lower and upper limits') else: cl = cl[:, 0:ni - 1] cl = cl.astype(dtype=scipy.float64, order='F', copy=True) cl_r = cl.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) # ne ne = ni + 1 ne_r = ctypes.c_int(ne) # nx
#probability of uneven number of recombination events Podd = 0.5 * (1 - SP.exp(-2.0 * DX * Rc)) Peven = 1 - Podd SAMPr = SP.zeros([Podd.shape[0], Nsamp]) SAMPk = SP.zeros([Podd.shape[0], Nsamp]) #sample a pool from these for i in xrange(Podd.shape[0]): #theoretical rate r = Peven[i] #sample pool binR = st.binom(samples_res, r) for s in xrange(Nsamp): rs = binR.rvs(1) binSeq = st.binom(Nread, SP.double(rs) / samples_res) kSeq = binSeq.rvs(1) SAMPr[i, s] = rs SAMPk[i, s] = kSeq #1. plot theoretical curve PL.figure() PL.subplot(411) PL.plot(DX, Peven) if 1: PL.subplot(412) #2. plot samples within pool PL.plot(DX, SAMPr / samples_res) if 1: PL.subplot(413) PL.plot(DX, SAMPk / Nread)
def score(self, start_pos=None, stop_pos=None, step_size=100E3, window_size=None, opt_recombination=False, opt_eps=False): """stepwise scoring function start_pos: start position for sliding window stoppos : stop position for sliding window step_size: step size window_size: analysis window size (None). If not set, all genome-wide SNPs are jointly analzed opt_recombination: optimize recombination rate opt_eps : optimize missphenotyping rate """ if start_pos is None: start_pos = self.pos.min() if stop_pos is None: stop_pos = self.pos.max() #2. get background likelihood assuming 50:50 LL0 = self._LL0(SP.arange(self.res.shape[0])) p = start_pos S = [] Sres = [] Ssus = [] S0 = [] P = [] while (p < stop_pos): if 1: #position based windowing dd = SP.absolute(p - self.pos) I = SP.nonzero(dd < window_size)[0] NI = I.shape[0] NI = 1 #NI = 1 if 0: #total number based window dd = SP.absolute(p - self.pos).argmin() I = SP.arange(max(0, dd - 100), min(self.pos.shape[0] - 1, dd + 100)) NI = 1 if 0: #subsample equal number of snps left and right of peak dd = p - self.pos Iw = SP.absolute(dd) < window_size Ip = SP.nonzero(Iw & (dd > 0))[0] In = SP.nonzero(Iw & (dd < 0))[0] Ns = min(len(Ip), len(In), 20000) #sample Irp = SP.random.permutation(len(Ip)) Irn = SP.random.permutation(len(In)) I = SP.concatenate((Ip[Irp][0:Ns], In[Irn][0:Ns])) NI = 1 ## while True: ## pdb.set_trace() ## d = self.pos[I].copy() ## d[1::]-= d[0:-1] ## imin = d.argmin() ## if d[imin]<50: ## I = SP.setdiff1d(I,I[d.argmin()]) ## else: ## break ## pass [score, LL_res, LL_sus] = self._LL(p, I, eps=self.eps) score0 = LL0[1][I].sum() + LL0[2][I].sum() if opt_eps | opt_recombination: [score0, score] = self._LLopt(p, I, eps=self.eps, opt_eps=opt_eps, opt_recombination=opt_recombination) S.append(score / NI) Sres.append(LL_res.sum() / NI) Ssus.append(LL_sus.sum() / NI) S0.append(score0 / NI) P.append(p) if 0: params_res = self._getBeta(d=dd, pool='res') LL_res = self._countLL(self.res, params_res) mv_res = abtomv(params_res) PL.ion() PL.figure(2, figsize=[15, 6]) PL.clf() print(LL_res.sum()) PL.plot(self.pos, self.res[:, 0] / self.res[:, 1], 'b.') PL.plot(self.pos, mv_res[0], 'b-') PL.plot(self.pos, mv_res[0] + SP.sqrt(mv_res[1]), 'b--') PL.plot(self.pos, mv_res[0] - SP.sqrt(mv_res[1]), 'b--') pass if 0: PL.ion() PL.figure(1, figsize=[15, 6]) PL.clf() PL.subplot(311) PL.plot(self.res[:, 0] / SP.double(self.res[:, 1]), 'b-') PL.plot(self.sus[:, 0] / SP.double(self.sus[:, 1]), 'g-') PL.subplot(312) #y_res = LL_res-LL0[1][I] #y_sus = LL_sus-LL0[2][I] y_res = LL_res y_sus = LL_sus PL.plot(self.pos[I], y_res, 'b-') PL.plot(self.pos[I], y_sus, 'g-') PL.plot(p, 0, 'r*', markersize=10) PL.subplot(313) PL.plot(P, SP.array(S) - SP.array(S0), 'k-') PL.plot(P, SP.array(Sres), 'b-') PL.plot(P, SP.array(Ssus), 'g-') PL.xlim([start_pos, stop_pos]) PL.show() pdb.set_trace() pass #move on p += step_size S = SP.array(S) S0 = SP.array(S0) P = SP.array(P) return [P, S, S0]