def adjust(iters): Xes = os.listdir(r'.\Gene Expressions\CurrentCel') names = [] for x in Xes: if x[-7:] == r'.CEL.gz': names.append(x[:-7]) geoMat = Catche.opickle(r'.\Gene Expressions\CurrentCel\PMIntense' + names[iters] + r'.pickle') sigma = 0 alpha = 0 a = [] y = [] vector = [] for row in geoMat: vector.append(row) #print vector[1] #print i m1 = mode(vector) m2 = mode(leftOf(vector, m1)) #estimate sigma? #find elements in dataO less than mu. group in leftz leftZ = leftOf(vector, m2) n = len(leftZ) zSum = 0 for lZ in leftZ: zSum += (lZ - m2)**2 if n > 1: sigma = math.sqrt(zSum / (n - 1)) * math.sqrt(2.0) else: sigma = math.sqrt(zSum) * math.sqrt(2.0) #estimate alpha #find elements in dataO greater than mu. store in rightz. rightz - mu for all #find mode of rightz alpha = 1 / mode(rightOf(vector, m2)) for j, val in enumerate(vector): a.append(val - m2 - alpha * (sigma**2)) print 'pass 1' for val in a: if sigma == 0: y.append(1) elif normcdf(val / sigma) == 0: y.append(2) else: y.append(val + sigma * normpdf(val / sigma) / normcdf(val / sigma)) print 'pass 2' for i, col in enumerate(y): geoMat[i] = col print 'pass 3' Catche.spickle( r'.\Gene Expressions\CurrentCel/BAIntense' + names[iters] + r'.pickle', geoMat)
def adjust(iters): Xes = os.listdir(r'.\Gene Expressions\CurrentCel') names = [] for x in Xes: if x[-7:] == r'.CEL.gz': names.append(x[:-7]) geoMat = Catche.opickle(r'.\Gene Expressions\CurrentCel\PMIntense'+names[iters]+r'.pickle') sigma = 0 alpha = 0 a = [] y = [] vector = [] for row in geoMat: vector.append(row) #print vector[1] #print i m1 = mode(vector) m2 = mode(leftOf(vector,m1)) #estimate sigma? #find elements in dataO less than mu. group in leftz leftZ = leftOf(vector,m2) n = len(leftZ) zSum = 0 for lZ in leftZ: zSum += (lZ - m2) ** 2 if n > 1: sigma = math.sqrt( zSum / (n - 1)) * math.sqrt(2.0) else: sigma = math.sqrt(zSum) * math.sqrt(2.0) #estimate alpha #find elements in dataO greater than mu. store in rightz. rightz - mu for all #find mode of rightz alpha = 1 / mode(rightOf(vector,m2)) for j,val in enumerate(vector): a.append(val - m2 - alpha * (sigma ** 2)) print 'pass 1' for val in a: if sigma == 0: y.append(1) elif normcdf(val / sigma) == 0: y.append(2) else: y.append(val + sigma * normpdf(val / sigma) / normcdf(val / sigma)) print 'pass 2' for i,col in enumerate(y): geoMat[i] = col print 'pass 3' Catche.spickle(r'.\Gene Expressions\CurrentCel/BAIntense'+names[iters]+r'.pickle',geoMat)
def GetExec(): Recs = os.listdir(os.getcwd()) newList = [] j = 0 listdata = dict() ftime = open('lastChecked.txt', 'r') prevTime = float(ftime.readline()) ftime.close() f = open('lastChecked.txt', 'w') f.write(str(time.time())) f.close() k = 0 while k < len(Recs): (name, ext) = os.path.splitext(Recs[k]) if len(ext) > 2 and not ext == '.pickle': ListFile = name + ".pickle" if not os.path.isfile(ListFile) or float(fmt.filemtime( Recs[k])) > prevTime: if ext[:3] == ".fa": if name != "my_seq": seqIORec = list(SeqIO.parse(Recs[k], 'fasta')) for i, v in enumerate(seqIORec): newList.append([v, v.id]) listdata[j] = str(v.id), len( v.seq), str(name) + str(ext) rHoward = [ str(v.id), len(v.seq), str(name) + str(ext), v ] mP.spickle(ListFile, rHoward) j += 1 elif ext[:3] == ".gb": seqIORec = list(SeqIO.parse(Recs[k], 'genbank')) for i, v in enumerate(seqIORec): newList.append([v, v.id]) listdata[j] = str(v.id), len( v.seq), str(name) + str(ext) rHoward = [ str(v.id), len(v.seq), str(name) + str(ext), v ] mP.spickle(ListFile, rHoward) j += 1 else: if ext[:3] in [".gb", ".fa"]: rHoward = mP.opickle(ListFile) listdata[j] = str(rHoward[0]), rHoward[1], rHoward[2] newList.append([rHoward[3], rHoward[0]]) j += 1 k += 1 return [newList, listdata]
def GetExec(): Recs = os.listdir(os.getcwd()) newList=[] j = 0 listdata=dict() ftime = open('lastChecked.txt','r') prevTime = float(ftime.readline()) ftime.close() f = open('lastChecked.txt','w') f.write(str(time.time())) f.close() k = 0 while k < len(Recs): (name, ext) = os.path.splitext(Recs[k]) if len(ext) > 2 and not ext == '.pickle': ListFile = name + ".pickle" if not os.path.isfile(ListFile) or float(fmt.filemtime(Recs[k])) > prevTime: if ext[:3] == ".fa": if name != "my_seq": seqIORec = list(SeqIO.parse(Recs[k],'fasta')) for i,v in enumerate(seqIORec): newList.append([v,v.id]) listdata[j] = str(v.id),len(v.seq),str(name)+str(ext) rHoward = [str(v.id),len(v.seq),str(name)+str(ext),v] mP.spickle(ListFile,rHoward) j+=1 elif ext[:3] == ".gb": seqIORec = list(SeqIO.parse(Recs[k],'genbank')) for i,v in enumerate(seqIORec): newList.append([v,v.id]) listdata[j] = str(v.id),len(v.seq),str(name)+str(ext) rHoward = [str(v.id),len(v.seq),str(name)+str(ext),v] mP.spickle(ListFile,rHoward) j+=1 else: if ext[:3] in [".gb",".fa"]: rHoward = mP.opickle(ListFile) listdata[j] = str(rHoward[0]),rHoward[1],rHoward[2] newList.append([rHoward[3],rHoward[0]]) j+=1 k += 1 return [newList,listdata]
def funccall(x): bogus = os.listdir(r'.\Gene Expressions\CurrentCel') names = [] for bug in bogus: if bug[-7:] == '.CEL.gz': names.append(bug) h = gzip.GzipFile(r'.\Gene Expressions\CurrentCel/' + names[x]) b = CelFileReader.read(h) g = b.intensities PMIntense = [] PMLoc = Catche.opickle(r'.\Gene Expressions\CurrentCel/PMLoc.pickle') for loc in PMLoc: PMIntense.append(g[loc[1]][loc[0]]) Catche.spickle( r'.\Gene Expressions\CurrentCel/PMIntense' + names[x][:-7] + r'.pickle', PMIntense) return x
def countSort(xe): names = [] Xes = os.listdir(r'.\Gene Expressions\CurrentCel') out = [[0]] for x in Xes: if x[-7:] == r'.CEL.gz': names.append(x[:-7]) arrayX = Catche.opickle(r'.\Gene Expressions\CurrentCel/BAIntense'+names[xe]+r'.pickle') counter = [] final = [] ranker = [] finRank = [] i = 0 while i < len(arrayX): counter.append(0) final.append(0) ranker.append([]) finRank.append(0) i += 1 for j,val in enumerate(arrayX): counter[int(val)] += 1 ranker[int(val)].append(j) preSum = [] preSum.append(0) i = 0 while i < len(arrayX) - 1: preSum.append(preSum[i] + counter[i]) i += 1 i = len(counter) - 1 while i >= 0: while counter[i] > 0: final[preSum[i]] = i finRank[preSum[i]] = ranker[i][-1] preSum[i] += 1 del ranker[i][-1] counter[i] -= 1 i -= 1 Catche.spickle(r'.\GeneExpressions\CurrentCel/SortAndRank'+names[xe]+r'.pickle',[final,finRank])
def GetExec(): Recs = os.listdir(os.getcwd()) newList = [] j = 0 listdata=dict() k = 0 p = PDBParser(PERMISSIVE=1) ftime = open('lastChecked.txt','r') pT = float(ftime.readline()) ftime.close() f = open('lastChecked.txt','w') f.write(str(time.time())) f.close() while k < len(Recs): try: (name, ext) = os.path.splitext(Recs[k]) if ext=='': 2+2 elif ext==".pdb": f = name + ".pickle" newList.append([Recs[k],os.getcwd()]) if not os.path.isfile(f) or float(fmt.filemtime(Recs[k])) > pT: with warnings.catch_warnings(): warnings.simplefilter("ignore") pdbRec = p.get_structure(name, Recs[k]) models = pdbRec.get_list() listdata[j] = str(name), len(models), os.getcwd()+'/'+str(name) + str(ext) rHoward = [str(name), len(models), str(name) + str(ext)] mP.spickle(f, rHoward) else: rHoward = mP.opickle(f) listdata[j] = str(rHoward[0]), rHoward[1], rHoward[2] j += 1 except IOError, e: print e k += 1
def GetExec(): Recs = os.listdir(os.getcwd()) newList = [] j = 0 listdata = dict() k = 0 p = PDBParser(PERMISSIVE=1) ftime = open('lastChecked.txt', 'r') pT = float(ftime.readline()) ftime.close() f = open('lastChecked.txt', 'w') f.write(str(time.time())) f.close() while k < len(Recs): try: (name, ext) = os.path.splitext(Recs[k]) if ext == '': 2 + 2 elif ext == ".pdb": f = name + ".pickle" newList.append([Recs[k], os.getcwd()]) if not os.path.isfile(f) or float(fmt.filemtime(Recs[k])) > pT: with warnings.catch_warnings(): warnings.simplefilter("ignore") pdbRec = p.get_structure(name, Recs[k]) models = pdbRec.get_list() listdata[j] = str(name), len( models), os.getcwd() + '/' + str(name) + str(ext) rHoward = [str(name), len(models), str(name) + str(ext)] mP.spickle(f, rHoward) else: rHoward = mP.opickle(f) listdata[j] = str(rHoward[0]), rHoward[1], rHoward[2] j += 1 except IOError, e: print e k += 1
def GetExec(self, frame, BigPanel, Rec, lenX, colorList): pluginEXE(Rec, lenX) self.geoMat = Catche.opickle(r'.\GeneExpressions\CurrentCel\RMAFinal.pickle') return self.geoMat
def GetExec(): try: for names in os.listdir(r'.\CurrentCel/'): os.remove(r'.\CurrentCel/'+names) os.removedirs(r'.\CurrentCel') except: errno Recs = os.listdir(os.getcwd()) newList=[] j = 0 PForm = "" listdata=dict() GeoUntar = [] k = 0 ftime = open('lastChecked.txt','r') prevTime = float(ftime.readline()) ftime.close() f = open('lastChecked.txt','w') f.write(str(time.time())) f.close() for i in Recs: (nameLeft, ext) = os.path.splitext(i) if ext == '.tgz': newList.append([i]) geoListFile = nameLeft + ".pickle" if not os.path.isfile(geoListFile) or float(fmt.filemtime(i)) > prevTime: filelib = tarfile.TarFile.gzopen(i) #Istar = i GeoUntar.append(filelib) #print Istar nameHolder = filelib.getnames() ''''for k,itsgo in enumerate(nameHolder): try: if itsgo[-4:] =='.txt' : if itsgo[0:3] != r"GPL": newList.append(itsgo) elif itsgo[0:3] == r"GPL": PForm = itsgo[:-10] elif itsgo[-4:] == ".xml": f = filelib.extractfile(itsgo) minimal = minidom.parse(f).childNodes[0] titleText = minimal.childNodes[-2].childNodes[3].childNodes[0].toxml() except IOError, e: print e''' #print nameHolder[:5] PForm = nameHolder[1][:-10] #i = nameHolder[0] #print i f = filelib.extractfile(nameHolder[0]) minimal = minidom.parse(f).childNodes[0] titleText = minimal.childNodes[-2].childNodes[3].childNodes[0].toxml() #print len(minimal.childNodes) listdata[j] = str(nameLeft[:-4]),titleText, PForm, len(nameHolder)-2 rHoward = [nameLeft,titleText,PForm,len(nameHolder)-2] mP.spickle(geoListFile,rHoward) j += 1 else: rHoward = mP.opickle(geoListFile) listdata[j] = str(rHoward[0][:-4]),rHoward[1],rHoward[2],rHoward[3] j += 1 elif ext == r'.tar': filelib = tarfile.TarFile.taropen(i) nameHolder = filelib.getnames() cels = 0 for n in nameHolder: if n[-7:] == r'.CEL.gz': cels += 1 """ sys.path.append(r'..\plugins\Tools\ETOOLSPlugins') exTool = __import__('ESearch').GetExec('gds',str(nameLeft[:-4])) esTool = __import__('ESummary').GetExec('gds',str(exTool['IdList'][0])) titleText = '' PForm = '' for line in esTool.split('\n'): if len(line) > 32: if line[:34] == '\t<Item Name="title" Type="String">': titleText = line[34:-8] elif line[:32] == '\t<Item Name="GPL" Type="String">': PForm = 'GPL' + str(line[32:-7]) listdata[j] = str(nameLeft[:-4]),titleText, PForm, cels newList.append([i,PForm])""" return [newList,listdata]
def doSearch(self,event): self.BigPanel.plotPanel2.Show(False) self.BigPanel.plotPanel.Show(False) f1=self.box1.GetValue() print f1 f2=self.box2.GetValue() print f2 self.mat2save=[[],[]] self.mat2save[0] = Catche.opickle(r"C:\Users\francis\Documents\DatabaseFingerprints/" + f1 + ' sig') self.mat2save[1] = Catche.opickle(r"C:\Users\francis\Documents\DatabaseFingerprints/" + f1 + ' updown') mat2plot1 = [] i = 0 while i < 17300: mat2plot1.append(2) i += 1 i=0 while i < len(self.mat2save[0]): pos = self.mat2save[0][i] #print pos mat2plot1[int(pos)+700]=self.mat2save[1][i] i += 1 mat2plot1 = np.array([mat2plot1]) self.BigPanel.plotPanel = wx.Panel(self.BigPanel, -1, style=wx.RAISED_BORDER, pos=(98,39), size=(80,770)) self.BigPanel.plotPanel.SetBackgroundColour('NAVY') self.BigPanel.plotter = PlotNotebook(self.BigPanel.plotPanel) self.BigPanel.axes1 = self.BigPanel.plotter.add('figure 1').gca() plt.spectral() c = self.BigPanel.axes1.pcolor(mat2plot1.transpose(),vmin=-.1,vmax=1.1) self.BigPanel.axes1.set_ylim(0,18000) self.BigPanel.axes1.set_xlim(0,1) self.mat2save=[[],[]] self.mat2save[0] = Catche.opickle(r"C:\Users\francis\Documents\DatabaseFingerprints/" + f2 + ' sig') self.mat2save[1] = Catche.opickle(r"C:\Users\francis\Documents\DatabaseFingerprints/" + f2 + ' updown') mat2plot = [] i = 0 while i < 17300: mat2plot.append(2) i += 1 i = 0 #print len(self.mat2save[1]) while i < len(self.mat2save[0]): pos = self.mat2save[0][i] #print i #print pos mat2plot[int(pos)+700]=self.mat2save[1][i] i += 1 mat2plot = np.array([mat2plot]) self.BigPanel.plotPanel2 = wx.Panel(self.BigPanel, -1, style=wx.RAISED_BORDER, pos=(200,39), size=(80,770)) self.BigPanel.plotPanel2.SetBackgroundColour('NAVY') self.BigPanel.plotter2 = PlotNotebook(self.BigPanel.plotPanel2) self.BigPanel.axes2 = self.BigPanel.plotter2.add('figure 1').gca() plt.spectral() c = self.BigPanel.axes2.pcolor(mat2plot.transpose(),vmin=-.1,vmax=1.1) self.BigPanel.axes2.set_ylim(0,18000) self.BigPanel.axes2.set_xlim(0,1) match=0.0 for i,v in enumerate(mat2plot[0]): if mat2plot[0][i] == mat2plot1[0][i]: match+=1 self.box3.Clear() self.box3.write(str(match/(1.0*len(mat2plot[0]))))
print i pods = [i,i+1,i+2] pool.map(cS.countSort,pods) i+=3 diff = lenX - i baggage = [] if diff > 0: while diff > 0: baggage.append(i) i += 1 diff -= 1 pool.map(cS.countSort,baggage) i = 0 while i < lenX: sorts = Catche.opickle(r'.\GeneExpressions\CurrentCel/SortAndRank' + names[i] + r'.pickle') sortedVals.append(sorts[0]) rankerVals.append(sorts[1]) print 'ranked and sorted' i += 1 sortedVals = nP.array(sortedVals) meanVals = nP.mean(sortedVals, axis=0) for i,n in enumerate(names): out.append([]) for mV in meanVals: out[i+1].append(0) for j,mV in enumerate(meanVals): out[i+1][rankerVals[i][j]] = mV print 'done qn' i = 0
print i pods = [i, i + 1, i + 2] pool.map(cS.countSort, pods) i += 3 diff = lenX - i baggage = [] if diff > 0: while diff > 0: baggage.append(i) i += 1 diff -= 1 pool.map(cS.countSort, baggage) i = 0 while i < lenX: sorts = Catche.opickle(r'.\GeneExpressions\CurrentCel/SortAndRank' + names[i] + r'.pickle') sortedVals.append(sorts[0]) rankerVals.append(sorts[1]) print 'ranked and sorted' i += 1 sortedVals = nP.array(sortedVals) meanVals = nP.mean(sortedVals, axis=0) for i, n in enumerate(names): out.append([]) for mV in meanVals: out[i + 1].append(0) for j, mV in enumerate(meanVals): out[i + 1][rankerVals[i][j]] = mV print 'done qn' i = 0
def doSearch(self, event): self.BigPanel.plotPanel2.Show(False) self.BigPanel.plotPanel.Show(False) f1 = self.box1.GetValue() print f1 f2 = self.box2.GetValue() print f2 self.mat2save = [[], []] self.mat2save[0] = Catche.opickle( r"C:\Users\francis\Documents\DatabaseFingerprints/" + f1 + ' sig') self.mat2save[1] = Catche.opickle( r"C:\Users\francis\Documents\DatabaseFingerprints/" + f1 + ' updown') mat2plot1 = [] i = 0 while i < 17300: mat2plot1.append(2) i += 1 i = 0 while i < len(self.mat2save[0]): pos = self.mat2save[0][i] #print pos mat2plot1[int(pos) + 700] = self.mat2save[1][i] i += 1 mat2plot1 = np.array([mat2plot1]) self.BigPanel.plotPanel = wx.Panel(self.BigPanel, -1, style=wx.RAISED_BORDER, pos=(98, 39), size=(80, 770)) self.BigPanel.plotPanel.SetBackgroundColour('NAVY') self.BigPanel.plotter = PlotNotebook(self.BigPanel.plotPanel) self.BigPanel.axes1 = self.BigPanel.plotter.add('figure 1').gca() plt.spectral() c = self.BigPanel.axes1.pcolor(mat2plot1.transpose(), vmin=-.1, vmax=1.1) self.BigPanel.axes1.set_ylim(0, 18000) self.BigPanel.axes1.set_xlim(0, 1) self.mat2save = [[], []] self.mat2save[0] = Catche.opickle( r"C:\Users\francis\Documents\DatabaseFingerprints/" + f2 + ' sig') self.mat2save[1] = Catche.opickle( r"C:\Users\francis\Documents\DatabaseFingerprints/" + f2 + ' updown') mat2plot = [] i = 0 while i < 17300: mat2plot.append(2) i += 1 i = 0 #print len(self.mat2save[1]) while i < len(self.mat2save[0]): pos = self.mat2save[0][i] #print i #print pos mat2plot[int(pos) + 700] = self.mat2save[1][i] i += 1 mat2plot = np.array([mat2plot]) self.BigPanel.plotPanel2 = wx.Panel(self.BigPanel, -1, style=wx.RAISED_BORDER, pos=(200, 39), size=(80, 770)) self.BigPanel.plotPanel2.SetBackgroundColour('NAVY') self.BigPanel.plotter2 = PlotNotebook(self.BigPanel.plotPanel2) self.BigPanel.axes2 = self.BigPanel.plotter2.add('figure 1').gca() plt.spectral() c = self.BigPanel.axes2.pcolor(mat2plot.transpose(), vmin=-.1, vmax=1.1) self.BigPanel.axes2.set_ylim(0, 18000) self.BigPanel.axes2.set_xlim(0, 1) match = 0.0 for i, v in enumerate(mat2plot[0]): if mat2plot[0][i] == mat2plot1[0][i]: match += 1 self.box3.Clear() self.box3.write(str(match / (1.0 * len(mat2plot[0]))))
temp = numpy.median(rowFx) ret = [] for c in colFx: ret.append(c + temp) return ret names = [] geoMat = [] Xes = os.listdir(r'.\Gene Expressions\CurrentCel') for x in Xes: if x[-7:] == r'.CEL.gz': names.append(x[:-7]) geoMat.append([x[:-7]]) i = 0 ProbeSet = Catche.opickle(r'.\Gene Expressions\CurrentCel/ProbeSets.pickle') rollingSum = 0 valMat = [] for n in names: valMat.append( Catche.opickle(r'.\Gene Expressions\CurrentCel\RMAPreSum' + n + r'.pickle')) print 'begin sum' while i < len(ProbeSet): print i rHoMat = [] for n, na in enumerate(names): k = rollingSum rHoMat.append([]) while k < ProbeSet[i][1]: rHoMat[-1].append(valMat[n][k])
temp = numpy.median(rowFx) ret = [] for c in colFx: ret.append(c + temp) return ret names = [] geoMat = [] Xes = os.listdir(r'.\Gene Expressions\CurrentCel') for x in Xes: if x[-7:] == r'.CEL.gz': names.append(x[:-7]) geoMat.append([x[:-7]]) i = 0 ProbeSet = Catche.opickle(r'.\Gene Expressions\CurrentCel/ProbeSets.pickle') rollingSum = 0 valMat = [] for n in names: valMat.append(Catche.opickle(r'.\Gene Expressions\CurrentCel\RMAPreSum' + n + r'.pickle')) print 'begin sum' while i < len(ProbeSet): print i rHoMat = [] for n,na in enumerate(names): k = rollingSum rHoMat.append([]) while k < ProbeSet[i][1]: rHoMat[-1].append(valMat[n][k]) k += 1 temp = getMedPol(rHoMat)