def save_tables_show_graph(self, desk): if desk.each_all == 'each': bias_corr_list = [desk.withCorrection] else: bias_corr_list = [False, True] print("-----------------------------------------------------------") for bias_corr in bias_corr_list: iLoop = 0 speciesParams = [] listMI_Anova = [] for mat in self.list_species_name_hmi: iLoop += 1 filename, name, species, hmi, sufix = mat mat = desk.dicParams[species] if desk.minmax == 'mincut': numOfSeqs = mat[2] L = mat[5] else: numOfSeqs = mat[3] L = mat[6] print("%i) %s - %s, %s" % (iLoop, name, desk.minmax, "bias corr." if bias_corr else 'no corr.')) if desk.organism == 'Drosophila': sp = self.dr.mnemonic(species.replace('Drosophila ', '')) else: sp = species desk.withCorrection = bias_corr if desk.withCorrection: str_correction = ' (bias corr.)' filename_correction = '_bias_corr' else: str_correction = '' filename_correction = '' if desk.frame == 0: sFrame = '' else: sFrame = ', frame=' + str(desk.frame) std_rand = "" if (desk.label_random == "") else " " + desk.label_random title = 'HMI: %s %s, %s %s\n%s%s%s; %i seqs; len=%i%s; #letter=%i'%\ (desk.organism, species, desk.seqType, desk.gene_title, desk.minmax, str_correction, std_rand, numOfSeqs, L, sFrame, desk.numOfLetters) ''' HMI mean and sdv for each k from n sequences ''' if not desk.withCorrection: arrayMIthis = np.array(hmi.arrayMI) arraySE = np.array(hmi.arraySE) else: arrayMIthis = np.array(hmi.arrayMIcorr) arraySE = np.array(hmi.arraySEcorr) if desk.mnat: arrayMIthis *= 1000 arraySE *= 1000 roundVal = 1 unit = 'mnat' else: roundVal = 4 unit = 'nat' if desk.norm: arrayMIthis /= desk.numOfLetters arraySE /= desk.numOfLetters # desk.showmsg_obs(' >>> species %s with %i sequences'%(species, numOfSeqs)) ''' first time build abcissa if choose == 1 and desk.frame == 1 or desk.frame == 0 ''' xSeq = [] if desk.frame < 2: for k in range(len(arrayMIthis)): xSeq.append(k + 3) else: ''' frame1: k = {3,6,9, ... 3n} ''' for k in range(len(arrayMIthis)): xSeq.append((k + 1) * 3) arrayVal = [] ySeqSEFinal = [] ''' if only one curve ''' if desk.frame < 2: arrayVal = arrayMIthis ySeqSEFinal = arraySE else: ''' if choose > 1 there are 3 others curves, and k jumps 3 x 3 ''' for m in range(len(arrayMIthis)): arrayVal.append(arrayMIthis[m]) arrayVal.append(arrayMIthis[m]) arrayVal.append(arrayMIthis[m]) ySeqSEFinal.append(arraySE[m]) ySeqSEFinal.append(arraySE[m]) ySeqSEFinal.append(arraySE[m]) self.meanY = np.round(np.mean(arrayVal), roundVal) self.medianY = np.round(np.median(arrayVal), roundVal) self.stdY = np.round(np.std(arrayVal), roundVal) self.maxY = np.round(np.max(arrayVal), roundVal) self.minY = np.round(np.min(arrayVal), roundVal) speciesParams.append([ species, sp, numOfSeqs, self.meanY, self.medianY, self.stdY, self.maxY, self.minY ]) listMI_Anova.append(arrayVal) if desk.showGraph or desk.saveGraph: if desk.frame < 2: gHist = graphPac.Histogram_FreqDistribution( desk, title) gHist.meanY = self.meanY gHist.medianY = self.medianY gHist.stdY = self.stdY gHist.maxY = self.maxY gHist.minY = self.minY gHist.plot_H_MI('V', xSeq=xSeq, ySeq=arrayVal, ySE=arraySE, showError=True) gHist.plot_H_MI('H', xSeq=xSeq, ySeq=arrayVal, ySE=ySeqSEFinal, showError=True) gHist.densityBar("H", seq=arrayVal) else: gHist.sameBar( xSeq=xSeq, arrayMIthis=arrayVal, linestyleCode=self.arrLinestyleCode[desk.frame], color=self.arrColor[desk.frame]) if (desk.frame == 0) or (desk.frame == 3): pictureName = 'HMI_%s%s' % (sufix, filename_correction) gHist.myPlot.print_graph(desk, gHist.fig, pictureName, frame=desk.tk_root) ''' del gHist plt.cla() plt.clf() gc.collect() ''' if desk.saveData: sufix = ('%s_%s_%s_%s_frame%i_NOL%i_%iL_cutoff%i') %\ (desk.organism, desk.minmax, desk.seqType, desk.gene_title, desk.frame, desk.numOfLetters, desk.cutoffLength, desk.cutoffNumSeq) std_rand = "" if (desk.label_random == "") else desk.stri_random filename = 'HMI_summary_%s%s%s.txt' % ( sufix, filename_correction, std_rand) stri = hmi.ent.calc_anova(listMI_Anova, sufix) hmi.ent.print_data_summary(desk, speciesParams, roundVal=roundVal, filename=filename, stri=stri) self.failure = False self.error_msg = 'Task ended. All right.' return True
def save_tables_show_graph(self,cmp_file=""): bias_corr_list = [self.withCorrection] print("-----------------------------------------------------------") for bias_corr in bias_corr_list: iLoop = 0 speciesParams = [] listMI_Anova = [] for mat in self.list_species_name_hmi: iLoop += 1 filename, name, species, hmi, sufix = mat numOfSeqs = self.sampleElems L = self.LH0 print("%i) %s - %s, %s"%(iLoop, name, self.minmax, "bias corr." if bias_corr else 'no corr.' )) sp = self.which_sp(self.organism, species) self.withCorrection = bias_corr if self.withCorrection: str_correction = ' (bias corr.)' filename_correction = '_bias_corr' else: str_correction = '' filename_correction = '' if self.frame == 0: sFrame = '' else: sFrame = ', frame=' + str(self.frame) title = 'HMI: %s %s, %s %s\n%s%s; %i seqs; len=%i%s; #letter=%i'%\ (self.organism, species, self.seqType, self.gene, self.minmax, str_correction, numOfSeqs, L, sFrame, self.numOfLetters) ''' HMI mean and sdv for each k from n sequences ''' if not self.withCorrection: arrayMIthis = np.array(hmi.arrayMI) arraySE = np.array(hmi.arraySE) else: arrayMIthis = np.array(hmi.arrayMIcorr) arraySE = np.array(hmi.arraySEcorr) if self.mnat: arrayMIthis *= 1000 arraySE *= 1000 roundVal = 1 self.unit = 'mnat' else: roundVal = 4 self.unit = 'nat' if self.norm: arrayMIthis /= self.numOfLetters arraySE /= self.numOfLetters # self.showmsg_obs(' >>> species %s with %i sequences'%(species, numOfSeqs)) ''' first time build abcissa if choose == 1 and self.frame == 1 or self.frame == 0 ''' xSeq = [] if self.frame < 2: for k in range( len(arrayMIthis)): xSeq.append(k+3) else: ''' frame1: k = {3,6,9, ... 3n} ''' for k in range( len(arrayMIthis)): xSeq.append((k+1)*3) arrayVal = [] ySeqSEFinal = [] ''' if only one curve ''' if self.frame < 2: arrayVal = arrayMIthis ySeqSEFinal = arraySE else: ''' if choose > 1 there are 3 others curves, and k jumps 3 x 3 ''' for m in range(len(arrayMIthis)): arrayVal.append(arrayMIthis[m]) arrayVal.append(arrayMIthis[m]) arrayVal.append(arrayMIthis[m]) ySeqSEFinal.append(arraySE[m]) ySeqSEFinal.append(arraySE[m]) ySeqSEFinal.append(arraySE[m]) self.meanY = np.round(np.mean(arrayVal), roundVal) self.medianY = np.round(np.median(arrayVal), roundVal) self.stdY = np.round(np.std(arrayVal), roundVal) self.maxY = np.round(np.max(arrayVal), roundVal) self.minY = np.round(np.min(arrayVal), roundVal) speciesParams.append([species,sp,numOfSeqs, self.meanY, self.medianY, self.stdY, self.maxY, self.minY]) listMI_Anova.append(arrayVal) if self.showGraph or self.saveGraph: if self.frame < 2: gHist = graphPac.Histogram_FreqDistribution(self, title) gHist.meanY = self.meanY gHist.medianY = self.medianY gHist.stdY = self.stdY gHist.maxY = self.maxY gHist.minY = self.minY gHist.desk = self gHist.plot_H_MI('V', xSeq=xSeq, ySeq=arrayVal, ySE=arraySE, showError=True, \ unit=self.unit, roundVal=roundVal) gHist.plot_H_MI('H', xSeq=xSeq, ySeq=arrayVal, ySE=ySeqSEFinal, showError=True, \ unit=self.unit, roundVal=roundVal) gHist.densityBar("H", seq=arrayVal, unit=self.unit, roundVal = roundVal) else: gHist.sameBar(xSeq=xSeq, arrayMIthis=arrayVal,linestyleCode=self.arrLinestyleCode[self.frame], color=self.arrColor[self.frame]) if (self.frame == 0) or (self.frame==3): ''' sample in filename ''' pictureName = 'HMI_%s%s%s'%(sufix, filename_correction, cmp_file) gHist.myPlot.print_graph(self, gHist.fig, pictureName, frame=self.tk_root, stay=True) ''' del gHist plt.cla() plt.clf() gc.collect() ''' if self.saveData : sufix = ('%s_%s_%s_%s_frame%i_NOL%i_%iL_cutoff%i') %\ (self.organism, self.minmax, self.seqType, self.gene, self.frame, self.numOfLetters, self.cutoffLength, self.cutoffNumSeq) ''' summary with sample in filename ''' filename = 'HMI_summary_%s%s%s.txt' % (sufix, filename_correction, cmp_file) stri = hmi.ent.calc_anova(listMI_Anova, sufix) hmi.ent.print_data_summary(self, speciesParams, roundVal=roundVal, filename=filename, stri=stri) self.failure = False self.error_msg = 'Task ended. All right.' ''' in the end of save_tables_show_graph clean memory ''' del self.list_species_name_hmi try: gc.collect() except: pass return True
def save_tables_show_graph(self, desk, which_module): if desk.each_all == 'each': list_modules = [which_module] bias_corr_list = [desk.withCorrection] else: list_modules = ['Entropy', 'VMI'] bias_corr_list = [False, True] print("-----------------------------------------------------------") for which_module in list_modules: for bias_corr in bias_corr_list: iLoop = 0 listMI_Anova = [] speciesParams = [] for mat in self.list_species_name_vmi: iLoop += 1 filename, name, species, vmi, sufix = mat mat = desk.dicParams[species] if desk.minmax == 'mincut': numOfSeqs = mat[2] L = mat[5] else: numOfSeqs = mat[3] L = mat[6] print("%i) %s - %s %s %s"%(iLoop, name, desk.minmax, "bias corr." if bias_corr else 'no corr.', which_module )) if desk.organism == 'Drosophila': sp = self.dr.mnemonic(species.replace('Drosophila ','')) else: sp = species desk.withCorrection = bias_corr if desk.withCorrection: str_correction = ' (bias corr.)' filename_correction = '_bias_corr' else: str_correction = '' filename_correction = '' std_rand = "" if (desk.label_random == "") else " " + desk.label_random if which_module == 'Entropy': title = 'Entropy Distribution %s %s, %s %s\n%s%s%s; %i seqs; len=%i; #letter=%i'%\ (desk.organism, species, desk.seqType, desk.gene_title, desk.minmax, str_correction, std_rand, numOfSeqs, L, desk.numOfLetters) else: title = 'VMI Heat Map %s %s, %s %s %s%s%s\n%i seqs; len=%i; #letter=%i'%\ (desk.organism, species, desk.seqType, desk.gene_title, desk.minmax, str_correction, std_rand, numOfSeqs, L, desk.numOfLetters) if which_module == 'Entropy': if not desk.withCorrection: arrayVal = np.array(vmi.HShannonList) arraySE = np.array(vmi.SeHShannonList) else: arrayVal = np.array(vmi.HShannonCorrList) arraySE = np.array(vmi.SeHShannonCorrList) else: if not desk.withCorrection: arrayVal = np.array(vmi.MIlist) arraySE = np.array(vmi.SeMIList) else: arrayVal = np.array(vmi.MIcorrList) arraySE = np.array(vmi.SeMICorrList) ''' normalization dividing by numOfLetters ''' if desk.norm: arrayVal /= desk.numOfLetters arraySE /= desk.numOfLetters ''' mili nats ''' if desk.mnat: arrayVal *= 1000 arraySE *= 1000 roundVal = 2 else: roundVal = 4 # print '--- params -----------------' maxMI = 0 maxiPos = None is_zero = True for pos in range(len(arrayVal)): if arrayVal[pos] > maxMI: maxMI = arrayVal[pos] SE = arraySE[pos] i,j = vmi.ijList[pos] maxiPos = [i,j,maxMI,SE] is_zero = False if is_zero: stri = '### Species %s has MI = ZERO. Too conserved data sequences. Impossible to include in analysis.'%(species) print(stri) continue xSeq = [x for x in range(len(arrayVal))] self.meanY = np.round(np.mean(arrayVal), roundVal) self.medianY = np.round(np.median(arrayVal), roundVal) self.stdY = np.round(np.std(arrayVal), roundVal) self.maxY = np.round(np.max(arrayVal), roundVal) self.minY = np.round(np.min(arrayVal), roundVal) speciesParams.append([species,sp,numOfSeqs, self.meanY, self.medianY, self.stdY, self.maxY, self.minY]) if desk.saveData: listMI_Anova.append(arrayVal) if desk.showGraph or desk.saveGraph: if which_module == 'Entropy': gHist = graphPac.Histogram_FreqDistribution(desk, title) gHist.meanY = self.meanY gHist.medianY = self.medianY gHist.stdY = self.stdY gHist.maxY = self.maxY gHist.minY = self.minY gHist.plot_H_MI('V', xSeq=xSeq, ySeq=arrayVal, ySE=arraySE, showError=True) gHist.densityBar('V', seq=arrayVal) pictureName = 'VHShannon_%s%s%s'%(sufix, filename_correction, desk.stri_random) gHist.myPlot.print_graph(desk, gHist.fig, pictureName, frame=desk.tk_root) else: ''' 3D dont has ceil ''' if desk.is3D: limSup = self.maxY else: ceil = desk.heatmap_ceil_value ''' the same roof for all heatmaps: parametrize in future ''' if desk.heatmap_ceil: if self.maxY <= ceil: limSup = ceil else: limSup = self.maxY else: limSup = self.maxY '''' updated 28/09/2015 ''' gMI = vmi.plotHeatMap(desk, desk.is3D, arrayVal, vmi.ijList, L, maxiPos, title, species=species, limSup=limSup, roundVal=roundVal, str_correction=str_correction) if desk.is3D: pictureName = 'HeatMap_3D_VMI_%s%s%s'%(sufix, filename_correction,desk.stri_random) else: gMI.densityHeatmapBar(arrayVal, limSup) pictureName = 'HeatMap_2D_VMI_%s%s%s'%(sufix, filename_correction,desk.stri_random) gMI.myPlot.print_graph(desk, gMI.fig, pictureName, frame=desk.tk_root) ''' plt.cla() plt.clf() plt.close() del gMI gc.collect() ''' # Flavio 02/06/2015 if which_module == 'Entropy': which_symb = 'VHShannon' else: which_symb = 'VMI' if desk.saveData: sufix = ('%s_%s_%s_%s_NOL%i_%iL_cutoff%i') %\ (desk.organism, desk.minmax, desk.seqType, desk.gene_title, desk.numOfLetters, desk.cutoffLength, desk.cutoffNumSeq) filename = '%s_summary_%s%s%s.txt' % (which_symb, sufix, filename_correction, desk.stri_random) stri = vmi.ent.calc_anova(listMI_Anova, sufix) # desk, speciesParams, roundVal=4, filename=None, stri = '', saveData=False): vmi.ent.print_data_summary(desk, speciesParams, roundVal=roundVal, filename=filename, stri=stri)