def descStats(data): """ Compute descriptive statistics of data """ dataList = list(data) logDataList = list(N.log10(dataList)) desc = dict() if len(dataList) == 0: desc['mean'] = 0 desc['median'] = 0 desc['logMean'] = 0 desc['logMedian'] = 0 elif len(dataList) < 2: desc['mean'] = dataList[0] desc['median'] = dataList[0] desc['logMean'] = logDataList[0] desc['logMedian'] = logDataList[0] else: desc['mean'] = mean(dataList) desc['median'] = median(dataList) desc['logMean'] = mean(logDataList) desc['logMedian'] = median(logDataList) if len(dataList) < 3: desc['stdev'] = 0 desc['sterr'] = 0 desc['logStdev'] = 0 desc['logSterr'] = 0 else: desc['stdev'] = std(dataList) desc['sterr'] = stderr(dataList) desc['logStdev'] = std(logDataList) desc['logSterr'] = stderr(logDataList) return desc
def gStats(self, missingValue=0.0): """dict of {geneID: (min,max,mean,median,std,stderr, Shapiro-Wilk(w,p),normaltest_chisq (D'Agostino and Pearson),...} """ import scipy as S import scipy.stats as SS rv = {} for k, v in self.items(): # print k,v va = S.array(self.gValues(k, missingValue)) try: normaltest = SS.normaltest(va) except: normaltest = None try: shapiro = SS.shapiro(va) except: shapiro = None try: rv[k] = (va.min(), va.max(), va.mean(), SS.median(va), SS.std(va), SS.stderr(va), normaltest, shapiro) except: print k, va raise return rv
def analyse(all_results): analysis = {} for parser_name, results in all_results.items(): scores = [] for result in results: scores.append(result['score']) analysis[parser_name] = (mean(scores), stderr(scores)) return analysis
def test_stderr(self): """ this is not in R, so used sqrt(var(testcase))/sqrt(4) """ ## y = stats.stderr(self.shoes[0]) ## assert_approx_equal(y,0.775177399) y = stats.stderr(self.testcase) assert_approx_equal(y, 0.6454972244)
def test_stderr(self): """ this is not in R, so used sqrt(var(testcase))/sqrt(4) """ ## y = stats.stderr(self.shoes[0]) ## assert_approx_equal(y,0.775177399) y = stats.stderr(self.testcase) assert_approx_equal(y,0.6454972244)
def mean_confidence_interval(data, confidence=0.90): """ T-distribution """ from scipy import stats import numpy data = np.array(data) n = len(data) m, se = np.mean(data), stats.stderr(data) h = se * sp.stats.t._ppf((1+confidence)/2. , n-1) return h*2
def eval_error_metrics(epoch, model, dataset, log_filename=None): ''' Evaluate PSNR and SSIM over fixed evaluation dataset ''' SSIM = [] PSNR = [] for data in dataset: model.set_input(data) model.test() for j, b_path in enumerate(data['B_paths']): sem_real = tensor2im(torch.unsqueeze(data['B'][j, ...], 0)) sem_pred = tensor2im(torch.unsqueeze(model.fake_B[j, ...], 0)) sem_real = np.array(Image.fromarray(sem_real).convert('L')) sem_pred = np.array(Image.fromarray(sem_pred).convert('L')) SSIM.append(compare_ssim(sem_pred, sem_real)) PSNR.append(compare_psnr(sem_pred, sem_real)) if b_path is not None: save_image(sem_pred, b_path) SSIM = np.array(SSIM) mean_SSIM, std_err_SSIM = np.mean(SSIM), stderr(SSIM) PSNR = np.array(PSNR) mean_PSNR, std_err_PSNR = np.mean(PSNR), stderr(PSNR) message = '(epoch: %d) PSNR=%s±%s, SSIM=%s±%s' % ( epoch, mean_PSNR, std_err_PSNR, mean_SSIM, std_err_SSIM) print(message) if log_filename is not None: with open(log_filename, "a") as log_file: log_file.write('%s\n' % message)
data['y'] = [] data['xci'] = [] data['yci'] = [] data['label'] = [] for run in sorted(valuemap.keys()): rundata = valuemap[run] row = {} ci = {} # Compute mean of all runs in all buckets for bucket, bucketdata in rundata.iteritems(): bucketarray = np.array(bucketdata) row[bucket] = bucketarray.mean() # determine coinfidence interval if desired if options.ci > 0: ci[bucket] = stats.stderr(bucketarray) * stats.t._ppf((1+options.ci)/2., len(bucketarray)) if xParRegex.pattern not in row: print "Error: x scalar \"" + args[0] + "\" not found, or all runs ignored due to the given include and exclude regexp!" exit(-1) if yParRegex.pattern not in row: print "Error: y scalar \"" + args[1] + "\" not found, or all runs ignored due to the given include and exclude regexp!" exit(-1) data['x'].append(row[xParRegex.pattern]) data['y'].append(row[yParRegex.pattern]) if options.ci > 0: data['xci'].append(ci[xParRegex.pattern]) data['yci'].append(ci[yParRegex.pattern]) else:
row={} ci={} if options.outfile: if options.ci > 0: outcol = np.zeros((3, len(bucketlist))) else: outcol = np.zeros((1, len(bucketlist))) # Compute mean of all runs in all buckets for bucket, bucketdata in rundata.iteritems(): bucketarray = np.array(bucketdata) bucketmean = bucketarray.mean() # determine coinfidence interval if desired if options.ci > 0: bucketci = stats.stderr(bucketarray) * stats.t._ppf((1+options.ci)/2., len(bucketarray)) * options.scale ci[bucket] = bucketci bucketmean*=options.scale row[bucket] = bucketmean if options.outfile: outcol[0, bucketlist.index(bucket)] = bucketmean if options.ci > 0: outcol[1, bucketlist.index(bucket)] = bucketmean - bucketci outcol[2, bucketlist.index(bucket)] = bucketmean + bucketci if options.outfile: outarray = np.vstack((outarray, outcol)) else: # Plot row if options.ci == 0:
row = {} ci = {} if options.outfile: if options.ci > 0: outcol = np.zeros((3, options.range[1] - options.range[0] + 1)) else: outcol = np.zeros((1, options.range[1] - options.range[0] + 1)) # Compute mean of all runs in all buckets for bucket, bucketdata in rundata.iteritems(): bucketarray = np.array(bucketdata) bucketmean = bucketarray.mean() # determine coinfidence interval if desired if options.ci > 0: bucketci = sp.stderr(bucketarray) * sp.t._ppf( (1 + options.ci) / 2., len(bucketarray)) * options.scale ci[bucket] = bucketci bucketmean *= options.scale row[bucket] = bucketmean if options.outfile: outcol[0, bucket - options.range[0]] = bucketmean if options.ci > 0: outcol[1, bucket - options.range[0]] = bucketmean - bucketci outcol[2, bucket - options.range[0]] = bucketmean + bucketci if options.outfile: outarray = np.vstack((outarray, outcol)) else: # Plot row
row = {} ci = {} if options.outfile: if options.ci > 0: outcol = np.zeros((3, len(bucketlist))) else: outcol = np.zeros((1, len(bucketlist))) # Compute mean of all runs in all buckets for bucket, bucketdata in rundata.iteritems(): bucketarray = np.array(bucketdata) bucketmean = bucketarray.mean() # determine coinfidence interval if desired if options.ci > 0: bucketci = stats.stderr(bucketarray) * stats.t._ppf( (1 + options.ci) / 2., len(bucketarray)) * options.scale ci[bucket] = bucketci bucketmean *= options.scale row[bucket] = bucketmean if options.outfile: outcol[0, bucketlist.index(bucket)] = bucketmean if options.ci > 0: outcol[1, bucketlist.index(bucket)] = bucketmean - bucketci outcol[2, bucketlist.index(bucket)] = bucketmean + bucketci if options.outfile: outarray = np.vstack((outarray, outcol)) else: # Plot row
data['y'] = [] data['xci'] = [] data['yci'] = [] data['label'] = [] for run in sorted(valuemap.keys()): rundata = valuemap[run] row = {} ci = {} # Compute mean of all runs in all buckets for bucket, bucketdata in rundata.iteritems(): bucketarray = np.array(bucketdata) row[bucket] = bucketarray.mean() # determine coinfidence interval if desired if options.ci > 0: ci[bucket] = stats.stderr(bucketarray) * stats.t._ppf( (1 + options.ci) / 2., len(bucketarray)) if xParRegex.pattern not in row: print "Error: x scalar \"" + args[ 0] + "\" not found, or all runs ignored due to the given include and exclude regexp!" exit(-1) if yParRegex.pattern not in row: print "Error: y scalar \"" + args[ 1] + "\" not found, or all runs ignored due to the given include and exclude regexp!" exit(-1) data['x'].append(row[xParRegex.pattern]) data['y'].append(row[yParRegex.pattern]) if options.ci > 0: