def update(self, arr): """Update the accumulators of the StatsCollector given a complete matrix; assume that all observations have the same weight. Properly handle missing values. """ assert self.width() == arr.shape[1] #shape(arr)[1] i = 0 ## Update number of elements counters (length, width) = arr.shape #shape(arr) initial_n = self.n.copy( ) #self.n[:] # Keep old n for argmin/argmax n = zeros(width) + length missings = isnan(arr) nnan = sum(missings, 0) self.n += n self.nnan += nnan self.nnonnan += n - nnan ## Create masked version of arr and update accumulators ma = masked_array(arr, mask=missings) # Here, mask missings only arr_nomissings = arr[~normal_sometrue(missings, 1)] # Here, strip missing rows self.sum = self.sum + sum(ma, 0) # += does not work... self.sum_ssq = self.sum_ssq + sum(ma * ma, 0) # += does not work... self.sum_xxt = self.sum_xxt + matrixmultiply(transpose(arr_nomissings), arr_nomissings) self.sum_nomi = self.sum_nomi + sum(arr_nomissings, 0) self.nxxt += arr_nomissings.shape[0] #shape(arr_nomissings)[0] ## Update (arg)min / make sure old argmin is kept if not updated ma_argmin = argmin(ma, 0) ma_min = ma[ma_argmin, range(width)] min_newpos = argmin(array([self.min, ma_min]), 0).astype('Bool') self.min[min_newpos] = ma_min[min_newpos] # XXX Argmin computation needs to be revised! Does not work, at least # when passing array of shape (1,1). self.argmin[min_newpos] = ma_argmin[min_newpos] + initial_n[min_newpos] ## Update (arg)max / make sure old argmax is kept if not updated ma_argmax = argmax(ma, 0) ma_max = ma[ma_argmax, range(width)] max_newpos = argmax(array([self.max, ma_max]), 0).astype('Bool') self.max[max_newpos] = ma_max[max_newpos] # XXX Argmax computation needs to be revised! Does not work, at least # when passing array of shape (1,1). Also, is the use of min_newpos # correct? self.argmax[max_newpos] = ma_argmax[max_newpos] + initial_n[min_newpos]
def printStats(self, os=sys.stdout, pretty=True): """Print a nice report with the statistics. If 'pretty' is set to False, the output will not be so nice, but at least will not cause test failures due to a zero test blank tolerance. """ if len(nonzero(self.nnonnan)[0]) != len(self.nnonnan): print >> os, "One or more columns in StatsCollector does not contain any data" return # Nothing accumulated yet stats = self.getStats() sk = [ "N", "NMISSING", "NNONMISSING", "E", "V", "STDDEV", "STDERR", "SUM", "SUMSQ", "MIN", "ARGMIN", "MAX", "ARGMAX" ] m = array([[stats[k][i] for i in range(self.width())] for k in sk]) _printMatrix(m, sk, self.fieldnames, os, pretty) print "\nCovariance Matrix:" _printMatrix(stats["COV"], self.fieldnames, self.fieldnames, os, pretty) print "\nCorrelation Matrix:" _printMatrix(stats["CORR"], self.fieldnames, self.fieldnames, os, pretty)
def update(self, arr): """Update the accumulators of the StatsCollector given a complete matrix; assume that all observations have the same weight. Properly handle missing values. """ assert self.width() == arr.shape[1] #shape(arr)[1] i = 0 ## Update number of elements counters (length,width)= arr.shape #shape(arr) initial_n = self.n.copy()#self.n[:] # Keep old n for argmin/argmax n = zeros(width) + length missings = isnan(arr) nnan = sum(missings,0) self.n += n self.nnan += nnan self.nnonnan += n - nnan ## Create masked version of arr and update accumulators ma = masked_array(arr, mask=missings) # Here, mask missings only arr_nomissings = arr[~normal_sometrue(missings,1)] # Here, strip missing rows self.sum = self.sum + sum(ma,0) # += does not work... self.sum_ssq = self.sum_ssq + sum(ma*ma,0) # += does not work... self.sum_xxt = self.sum_xxt + matrixmultiply(transpose(arr_nomissings), arr_nomissings) self.sum_nomi= self.sum_nomi + sum(arr_nomissings,0) self.nxxt += arr_nomissings.shape[0] #shape(arr_nomissings)[0] ## Update (arg)min / make sure old argmin is kept if not updated ma_argmin = argmin(ma,0) ma_min = ma[ma_argmin, range(width)] min_newpos = argmin(array([self.min, ma_min]), 0).astype('Bool') self.min[min_newpos] = ma_min[min_newpos] # XXX Argmin computation needs to be revised! Does not work, at least # when passing array of shape (1,1). self.argmin[min_newpos] = ma_argmin[min_newpos] + initial_n[min_newpos] ## Update (arg)max / make sure old argmax is kept if not updated ma_argmax = argmax(ma,0) ma_max = ma[ma_argmax, range(width)] max_newpos = argmax(array([self.max, ma_max]), 0).astype('Bool') self.max[max_newpos] = ma_max[max_newpos] # XXX Argmax computation needs to be revised! Does not work, at least # when passing array of shape (1,1). Also, is the use of min_newpos # correct? self.argmax[max_newpos] = ma_argmax[max_newpos] + initial_n[min_newpos]
def printStats(self, os = sys.stdout, pretty = True): """Print a nice report with the statistics. If 'pretty' is set to False, the output will not be so nice, but at least will not cause test failures due to a zero test blank tolerance. """ if len(nonzero(self.nnonnan)[0]) != len(self.nnonnan): print >>os, "One or more columns in StatsCollector does not contain any data" return # Nothing accumulated yet stats = self.getStats() sk = ["N" , "NMISSING" , "NNONMISSING" , "E" , "V" , "STDDEV" , "STDERR" , "SUM" , "SUMSQ" , "MIN" , "ARGMIN" , "MAX" , "ARGMAX" ] m = array([[stats[k][i] for i in range(self.width())] for k in sk]) _printMatrix(m, sk, self.fieldnames, os, pretty) print "\nCovariance Matrix:" _printMatrix(stats["COV"], self.fieldnames, self.fieldnames, os, pretty) print "\nCorrelation Matrix:" _printMatrix(stats["CORR"], self.fieldnames, self.fieldnames, os, pretty)