def getSumsMatrixAndNames(self, condvari, condvarj, summarize_min_prob=1e10): """ Returns the tuple (mat, rownames, colnames)""" try: cube = self.cubes[(condvari, condvarj)] except KeyError: cube = self.cubes[(condvarj, condvari)] cube = transpose(cube,(1,0,2)) condi_domain = self.stringDomain(condvari)+self.numberDomain(condvari, condvarj, summarize_min_prob) condi_descr = [ str(interval) for interval,ids in condi_domain ] condj_descr, condj_id = self.var_domains[condvarj].domain_descr_id() values = cube[:,:,:] values = take(values,condj_id,axis=1) l, w, d = values.shape n = len(condi_domain) #mat = numarray.typefrom(zeros((1+n,1+w,d),values)) mat = zeros((1+n,1+w,d),numarray.typefrom(values)) firstrow = sum(values, 0) mat[0, 1:1+w, :] = firstrow mat[0, 0, :] = sum(firstrow) for i in xrange(n): interval,ids = condi_domain[i] s = mat[1+i,0,:] for id in ids: row = values[id] mat[1+i, 1:, :] += row s += sum(row) condi_descr.insert(0,'/*/') condj_descr.insert(0,'/*/') return mat, condi_descr, condj_descr
def to_diagonal(a): """Returns a diagonal matrix with elements in 'a' on the diagonal.""" assert len(a.shape)==1 n = len(a) A = zeros(shape=(n,n), type=numarray.typefrom(a)) for i in range(n): A[i,i] = a[i] return A
def to_diagonal(a): """Returns a diagonal matrix with elements in 'a' on the diagonal.""" assert len(a.shape) == 1 n = len(a) A = zeros(shape=(n, n), type=numarray.typefrom(a)) for i in range(n): A[i, i] = a[i] return A
def _check_input(array): shape = array.shape typecode = n.typefrom(array) if len(shape) == 3 and shape[2] == 3 and typecode == n.UInt8: return RGB elif len(shape) == 2: if _inverse_typecodes.has_key(typecode): return _inverse_typecodes[typecode] raise ValueError('Array is not one of the acceptable types (UInt8 * 3, UInt8, UInt16, UInt32, Float64, Complex64)')
def numberDomain(self, varname, condvar, summarize_min_prob = -1.): """Will return an ordered list of (interval, indexes) for the numerical part of the domain interval is an instance of Interval indexes is a list of corresponding cube-indexes for variable varname in a cube corresponding to varname, such that the union of the individual intervals associated with those indexes gives interval (thus the slices corresponding to those indexes must be summed to get the values corresponding to interval). Consecutive intervals will be merged until the largest conditional probability within them reaches summarize_min_prob """ num_domain = self.var_domains[varname].number_domain() if not num_domain: # empty list return num_domain if summarize_min_prob<=0.: return [ (interval, [id] ) for interval,id in num_domain ] try: cube = self.cubes[(varname, condvar)] except KeyError: cube = self.cubes[(condvar, varname)] cube = transpose(cube,(1,0,2)) num_ids = [ id for interval,id in num_domain ] # we consider only the "counts" (value index 0) counts = cube[:,:,0] # sum the counts count_sums = abs(sum(counts, 0))+1e-6 # we add 1e-6 just to make sure we don't have zeros and divisions by zero condprobs = counts/count_sums # we keep only the numerical domain condprobs = take(condprobs,num_ids) l,w = condprobs.shape summarized_domain = [] newinterval = None newrow = zeros(w, numarray.typefrom(condprobs)) ids = [] for i in xrange(l): row = condprobs[i] interval, id = num_domain[i] if not newinterval: newinterval = interval else: newinterval = Interval(newinterval.include_low, newinterval.low, interval.high, interval.include_high) ids.append(id) newrow += row if max(newrow)>=summarize_min_prob or i==l-1: summarized_domain.append( (newinterval, ids) ) newinterval = None newrow[:] = 0. ids = [] return summarized_domain
def enlarge(self,idx): if isinstance(idx,int): idx = [idx] # make it a list oldshape = self.data.shape newshape = list(oldshape) for k in xrange(len(idx)): newshape[k] = max(newshape[k], idx[k]+1) newdata = zeros(newshape, numarray.typefrom(self.data)) slicespec = [ slice(0,dim) for dim in oldshape ] newdata[slicespec] = self.data self.data = newdata
def trimmedNumberDomain(self, varname, condvar, summarize_remove_n, summarize_min_prob): """Will return an ordered list of (interval, indexes) for the numerical part of the domain interval is an instance of Interval indexes is a list of corresponding cube-indexes for variable varname in a cube corresponding to varname, such that the union of the individual intervals associated with those indexes gives interval (thus the slices corresponding to those indexes must be summed to get the values corresponding to interval). summarize_remove_n is the number of small intervals to 'remove' In addition, all intervals whose prob is less than summarize_min_prob will also be 'removed' """ num_domain = self.var_domains[varname].number_domain() summarized_domain = [ (interval, (id,) ) for interval,id in num_domain ] if (summarize_remove_n<=0 and summarize_min_prob>=1.) or len(summarized_domain)==0: return summarized_domain try: cube = self.cubes[(varname, condvar)] except KeyError: cube = self.cubes[(condvar, varname)] cube = transpose(cube,(1,0,2)) num_ids = [ id for xrange,id in num_domain ] # we consider only the "counts" (value index 0) counts = cube[:,:,0] # sum the counts count_sums = abs(sum(counts, 0))+1e-6 # we add 1e-6 just to make sure we don't have zeros and divisions by zero condprobs = counts/count_sums # we keep only the numerical domain condprobs = take(condprobs,num_ids) l,w = condprobs.shape nremoved = 0 while l>0: maxprobs = array([ max(p) for p in condprobs ]) k = argmin(maxprobs) minprob = maxprobs[k] #print >>f, 'maxprobs: ',maxprobs #print >>f, 'k: ',k #print >>f, 'minprob: ',minprob if nremoved>=summarize_remove_n or minprob>=summarize_min_prob: break # exit while loop if k==0: k_a = 0 k_b = 1 elif k==l-1: k_a = l-2 k_b = l-1 elif maxprobs[k-1]<=maxprobs[k+1]: k_a = k-1 k_b = k else: k_a = k k_b = k+1 interval_a, ids_a = summarized_domain[k_a] interval_b, ids_b = summarized_domain[k_b] try: union_interval = interval_a + interval_b except ValueError: # union of the 2 intervals is not an interval! break summarized_domain[k_a] = (union_interval, ids_a+ids_b) del summarized_domain[k_b] #newcondprobs = numarray.typefrom(zeros((l-1,w),condprobs)) newcondprobs = zeros((l-1,w),numarray.typefrom(condprobs)) newcondprobs[0:k_b] += condprobs[0:k_b] newcondprobs[k_a:] += condprobs[k_b:] condprobs = newcondprobs l = l-1 nremoved += 1 return summarized_domain