def __call__(self, input, error=False): if rank(input) == 1: X = join((input, [1])) else: X = join((input, [[1]])) return self._fn(X, error=error)
def learn_step(self, input, output): if rank(input) == 1: input = reshape(input, (len(input), 1)) X = join((input, [[1]])) self._fn.learn_step(X, output)
def learn_batch(self, data): X = array([x for x, y in data]) Y = array([y for x, y in data]) if self.use_bias: X = join((X, ones((len(X), 1))), axis=1) W, residuals, rank, s = linalg.lstsq(X, Y) self.w = W
def plot_avg(self,x=None,y=None,title=None,replot=False,step=1, errorbars='conf',lw=1): """ Plot the average over a set of Y values with error bars indicating the 95% confidence interval of the sample mean at each point. (i.e. stderr * 1.96) y = A sequence of sequences of Y values to average. If not all sequences are of equal length, the length of the shortest sequence is used for all. x = (optional) A single sequence of X values corresponding to the Ys. title = The title of the average plot. replot = Keep the old contents of the plot window. default = False step = Plot the average at every Nth point. (default = 1) errorbars = What statistic to use for error bars, one of: 'conf' -> 95% confidence interval (stderr * 1.96) 'stderr' -> Standard error 'stddev -> Standard deviation 'var' -> Variance """ from numpy import concatenate as join N = min(map(len,y)) mean,var,stderr = utils.stats(join([array([a[:N]]) for a in y],axis=0)) if replot: self.current_style += 1 else: self.current_style = 1 self.plot(x=x,y=mean, #title=title, with='lines lt %d lw %d'%(self.current_style,lw), step=step,replot=replot) if not x: x = range(len(mean)) if errorbars == 'conf': bars = stderr * 1.96 elif errorbars == 'stderr': bars = stderr elif errorbars == 'stddev': bars = sqrt(var) elif errorbars == 'var': bars = var else: raise 'Unknown error bar type: "%s"' % errorbars self.plot(pts=zip(x,mean,bars), title=title, with='errorbars lt %d lw %d'%(self.current_style,lw), step=step,replot=1)
def del_unit(self, x): self.verbose("Deleting unit", x) if self.shrink_callback: self.shrink_callback(x) # remove the connections for unit x del self.connections[x] # iterate through the connection dictionaries decrementing # all the connection numbers greater than x for i, conn_dict in enumerate(self.connections): new_dict = {} for k, v in conn_dict.items(): assert x != k if k > x: new_dict[k - 1] = v else: new_dict[k] = v self.verbose("old connections for unit", i, "=", conn_dict) self.verbose("new connections for unit", i, "=", new_dict) self.connections[i] = new_dict # set up slices for the items before and after # item x before = slice(0, x) after = slice(x + 1, len(self.weights)) # remove the weights for unit x self.weights = join((self.weights[before], self.weights[after])) # remove the error accumulator for unit x self.error = join((self.error[before], self.error[after])) # remove the distance value for unit x self.dists = join((self.dists[before], self.dists[after]))
def _combine(self,q,Xs,Ys,weights): q = array(q) X = array(Xs) rows,cols = X.shape if rows < cols: self.verbose("Falling back to weighted averaging.") return weighted_average(Ys,weights) Y = array(Ys) W = numpy.identity(len(weights))*weights Z = numpy.dot(W,X) v = numpy.dot(W,Y) if self.ridge_range: ridge = numpy.identity(cols) * rand.uniform(0,self.ridge_range,(cols,1)) Z = join((Z,ridge)) v = join((v,numpy.zeros((cols,1)))) B,residuals,rank,s = linalg.lstsq(Z,v) if len(residuals) == 0: self.verbose("Falling back to weighted averaging.") return weighted_average(Ys,weights) estimate = numpy.dot(q,B) # we estimate the variance as the sum of the # residuals over the squared sum of the weights variance = residuals/sum(weights**2) stderr = numpy.sqrt(variance)/numpy.sqrt(sum(weights)) return estimate,stderr
def gini_coeff(X,pad_len=0,pad_value=0): """ Computes the Gini coefficient of a set of values contained in the 1-d array X. If pad_len > len(X), X is padded out to length pad_len with the value pad_value (default 0). """ # from http://mathworld.wolfram.com/GiniCoefficient.html # note there is probably a more efficient (O(n log n)) computation using # argsort(X), but this one was easiest to implement. from numpy import argsort,zeros,concatenate as join if pad_len > len(X): X = join((X,zeros(pad_len-len(X))+pad_value)) G = 0.0 n = len(X) for xi in X: for xj in X: G += abs(xi-xj) return G/(2*n*n*mmean(X)) * (n/(n-1))
def gini_coeff(X, pad_len=0, pad_value=0): """ Computes the Gini coefficient of a set of values contained in the 1-d array X. If pad_len > len(X), X is padded out to length pad_len with the value pad_value (default 0). """ # from http://mathworld.wolfram.com/GiniCoefficient.html # note there is probably a more efficient (O(n log n)) computation using # argsort(X), but this one was easiest to implement. from numpy import argsort, zeros, concatenate as join if pad_len > len(X): X = join((X, zeros(pad_len - len(X)) + pad_value)) G = 0.0 n = len(X) for xi in X: for xj in X: G += abs(xi - xj) return G / (2 * n * n * mmean(X)) * (n / (n - 1))
class GPlot(Gp.Gnuplot): """ A subclass of Gnuplot.Gnuplot that makes some common kinds of plotting easier. Notably, it's easy to plot a single y variable against the integers, without haveing explicitly specify the data as a sequence of (x,y) pairs. It also allows the plotting of multiple curves in a single call, automatic plotting of averages with error bars, and plotting only everyt Nth point in a data set. See the individual method docs for more details. Using GPlot, it's not necessary to create separate GPlot.Data objects for data. """ def __init__(self,*args,**kw): Gp.Gnuplot.__init__(self,*args,**kw) self.current_style = 0 def plot(self,x=None, y=None, pts=None, cmd=None, replot=False, queue_only=False, clear_queue=False, step=1, inline=1, **params): """ General data plotting. Uses keywords to specify data: y -- The Y values to plot x -- The X values for the plotted points. By default, x = range(0,len(y)). pts -- A sequence of points to plot, similar to the standard arg to Gnuplot.Data(), overrides x and y. replot -- Keep the old plot contents. Default = False. queue_only -- adds data to PlotItem queue, but doesn't plot it overrides replot=True. Default=False. step -- Only plot every nth data point, default = 1. **params -- Keyword arguments that will be passed to Gnuplot.Data(), e.g. with='lines' """ if clear_queue: self._clear_queue() if queue_only: plot_fn = lambda s,d:Gp.Gnuplot._add_to_queue(s,[d]) elif replot: plot_fn = Gp.Gnuplot.replot else: plot_fn = Gp.Gnuplot.plot if pts: if step > 1: pts = step_select(pts,step) data = Gp.Data(pts,inline=inline,**params) elif y: if not x: x = range(len(y)) pts = step_select(zip(x,y),step) data = Gp.Data(pts,inline=inline,**params) elif cmd: data = cmd else: Gp.Gnuplot.replot(self) return plot_fn(self,data) def plot_multi(self,x=None,y=None,pts=None,title=None, replot=False,**params): """ Plot several data sets. y -- A sequence of sequences of Y coordinates to plot. x -- A *single* sequence of X coordinates to match the Ys, (can be blank) pts -- A sequence of sequences of (x,y) pairs to plot. (overrides x and y). replot -- Keep the old plot contents. Default False title -- A sequence of strings as titles for the respective data sets. lw -- gnuplot line width **params -- keyword args to pass to Gnuplot.Data() """ if pts: if not title: title = [''] * len(pts) self.plot(pts=pts[0],title=title[0],replot=replot,**params) for p,t in zip(pts,title): self.plot(pts=p,title=t,replot=1,**params) elif y: if not title: title = [''] * len(y) self.plot(x=x,y=y[0],title=title[0],replot=replot,**params) for p,t in zip(y,title): self.plot(x=x,y=p,title=t,replot=1,**params) else: raise "plot_multi requires either pts, or y as an argument" def plot_avg(self,x=None,y=None,title=None,replot=False,step=1, errorbars='conf',lw=1): """ Plot the average over a set of Y values with error bars indicating the 95% confidence interval of the sample mean at each point. (i.e. stderr * 1.96) y = A sequence of sequences of Y values to average. If not all sequences are of equal length, the length of the shortest sequence is used for all. x = (optional) A single sequence of X values corresponding to the Ys. title = The title of the average plot. replot = Keep the old contents of the plot window. default = False step = Plot the average at every Nth point. (default = 1) errorbars = What statistic to use for error bars, one of: 'conf' -> 95% confidence interval (stderr * 1.96) 'stderr' -> Standard error 'stddev -> Standard deviation 'var' -> Variance """ from numpy import concatenate as join N = min(map(len,y)) mean,var,stderr = utils.stats(join([array([a[:N]]) for a in y],axis=0)) if replot: self.current_style += 1 else: self.current_style = 1 self.plot(x=x,y=mean, #title=title, with='lines lt %d lw %d'%(self.current_style,lw), step=step,replot=replot) if not x: x = range(len(mean)) if errorbars == 'conf': bars = stderr * 1.96 elif errorbars == 'stderr': bars = stderr elif errorbars == 'stddev': bars = sqrt(var) elif errorbars == 'var': bars = var else: raise 'Unknown error bar type: "%s"' % errorbars self.plot(pts=zip(x,mean,bars), title=title, with='errorbars lt %d lw %d'%(self.current_style,lw), step=step,replot=1) def plot_stddev(self,x=None,y=None,title=None,replot=False,step=1,with='lines'): from numpy import concatenate as join N = min(map(len,y)) mean,var,stderr = utils.stats(join([array([a[:N]]) for a in y],axis=0)) self.plot(x=x,y=sqrt(var),title=title,with=with,replot=replot,step=step) def plot_hist(self,bins=None,counts=None,title=None,style=None,replot=False, fill='solid 0.5'): if style: withstr = 'boxes lt %d fs %s'%(style,fill) else: withstr = 'boxes fs %s'%fill bins = array(bins) self.plot(x = bins+(bins[1]-bins[0])/2.0, y = counts, with = withstr, replot = replot, title=title) def replot(self,**params): self.plot(replot=True,**params)
def train(self, X, error=None): self.debug("Training on input:", X) self.present_input(X) self.count += 1 # (roman numeral comments from fritzke's algorithm in # B. Fritzke, Unsupervised ontogenetic networks, in Handbook # of Neural Computation, IOP Publishing and Oxford University # Press, 1996) [ replacing \zeta with X ] # (iii) Determine units s_1 and s_2 (s_1,s_2 \in A) such that # |w_{s_1} - X| <= |w_c - X| (\forall c \in A) # and # |w_{s_2} - X| <= |w_c - X| (\forall c \in A\\s_1) s_1, s_2 = self.winners(2) # (iv) If it does not already exist, insert a connection between s1 and s2 # in any case, set the age of the connection to zero self.add_connection(s_1, s_2) # (v) Add the squared distance betwen the input signal and the # nearest unit in input space to a local error variable if error == None: error = self.dists[s_1]**2 if self.normalize_error: error = sqrt(error) / norm(X) self.error[s_1] += error # (vi) Move s_i and its direcct topological neighbors towards # X by fractions e_b and e_n, respectively, of the total # distance. self.weights[s_1] += self.e_b * (X - self.weights[s_1]) for n in self.connections[s_1]: self.weights[n] += self.e_n * (X - self.weights[n]) # (vii) Increment the age of all edges emanating from s_1 for n in self.connections[s_1]: self.connections[n][s_1] += 1 self.connections[s_1][n] += 1 # (viii) Remove edges with an age larger than max_age.... for a, connection_dict in enumerate(self.connections): for b, age in connection_dict.items(): if age > self.max_age: self.del_connection(a, b) # (viii) ... If this results in units having no emanating # edges, remove them as well. to_be_deleted = [a for a, d in enumerate(self.connections) if not d] # sort the list in descending order, so deleting lower numbered # units doesn't screw up the connections to_be_deleted.sort(reverse=True) if to_be_deleted: self.verbose("Deleting units", to_be_deleted) for a in to_be_deleted: self.del_unit(a) # (ix) if the number of input signals so far is an integer # multiple of a parameter \lambda, insert a new unit as # follows. if self.time_to_grow(): # o Determine the unit q with the maximum accumulated error. # o Interpolate a new unit r from q and its neighbor f with the largest # error variable q, f = self.growth_pair() r = len(self.weights) new_weights = 0.5 * (self.weights[q] + self.weights[f]) new_weights.shape = (1, self.dim) self.weights = join((self.weights, new_weights), axis=0) new_distance = norm(X - new_weights) self.dists = join((self.dists, new_distance), axis=0) self.connections.append({}) # o Insert edges connecting the new unit r with unts q and f and # remove the original edge between q and f. self.verbose("Adding unit", r, "between", q, "and", f, "--- count =", self.count) self.add_connection(q, r) self.add_connection(r, f) self.del_connection(q, f) # o Decrease the error variables of q and f self.error[q] += -self.alpha * self.error[q] self.error[f] += -self.alpha * self.error[f] # o Interpolate the error variable of r from q and f new_error = array(0.5 * (self.error[q] + self.error[f])) new_error.shape = (1, 1) self.error = join((self.error, new_error)) if self.grow_callback: self.grow_callback(q, f) # (x) Decrease the error variables of all units self.error += -self.beta * self.error return
def __call__(self, X): if self.use_bias: X = join((X, [1])) return dot(X, self.w)