Exemple #1
0
    def __call__(self, input, error=False):

        if rank(input) == 1:
            X = join((input, [1]))
        else:
            X = join((input, [[1]]))

        return self._fn(X, error=error)
Exemple #2
0
    def learn_step(self, input, output):

        if rank(input) == 1:
            input = reshape(input, (len(input), 1))
        X = join((input, [[1]]))

        self._fn.learn_step(X, output)
Exemple #3
0
    def learn_batch(self, data):

        X = array([x for x, y in data])
        Y = array([y for x, y in data])

        if self.use_bias:
            X = join((X, ones((len(X), 1))), axis=1)
        W, residuals, rank, s = linalg.lstsq(X, Y)

        self.w = W
Exemple #4
0
    def plot_avg(self,x=None,y=None,title=None,replot=False,step=1,
                 errorbars='conf',lw=1):

        """
        Plot the average over a set of Y values with error bars
        indicating the 95% confidence interval of the sample mean at
        each point. (i.e. stderr * 1.96)

        y = A sequence of sequences of Y values to average.
            If not all sequences are of equal length, the length
            of the shortest sequence is used for all.
        x = (optional) A single sequence of X values corresponding to
            the Ys.
        title = The title of the average plot.
        replot = Keep the old contents of the plot window.
                 default = False
        step = Plot the average at every Nth point. (default = 1)
        errorbars = What statistic to use for error bars, one of:
                    'conf'   -> 95% confidence interval (stderr * 1.96)
                    'stderr' -> Standard error
                    'stddev  -> Standard deviation
                    'var'    -> Variance
        """
        from numpy import concatenate as join
        N = min(map(len,y))
        mean,var,stderr = utils.stats(join([array([a[:N]]) for a in y],axis=0))

        if replot:
            self.current_style += 1
        else:
            self.current_style = 1

        self.plot(x=x,y=mean,
                  #title=title,
                  with='lines lt %d lw %d'%(self.current_style,lw),
                  step=step,replot=replot)
        if not x:
            x = range(len(mean))

        if errorbars == 'conf':
            bars = stderr * 1.96
        elif errorbars == 'stderr':
            bars = stderr
        elif errorbars == 'stddev':
            bars = sqrt(var)
        elif errorbars == 'var':
            bars = var
        else:
            raise 'Unknown error bar type: "%s"' % errorbars
        
        self.plot(pts=zip(x,mean,bars),
                  title=title,
                  with='errorbars lt %d lw %d'%(self.current_style,lw),
                  step=step,replot=1)
Exemple #5
0
    def del_unit(self, x):

        self.verbose("Deleting unit", x)

        if self.shrink_callback:
            self.shrink_callback(x)

        # remove the connections for unit x
        del self.connections[x]

        # iterate through the connection dictionaries decrementing
        # all the connection numbers greater than x
        for i, conn_dict in enumerate(self.connections):
            new_dict = {}
            for k, v in conn_dict.items():
                assert x != k
                if k > x:
                    new_dict[k - 1] = v
                else:
                    new_dict[k] = v
            self.verbose("old connections for unit", i, "=", conn_dict)
            self.verbose("new connections for unit", i, "=", new_dict)
            self.connections[i] = new_dict

        # set up slices for the items before and after
        # item x
        before = slice(0, x)
        after = slice(x + 1, len(self.weights))

        # remove the weights for unit x
        self.weights = join((self.weights[before], self.weights[after]))

        # remove the error accumulator for unit x
        self.error = join((self.error[before], self.error[after]))

        # remove the distance value for unit x
        self.dists = join((self.dists[before], self.dists[after]))
Exemple #6
0
    def _combine(self,q,Xs,Ys,weights):
        q = array(q)
        X = array(Xs)

        rows,cols = X.shape
        
        if rows < cols:
            self.verbose("Falling back to weighted averaging.")
            return weighted_average(Ys,weights)
        
        Y = array(Ys)
        W = numpy.identity(len(weights))*weights
        Z = numpy.dot(W,X)
        v = numpy.dot(W,Y)

        if self.ridge_range:
            ridge = numpy.identity(cols) * rand.uniform(0,self.ridge_range,(cols,1))
            Z = join((Z,ridge))
            v = join((v,numpy.zeros((cols,1))))
            

        B,residuals,rank,s = linalg.lstsq(Z,v)

        if len(residuals) == 0:
            self.verbose("Falling back to weighted averaging.")
            return weighted_average(Ys,weights)
        
        estimate = numpy.dot(q,B)

        # we estimate the variance as the sum of the
        # residuals over the squared sum of the weights
        variance = residuals/sum(weights**2)

        stderr = numpy.sqrt(variance)/numpy.sqrt(sum(weights))

        return estimate,stderr
def gini_coeff(X,pad_len=0,pad_value=0):
    """
    Computes the Gini coefficient of a set of values contained in the
    1-d array X.

    If pad_len > len(X), X is padded out to length pad_len with
    the value pad_value (default 0).
    """
    
    # from http://mathworld.wolfram.com/GiniCoefficient.html
    # note there is probably a more efficient (O(n log n)) computation using
    # argsort(X), but this one was easiest to implement.
    
    from numpy import argsort,zeros,concatenate as join 
    if pad_len > len(X):
        X = join((X,zeros(pad_len-len(X))+pad_value))
    G = 0.0
    n = len(X)
    for xi in X:
        for xj in X:
            G += abs(xi-xj)
    return G/(2*n*n*mmean(X)) * (n/(n-1))
Exemple #8
0
def gini_coeff(X, pad_len=0, pad_value=0):
    """
    Computes the Gini coefficient of a set of values contained in the
    1-d array X.

    If pad_len > len(X), X is padded out to length pad_len with
    the value pad_value (default 0).
    """

    # from http://mathworld.wolfram.com/GiniCoefficient.html
    # note there is probably a more efficient (O(n log n)) computation using
    # argsort(X), but this one was easiest to implement.

    from numpy import argsort, zeros, concatenate as join
    if pad_len > len(X):
        X = join((X, zeros(pad_len - len(X)) + pad_value))
    G = 0.0
    n = len(X)
    for xi in X:
        for xj in X:
            G += abs(xi - xj)
    return G / (2 * n * n * mmean(X)) * (n / (n - 1))
Exemple #9
0
class GPlot(Gp.Gnuplot):
    """
    A subclass of Gnuplot.Gnuplot that makes some common kinds of
    plotting easier.

    Notably, it's easy to plot a single y variable against the
    integers, without haveing explicitly specify the data as a
    sequence of (x,y) pairs.  It also allows the plotting of multiple
    curves in a single call, automatic plotting of averages with error
    bars, and plotting only everyt Nth point in a data set.  See the
    individual method docs for more details.

    Using GPlot, it's not necessary to create separate GPlot.Data
    objects for data.
    """
    
    def __init__(self,*args,**kw):
        Gp.Gnuplot.__init__(self,*args,**kw)
        self.current_style = 0

    def plot(self,x=None, y=None, pts=None, cmd=None,
             replot=False, queue_only=False, clear_queue=False,
             step=1, inline=1,
             **params):
        """
        General data plotting.  Uses keywords to specify data:

        y -- The Y values to plot
        x -- The X values for the plotted points.  By
             default, x = range(0,len(y)).

        pts -- A sequence of points to plot, similar to the
               standard arg to Gnuplot.Data(), overrides x and y.

        replot -- Keep the old plot contents.  Default = False.
        queue_only -- adds data to PlotItem queue, but doesn't plot it
                      overrides replot=True.  Default=False.
        step -- Only plot every nth data point, default = 1.

        **params -- Keyword arguments that will be passed to
                    Gnuplot.Data(), e.g. with='lines'
        """

        if clear_queue:
            self._clear_queue()
        if queue_only:
            plot_fn = lambda s,d:Gp.Gnuplot._add_to_queue(s,[d])
        elif replot:
            plot_fn = Gp.Gnuplot.replot
        else:
            plot_fn = Gp.Gnuplot.plot

        if pts:
            if step > 1:
                pts = step_select(pts,step)
            data = Gp.Data(pts,inline=inline,**params)
        elif y:
            if not x:
                x = range(len(y))
            pts = step_select(zip(x,y),step)
            data = Gp.Data(pts,inline=inline,**params)
        elif cmd:
            data = cmd
        else:
            Gp.Gnuplot.replot(self)
            return
        
        plot_fn(self,data)

    def plot_multi(self,x=None,y=None,pts=None,title=None,
                   replot=False,**params):
        """
        Plot several data sets.

        y -- A sequence of sequences of Y coordinates to plot.
        x -- A *single* sequence of X coordinates to match the Ys,
            (can be blank)
        pts -- A sequence of sequences of (x,y) pairs to
        plot. (overrides x and y).
        replot -- Keep the old plot contents. Default False
        title -- A sequence of strings as titles for the respective
                 data sets.
        lw -- gnuplot line width
        **params -- keyword args to pass to Gnuplot.Data()
        """
        if pts:
            if not title:
                title = [''] * len(pts)
            self.plot(pts=pts[0],title=title[0],replot=replot,**params)
            for p,t in zip(pts,title):
                self.plot(pts=p,title=t,replot=1,**params)
        elif y:
            if not title:
                title = [''] * len(y)
            self.plot(x=x,y=y[0],title=title[0],replot=replot,**params)
            for p,t in zip(y,title):
                self.plot(x=x,y=p,title=t,replot=1,**params)
        else:
            raise "plot_multi requires either pts, or y as an argument"
        

    def plot_avg(self,x=None,y=None,title=None,replot=False,step=1,
                 errorbars='conf',lw=1):

        """
        Plot the average over a set of Y values with error bars
        indicating the 95% confidence interval of the sample mean at
        each point. (i.e. stderr * 1.96)

        y = A sequence of sequences of Y values to average.
            If not all sequences are of equal length, the length
            of the shortest sequence is used for all.
        x = (optional) A single sequence of X values corresponding to
            the Ys.
        title = The title of the average plot.
        replot = Keep the old contents of the plot window.
                 default = False
        step = Plot the average at every Nth point. (default = 1)
        errorbars = What statistic to use for error bars, one of:
                    'conf'   -> 95% confidence interval (stderr * 1.96)
                    'stderr' -> Standard error
                    'stddev  -> Standard deviation
                    'var'    -> Variance
        """
        from numpy import concatenate as join
        N = min(map(len,y))
        mean,var,stderr = utils.stats(join([array([a[:N]]) for a in y],axis=0))

        if replot:
            self.current_style += 1
        else:
            self.current_style = 1

        self.plot(x=x,y=mean,
                  #title=title,
                  with='lines lt %d lw %d'%(self.current_style,lw),
                  step=step,replot=replot)
        if not x:
            x = range(len(mean))

        if errorbars == 'conf':
            bars = stderr * 1.96
        elif errorbars == 'stderr':
            bars = stderr
        elif errorbars == 'stddev':
            bars = sqrt(var)
        elif errorbars == 'var':
            bars = var
        else:
            raise 'Unknown error bar type: "%s"' % errorbars
        
        self.plot(pts=zip(x,mean,bars),
                  title=title,
                  with='errorbars lt %d lw %d'%(self.current_style,lw),
                  step=step,replot=1)

    def plot_stddev(self,x=None,y=None,title=None,replot=False,step=1,with='lines'):
        from numpy import concatenate as join
        N = min(map(len,y))
        mean,var,stderr = utils.stats(join([array([a[:N]]) for a in y],axis=0))
        self.plot(x=x,y=sqrt(var),title=title,with=with,replot=replot,step=step)

    def plot_hist(self,bins=None,counts=None,title=None,style=None,replot=False,
                  fill='solid 0.5'):

        if style:
            withstr = 'boxes lt %d fs %s'%(style,fill)
        else:
            withstr = 'boxes fs %s'%fill

        bins = array(bins)
        self.plot(x = bins+(bins[1]-bins[0])/2.0,
                  y = counts,
                  with = withstr,
                  replot = replot,
                  title=title)
        
    def replot(self,**params):
        self.plot(replot=True,**params)
Exemple #10
0
    def train(self, X, error=None):

        self.debug("Training on input:", X)
        self.present_input(X)
        self.count += 1

        # (roman numeral comments from fritzke's algorithm in
        # B. Fritzke, Unsupervised ontogenetic networks, in Handbook
        # of Neural Computation, IOP Publishing and Oxford University
        # Press, 1996)  [ replacing \zeta with X ]

        # (iii) Determine units s_1 and s_2 (s_1,s_2 \in A) such that
        #       |w_{s_1} - X| <= |w_c - X| (\forall c \in A)
        #   and
        #       |w_{s_2} - X| <= |w_c - X| (\forall c \in A\\s_1)

        s_1, s_2 = self.winners(2)

        # (iv) If it does not already exist, insert a connection between s1 and s2
        #   in any case, set the age of the connection to zero

        self.add_connection(s_1, s_2)

        # (v) Add the squared distance betwen the input signal and the
        # nearest unit in input space to a local error variable

        if error == None:
            error = self.dists[s_1]**2
            if self.normalize_error:
                error = sqrt(error) / norm(X)

        self.error[s_1] += error

        # (vi) Move s_i and its direcct topological neighbors towards
        # X by fractions e_b and e_n, respectively, of the total
        # distance.

        self.weights[s_1] += self.e_b * (X - self.weights[s_1])
        for n in self.connections[s_1]:
            self.weights[n] += self.e_n * (X - self.weights[n])

        # (vii) Increment the age of all edges emanating from s_1
        for n in self.connections[s_1]:
            self.connections[n][s_1] += 1
            self.connections[s_1][n] += 1

        # (viii) Remove edges with an age larger than max_age....
        for a, connection_dict in enumerate(self.connections):
            for b, age in connection_dict.items():
                if age > self.max_age:
                    self.del_connection(a, b)

        # (viii) ... If this results in units having no emanating
        # edges, remove them as well.
        to_be_deleted = [a for a, d in enumerate(self.connections) if not d]
        #   sort the list in descending order, so deleting lower numbered
        #   units doesn't screw up the connections
        to_be_deleted.sort(reverse=True)
        if to_be_deleted:
            self.verbose("Deleting units", to_be_deleted)
        for a in to_be_deleted:
            self.del_unit(a)

        # (ix) if the number of input signals so far is an integer
        # multiple of a parameter \lambda, insert a new unit as
        # follows.
        if self.time_to_grow():
            # o Determine the unit q with the maximum accumulated error.
            # o Interpolate a new unit r from q and its neighbor f with the largest
            #   error variable

            q, f = self.growth_pair()
            r = len(self.weights)

            new_weights = 0.5 * (self.weights[q] + self.weights[f])
            new_weights.shape = (1, self.dim)
            self.weights = join((self.weights, new_weights), axis=0)

            new_distance = norm(X - new_weights)
            self.dists = join((self.dists, new_distance), axis=0)

            self.connections.append({})

            # o Insert edges connecting the new unit r with unts q and f and
            #   remove the original edge between q and f.
            self.verbose("Adding unit", r, "between", q, "and", f,
                         "--- count =", self.count)
            self.add_connection(q, r)
            self.add_connection(r, f)
            self.del_connection(q, f)

            # o Decrease the error variables of q and f
            self.error[q] += -self.alpha * self.error[q]
            self.error[f] += -self.alpha * self.error[f]

            # o Interpolate the error variable of r from q and f
            new_error = array(0.5 * (self.error[q] + self.error[f]))
            new_error.shape = (1, 1)
            self.error = join((self.error, new_error))

            if self.grow_callback:
                self.grow_callback(q, f)

        # (x) Decrease the error variables of all units
        self.error += -self.beta * self.error
        return
Exemple #11
0
    def __call__(self, X):

        if self.use_bias:
            X = join((X, [1]))
        return dot(X, self.w)