Example #1
0
  def momentum_train(self, train_x:np.ndarray, train_y:np.ndarray, alpha=1e-01, beta=5e-02, epsilon=0, tresh=1e-02, max_epochs=300,reset=True ):
    """
      trains the network using classical momentum
      alpha: learning rate
      beta: acceleration coefficent 
      epsilon: regularization coefficent
      tresh: treshold to exit main loop of training
      max_epochs: maximum number of epochs to be done
    """
    # stuff for statistics computation
    init_time = time.perf_counter()
    grad_norms = []
    errors = []
    times = []
    def statistics(gradient_norm):
      now = time.perf_counter()
      times.append( now - init_time )
      grad_norms.append( gradient_norm )
      e = self.test_loss(train_x,train_y,epsilon)
      errors.append( e )
      print(gradient_norm, e )
      clear_output(wait=True)
    statistics(0)

    # number of patterns in training set, epochs of training currently executed
    epoch = 0

    # functions for gradient computation
    compute_gradient = self.compute_gradient

    # prevous velocity (v_t) for momentum computation. (placeholder)
    old_d = np.array( [ np.zeros(self.w[i].shape) for i in range(self.Nl+1) ] ,dtype=object) 

    # main loop: compute velocity and update weights
    gradient_norm = np.inf # placeholder for gradient norm
    init_gradient_norm = np.linalg.norm( myflatten( compute_gradient( train_x,train_y, epsilon ) ) ) # initial norm of the gradient, to be used for stoppin criterion
    while( ( gradient_norm / init_gradient_norm ) > tresh and epoch < max_epochs ):

      if reset and epoch % 50 == 0:
        print('reset')
        old_d = np.array( [ np.zeros(self.w[i].shape) for i in range(self.Nl+1) ] ,dtype=object) 

      # compute gradient ( \Nabla loss(w_t) ) and its norm
      g = compute_gradient( train_x, train_y, epsilon )
      gradient_norm = np.linalg.norm( myflatten(g) ) # generally gradient is a tensor/matrix. To compute its norm i flatten it in order to make it become a vector

      #compute velocity d
      d = -alpha*g + beta*old_d

      # update weights and prevoius velocity
      self.w = self.w + d; self.w = set_zeros(self.w)

      old_d = d

      # update epochs counter and collect statistics
      epoch +=1; statistics(gradient_norm / init_gradient_norm)
    
    return grad_norms, errors, times, epoch
Example #2
0
    def dump(self, rows=None, cols=None):
        """show table
    """
        if rows is None:
            rows = ones(len(self._row_names), dtype=bool)
        elif isinstance(rows, str):
            rows = self.pattern(rows)
        rows = where(rows)[0]

        if cols is None:
            colsn = list(myflatten(self._col_groups))
            cols = [getattr(self, n) for n in colsn]
        if isinstance(cols, str):
            colsn = cols.split()
            cols = [self._eval(n) for n in cols.split()]

        out = []
        rowfmt = ['%%-%d.%ds' % (self._name_char, self._name_char)]
        rowfmt += ['%%-%d.%ds' %
                   (self._entry_char, self._name_char)] * len(colsn)
        rowfmt = ' '.join(rowfmt)
        out.append(rowfmt % tuple(['name'] + colsn))
        if len(rows) == 1:
            v = [self._row_names[0]
                 ] + [_mystr(c, self._entry_char) for c in cols]
            out.append(rowfmt % tuple(v))
        else:
            for i in rows:
                v = [self._row_names[i]
                     ] + [_mystr(c[i], self._entry_char) for c in cols]
                out.append(rowfmt % tuple(v))
        return '\n'.join(out)
Example #3
0
  def dump(self,rows=None,cols=None):
    """show table
    """
    if rows is None:
      rows=ones(len(self._row_names),dtype=bool)
    elif isinstance(rows,str):
      rows=self.pattern(rows)
    rows=where(rows)[0]

    if cols is None:
      colsn=list(myflatten(self._col_groups))
      cols=[getattr(self,n) for n in colsn]
    if isinstance(cols,str):
      colsn=cols.split()
      cols=[self._eval(n) for n in cols.split()]

    out=[]
    rowfmt=['%%-%d.%ds' % (self._name_char,self._name_char)]
    rowfmt+=['%%-%d.%ds' % (self._entry_char,self._name_char)] * len(colsn)
    rowfmt=' '.join(rowfmt)
    out.append(rowfmt % tuple(['name'] + colsn  ) )
    if len(rows)==1:
      v=[ self._row_names[0] ]+ [ _mystr(c,self._entry_char) for c in cols ]
      out.append(rowfmt %  tuple(v))
    else:
      for i in rows:
        v=[ self._row_names[i] ]+ [ _mystr(c[i],self._entry_char) for c in cols ]
        out.append(rowfmt %  tuple(v))
    return '\n'.join(out)
Example #4
0
 def sbend_to_rbend(self,sel):
   """Add edgefocusing to selection
   >>> t=trtable('q b f f b q')
   >>> t.b.l=1; t.b.kn0l=1; t.l[2]=0
   >>> t2=t.sbend_to_rbend(t//'b')
   >>> t2.dump('.','l kn1l')
   name         l            kn1l        
   start           0.           0.       
   q               0.           0.       
   b_edge          0.           0.       
   b               0.           0.       
   b_edge          0.           0.       
   f               0.           0.       
   f               0.           0.       
   b_edge          0.         546.302e-03
   b               1.000        0.       
   b_edge          0.         546.302e-03
   q               0.           0.       
   end             0.           0.       
   """
   sel=(where(sel)[0]).tolist()
   names=self._row_names
   new_names=names[:]
   for i in sel:
     new_names[i]=[names[i]+'_edge',names[i],names[i]+'_edge']
   new_names=list(myflatten(new_names))
   edges=zeros(len(new_names),dtype=bool)
   sbends=zeros(len(new_names),dtype=bool)
   for i in range(len(sel)):
     edges [ sel[i]+i*2   ]= True
     sbends[ sel[i]+i*2+1 ]= True
     edges [ sel[i]+i*2+2 ]= True
   new_t=trtable(new_names,inc_start=False,inc_stop=False)
   for cname in myflatten(self._col_groups):
     col=getattr(new_t,cname)
     col[- edges]=getattr(self,cname)
   angle=new_t.kn0l[sbends]
   l=new_t.l[sbends]
   pos=l>0
   kick=( (angle/l)*tan(angle/2) )[pos]
   idx=where(edges)[0][0::2][pos]
   new_t.kn1l[idx]=kick
   new_t.kn1l[idx+2]=kick
   return new_t
Example #5
0
 def split(self,n):
   oldnames=array(self._row_names[1:-1]) # remove start end
   r=len(oldnames)
   newnames=zeros(r*n,dtype=oldnames.dtype)
   for i in range(n):
     newnames[i::n]=oldnames
   t=trtable(newnames.tolist(),full=self._full)
   for c in myflatten(self._col_groups):
     for i in range(n):
       getattr(t,c)[i+1:-1:n]=getattr(self,c)[1:-1]
     getattr(t,c)[0]=getattr(self,c)[0]
     getattr(t,c)[-1]=getattr(self,c)[-1]
   t.l=t.l/n
   t.kn1l=t.kn1l/n
   t.ks1l=t.ks1l/n
   t.kn0l=t.kn0l/n
   t.ks0l=t.ks0l/n
   return t
Example #6
0
 def add_rows(self, row_names, idx=None):
     if not hasattr(row_names, '__iter__'): row_names = row_names.split()
     if idx is None:
         idx = len(self)
     lrow = len(row_names)
     if lrow > 0:
         self._row_names.insert(idx, row_names)
         self._row_names = list(myflatten(self._row_names))
     new_data = []
     for d in self._data:
         newd = zeros((lrow, d.shape[1]), dtype=d.dtype)
         new_data.append(r_[d[:idx], newd, d[idx:]])
     self._data = new_data
     for cnames, d in zip(self._col_groups, self._data):
         for cname in cnames:
             self.__dict__[cname].data = d
     self._row_index = _mkidx(self._row_names)
     if self._full:
         self._mkfull()
Example #7
0
 def add_rows(self,row_names,idx=None):
   if not hasattr(row_names,'__iter__'):  row_names=row_names.split()
   if idx is None:
     idx=len(self)
   lrow=len(row_names)
   if lrow>0:
     self._row_names.insert(idx,row_names)
     self._row_names=list(myflatten(self._row_names))
   new_data=[]
   for d in self._data:
     newd=zeros( (lrow,d.shape[1]), dtype=d.dtype)
     new_data.append(r_[ d[:idx], newd,  d[idx:] ])
   self._data=new_data
   for cnames,d in zip(self._col_groups,self._data):
     for cname in cnames:
       self.__dict__[cname].data=d
   self._row_index=_mkidx(self._row_names)
   if self._full:
     self._mkfull()
Example #8
0
import time

# activation functions
from numpy import tanh
ide = lambda x : np.copy(x)
relu = lambda x: x*(x > 0)
from scipy.special import softmax

# loss functions:
squared_error = lambda y,d:  np.linalg.norm( (y - d).flatten() ) ** 2 
cross_entropy = lambda y,d: -np.sum( d * np.log( y + np.finfo(float).eps ) ) 
MSE = lambda y,d: np.mean( np.square( y-d ) )

# norm-regularization (rewritten a naive version of numpy's l1-norm in order to give it an array of matrices of (possibly) different sizes and get back l1 norm of each matrix ) 
#l1 = lambda x: np.array( [ np.max(np.sum(np.abs(w), axis=0)) for w in x ] )
l1 = lambda x: np.linalg.norm( myflatten(x).reshape(-1), ord=1)

def derivative(f):
  """
  When f is an activation function, returns derivative w.r.t. potential of activation
  When f is a loss, returns derivative w.r.t. activation of last layer's units
  When f is norm, returns derivative w.r.t. weigts
  (When f is cross_entropy and activation of output units is softmax, maths say derivative of loss w.r.t potential is one )
  """
  if f == tanh:
    return lambda x: 1.0 - tanh(x)**2
  elif f == relu:
    return lambda x: 1*(x>=0)
  elif f == ide or f == softmax:
    return lambda x : x-x+1
  elif f == squared_error or f==MSE or f == cross_entropy: