Beispiel #1
0
def bfgs(inverse_hessian, weight_delta, gradient_delta, maxrho=1e4):
    ident_matrix = cast_float(T.eye(inverse_hessian.shape[0]))
    maxrho = cast_float(maxrho)
    rho = cast_float(1.) / gradient_delta.dot(weight_delta)
    rho = ifelse(T.isinf(rho), maxrho * T.sgn(rho), rho)
    param1 = ident_matrix - T.outer(weight_delta, gradient_delta) * rho
    param2 = ident_matrix - T.outer(gradient_delta, weight_delta) * rho
    param3 = rho * T.outer(weight_delta, weight_delta)
    return param1.dot(inverse_hessian).dot(param2) + param3
Beispiel #2
0
 def predict(self, input_data):
     batch_size = self.batch_size
     n_samples = input_data.shape[0]
     n_batches = n_samples / batch_size
     if n_batches * batch_size < n_samples: n_batches += 1
     pred = cast_float(np.zeros((0, 1)))
     input_data = cast_float(input_data)
     for batch_index in xrange(n_batches):
         idx = slice(batch_index * batch_size,
                     (batch_index + 1) * batch_size)
         predx = self.methods['predict'](input_data[idx])
         pred = np.concatenate([pred, predx])
     return pred
Beispiel #3
0
 def init_layers(self):
     super(MomentumNet, self).init_layers()
     for layer in self.layers:
         for parameter in layer.parameters:
             value = cast_float(np.zeros(T.shape(parameter).eval()))
             parameter.delta = theano.shared(name="delta_" + parameter.name,
                                             value=value)
Beispiel #4
0
 def create_shared(value, name, shape, isbias):
     if isinstance(value, (T.sharedvar.SharedVariable, T.Variable)):
         return value
     if value is None:
         value = ParameterLayer.xavier_normal(shape, isbias)
     value = cast_float(value)
     return theano.shared(value=value, name=name, borrow=True)
Beispiel #5
0
    def train(self):
        scale = (self.emax - self.emin) * self.energy_factor
        if self.orig_and_diff == 0 or not self.diff:
            self.net_eval.net.train(*self.fit_data[0:4],
                                    epochs=self.epochs,
                                    scale=scale)
        else:
            dk = self.orig_and_diff
            self.net_eval.net.current_epoch = 0
            self.net_ori_eval.net.current_epoch = 0
            fil = np.array([
                x for x, y in zip(self.fit_data[2], self.fit_data[3])
                if y == 0.0
            ])
            self.filtered_test_data = [
                cast_float(fil),
                cast_float(np.zeros((fil.shape[0], 1)))
            ]
            while self.net_eval.net.current_epoch < self.epochs:
                self.fit_data[0:4] = self.net_eval.net.train(
                    *self.fit_data[0:4],
                    epochs=self.epochs,
                    scale=scale,
                    start_epoch=self.net_eval.net.current_epoch,
                    do_epochs=self.net_eval.net.current_epoch + dk)
                erx = np.sqrt(self.net_eval.net.methods['predict_error'](
                    *self.filtered_test_data))
                print(
                    '== filtered permutation error: %13.8f, scaled = %13.8f' %
                    (erx, erx * scale))
                param = self.net_eval.net.get_parameters()
                self.net_ori_eval.net.set_parameters(param)
                self.net_eval.net.saved_parameters = param

                self.ori_fit_data[0:4] = self.net_ori_eval.net.train(
                    *self.ori_fit_data[0:4],
                    epochs=self.epochs,
                    scale=scale,
                    start_epoch=self.net_ori_eval.net.current_epoch,
                    do_epochs=self.net_ori_eval.net.current_epoch + dk)
                param = self.net_ori_eval.net.get_parameters()
                self.net_eval.net.set_parameters(param)
                self.net_ori_eval.net.saved_parameters = param
Beispiel #6
0
 def set_parameter_value(self, vector):
     vector = cast_float(vector)
     parameters = list(iter_parameters(self))
     start_position = 0
     for p in parameters:
         end_position = start_position + int(p.size.eval())
         pp = np.reshape(vector[start_position:end_position],
                         p.shape.eval())
         p.set_value(pp)
         start_position = end_position
Beispiel #7
0
 def init_derivatives(self):
     print('initialize derivative functions ...')
     n = self.an
     num = self.net_keep.degree
     expl = self.net_keep.expl
     scale_l = self.net_keep.scale_l
     d_order = self.net_keep.d_order
     le = np.array(
         [list(g) for g in itertools.combinations(range(0, n), num)])
     x = cast_float(T.dvector())
     xr = x.reshape((n, 3))
     lx = get_length(num)
     lena = le.shape[0]  # number of all selections
     lenb = lx.shape[1]  # for each selection, number of lengths
     yl = le[:, lx.T]  # list of integers need to be selected for lengths
     lt = get_length(n)
     lenc = lt.shape[1]  # for all atoms, number of lengths
     lti = np.zeros((n, n), dtype=np.int)
     for i, (j, k) in enumerate(lt.T):
         lti[j, k] = lti[k, j] = i
     if d_order:
         raise NotImplementedError(
             'second order has not been supported yet!')
     mt = (xr[lt[0]] - xr[lt[1]]).norm(2, axis=1)  # all lengths
     if expl != 0: mt = np.exp(-mt / expl)
     if scale_l: mt = trans_forward(mt, *self.net_keep.max_min[0:2])
     ylt = lti[yl[:, :, 0],
               yl[:, :, 1]]  # transfer the len indices to index in mt
     y = mt[ylt]  # all length values after num'd, it is the input of net
     ylf = yl.reshape((lena * lenb, 2))
     yltf = lti[ylf[:, 0], ylf[:, 1]]  # flatted ylt
     mtdl = []  # calculate the grad of all lengths wrt x
     for i in range(lenc):
         mtdl.append(T.grad(mt[i], x).reshape((1, n * 3)))
     mtd = T.concatenate(mtdl, axis=0)  # the grad of mt wrt flat x
     yd = mtd[yltf]  # the grad with sel-permu (flatted) indices
     xip = self.net_eval.net.variables['net_input']
     xop = self.net_eval.net.variables['prediction'][0][0]
     xop = trans_backward(xop, *self.net_keep.max_min[2:4])
     print('## F: lengths -> energy ...')
     xener = theano.function([xip], xop)  # from input lengths to energy
     print('## dF: lengths -> energy ...')
     xenerd = theano.function([xip],
                              T.grad(xop, xip).reshape(
                                  (lena * lenb, )))  # energy gradient
     print('## F: coords -> lengths ...')
     xprim = theano.function([x], y.reshape(
         (1, lena, lenb)))  # from atomic to input
     print('## dF: coords -> lengths ...')
     xprimd = theano.function([x], yd)  # from atomic to input, gradient
     self.opt_eval = lambda x, xener=xener, xprim=xprim: xener(xprim(x))
     self.opt_evald = (
         lambda x, xprim=xprim, xprimd=xprimd, xenerd=xenerd: np.tensordot(
             xenerd(xprim(x)), xprimd(x), axes=(0, 0)))
Beispiel #8
0
def dfp(inverse_hessian, weight_delta, gradient_delta, maxnum=1e5):
    maxnum = cast_float(maxnum)
    quasi_dot_gradient = inverse_hessian.dot(gradient_delta)
    param1 = T.outer(weight_delta, weight_delta) / T.dot(
        gradient_delta, weight_delta)
    param2_numerator = T.clip(
        T.outer(quasi_dot_gradient, gradient_delta) * inverse_hessian, -maxnum,
        maxnum)
    param2_denominator = gradient_delta.dot(quasi_dot_gradient)
    param2 = param2_numerator / param2_denominator
    return inverse_hessian + param1 - param2
Beispiel #9
0
 def train(self,
           input_train,
           output_train,
           input_test,
           output_test,
           epochs=100,
           scale=None,
           start_epoch=0,
           do_epochs=None):
     if do_epochs is None or do_epochs > epochs: do_epochs = epochs
     holder = 52 if scale is None else 68
     train_ind = np.arange(input_train.shape[0])
     if start_epoch == 0:
         self.train_errors = []
         self.test_errors = []
         self.train_times = []
         input_train = cast_float(input_train)
         output_train = cast_float(output_train)
         input_test = cast_float(input_test)
         output_test = cast_float(output_test)
         if len(output_test.shape) == 1:
             output_test = output_test.reshape((output_test.shape[0], 1))
         if len(output_train.shape) == 1:
             output_train = output_train.reshape((output_train.shape[0], 1))
         if (input_train.shape[0] != output_train.shape[0]
                 or input_test.shape[0] != output_test.shape[0]):
             raise RuntimeError('train/test data first dimension mismatch!')
         if (input_train.shape[1:] != self.input_size
                 or input_test.shape[1:] != self.input_size
                 or output_train.shape[1:] != self.output_size
                 or output_test.shape[1:] != self.output_size):
             raise RuntimeError(
                 'train/test data size is not the size of network!\n' +
                 'input shape: {}, net input shape: {}\n'.format(
                     input_train.shape[1:], self.input_size) +
                 'ouput shape: {}, net ouput shape: {}'.format(
                     output_train.shape[1:], self.output_size))
         print('pre-training working ...')
         self.pre_train(input_train, output_train)
         self.pre_test(input_test, output_test)
         print(('%%%ds' % (holder)) % ('-' * holder, ))
         if scale is None:
             print('%10s %15s %15s %7s  ' %
                   ('epoch #', 'train error', 'test error', 'time'))
         else:
             print('%10s %15s %15s %15s %7s  ' %
                   ('epoch #', 'train error', 'test error', 'scaled error',
                    'time'))
         self.min_error = None
     print(('%%%ds' % (holder)) % ('-' * holder, ))
     start_time_each = start_time = init_time = time.time()
     for epoch in xrange(start_epoch, do_epochs):
         self.current_epoch = epoch
         if self.shuffle_per != 0:
             if epoch % self.shuffle_per == 0:
                 self.shuffle = True
             else:
                 self.shuffle = False
         if self.shuffle:
             np.random.shuffle(train_ind)
             input_train = input_train[train_ind]
             output_train = output_train[train_ind]
         self.pre_epoch(input_train, output_train)
         mid_time = time.time()
         train_error = self.train_epoch(input_train, output_train)
         test_error = self.test_epoch(input_test, output_test)
         finish_time = time.time()
         train_time_each = finish_time - start_time_each
         start_time_each = finish_time
         self.train_errors.append(train_error)
         self.test_errors.append(test_error)
         self.train_times.append(train_time_each)
         is_min = False
         if self.param_save != 0 and epoch % self.param_save == 0:
             if self.min_error is None or test_error < self.min_error:
                 self.min_error = test_error
                 self.saved_parameters = self.get_parameters()
                 is_min = True
         if epoch == 0 or (epoch + 1) % self.show_epoch == 0:
             train_time = finish_time - start_time
             start_time = finish_time
             if scale is None:
                 print('%10d %15.8f %15.8f %7.2f%s' %
                       (epoch + 1, train_error, test_error, train_time,
                        ' *' if is_min else '  '))
             else:
                 print('%10d %15.8f %15.8f %15.8f %7.2f%s' %
                       (epoch + 1, train_error, test_error,
                        np.sqrt(test_error) * scale, train_time,
                        ' *' if is_min else '  '))
             if epoch == 0:
                 if self.shuffle_per == 0:
                     total_time = train_time * epochs
                 else:
                     shu_epochs = epochs / self.shuffle_per
                     total_time = (finish_time - mid_time) * (
                         epochs - shu_epochs) + train_time * shu_epochs
                 print(('%%%ds' % (holder)) %
                       ('estimated total time %d:%02d:%02d' %
                        (total_time / 3600,
                         (total_time / 60) % 60, total_time % 60), ))
                 print(('%%%ds' % (holder)) % ('-' * holder, ))
     print(('%%%ds' % (holder)) % ('-' * holder, ))
     total_time = finish_time - init_time
     print(('%%%ds' % (holder)) %
           ('total time used: %d:%02d:%02d' %
            (total_time / 3600, (total_time / 60) % 60, total_time % 60), ))
     print(('%%%ds' % (holder)) % ('-' * holder, ))
     self.current_epoch = do_epochs
     self.post_train()
     return input_train, output_train, input_test, output_test
Beispiel #10
0
 def create_shared(name, shape):
     value = cast_float(np.zeros(shape))
     return theano.shared(value=value, name=name, borrow=True)
Beispiel #11
0
 def predict(self, input_data):
     return self.methods['predict'](cast_float(input_data))
Beispiel #12
0
 def init_variables(self):
     super(LevenbergMarquardtNet, self).init_variables()
     self.variables['mu'] = theano.shared(name="mu",
                                          value=cast_float(self.mu))
     self.variables['last_error'] = theano.shared(name="last_error",
                                                  value=cast_float(np.nan))