def bfgs(inverse_hessian, weight_delta, gradient_delta, maxrho=1e4): ident_matrix = cast_float(T.eye(inverse_hessian.shape[0])) maxrho = cast_float(maxrho) rho = cast_float(1.) / gradient_delta.dot(weight_delta) rho = ifelse(T.isinf(rho), maxrho * T.sgn(rho), rho) param1 = ident_matrix - T.outer(weight_delta, gradient_delta) * rho param2 = ident_matrix - T.outer(gradient_delta, weight_delta) * rho param3 = rho * T.outer(weight_delta, weight_delta) return param1.dot(inverse_hessian).dot(param2) + param3
def predict(self, input_data): batch_size = self.batch_size n_samples = input_data.shape[0] n_batches = n_samples / batch_size if n_batches * batch_size < n_samples: n_batches += 1 pred = cast_float(np.zeros((0, 1))) input_data = cast_float(input_data) for batch_index in xrange(n_batches): idx = slice(batch_index * batch_size, (batch_index + 1) * batch_size) predx = self.methods['predict'](input_data[idx]) pred = np.concatenate([pred, predx]) return pred
def init_layers(self): super(MomentumNet, self).init_layers() for layer in self.layers: for parameter in layer.parameters: value = cast_float(np.zeros(T.shape(parameter).eval())) parameter.delta = theano.shared(name="delta_" + parameter.name, value=value)
def create_shared(value, name, shape, isbias): if isinstance(value, (T.sharedvar.SharedVariable, T.Variable)): return value if value is None: value = ParameterLayer.xavier_normal(shape, isbias) value = cast_float(value) return theano.shared(value=value, name=name, borrow=True)
def train(self): scale = (self.emax - self.emin) * self.energy_factor if self.orig_and_diff == 0 or not self.diff: self.net_eval.net.train(*self.fit_data[0:4], epochs=self.epochs, scale=scale) else: dk = self.orig_and_diff self.net_eval.net.current_epoch = 0 self.net_ori_eval.net.current_epoch = 0 fil = np.array([ x for x, y in zip(self.fit_data[2], self.fit_data[3]) if y == 0.0 ]) self.filtered_test_data = [ cast_float(fil), cast_float(np.zeros((fil.shape[0], 1))) ] while self.net_eval.net.current_epoch < self.epochs: self.fit_data[0:4] = self.net_eval.net.train( *self.fit_data[0:4], epochs=self.epochs, scale=scale, start_epoch=self.net_eval.net.current_epoch, do_epochs=self.net_eval.net.current_epoch + dk) erx = np.sqrt(self.net_eval.net.methods['predict_error']( *self.filtered_test_data)) print( '== filtered permutation error: %13.8f, scaled = %13.8f' % (erx, erx * scale)) param = self.net_eval.net.get_parameters() self.net_ori_eval.net.set_parameters(param) self.net_eval.net.saved_parameters = param self.ori_fit_data[0:4] = self.net_ori_eval.net.train( *self.ori_fit_data[0:4], epochs=self.epochs, scale=scale, start_epoch=self.net_ori_eval.net.current_epoch, do_epochs=self.net_ori_eval.net.current_epoch + dk) param = self.net_ori_eval.net.get_parameters() self.net_eval.net.set_parameters(param) self.net_ori_eval.net.saved_parameters = param
def set_parameter_value(self, vector): vector = cast_float(vector) parameters = list(iter_parameters(self)) start_position = 0 for p in parameters: end_position = start_position + int(p.size.eval()) pp = np.reshape(vector[start_position:end_position], p.shape.eval()) p.set_value(pp) start_position = end_position
def init_derivatives(self): print('initialize derivative functions ...') n = self.an num = self.net_keep.degree expl = self.net_keep.expl scale_l = self.net_keep.scale_l d_order = self.net_keep.d_order le = np.array( [list(g) for g in itertools.combinations(range(0, n), num)]) x = cast_float(T.dvector()) xr = x.reshape((n, 3)) lx = get_length(num) lena = le.shape[0] # number of all selections lenb = lx.shape[1] # for each selection, number of lengths yl = le[:, lx.T] # list of integers need to be selected for lengths lt = get_length(n) lenc = lt.shape[1] # for all atoms, number of lengths lti = np.zeros((n, n), dtype=np.int) for i, (j, k) in enumerate(lt.T): lti[j, k] = lti[k, j] = i if d_order: raise NotImplementedError( 'second order has not been supported yet!') mt = (xr[lt[0]] - xr[lt[1]]).norm(2, axis=1) # all lengths if expl != 0: mt = np.exp(-mt / expl) if scale_l: mt = trans_forward(mt, *self.net_keep.max_min[0:2]) ylt = lti[yl[:, :, 0], yl[:, :, 1]] # transfer the len indices to index in mt y = mt[ylt] # all length values after num'd, it is the input of net ylf = yl.reshape((lena * lenb, 2)) yltf = lti[ylf[:, 0], ylf[:, 1]] # flatted ylt mtdl = [] # calculate the grad of all lengths wrt x for i in range(lenc): mtdl.append(T.grad(mt[i], x).reshape((1, n * 3))) mtd = T.concatenate(mtdl, axis=0) # the grad of mt wrt flat x yd = mtd[yltf] # the grad with sel-permu (flatted) indices xip = self.net_eval.net.variables['net_input'] xop = self.net_eval.net.variables['prediction'][0][0] xop = trans_backward(xop, *self.net_keep.max_min[2:4]) print('## F: lengths -> energy ...') xener = theano.function([xip], xop) # from input lengths to energy print('## dF: lengths -> energy ...') xenerd = theano.function([xip], T.grad(xop, xip).reshape( (lena * lenb, ))) # energy gradient print('## F: coords -> lengths ...') xprim = theano.function([x], y.reshape( (1, lena, lenb))) # from atomic to input print('## dF: coords -> lengths ...') xprimd = theano.function([x], yd) # from atomic to input, gradient self.opt_eval = lambda x, xener=xener, xprim=xprim: xener(xprim(x)) self.opt_evald = ( lambda x, xprim=xprim, xprimd=xprimd, xenerd=xenerd: np.tensordot( xenerd(xprim(x)), xprimd(x), axes=(0, 0)))
def dfp(inverse_hessian, weight_delta, gradient_delta, maxnum=1e5): maxnum = cast_float(maxnum) quasi_dot_gradient = inverse_hessian.dot(gradient_delta) param1 = T.outer(weight_delta, weight_delta) / T.dot( gradient_delta, weight_delta) param2_numerator = T.clip( T.outer(quasi_dot_gradient, gradient_delta) * inverse_hessian, -maxnum, maxnum) param2_denominator = gradient_delta.dot(quasi_dot_gradient) param2 = param2_numerator / param2_denominator return inverse_hessian + param1 - param2
def train(self, input_train, output_train, input_test, output_test, epochs=100, scale=None, start_epoch=0, do_epochs=None): if do_epochs is None or do_epochs > epochs: do_epochs = epochs holder = 52 if scale is None else 68 train_ind = np.arange(input_train.shape[0]) if start_epoch == 0: self.train_errors = [] self.test_errors = [] self.train_times = [] input_train = cast_float(input_train) output_train = cast_float(output_train) input_test = cast_float(input_test) output_test = cast_float(output_test) if len(output_test.shape) == 1: output_test = output_test.reshape((output_test.shape[0], 1)) if len(output_train.shape) == 1: output_train = output_train.reshape((output_train.shape[0], 1)) if (input_train.shape[0] != output_train.shape[0] or input_test.shape[0] != output_test.shape[0]): raise RuntimeError('train/test data first dimension mismatch!') if (input_train.shape[1:] != self.input_size or input_test.shape[1:] != self.input_size or output_train.shape[1:] != self.output_size or output_test.shape[1:] != self.output_size): raise RuntimeError( 'train/test data size is not the size of network!\n' + 'input shape: {}, net input shape: {}\n'.format( input_train.shape[1:], self.input_size) + 'ouput shape: {}, net ouput shape: {}'.format( output_train.shape[1:], self.output_size)) print('pre-training working ...') self.pre_train(input_train, output_train) self.pre_test(input_test, output_test) print(('%%%ds' % (holder)) % ('-' * holder, )) if scale is None: print('%10s %15s %15s %7s ' % ('epoch #', 'train error', 'test error', 'time')) else: print('%10s %15s %15s %15s %7s ' % ('epoch #', 'train error', 'test error', 'scaled error', 'time')) self.min_error = None print(('%%%ds' % (holder)) % ('-' * holder, )) start_time_each = start_time = init_time = time.time() for epoch in xrange(start_epoch, do_epochs): self.current_epoch = epoch if self.shuffle_per != 0: if epoch % self.shuffle_per == 0: self.shuffle = True else: self.shuffle = False if self.shuffle: np.random.shuffle(train_ind) input_train = input_train[train_ind] output_train = output_train[train_ind] self.pre_epoch(input_train, output_train) mid_time = time.time() train_error = self.train_epoch(input_train, output_train) test_error = self.test_epoch(input_test, output_test) finish_time = time.time() train_time_each = finish_time - start_time_each start_time_each = finish_time self.train_errors.append(train_error) self.test_errors.append(test_error) self.train_times.append(train_time_each) is_min = False if self.param_save != 0 and epoch % self.param_save == 0: if self.min_error is None or test_error < self.min_error: self.min_error = test_error self.saved_parameters = self.get_parameters() is_min = True if epoch == 0 or (epoch + 1) % self.show_epoch == 0: train_time = finish_time - start_time start_time = finish_time if scale is None: print('%10d %15.8f %15.8f %7.2f%s' % (epoch + 1, train_error, test_error, train_time, ' *' if is_min else ' ')) else: print('%10d %15.8f %15.8f %15.8f %7.2f%s' % (epoch + 1, train_error, test_error, np.sqrt(test_error) * scale, train_time, ' *' if is_min else ' ')) if epoch == 0: if self.shuffle_per == 0: total_time = train_time * epochs else: shu_epochs = epochs / self.shuffle_per total_time = (finish_time - mid_time) * ( epochs - shu_epochs) + train_time * shu_epochs print(('%%%ds' % (holder)) % ('estimated total time %d:%02d:%02d' % (total_time / 3600, (total_time / 60) % 60, total_time % 60), )) print(('%%%ds' % (holder)) % ('-' * holder, )) print(('%%%ds' % (holder)) % ('-' * holder, )) total_time = finish_time - init_time print(('%%%ds' % (holder)) % ('total time used: %d:%02d:%02d' % (total_time / 3600, (total_time / 60) % 60, total_time % 60), )) print(('%%%ds' % (holder)) % ('-' * holder, )) self.current_epoch = do_epochs self.post_train() return input_train, output_train, input_test, output_test
def create_shared(name, shape): value = cast_float(np.zeros(shape)) return theano.shared(value=value, name=name, borrow=True)
def predict(self, input_data): return self.methods['predict'](cast_float(input_data))
def init_variables(self): super(LevenbergMarquardtNet, self).init_variables() self.variables['mu'] = theano.shared(name="mu", value=cast_float(self.mu)) self.variables['last_error'] = theano.shared(name="last_error", value=cast_float(np.nan))