# collect gradients and logs gradients.append(grads_and_vars) loss_totals.append(loss_total) # prepare for next segment x_connector_current = x_connector h_connector_current = h_connector print('.') # updating with back-tracking ## collect statistics before updating Wxx, Wxxu, Wxu, h_parameters = isess.run( [dr.Wxx, dr.Wxxu[0], dr.Wxu, dr.h_parameters]) y_hat_original = regenerate_data(du, Wxx, Wxxu, Wxu, h_parameters) loss_prediction_original = tb.mse(y_hat_original, du.get('y')) ## sum gradients grads_and_vars = gradients[-1] for idx, grad_and_var in enumerate(grads_and_vars): grads_and_vars[idx] = (sum([gv[idx][0] for gv in gradients]), grads_and_vars[idx][1]) step_size = STEP_SIZE count = 0 loss_prediction = float('inf') dr.update_variables_in_graph( isess, dr.trainable_variables_nodes, [-val[0] * step_size + val[1] for val in grads_and_vars])
grads_and_vars[idx] = (grads_and_vars[idx][0] * dr.support_masks[variable_name], grads_and_vars[idx][1]) # logs gradients.append(grads_and_vars) y_before_train.append(y_predicted_before_training) x_connectors.append(x_connector_current) h_connectors.append(h_connector_current) loss_totals.append(loss_total) # record the loss before applying gradient du_hat.update_trainable_variables(grads_and_vars, step_size=0) du_hat.regenerate_data(data['u'][i], x_connector, h_connector) y_hat_original = du_hat.get('y') loss_prediction_original = tb.mse(y_hat_original, data['y'][i]) # adaptive step size, making max value change dr.max_parameter_change_per_iteration max_gradient = max( [np.max(np.abs(np.array(g[0])).flatten()) for g in grads_and_vars]) # STEP_SIZE = 0.001 STEP_SIZE = dr.max_parameter_change_per_iteration / max_gradient print('max gradient: ' + str(max_gradient)) print('STEP_SIZE: ' + str(STEP_SIZE)) # try to find proper step size step_size = STEP_SIZE count = 0 du_hat.update_trainable_variables(grads_and_vars, step_size) Wxx = du_hat.get('Wxx')
prior=np.transpose(prior))) print(W[:, :3]) print(du.get('Wxx')) print(W[:, 3:6]) print(du.get('Wxxu')[0]) print(W[:, 6].reshape(3, 1)) print(du.get('Wxu')) w_hat = [W[:, :3], [W[:, 3:6]], W[:, 6].reshape(3, 1)] w_true = [du.get('Wxx'), du.get('Wxxu'), du.get('Wxu')] x_hat_rep, x_true_rep = reproduce_x(w_hat, w_true, if_plot=True) plt.plot(data['x_hat_merged'].data[index_range]) plt.plot(x_hat_rep, '--') print('mse x_hat vs x_hat_reproduced:' + str(tb.mse(data['x_hat_merged'].data[index_range], x_hat_rep))) print('mse x_hat vs x_true:' + str(tb.mse(data['x_true_merged'][index_range], x_hat_rep))) ''' Y = np.transpose(x[1:]) X = np.transpose(np.concatenate([x[:-1], xu[:-1], u[:-1]], x_axis=1)) # fitting clf = linear_model.Lasso(alpha=0.000005) clf.fit(X, Y) np.set_printoptions(precision=4) np.set_printoptions(suppress=True) print(clf.coef_[:, :3] * w_xx_f) print(du.get('Wxx'))
dr.u_placeholder: batch['u'], dr.x_state_initial: batch['x_initial'], dr.h_state_initial: batch['h_initial'], dr.y_true: batch['y'] }) # collect gradients and logs gradients.append(grads_and_vars) # updating with back-tracking ## collect statistics before updating du_hat.update_trainable_variables(grads_and_vars, step_size=0) du_hat.regenerate_data() y_hat_original = du_hat.get('y') loss_prediction_original = tb.mse(y_hat_original, y_target) ## sum gradients grads_and_vars = gradients[-1] for idx, grad_and_var in enumerate(grads_and_vars): grads_and_vars[idx] = (sum([gv[idx][0] for gv in gradients]), grads_and_vars[idx][1]) ## apply mask to gradients variable_names = [v.name for v in tf.trainable_variables()] for idx in range(len(grads_and_vars)): variable_name = variable_names[idx] grads_and_vars[idx] = (grads_and_vars[idx][0] * dr.support_masks[variable_name], grads_and_vars[idx][1])
x_true = du.scan_x(package) plt.plot(x_true) plt.plot(x_hat, '--') print('mse x_hat vs x_true:' + str(tb.mse(x_hat, x_true))) plt.show() ''' du_hat._secured_data['Wxx'] = Wxx du_hat._secured_data['Wxxu'] = Wxxu du_hat._secured_data['Wxu'] = Wxu parameter_package = du_hat.collect_parameter_for_x_scan() du_hat._secured_data['x'] = du_hat.scan_x(parameter_package) parameter_package = du_hat.collect_parameter_for_h_scan() du_hat._secured_data['h'] = du_hat.scan_h(parameter_package) parameter_package = du_hat.collect_parameter_for_y_scan() du_hat._secured_data['y'] = du_hat.scan_y(parameter_package) plt.plot(du.get('y')[index_range]) plt.plot(du_hat.get('y')[index_range], '--') plt.plot(tb.merge(data['y'], N_RECURRENT_STEP, DATA_SHIFT)[index_range], '*', alpha=0.7) print('mse x_hat vs x_true:' + str(tb.mse(du.get('y')[index_range], du_hat.get('y')[index_range]))) # plt.plot([val[1, 2] for val in W['Wxx']]) plt.plot(np.asarray([val[0][0].flatten() for val in gradents])) plt.plot(loss_prediction_accumulated_list)
print("Optimization Finished!") print(loss_total_accumulated_list) print(loss_prediction_accumulated_list) print(Wxx) print(Wxxu[0]) print(Wxu) signal_length = tb.merge(data['u'], N_RECURRENT_STEP, DATA_SHIFT).shape[0] index_range = list(range(signal_length)) package = {} package["Wxx"] = Wxx package["Wxxu"] = Wxxu package["Wxu"] = Wxu package['initial_x_state'] = np.array([0, 0, 0]) package['u'] = tb.merge(data['u'], N_RECURRENT_STEP, DATA_SHIFT) x_hat = du.scan_x(package) package["Wxx"] = du.get('Wxx') package["Wxxu"] = du.get('Wxxu') package["Wxu"] = du.get('Wxu') package['initial_x_state'] = np.array([0, 0, 0]) package['u'] = tb.merge(data['u'], N_RECURRENT_STEP, DATA_SHIFT) x_true = du.scan_x(package) plt.plot(x_true) plt.plot(x_hat, '--') print('mse x_hat vs x_hat_reproduced:' + str(tb.mse(x_hat, x_true))) plt.plot([val[1, 2] for val in W['Wxx']])