def forward(self, state: np.ndarray, goal: np.ndarray, action: np.ndarray) -> torch.Tensor: if self.has_goal: state, goal, action = get_tensor(state), get_tensor( goal), get_tensor(action) total_input = torch.cat( [state, goal, action], dim=-1) # Concatenate to format [states | goals | actions] else: state, action = get_tensor(state), get_tensor(action) total_input = torch.cat( [state, action], dim=-1) # Concatenate to format [states | actions] x = self.fc1(total_input) x = F.relu(x) x = self.fc2(x) x = F.relu(x) x = self.fc3(x) x = F.relu(x) x = self.fc4(x) if self.has_q_bound: return self.q_bound * torch.sigmoid(x) else: return x
def forward(self, state: np.ndarray, goal: np.ndarray, deterministic=False, compute_log_prob=True) -> Tuple[torch.Tensor, torch.Tensor]: """ Returns the actions and their log probs as a torch Tensors (gradients can be computed)""" if self.has_goal: state, goal = get_tensor(state), get_tensor(goal) total_input = torch.cat( [state, goal], dim=-1) # Concatenate to format [states | goals] else: total_input = get_tensor(state) hidden_state = self.layers.forward(total_input) mu = self.mu_layer(hidden_state) log_std = self.sigma_layer(hidden_state) log_std = LOG_SIGMA_MIN + (LOG_SIGMA_MAX - LOG_SIGMA_MIN) * ( torch.tanh(log_std) + 1) / 2.0 # log_std = torch.clamp(log_std, LOG_SIGMA_MIN, LOG_SIGMA_MAX) std = torch.exp(log_std) policy_distribution = Normal(mu, std) actions = mu if deterministic else policy_distribution.rsample() if compute_log_prob: # Exact source: https://github.com/openai/spinningup/blob/master/spinup/algos/pytorch/sac/core.py#L54 # "Compute logprob from Gaussian, and then apply correction for Tanh squashing. # NOTE: The correction formula is a little bit magic. To get an understanding # of where it comes from, check out the original SAC paper (arXiv 1801.01290) # and look in appendix C. This is a more numerically-stable equivalent to Eq 21. # Try deriving it yourself as a (very difficult) exercise. :)" log_prob = policy_distribution.log_prob(actions).sum(axis=-1) try: log_prob -= ( 2 * (np.log(2) - actions - F.softplus(-2 * actions))).sum( axis=1) except IndexError: log_prob -= ( 2 * (np.log(2) - actions - F.softplus(-2 * actions))).sum() else: log_prob = None actions = torch.tanh( actions ) # The log_prob above takes into account this "tanh squashing" action_center = (self.action_high + self.action_low) / 2 action_range = (self.action_high - self.action_low) / 2 actions_in_range = action_center + actions * action_range # print(f"Mu {mu}\t sigma {std}\tactions {actions}\taction_in_range {actions_in_range}") return actions_in_range, log_prob
def forward(self, state: np.ndarray, goal: np.ndarray) -> torch.Tensor: state, goal = get_tensor(state), get_tensor(goal) total_input = torch.cat( [state, goal], dim=-1) # Concatenate to format [states | goals] x = self.fc1(total_input) x = F.relu(x) x = self.fc2(x) x = F.relu(x) x = self.fc3(x) x = F.relu(x) x = self.fc4(x) return self.action_center + self.action_range * torch.tanh(x)
def forward(self, state: np.ndarray, goal: np.ndarray, action: np.ndarray) -> torch.Tensor: """ Returns the actions as a torch Tensor (gradients can be computed)""" if self.has_goal: state, goal, action = get_tensor(state), get_tensor(goal), get_tensor(action) total_input = torch.cat([state, goal, action], dim=-1) # Concatenate to format [states | goals | actions] else: state, action = get_tensor(state), get_tensor(action) total_input = torch.cat([state, action], dim=-1) # Concatenate to format [states | actions] # Tensor are concatenated over the last dimension (e.g. the values, not the batch rows) x = self.layers.forward(total_input) if self.has_q_bound: return self.center + self.range * torch.tanh(x) else: return x
def forward(self, state: np.ndarray, goal: np.ndarray, action: np.ndarray) -> torch.Tensor: state, goal, action = get_tensor(state), get_tensor(goal), get_tensor( action) total_input = torch.cat( [state, goal, action], dim=-1) # Concatenate to format [states | goals | actions] x = self.fc1(total_input) x = F.relu(x) x = self.fc2(x) x = F.relu(x) x = self.fc3(x) x = F.relu(x) x = self.fc4(x) # TODO: self.q_init is a value that leads to a good initialization by default, so that the # sigmoid is near 0. In this code I don't use it. See if it's needed return self.q_bound * torch.sigmoid(x) # -5 * [0, 1] -> [-5, 0]
def compute_inner_error(overall_df_inner, learning_rate_cv, num_iterations_cv, num_season_factors_cv,num_home_factors_cv, lam_cv, A_source): print num_iterations_cv, num_season_factors_cv,num_home_factors_cv,lam_cv inner_kf = KFold(n_splits=2) pred_inner = {} for train_inner, test_inner in inner_kf.split(overall_df_inner): train_ix_inner = overall_df_inner.index[train_inner] test_ix_inner = overall_df_inner.index[test_inner] train_test_ix_inner = np.concatenate([test_ix_inner, train_ix_inner]) df_t_inner, dfc_t_inner = target_df.loc[train_test_ix_inner], target_dfc.loc[train_test_ix_inner] tensor_inner = get_tensor(df_t_inner, start, stop) tensor_copy_inner = tensor_inner.copy() # First n tensor_copy_inner[:len(test_ix_inner), 1:, :] = np.NaN L_inner = target_L[np.ix_(np.concatenate([test_inner, train_inner]), np.concatenate([test_inner, train_inner]))] if setting=="transfer": A_source = A_store[learning_rate_cv][num_season_factors_cv][num_home_factors_cv][lam_cv][num_iterations_cv] else: A_source = None H, A, T, Hs, As, Ts, HATs, costs = learn_HAT_adagrad_graph(case, tensor_copy_inner, L_inner, num_home_factors_cv, num_season_factors_cv, num_iter=num_iterations_cv, lr=learning_rate_cv, dis=False, lam=lam_cv, A_known=A_source, T_known=T_constant) HAT = multiply_case(H, A, T, case) for appliance in APPLIANCES_ORDER: if appliance not in pred_inner: pred_inner[appliance] = [] pred_inner[appliance].append(pd.DataFrame(HAT[:len(test_ix_inner), appliance_index[appliance], :], index=test_ix_inner)) err = {} appliance_to_weight = [] for appliance in APPLIANCES_ORDER[1:]: pred_inner[appliance] = pd.DataFrame(pd.concat(pred_inner[appliance])) try: if appliance == "hvac": err[appliance] = compute_rmse_fraction(appliance, pred_inner[appliance][range(5-start, 11-start)], target, start, stop)[2] else: err[appliance] = compute_rmse_fraction(appliance, pred_inner[appliance], target, start, stop)[2] appliance_to_weight.append(appliance) except Exception, e: # This appliance does not have enough samples. Will not be # weighed print 'here' print(e) print(appliance)
continue for lam_cv in lambda_cv_range: pred_inner = {} for train_inner, test_inner in inner_kf.split( overall_df_inner): train_ix_inner = overall_df_inner.index[ train_inner] test_ix_inner = overall_df_inner.index[test_inner] train_test_ix_inner = np.concatenate( [test_ix_inner, train_ix_inner]) df_t_inner, dfc_t_inner = target_df.loc[ train_test_ix_inner], target_dfc.loc[ train_test_ix_inner] tensor_inner = get_tensor(df_t_inner, start, stop) tensor_copy_inner = tensor_inner.copy() # First n tensor_copy_inner[:len(test_ix_inner), 1:, :] = np.NaN L_inner = target_L[np.ix_( np.concatenate([test_inner, train_inner]), np.concatenate([test_inner, train_inner]))] if setting == "transfer": A_source = A_store[learning_rate_cv][ num_season_factors_cv][ num_home_factors_cv][lam_cv][ num_iterations_cv] else: A_source = None
} print("******* BEST PARAMS *******") print(best_params_global[outer_loop_iteration]) print("******* BEST PARAMS *******") sys.stdout.flush() # Now we will be using the best parameter set obtained to compute the predictions if setting == "transfer": A_source = A_store[best_learning_rate][best_num_season_factors][ best_num_home_factors][best_lam][best_num_iterations] else: A_source = None num_test = len(test_ix) train_test_ix = np.concatenate([test_ix, train_ix]) df_t, dfc_t = target_df.loc[train_test_ix], target_dfc.loc[train_test_ix] tensor = get_tensor(df_t, start, stop) tensor_copy = tensor.copy() # First n tensor_copy[:num_test, 1:, :] = np.NaN L = target_L[np.ix_(np.concatenate([test, train]), np.concatenate([test, train]))] H, A, T, Hs, As, Ts, HATs, costs = learn_HAT_adagrad_graph( case, tensor_copy, L, best_num_home_factors, best_num_season_factors, num_iter=best_num_iterations, lr=best_learning_rate,
lambda_cv_range = [0, 0.001, 0.01, 0.1] else: lambda_cv_range = [0] #A_store = pickle.load(open(os.path.expanduser('~/git/scalable-nilm/aaai18/predictions/case-{}-graph_{}_{}_all_As.pkl'.format(case, source, constant_use)), 'r')) source_df, source_dfc, source_tensor, source_static = create_region_df_dfc_static( source, year) target_df, target_dfc, target_tensor, target_static = create_region_df_dfc_static( target, year) #Only for homes with all static features static_df = pd.DataFrame(target_static, index=target_df.index) idx = static_df.dropna(how='any').index target_df = target_df.loc[idx] target_dfc = target_dfc.loc[idx] target_tensor = get_tensor(target_df, start, stop) static_df = static_df.loc[idx] target_static = static_df.values # # using cosine similarity to compute L source_L = get_L(source_static) target_L = get_L(target_static) if setting == "transfer": name = "{}-{}-{}-{}".format(source, target, random_seed, train_percentage) else: name = "{}-{}-{}".format(target, random_seed, train_percentage) # Seasonal constant constraints if constant_use == 'True': T_constant = np.ones(12).reshape(-1, 1)