steps_per_theta_update=None, verbose=1, norm_value=NORM_VALUE, independent=INDEPENDENT) # term_condition=lambda v1, v2: # increment_base_termination(v1,v2,2,tol=1e-2)) #term_condition=lambda v1, v2: terminal_evaluation(v1,v2,1e-1)) def tmetric(theta): t = pbo.apply_bo(theta[0], n_times=STEPS_AHEAD) return q_regressor.get_k(t) state, actions, reward, next_states, absorbing = split_dataset(dataset, state_dim=state_dim, action_dim=action_dim, reward_dim=reward_dim) theta0 = np.array([6., 10.001], dtype='float32').reshape(1, -1) # theta0 = np.array([16., 10.001], dtype='float32').reshape(1, -1) history = pbo.fit(state, actions, next_states, reward, absorbing, theta0, batch_size=10, nb_epoch=EPOCH, theta_metrics={'k': tmetric}) ########################################## # Evaluate the final solution initial_states = np.array([[1, 2, 5, 7, 10]]).T values = evaluation.evaluate_policy(mdp, pbo, initial_states=initial_states) print('Learned theta: {}'.format(pbo.learned_theta_value)) print('Final performance of PBO: {}'.format(values))
'scale': True, 'verbose': 1, 'significance': args.significance } selector = IFS(**ifs_params) features_names = np.array(['S%s' % i for i in xrange(state_dim)] + ['A%s' % i for i in xrange(action_dim)]) rfs_params = { 'feature_selector': selector, 'features_names': features_names, 'verbose': 1 } fs = RFS(**rfs_params) # Split dataset for RFS state, actions, reward, next_states = split_dataset( dataset, state_dim, action_dim, reward_dim) # Run RFS fs.fit(state, actions, next_states, reward) # Reduce the dataset for FQI selected_states = [] selected_actions = [] for f in features_names[np.where(fs.get_support())]: if f.startswith('S'): selected_states.append(f) if f.startswith('A'): selected_actions.append(f) # TODO remove this once everything works assert len(selected_states) > 0, '### RFS fail ###'
# dataset: s, a, r, s' # dataset = evaluation.collect_episodes(mdp, n_episodes=50) dataset = np.loadtxt('encoded_dataset.csv', skiprows=1, delimiter=',') # check_dataset(dataset, state_dim, action_dim, reward_dim) estimator = ExtraTreesRegressor(n_estimators=50, n_jobs=-1, importance_criterion="gini") # estimator = DecisionTreeRegressor(importance_criterion="gini") selector = IFS(estimator=estimator, scale=True, verbose=1) features_names = ['S%s' % i for i in xrange(state_dim)] + ['A%s' % i for i in xrange(action_dim)] fs = RFS(feature_selector=selector, # features_names=np.array(['S0', 'S1', 'S2', 'S3', 'A0', 'A1']), features_names=np.array(features_names), verbose=1) state, actions, reward, next_states = \ split_dataset(dataset, state_dim, action_dim, reward_dim) state = dataset[:,0:state_dim] actions = dataset[:,state_dim:state_dim+action_dim] reward = dataset[:,state_dim+action_dim] # print(dataset[:10, :]) fs.fit(state, actions, next_states, reward) print( fs.get_support()) # this are the selected features, it should be [s0, s2, a0]
update_theta_every=UPDATE_EVERY, steps_per_theta_update=None, verbose=1, norm_value=NORM_VALUE, independent=INDEPENDENT) # term_condition=lambda v1, v2: # increment_base_termination(v1,v2,2,tol=1e-2)) #term_condition=lambda v1, v2: terminal_evaluation(v1,v2,1e-1)) def tmetric(theta): t = pbo.apply_bo(theta[0], n_times=STEPS_AHEAD) return q_regressor.get_k(t) state, actions, reward, next_states, absorbing = split_dataset( dataset, state_dim=state_dim, action_dim=action_dim, reward_dim=reward_dim) theta0 = np.array([6., 10.001], dtype='float32').reshape(1, -1) # theta0 = np.array([16., 10.001], dtype='float32').reshape(1, -1) history = pbo.fit(state, actions, next_states, reward, absorbing, theta0, batch_size=10, nb_epoch=EPOCH, theta_metrics={'k': tmetric}) ########################################## # Evaluate the final solution
# np.random.seed(3452) mdp = env.SyntheticToyFS() state_dim, action_dim, reward_dim = get_space_info(mdp) nextstate_idx = state_dim + action_dim + reward_dim reward_idx = action_dim + state_dim # dataset: s, a, r, s' dataset = evaluation.collect_episodes(mdp, n_episodes=50) check_dataset(dataset, state_dim, action_dim, reward_dim) selector = IFS(estimator=ExtraTreesRegressor(n_estimators=50), scale=True, verbose=1) fs = RFS(feature_selector=selector, features_names=np.array(['S0', 'S1', 'S2', 'S3', 'A0', 'A1']), verbose=1) state, actions, reward, next_states, absorbing = \ split_dataset(dataset, state_dim, action_dim, reward_dim) # print(dataset[:10, :]) fs.fit(state, actions, next_states, reward) selected_features = fs.features_names[fs.get_support()] print('selected features: {}'.format(selected_features)) # this are the selected features, it should be [s0, s2, a0] assert np.all(selected_features == ['S0', 'S2', 'A0']) print(fs.nodes) g = fs.export_graphviz() g.view()
'n_jobs': args.njobs} ifs_params = {'estimator': ExtraTreesRegressor(**ifs_regressor_params), 'n_features_step': 1, 'cv': None, 'scale': True, 'verbose': 1, 'significance': args.significance} selector = IFS(**ifs_params) features_names = np.array(['S%s' % i for i in xrange(state_dim)] + ['A%s' % i for i in xrange(action_dim)]) rfs_params = {'feature_selector': selector, 'features_names': features_names, 'verbose': 1} fs = RFS(**rfs_params) # Split dataset for RFS state, actions, reward, next_states = split_dataset(dataset, state_dim, action_dim, reward_dim) # Run RFS fs.fit(state, actions, next_states, reward) # Reduce the dataset for FQI selected_states = [] selected_actions = [] for f in features_names[np.where(fs.get_support())]: if f.startswith('S'): selected_states.append(f) if f.startswith('A'): selected_actions.append(f) # TODO remove this once everything works assert len(selected_states) > 0, '### RFS fail ###'