'features_names': features_names, 'verbose': 1 } fs = RFS(**rfs_params) # Split dataset for RFS state, actions, reward, next_states = split_dataset( dataset, state_dim, action_dim, reward_dim) # Run RFS fs.fit(state, actions, next_states, reward) # Reduce the dataset for FQI selected_states = [] selected_actions = [] for f in features_names[np.where(fs.get_support())]: if f.startswith('S'): selected_states.append(f) if f.startswith('A'): selected_actions.append(f) # TODO remove this once everything works assert len(selected_states) > 0, '### RFS fail ###' got_actions = len(selected_actions) > 0 if not got_actions: selected_actions = ['A0'] if args.onehot: selected_actions_values = [ int(a.lstrip('A')) for a in selected_actions ]
# dataset: s, a, r, s' # dataset = evaluation.collect_episodes(mdp, n_episodes=50) dataset = np.loadtxt('encoded_dataset.csv', skiprows=1, delimiter=',') # check_dataset(dataset, state_dim, action_dim, reward_dim) estimator = ExtraTreesRegressor(n_estimators=50, n_jobs=-1, importance_criterion="gini") # estimator = DecisionTreeRegressor(importance_criterion="gini") selector = IFS(estimator=estimator, scale=True, verbose=1) features_names = ['S%s' % i for i in xrange(state_dim)] + ['A%s' % i for i in xrange(action_dim)] fs = RFS(feature_selector=selector, # features_names=np.array(['S0', 'S1', 'S2', 'S3', 'A0', 'A1']), features_names=np.array(features_names), verbose=1) state, actions, reward, next_states = \ split_dataset(dataset, state_dim, action_dim, reward_dim) state = dataset[:,0:state_dim] actions = dataset[:,state_dim:state_dim+action_dim] reward = dataset[:,state_dim+action_dim] # print(dataset[:10, :]) fs.fit(state, actions, next_states, reward) print( fs.get_support()) # this are the selected features, it should be [s0, s2, a0]
features_names = np.array(['S%s' % i for i in xrange(state_dim)] + ['A%s' % i for i in xrange(action_dim)]) rfs_params = {'feature_selector': selector, 'features_names': features_names, 'verbose': 1} fs = RFS(**rfs_params) # Split dataset for RFS state, actions, reward, next_states = split_dataset(dataset, state_dim, action_dim, reward_dim) # Run RFS fs.fit(state, actions, next_states, reward) # Reduce the dataset for FQI selected_states = [] selected_actions = [] for f in features_names[np.where(fs.get_support())]: if f.startswith('S'): selected_states.append(f) if f.startswith('A'): selected_actions.append(f) # TODO remove this once everything works assert len(selected_states) > 0, '### RFS fail ###' got_actions = len(selected_actions) > 0 if not got_actions: selected_actions = ['A0'] if args.onehot: selected_actions_values = [int(a.lstrip('A')) for a in selected_actions] assert len(selected_actions_values) >= 2, 'Not enough actions selected (try to decrease significance)' else:
# np.random.seed(3452) mdp = env.SyntheticToyFS() state_dim, action_dim, reward_dim = get_space_info(mdp) nextstate_idx = state_dim + action_dim + reward_dim reward_idx = action_dim + state_dim # dataset: s, a, r, s' dataset = evaluation.collect_episodes(mdp, n_episodes=50) check_dataset(dataset, state_dim, action_dim, reward_dim) selector = IFS(estimator=ExtraTreesRegressor(n_estimators=50), scale=True, verbose=1) fs = RFS(feature_selector=selector, features_names=np.array(['S0', 'S1', 'S2', 'S3', 'A0', 'A1']), verbose=1) state, actions, reward, next_states, absorbing = \ split_dataset(dataset, state_dim, action_dim, reward_dim) # print(dataset[:10, :]) fs.fit(state, actions, next_states, reward) selected_features = fs.features_names[fs.get_support()] print('selected features: {}'.format(selected_features)) # this are the selected features, it should be [s0, s2, a0] assert np.all(selected_features == ['S0', 'S2', 'A0']) print(fs.nodes) g = fs.export_graphviz() g.view()
# dataset = evaluation.collect_episodes(mdp, n_episodes=50) dataset = np.loadtxt('encoded_dataset.csv', skiprows=1, delimiter=',') # check_dataset(dataset, state_dim, action_dim, reward_dim) estimator = ExtraTreesRegressor(n_estimators=50, n_jobs=-1, importance_criterion="gini") # estimator = DecisionTreeRegressor(importance_criterion="gini") selector = IFS(estimator=estimator, scale=True, verbose=1) features_names = ['S%s' % i for i in xrange(state_dim) ] + ['A%s' % i for i in xrange(action_dim)] fs = RFS( feature_selector=selector, # features_names=np.array(['S0', 'S1', 'S2', 'S3', 'A0', 'A1']), features_names=np.array(features_names), verbose=1) state, actions, reward, next_states = \ split_dataset(dataset, state_dim, action_dim, reward_dim) state = dataset[:, 0:state_dim] actions = dataset[:, state_dim:state_dim + action_dim] reward = dataset[:, state_dim + action_dim] # print(dataset[:10, :]) fs.fit(state, actions, next_states, reward) print(fs.get_support() ) # this are the selected features, it should be [s0, s2, a0]