def random_test(): discovery_threshold TRIALS = 200 K = 5 # number of randomly activated features domain = Domains.PST() np.random.seed(999999999) initialRep = IndependentDiscretizationCompactBinary(domain) rep = iFDD( domain, discovery_threshold, initialRep, debug=0, useCache=1, iFDDPlus=0) rep.theta = np.arange(rep.features_num * domain.actions_num) * 10 n = rep.features_num for i in xrange(TRIALS): phi = np.zeros(n, 'bool') for j in xrange(K): ind = np.random.randint(0, n - 1) phi[ind] = 1 threshold = np.random.rand() * 2 - 1 print '%d: %0.2f >> %s' % (i + 1, threshold, str(phi.nonzero()[0])) rep.discover(phi, threshold) rep.show()
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=1.0, lambda_=0., boyan_N0=20.1, initial_learn_rate=0.330): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 100000 opt["num_policy_checks"] = 10 opt["checks_per_policy"] = 1 sparsify = 1 ifddeps = 1e-7 domain = IntruderMonitoring() opt["domain"] = domain initial_rep = IndependentDiscretization(domain) representation = iFDD(domain, discover_threshold, initial_rep, sparsify=sparsify, useCache=True, iFDDPlus=1 - ifddeps) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = SARSA(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment( exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=1.0, lambda_=0., boyan_N0=20.1, initial_learn_rate=0.330): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 100000 opt["num_policy_checks"] = 10 opt["checks_per_policy"] = 1 sparsify = 1 ifddeps = 1e-7 domain = IntruderMonitoring() opt["domain"] = domain initial_rep = IndependentDiscretization(domain) representation = iFDD(domain, discover_threshold, initial_rep, sparsify=sparsify, useCache=True, iFDDPlus=1 - ifddeps) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = SARSA( policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def random_test(): discovery_threshold TRIALS = 200 K = 5 # number of randomly activated features domain = Domains.PST() np.random.seed(999999999) initialRep = IndependentDiscretizationCompactBinary(domain) rep = iFDD( domain, discovery_threshold, initialRep, debug=0, useCache=1, iFDDPlus=0) rep.theta = np.arange(rep.features_num * domain.actions_num) * 10 n = rep.features_num for i in range(TRIALS): phi = np.zeros(n, 'bool') for j in range(K): ind = np.random.randint(0, n - 1) phi[ind] = 1 threshold = np.random.rand() * 2 - 1 print('%d: %0.2f >> %s' % (i + 1, threshold, str(phi.nonzero()[0]))) rep.discover(phi, threshold) rep.show()
def deterministic_test(): discovery_threshold = 1 sparsify = True domain = rlpy.Domains.SystemAdministrator() initialRep = IndependentDiscretizationCompactBinary(domain) rep = iFDD(domain, discovery_threshold, initialRep, debug=0, useCache=1, sparsify=sparsify) rep.theta = np.arange(rep.features_num * domain.actions_num) * 10 print 'Initial [0,1] => ', ANSWER = np.sort(rep.findFinalActiveFeatures([0, 1])) print ANSWER assert np.array_equal(ANSWER, np.array([0, 1])) # rep.show() print rep.inspectPair(0, 1, discovery_threshold + 1) # rep.show() ANSWER = np.sort(rep.findFinalActiveFeatures([0, 1])) print ANSWER assert np.array_equal(ANSWER, np.array([21])) print 'Initial [2,3] => ', ANSWER = np.sort(rep.findFinalActiveFeatures([2, 3])) print ANSWER assert np.array_equal(ANSWER, np.array([2, 3])) # rep.showCache() # rep.showFeatures() # rep.showCache() print 'Initial [0,20] => ', ANSWER = np.sort(rep.findFinalActiveFeatures([0, 20])) print ANSWER assert np.array_equal(ANSWER, np.array([0, 20])) print 'Initial [0,1,20] => ', ANSWER = np.sort(rep.findFinalActiveFeatures([0, 1, 20])) print ANSWER assert np.array_equal(ANSWER, np.array([20, 21])) rep.showCache() # Change the weight for new feature 40 rep.theta[40] = -100 print 'Initial [0,20] => ', ANSWER = np.sort(rep.findFinalActiveFeatures([0, 20])) print ANSWER assert np.array_equal(ANSWER, np.array([0, 20])) print 'discover 0,1,20' rep.inspectPair(20, rep.features_num - 1, discovery_threshold + 1) # rep.showFeatures() # rep.showCache() print 'Initial [0,1,20] => ', ANSWER = np.sort(rep.findFinalActiveFeatures([0, 1, 20])) print ANSWER assert np.array_equal(ANSWER, np.array([22])) rep.showCache() print 'Initial [0,1,2,3,4,5,6,7,8,20] => ', ANSWER = np.sort( rep.findFinalActiveFeatures([0, 1, 2, 3, 4, 5, 6, 7, 8, 20])) print ANSWER assert np.array_equal(ANSWER, np.array([2, 3, 4, 5, 6, 7, 8, 22]))
def deterministic_test(): discovery_threshold = 1 sparsify = True domain = rlpy.Domains.SystemAdministrator() initialRep = IndependentDiscretizationCompactBinary(domain) rep = iFDD(domain, discovery_threshold, initialRep, debug=0, useCache=1, sparsify=sparsify) rep.theta = np.arange(rep.features_num * domain.actions_num) * 10 print('Initial [0,1] => ', end=' ') ANSWER = np.sort(rep.findFinalActiveFeatures([0, 1])) print(ANSWER) assert np.array_equal(ANSWER, np.array([0, 1])) # rep.show() print(rep.inspectPair(0, 1, discovery_threshold + 1)) # rep.show() ANSWER = np.sort(rep.findFinalActiveFeatures([0, 1])) print(ANSWER) assert np.array_equal(ANSWER, np.array([21])) print('Initial [2,3] => ', end=' ') ANSWER = np.sort(rep.findFinalActiveFeatures([2, 3])) print(ANSWER) assert np.array_equal(ANSWER, np.array([2, 3])) # rep.showCache() # rep.showFeatures() # rep.showCache() print('Initial [0,20] => ', end=' ') ANSWER = np.sort(rep.findFinalActiveFeatures([0, 20])) print(ANSWER) assert np.array_equal(ANSWER, np.array([0, 20])) print('Initial [0,1,20] => ', end=' ') ANSWER = np.sort(rep.findFinalActiveFeatures([0, 1, 20])) print(ANSWER) assert np.array_equal(ANSWER, np.array([20, 21])) rep.showCache() # Change the weight for new feature 40 rep.theta[40] = -100 print('Initial [0,20] => ', end=' ') ANSWER = np.sort(rep.findFinalActiveFeatures([0, 20])) print(ANSWER) assert np.array_equal(ANSWER, np.array([0, 20])) print('discover 0,1,20') rep.inspectPair(20, rep.features_num - 1, discovery_threshold + 1) # rep.showFeatures() # rep.showCache() print('Initial [0,1,20] => ', end=' ') ANSWER = np.sort(rep.findFinalActiveFeatures([0, 1, 20])) print(ANSWER) assert np.array_equal(ANSWER, np.array([22])) rep.showCache() print('Initial [0,1,2,3,4,5,6,7,8,20] => ', end=' ') ANSWER = np.sort( rep.findFinalActiveFeatures([0, 1, 2, 3, 4, 5, 6, 7, 8, 20])) print(ANSWER) assert np.array_equal(ANSWER, np.array([2, 3, 4, 5, 6, 7, 8, 22]))
def select_agent(name: Optional[str], seed: int) -> Agent: tabular = Tabular(DOMAIN, discretization=20) if name is None or name == 'tabular-lspi': policy = eGreedy(tabular, epsilon=0.1) return LSPI(policy, tabular, DOMAIN.discount_factor, MAX_STEPS, 1000) elif name == 'tabular-q': policy = eGreedy(tabular, epsilon=0.1) return Q_Learning(policy, tabular, DOMAIN.discount_factor, lambda_=0.3) elif name == 'tabular-sarsa': policy = eGreedy(tabular, epsilon=0.1) return SARSA(policy, tabular, DOMAIN.discount_factor, lambda_=0.3) elif name == 'ifdd-q': lambda_, boyan_N0 = 0.42, 202 discretization = 18 initial_rep = IndependentDiscretization(DOMAIN, discretization=discretization) ifdd = iFDD( DOMAIN, discovery_threshold=8.63917, initial_representation=initial_rep, useCache=True, iFDDPlus=True, ) return Q_Learning( eGreedy(ifdd, epsilon=0.1), ifdd, discount_factor=DOMAIN.discount_factor, lambda_=lambda_, initial_learn_rate=0.7422, learn_rate_decay_mode='boyan', boyan_N0=boyan_N0, ) elif name == 'kifdd-q': lambda_, boyan_N0 = 0.52738, 389.56 kernel_resolution = 8.567677 kernel_width = (DOMAIN.statespace_limits[:, 1] - DOMAIN.statespace_limits[:, 0]) \ / kernel_resolution kifdd = KernelizediFDD( DOMAIN, sparsify=True, kernel=gaussian_kernel, kernel_args=[kernel_width], active_threshold=0.01, discover_threshold=0.0807, normalization=True, max_active_base_feat=10, max_base_feat_sim=0.5 ) policy = eGreedy(kifdd, epsilon=0.1) return Q_Learning( policy, kifdd, discount_factor=DOMAIN.discount_factor, lambda_=lambda_, initial_learn_rate=0.4244, learn_rate_decay_mode='boyan', boyan_N0=boyan_N0, ) elif name == 'rbfs-q': rbf = RBF( DOMAIN, num_rbfs=96, resolution_max=21.0, resolution_min=21.0, const_feature=False, normalize=True, seed=seed, ) policy = eGreedy(rbf, epsilon=0.1) return Q_Learning( policy, rbf, discount_factor=DOMAIN.discount_factor, lambda_=0.1953, initial_learn_rate=0.6633, learn_rate_decay_mode='boyan', boyan_N0=13444.0, ) else: raise NotImplementedError()