Ejemplo n.º 1
0
def random_test():
    discovery_threshold
    TRIALS = 200
    K = 5  # number of randomly activated features
    domain = Domains.PST()
    np.random.seed(999999999)
    initialRep = IndependentDiscretizationCompactBinary(domain)
    rep = iFDD(
        domain,
        discovery_threshold,
        initialRep,
        debug=0,
        useCache=1,
        iFDDPlus=0)
    rep.theta = np.arange(rep.features_num * domain.actions_num) * 10
    n = rep.features_num
    for i in xrange(TRIALS):
        phi = np.zeros(n, 'bool')
        for j in xrange(K):
            ind = np.random.randint(0, n - 1)
            phi[ind] = 1
            threshold = np.random.rand() * 2 - 1
        print '%d: %0.2f >> %s' % (i + 1, threshold, str(phi.nonzero()[0]))
        rep.discover(phi, threshold)
    rep.show()
Ejemplo n.º 2
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    discover_threshold=1.0,
                    lambda_=0.,
                    boyan_N0=20.1,
                    initial_learn_rate=0.330):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 100000
    opt["num_policy_checks"] = 10
    opt["checks_per_policy"] = 1
    sparsify = 1
    ifddeps = 1e-7
    domain = IntruderMonitoring()
    opt["domain"] = domain
    initial_rep = IndependentDiscretization(domain)
    representation = iFDD(domain,
                          discover_threshold,
                          initial_rep,
                          sparsify=sparsify,
                          useCache=True,
                          iFDDPlus=1 - ifddeps)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = SARSA(policy,
                         representation,
                         discount_factor=domain.discount_factor,
                         lambda_=lambda_,
                         initial_learn_rate=initial_learn_rate,
                         learn_rate_decay_mode="boyan",
                         boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Ejemplo n.º 3
0
def make_experiment(
        exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/",
        discover_threshold=1.0,
        lambda_=0.,
        boyan_N0=20.1,
        initial_learn_rate=0.330):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 100000
    opt["num_policy_checks"] = 10
    opt["checks_per_policy"] = 1
    sparsify = 1
    ifddeps = 1e-7
    domain = IntruderMonitoring()
    opt["domain"] = domain
    initial_rep = IndependentDiscretization(domain)
    representation = iFDD(domain, discover_threshold, initial_rep,
                          sparsify=sparsify,
                          useCache=True,
                          iFDDPlus=1 - ifddeps)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = SARSA(
        policy, representation, discount_factor=domain.discount_factor,
        lambda_=lambda_, initial_learn_rate=initial_learn_rate,
        learn_rate_decay_mode="boyan", boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Ejemplo n.º 4
0
def random_test():
    discovery_threshold
    TRIALS = 200
    K = 5  # number of randomly activated features
    domain = Domains.PST()
    np.random.seed(999999999)
    initialRep = IndependentDiscretizationCompactBinary(domain)
    rep = iFDD(
        domain,
        discovery_threshold,
        initialRep,
        debug=0,
        useCache=1,
        iFDDPlus=0)
    rep.theta = np.arange(rep.features_num * domain.actions_num) * 10
    n = rep.features_num
    for i in range(TRIALS):
        phi = np.zeros(n, 'bool')
        for j in range(K):
            ind = np.random.randint(0, n - 1)
            phi[ind] = 1
            threshold = np.random.rand() * 2 - 1
        print('%d: %0.2f >> %s' % (i + 1, threshold, str(phi.nonzero()[0])))
        rep.discover(phi, threshold)
    rep.show()
Ejemplo n.º 5
0
def deterministic_test():
    discovery_threshold = 1
    sparsify = True
    domain = rlpy.Domains.SystemAdministrator()
    initialRep = IndependentDiscretizationCompactBinary(domain)
    rep = iFDD(domain, discovery_threshold, initialRep,
               debug=0, useCache=1, sparsify=sparsify)
    rep.theta = np.arange(rep.features_num * domain.actions_num) * 10
    print 'Initial [0,1] => ',
    ANSWER = np.sort(rep.findFinalActiveFeatures([0, 1]))
    print ANSWER
    assert np.array_equal(ANSWER, np.array([0, 1]))
    # rep.show()

    print rep.inspectPair(0, 1, discovery_threshold + 1)
    # rep.show()
    ANSWER = np.sort(rep.findFinalActiveFeatures([0, 1]))
    print ANSWER
    assert np.array_equal(ANSWER, np.array([21]))

    print 'Initial [2,3] => ',
    ANSWER = np.sort(rep.findFinalActiveFeatures([2, 3]))
    print ANSWER
    assert np.array_equal(ANSWER, np.array([2, 3]))
    # rep.showCache()
    # rep.showFeatures()
    # rep.showCache()
    print 'Initial [0,20] => ',
    ANSWER = np.sort(rep.findFinalActiveFeatures([0, 20]))
    print ANSWER
    assert np.array_equal(ANSWER, np.array([0, 20]))

    print 'Initial [0,1,20] => ',
    ANSWER = np.sort(rep.findFinalActiveFeatures([0, 1, 20]))
    print ANSWER
    assert np.array_equal(ANSWER, np.array([20, 21]))
    rep.showCache()
    # Change the weight for new feature 40
    rep.theta[40] = -100
    print 'Initial [0,20] => ',
    ANSWER = np.sort(rep.findFinalActiveFeatures([0, 20]))
    print ANSWER
    assert np.array_equal(ANSWER, np.array([0, 20]))

    print 'discover 0,1,20'
    rep.inspectPair(20, rep.features_num - 1, discovery_threshold + 1)
    # rep.showFeatures()
    # rep.showCache()
    print 'Initial [0,1,20] => ',
    ANSWER = np.sort(rep.findFinalActiveFeatures([0, 1, 20]))
    print ANSWER
    assert np.array_equal(ANSWER, np.array([22]))

    rep.showCache()
    print 'Initial [0,1,2,3,4,5,6,7,8,20] => ',
    ANSWER = np.sort(
        rep.findFinalActiveFeatures([0, 1, 2, 3, 4, 5, 6, 7, 8, 20]))
    print ANSWER
    assert np.array_equal(ANSWER, np.array([2, 3, 4, 5, 6, 7, 8, 22]))
Ejemplo n.º 6
0
def deterministic_test():
    discovery_threshold = 1
    sparsify = True
    domain = rlpy.Domains.SystemAdministrator()
    initialRep = IndependentDiscretizationCompactBinary(domain)
    rep = iFDD(domain, discovery_threshold, initialRep,
               debug=0, useCache=1, sparsify=sparsify)
    rep.theta = np.arange(rep.features_num * domain.actions_num) * 10
    print('Initial [0,1] => ', end=' ')
    ANSWER = np.sort(rep.findFinalActiveFeatures([0, 1]))
    print(ANSWER)
    assert np.array_equal(ANSWER, np.array([0, 1]))
    # rep.show()

    print(rep.inspectPair(0, 1, discovery_threshold + 1))
    # rep.show()
    ANSWER = np.sort(rep.findFinalActiveFeatures([0, 1]))
    print(ANSWER)
    assert np.array_equal(ANSWER, np.array([21]))

    print('Initial [2,3] => ', end=' ')
    ANSWER = np.sort(rep.findFinalActiveFeatures([2, 3]))
    print(ANSWER)
    assert np.array_equal(ANSWER, np.array([2, 3]))
    # rep.showCache()
    # rep.showFeatures()
    # rep.showCache()
    print('Initial [0,20] => ', end=' ')
    ANSWER = np.sort(rep.findFinalActiveFeatures([0, 20]))
    print(ANSWER)
    assert np.array_equal(ANSWER, np.array([0, 20]))

    print('Initial [0,1,20] => ', end=' ')
    ANSWER = np.sort(rep.findFinalActiveFeatures([0, 1, 20]))
    print(ANSWER)
    assert np.array_equal(ANSWER, np.array([20, 21]))
    rep.showCache()
    # Change the weight for new feature 40
    rep.theta[40] = -100
    print('Initial [0,20] => ', end=' ')
    ANSWER = np.sort(rep.findFinalActiveFeatures([0, 20]))
    print(ANSWER)
    assert np.array_equal(ANSWER, np.array([0, 20]))

    print('discover 0,1,20')
    rep.inspectPair(20, rep.features_num - 1, discovery_threshold + 1)
    # rep.showFeatures()
    # rep.showCache()
    print('Initial [0,1,20] => ', end=' ')
    ANSWER = np.sort(rep.findFinalActiveFeatures([0, 1, 20]))
    print(ANSWER)
    assert np.array_equal(ANSWER, np.array([22]))

    rep.showCache()
    print('Initial [0,1,2,3,4,5,6,7,8,20] => ', end=' ')
    ANSWER = np.sort(
        rep.findFinalActiveFeatures([0, 1, 2, 3, 4, 5, 6, 7, 8, 20]))
    print(ANSWER)
    assert np.array_equal(ANSWER, np.array([2, 3, 4, 5, 6, 7, 8, 22]))
Ejemplo n.º 7
0
def select_agent(name: Optional[str], seed: int) -> Agent:
    tabular = Tabular(DOMAIN, discretization=20)
    if name is None or name == 'tabular-lspi':
        policy = eGreedy(tabular, epsilon=0.1)
        return LSPI(policy, tabular, DOMAIN.discount_factor, MAX_STEPS, 1000)
    elif name == 'tabular-q':
        policy = eGreedy(tabular, epsilon=0.1)
        return Q_Learning(policy, tabular, DOMAIN.discount_factor, lambda_=0.3)
    elif name == 'tabular-sarsa':
        policy = eGreedy(tabular, epsilon=0.1)
        return SARSA(policy, tabular, DOMAIN.discount_factor, lambda_=0.3)
    elif name == 'ifdd-q':
        lambda_, boyan_N0 = 0.42, 202
        discretization = 18
        initial_rep = IndependentDiscretization(DOMAIN, discretization=discretization)
        ifdd = iFDD(
            DOMAIN,
            discovery_threshold=8.63917,
            initial_representation=initial_rep,
            useCache=True,
            iFDDPlus=True,
        )
        return Q_Learning(
            eGreedy(ifdd, epsilon=0.1),
            ifdd,
            discount_factor=DOMAIN.discount_factor,
            lambda_=lambda_,
            initial_learn_rate=0.7422,
            learn_rate_decay_mode='boyan',
            boyan_N0=boyan_N0,
        )
    elif name == 'kifdd-q':
        lambda_, boyan_N0 = 0.52738, 389.56
        kernel_resolution = 8.567677
        kernel_width = (DOMAIN.statespace_limits[:, 1] - DOMAIN.statespace_limits[:, 0]) \
            / kernel_resolution
        kifdd = KernelizediFDD(
            DOMAIN,
            sparsify=True,
            kernel=gaussian_kernel,
            kernel_args=[kernel_width],
            active_threshold=0.01,
            discover_threshold=0.0807,
            normalization=True,
            max_active_base_feat=10,
            max_base_feat_sim=0.5
        )
        policy = eGreedy(kifdd, epsilon=0.1)
        return Q_Learning(
            policy,
            kifdd,
            discount_factor=DOMAIN.discount_factor,
            lambda_=lambda_,
            initial_learn_rate=0.4244,
            learn_rate_decay_mode='boyan',
            boyan_N0=boyan_N0,
        )
    elif name == 'rbfs-q':
        rbf = RBF(
            DOMAIN,
            num_rbfs=96,
            resolution_max=21.0,
            resolution_min=21.0,
            const_feature=False,
            normalize=True,
            seed=seed,
        )
        policy = eGreedy(rbf, epsilon=0.1)
        return Q_Learning(
            policy,
            rbf,
            discount_factor=DOMAIN.discount_factor,
            lambda_=0.1953,
            initial_learn_rate=0.6633,
            learn_rate_decay_mode='boyan',
            boyan_N0=13444.0,
        )
    else:
        raise NotImplementedError()