예제 #1
0
def test_reward():
    ac = mfg_ac2.actor_critic()

    P = np.array([[1, 3, 3], [4, 5, 6], [7, 8, 9]])
    print(P)
    pi = np.array([0.1, 0.2, 0.7])
    print(pi)

    reward = ac.calc_reward(P, pi, 3)

    print("Reward", reward)
예제 #2
0
def time_value(n):
    d = 47
    ac = mfg_ac2.actor_critic(d=d)
    num_features = int((d + 1) * d / 2 + d + 1)
    ac.w = np.ones(num_features)
    pi = np.random.rand(d)

    t_start = time.time()
    for j in range(n):
        value = ac.calc_value(pi)
    t_end = time.time()
    print("Time taken for %d runs of calc_value" % n, (t_end - t_start))
    print("Value", value)
예제 #3
0
def time_reward(n):
    ac = mfg_ac2.actor_critic()
    # P = np.array([[1,3,3,4],[4,5,6,6],[7,8,9,10],[1,2,3,4]])
    P = np.ones([47, 47])
    # pi = np.array([0.1,0.2,0.6,0.1])
    pi = np.zeros(47)
    pi[0] = 1
    t_start = time.time()
    for j in range(n):
        reward = ac.calc_reward(P, pi, 47)
    t_end = time.time()
    print("Time taken for %d runs of calc_reward" % n, (t_end - t_start))
    print("Reward", reward)
예제 #4
0
def test_value():
    d = 3
    ac = mfg_ac2.actor_critic(d=d)
    num_features = int((d + 1) * d / 2 + d + 1)
    ac.w = np.ones(num_features)
    print("weights")
    print(ac.w)

    pi = np.array([0.1, 0.2, 0.7])
    print("pi")
    print(pi)

    value = ac.calc_value(pi)
    # vec_features = ac.calc_features(pi)
    # value = vec_features.dot(ac.w)
    print("value", value)
예제 #5
0
def test_pi0(indir):
    ac = mfg_ac2.actor_critic(d=20)
    ac.init_pi0(path_to_dir='./' + indir)
    print(ac.mat_pi0)
예제 #6
0
import numpy as np
import mfg_ac2
import time
import os

ac = mfg_ac2.actor_critic(theta=10, shift=0.4, d=4)


def test_pi0(indir):
    ac = mfg_ac2.actor_critic(d=20)
    ac.init_pi0(path_to_dir='./' + indir)
    print(ac.mat_pi0)


def test_action():

    pi = np.array([0.7, 0.09, 0.01, 0.2])

    print("pi")
    print(pi)
    print("Sum of pi", np.sum(pi))

    P = ac.sample_action(pi)

    print("P matrix")
    print(P)
    print("Row 1 sum", np.sum(P[1, :]))
    print("Col 3 sum", np.sum(P[:, 3]))

    print("New pi")
    pi_next = np.transpose(P).dot(pi)