コード例 #1
0
ファイル: avgSG.py プロジェクト: lisarah/mdp
import util as ut
# need to generate MDP 
# P : S x SA, column stochastic
N = 3; # columns
M = 5; # rows
S = N*M;
A = 4;
p = 0.7;
SMin = []; SMax = [];
for i in range(S):
    if i%3 == 0:
        SMin.append(i);
    else:
        SMax.append(i);
        
P = ut.rectangleMDP(M,N,p);
C = np.random.rand(N*M,A)*100.;

#-------------------------- value iteration -----------------------------------#
avgCost = pI.game_VI(P,C, SMin, SMax);

#------------ checking with cvx -----------------------------------#
y = cvx.Variable((S,A));
constraints = [];
ones = np.ones(A);

for i in range(S):
    constraints.append(ones*(y[i,:]) == 
                       sum([sum([P[i,s*A +a]*y[s,a] for s in range(S)])
                       for a in range(A)]));
constraints.append(y >= 0);
コード例 #2
0
Created on Sat Nov  2 09:48:50 2019

@author: sarahli
"""
import util as ut
import numpy as np
import matplotlib.pyplot as plt
import dynamicProg as dp
import cvxpy as cvx

plt.close('all')
N = 3; 
M = 3;
S = N*M; A = 4;
gamma = 0.5;
P = ut.rectangleMDP(N,M,0.7);
"""
    Cost model:
        player x: C = C1 + C2.dot(y)
        player y: C = C1 + C2.dot(x)
"""
C1 = np.random.rand(S,A);
C2 = 0.3*np.random.rand(S,A);
T = 100;


Samples = 10;
timeLine = np.arange(0,T);

Vx = np.zeros((S,T,Samples)); 
Vy_varyingGamma = np.zeros((S,T,Samples)); 
コード例 #3
0
ファイル: td0.py プロジェクト: lisarah/mdp
import matplotlib.pyplot as plt
import numpy as np
import dynamicProg as dP

plt.close('all')
N = 2
M = 2
S = N * M
A = 4
gamma = 0.4
alpha = 1.0
#step size of algorithm
eps = 0.2
# for the epsilon greedy algorithm
stateVec = np.linspace(0, S, S, endpoint=False)
P = ut.rectangleMDP(M, N, 0.7)

C = np.random.rand(S, A)
#print (C);
# generate random list
#SARSA implementation

T = 100000
Q = np.zeros((S, A, T))
s = np.random.randint(0, S)
curA = np.random.randint(0, A)
for t in range(T - 1):
    alpha = 1. / (t + 1)
    # transition
    transition = P[:, s * A + curA]
    nextS = int(np.random.choice(stateVec, 1, p=transition)[0])
コード例 #4
0
# -*- coding: utf-8 -*-
"""
Created on Sat Jan  4 17:17:25 2020

@author: craba
"""

import util as ut
import numpy as np
import dynamicProg as dp
row = 5; col = 3; A = 4;
P = ut.rectangleMDP(row, col, p = 0.6);
C = np.random.rand(row*col, A);
gamma = 0.7;

print ("----------------Value iteration ---------------");
v_VI = dp.discounted_valueIteration(P,C, True, gamma);
print ("value function = ", v_VI);
print ("----------------Policy iteration ---------------");
pi_PI, v_PI = dp.policyIteration(P,C, gamma);
print ("value function = ", v_PI);