Example #1
0
                        default=0.99,
                        type=float,
                        help="discount_factor. Default = 0.99",
                        dest="discount_factor")
    args = parser.parse_args()

    #DP(args,8)
    States = environment.CreateStates(args.LT_f, args.LT_s, args.Inv_Max,
                                      args.Inv_Min, args.OrderFast,
                                      args.OrderSlow)
    Actions = environment.CreateActions(args.OrderFast, args.OrderSlow)
    dict_states = environment.CreateDictStates(States)

    with open('./A3C_policy.csv') as f:
        #States = []
        policy = []

        for line in f:
            #States.append(line.split(sep=';')[:2])
            policy.append(line.split(sep=';')[0:82])
            #a_prob_s.append(line.split(sep=';')[10:16])
        for index, i in enumerate(policy):
            policy[index] = [float(j) for j in policy[index]]

    #print(policy)
    P = PI.TransitionProbs(States, Actions, args.Demand_Max, args.LT_s,
                           args.LT_f, args.h, args.b, args.C_s, args.C_f,
                           args.Inv_Max, args.Inv_Min, args.cap_fast,
                           args.cap_slow, dict_states)
    print('Cost: ', MarkovChain.TestPolicy(States, P, policy))