예제 #1
0
파일: PI.py 프로젝트: RLapplications/A3C
def cost_per_period(States,Actions,dict_states, args, index_LT):
    P = TransitionProbs(States, Actions, args.Demand_Max, args.LT_s, args.LT_f, args.h, args.b, args.C_s, args.C_f,
                           args.Inv_Max, args.Inv_Min, args.cap_fast, args.cap_slow, dict_states)
    start = time.time()

    env = PI_env(States, Actions, P)
    #print('env created', time.time() - start)

    policy, v = policy_improvement(env, args.discount_factor)
    #print(time.time() - start)

    #print("Policy Probability Distribution:")
    #print(policy)
    #print("")

    #print("Value Function:")
    #print(v)
    #print("")
    np.savetxt("policy-LT%i-cap%i.csv" %(index_LT,args.cap_fast), policy, delimiter=";")
    np.savetxt("valuefunction-LT%i-cap%i.csv"%(index_LT,args.cap_fast), v, delimiter=";")
    np.savetxt("States-LT%i-cap%i.csv"%(index_LT,args.cap_fast),States, delimiter=";")
    np.savetxt("Actions-LT%i-cap%i.csv"%(index_LT,args.cap_fast), Actions, delimiter=";")

    #for index, i in enumerate(States):
    #    print(i, Actions[np.argmax(policy[index])])

    MC, MC_R = MarkovChain.MC(States, P, policy)
    steady_state = MarkovChain.steady_state(States, policy, MC)
    #print(steady_state)
    optimal_cost = MarkovChain.cost_steady_state(steady_state, policy, MC, MC_R)
    return optimal_cost
예제 #2
0
def cost_per_period(States, Actions, dict_states, args, k, u, m, distribution,
                    identifier, demand_values):
    start = time.time()
    P = TransitionProbs(States, Actions, args.Demand_Max, args.LT_s, args.LT_f,
                        args.h, args.b, args.C_s, args.C_f, args.Inv_Max,
                        args.Inv_Min, args.cap_fast, args.cap_slow,
                        dict_states, k, u, m, distribution, demand_values)

    env = VI.VI_env(States, Actions, P)

    print('environment created', time.time() - start)
    #policy, v = PI.policy_improvement(env, args.discount_factor)
    policy, v = VI.value_iteration(env,
                                   theta=0.000001,
                                   discount_factor=args.discount_factor)

    np.save('v_%s_%s_%s.npy' % (identifier, args.LT_s, k), v)
    optimal_policy = []

    MC, MC_R = MarkovChain.MC(States, P, policy)
    steady_state = MarkovChain.steady_state(States, policy, MC)
    optimal_cost_array, share_expedited = MarkovChain.cost_steady_state(
        steady_state, Actions, policy, MC, MC_R)
    optimal_cost = np.sum(optimal_cost_array)

    with open(
            'optimal_policy-l_e%i-l_r%i-k%i-Distribution %s.csv' %
        (args.LT_f, args.LT_s, k, identifier), 'w') as f:
        f.write('OPTIMAL COST;' + str(optimal_cost) + '\n')
        f.write('Share expedited;' + str(share_expedited) + '\n')
        f.write('Share regular;' + str(1 - share_expedited) + '\n\n')

        f.write('PARAMETERS USED:\n')
        f.write('Demand;Prob\n')
        for index, item in enumerate(distribution):
            f.write(str(demand_values[index]) + ';' + str(item) + '\n')
        f.write('\n')

        f.write('l_r;')
        f.write(str(args.LT_s) + '\n')

        f.write('h;')
        f.write(str(args.h) + '\n')

        f.write('b;')
        f.write(str(args.b) + '\n')

        f.write('k;')
        f.write(str(k) + '\n')

        f.write('u;')
        f.write(str(u) + '\n')

        f.write('m;')
        f.write(str(m) + '\n')

        f.write('c_r;')
        f.write(str(args.C_s) + '\n')

        f.write('\n')
        for index, __ in enumerate(States[0]):
            f.write('State' + ';')
        f.write('optimal local' + ';' + 'optimal offshore')
        f.write(';prob state;')
        f.write('cost state;')
        f.write('weighted cost state;')
        f.write('\n')

        for index, state in enumerate(policy):
            for index2, action in enumerate(state):
                if (action == 1):
                    #print(States[index],Actions[index2])
                    optimal_policy.append([States[index], Actions[index2]])
                    for item in States[index]:
                        f.write(str(item) + ';')
                    for item in Actions[index2]:
                        f.write(str(item) + ';')
                    #print(steady_state,steady_state[0])
                    f.write(str(steady_state[index]) + ';')
                    if (steady_state[index] > 0):
                        f.write(
                            str(optimal_cost_array[index] /
                                steady_state[index]) + ';')
                    else:
                        f.write(str(0) + ';')
                    f.write(str(optimal_cost_array[index]))
                    f.write('\n')
    return optimal_cost, share_expedited