def cost_per_period(States,Actions,dict_states, args, index_LT): P = TransitionProbs(States, Actions, args.Demand_Max, args.LT_s, args.LT_f, args.h, args.b, args.C_s, args.C_f, args.Inv_Max, args.Inv_Min, args.cap_fast, args.cap_slow, dict_states) start = time.time() env = PI_env(States, Actions, P) #print('env created', time.time() - start) policy, v = policy_improvement(env, args.discount_factor) #print(time.time() - start) #print("Policy Probability Distribution:") #print(policy) #print("") #print("Value Function:") #print(v) #print("") np.savetxt("policy-LT%i-cap%i.csv" %(index_LT,args.cap_fast), policy, delimiter=";") np.savetxt("valuefunction-LT%i-cap%i.csv"%(index_LT,args.cap_fast), v, delimiter=";") np.savetxt("States-LT%i-cap%i.csv"%(index_LT,args.cap_fast),States, delimiter=";") np.savetxt("Actions-LT%i-cap%i.csv"%(index_LT,args.cap_fast), Actions, delimiter=";") #for index, i in enumerate(States): # print(i, Actions[np.argmax(policy[index])]) MC, MC_R = MarkovChain.MC(States, P, policy) steady_state = MarkovChain.steady_state(States, policy, MC) #print(steady_state) optimal_cost = MarkovChain.cost_steady_state(steady_state, policy, MC, MC_R) return optimal_cost
def cost_per_period(States, Actions, dict_states, args, k, u, m, distribution, identifier, demand_values): start = time.time() P = TransitionProbs(States, Actions, args.Demand_Max, args.LT_s, args.LT_f, args.h, args.b, args.C_s, args.C_f, args.Inv_Max, args.Inv_Min, args.cap_fast, args.cap_slow, dict_states, k, u, m, distribution, demand_values) env = VI.VI_env(States, Actions, P) print('environment created', time.time() - start) #policy, v = PI.policy_improvement(env, args.discount_factor) policy, v = VI.value_iteration(env, theta=0.000001, discount_factor=args.discount_factor) np.save('v_%s_%s_%s.npy' % (identifier, args.LT_s, k), v) optimal_policy = [] MC, MC_R = MarkovChain.MC(States, P, policy) steady_state = MarkovChain.steady_state(States, policy, MC) optimal_cost_array, share_expedited = MarkovChain.cost_steady_state( steady_state, Actions, policy, MC, MC_R) optimal_cost = np.sum(optimal_cost_array) with open( 'optimal_policy-l_e%i-l_r%i-k%i-Distribution %s.csv' % (args.LT_f, args.LT_s, k, identifier), 'w') as f: f.write('OPTIMAL COST;' + str(optimal_cost) + '\n') f.write('Share expedited;' + str(share_expedited) + '\n') f.write('Share regular;' + str(1 - share_expedited) + '\n\n') f.write('PARAMETERS USED:\n') f.write('Demand;Prob\n') for index, item in enumerate(distribution): f.write(str(demand_values[index]) + ';' + str(item) + '\n') f.write('\n') f.write('l_r;') f.write(str(args.LT_s) + '\n') f.write('h;') f.write(str(args.h) + '\n') f.write('b;') f.write(str(args.b) + '\n') f.write('k;') f.write(str(k) + '\n') f.write('u;') f.write(str(u) + '\n') f.write('m;') f.write(str(m) + '\n') f.write('c_r;') f.write(str(args.C_s) + '\n') f.write('\n') for index, __ in enumerate(States[0]): f.write('State' + ';') f.write('optimal local' + ';' + 'optimal offshore') f.write(';prob state;') f.write('cost state;') f.write('weighted cost state;') f.write('\n') for index, state in enumerate(policy): for index2, action in enumerate(state): if (action == 1): #print(States[index],Actions[index2]) optimal_policy.append([States[index], Actions[index2]]) for item in States[index]: f.write(str(item) + ';') for item in Actions[index2]: f.write(str(item) + ';') #print(steady_state,steady_state[0]) f.write(str(steady_state[index]) + ';') if (steady_state[index] > 0): f.write( str(optimal_cost_array[index] / steady_state[index]) + ';') else: f.write(str(0) + ';') f.write(str(optimal_cost_array[index])) f.write('\n') return optimal_cost, share_expedited