예제 #1
0
def main(args):
    args = parse_arguments()
    for env_name in ENV_LIST:
        print('\nTesting functionality for {}'.format(env_name))
        try:
            env = or_gym.make(env_name)
            print('{} initialized successfully'.format(env_name))
            try:
                action = env.action_space.sample()
                print('Action {} selected'.format(action))
            except Exception as e:
                print('Error sampling action for env = {}'.format(env_name))
            try:
                _ = env.step(action)
                print('Step successful')
            except Exception as e:
                print('Error encountered during step for action {}.'.format(
                    action))
            try:
                env.reset()
                print('Reset successful for env = {}'.format(env_name))
            except Exception as e:
                print('Reset error encountered for env = {}'.format(env_name))
        except Exception as e:
            print('Error encountered initializing env = {}'.format(env_name))
예제 #2
0
def check_config(env_name, model_name=None, *args, **kwargs):
    if model_name is None:
        model_name = 'or_gym_tune'
    env = or_gym.make(env_name)
    try:
        vf_clip_param = env._max_rewards
    except AttributeError:
        vf_clip_param = 10

    # TODO: Add grid search capabilities
    rl_config = {
        "env": env_name,
        "num_workers": 2,
        "env_config": {
            'mask': True
        },
        # "lr": 1e-5,
        # "entropy_coeff": 1e-4,
        "vf_clip_param": vf_clip_param,
        "lr": tune.grid_search([1e-4, 1e-5]),  #1e-6, 1e-7]),
        "entropy_coeff": tune.grid_search([1e-2]),  #, 1e-4]),
        # "critic_lr": tune.grid_search([1e-3, 1e-4, 1e-5]),
        # "actor_lr": tune.grid_search([1e-3, 1e-4, 1e-5]),
        # "lambda": tune.grid_search([0.95, 0.9]),
        "kl_target": tune.grid_search([0.01]),
        # "sgd_minibatch_size": tune.grid_search([128, 512, 1024]),
        # "train_batch_size": tune.grid_search([])
        "model": {
            "vf_share_layers": False,
            # "custom_model": model_name,
            "fcnet_activation": "elu",
            "fcnet_hiddens": [128, 128, 128]
        }
    }

    return rl_config
예제 #3
0
if __name__=='__main__':

    N=10
    # print(net)
    # param=list(net.parameters())
    # print(param[0].size())

    env_config={ 'N':N,
                 'max_weight': 200,
                 'current_weight': 0,
                 'mask':True,
                 'randomize_params_on_reset': True,
    }

    env=or_gym.make('Knapsack-v0',env_config=env_config)
    # set seed for reproducibility
    # right now take fresh entropy from the computer
    env.set_seed(int.from_bytes(os.urandom(4), sys.byteorder))

    net = actor(N)
    print(net)

    K=7
    nb_episodes = 1000
    criterion = nn.CrossEntropyLoss()
    directory = 'linear_only_actor,CrossEntropy,oracletrainer,batch_updates'

    LR=np.array([ 10**i for i in np.arange(-2,3,1,dtype='float')])

    failure=os.system('mkdir ./results/'+directory)
예제 #4
0
import or_gym

env_name = 'InvManagement-v2'
env = or_gym.make(env_name)
print(env.step(env.action_space.sample()))
print(env.state.shape[0]==env.obs_dim)
env.plot_network()
예제 #5
0
def im_dfo_model(x, env, online):
    '''
    Compute negative of the expected profit for a sample path.
    This function is used in an unconstrained optimization algorithm (scipy.optimize.minimize).
    
    x = [integer list; dimension |Stages| - 1] total inventory levels at each node.
    env = [InvManagementEnv] current simulation environment.
    online = [Boolean] should the optimization be run online?
    '''

    # assert env.spec.id == 'InvManagement-v0', \
    # '{} received. Heuristic designed for InvManagement-v0.'.format(env.spec.id)

    x = np.array(x)  #inventory level at each node
    z = np.cumsum(x)  #base stock levels

    m = env.num_stages
    try:
        dimz = len(z)
    except:
        dimz = 1
    assert dimz == m - 1, "Wrong dimension on base stock level vector. Should be #Stages - 1."

    #create simulation environment (copy it if in offline mode)
    sim_kwargs = {
        'I0': x,  #set initial inventory to full base stock
        'p': env.p,  #extract all other parameters from env
        'r': env.r,
        'k': env.k,
        'h': env.h,
        'c': env.c,
        'L': env.L,
        'backlog': env.backlog,
        'dist_param': env.dist_param,
        'alpha': env.alpha,
        'seed_int': env.seed_int
    }

    demand_dist = env.demand_dist  #extract demand distribution function from env

    if online:
        #extract args to pass to re-simulation
        sim_kwargs[
            'periods'] = env.period  #simulation goes up until current period in online mode
        sim_kwargs['dist'] = 5  #set distribution to manual mode
        sim_kwargs['user_D'] = env.D[:env.
                                     period]  #copy historical demands from env
    else:
        sim_kwargs['periods'] = env.num_periods  #copy num_periods from env
        sim_kwargs['dist'] = env.dist  #copy dist from env

    #build simulation environment (this is just clean copy if in offline mode)
    if env.backlog:
        sim = or_gym.make("InvManagement-v0", env_config=sim_kwargs)
    else:
        sim = or_gym.make("InvManagement-v1", env_config=sim_kwargs)

    #run simulation
    for t in range(sim.num_periods):
        #take a step in the simulation using critical ratio base stock
        sim.step(action=sim.base_stock_action(z=z))

    #probability for demand at each period
    prob = demand_dist.pmf(sim.D, **sim.dist_param)

    #expected profit
    return -1 / sim.num_periods * np.sum(prob * sim.P)
예제 #6
0
model = "DLP"
mode = "SH"
window = 30
solver = "glpk"

#create file names
filename = path+"in"+ver+".pkl"
D = pickle.load(open(filename,'rb'))
filesave = path+model+"_"+mode+"_"+ver+"/"

#solve shrinking horizon model
for i in range(100):
    #create environmnet with user specified demand
    user_D = {(1,0): D[:,i]} #assign scenario to retail/market link
    sample_path = {(1,0): True} #note that the specified demand is sampled from the prob distribution
    env = or_gym.make("InvManagement-"+ver, env_config={'user_D': user_D,
                                                        'sample_path': sample_path})
    #loop through each period in the simulation, optimizing along the way
    for t in range(env.num_periods):
        #create model
        m=net_im_lp_model(env,window_size=window,use_expectation=True)
        #select solver
        s=SolverFactory(solver)
        #solve model
        res=s.solve(m, tee=False)
        #check result is optimal
        if res['Solver'][0]['Termination condition'][:] != 'optimal':
            print("ERROR: NOT OPTIMAL")
            break
        #extract reorder quantities
        Ropt=m.R.get_values()
        #pass action for the current timestep only (i.e. t=0)
예제 #7
0
 def _build_env(self, env_name):
     env = or_gym.make(env_name)
     return env
예제 #8
0
def knapsack_env():
    env = or_gym.make('Knapsack-v0')

    # todo: take constraints from or_gym created and put into
    #  class knapsack environment.
    raise ValueError('This still has to be done!')
예제 #9
0
import or_gym
import numpy as np
import pandas as pd
from pyomo.opt import SolverFactory
from or_gym.algos.supply_network.math_prog import *
from or_gym.algos.supply_network.stoch_prog import *

#solve perfect information model
env1 = or_gym.make("InvManagement-v2")
m1 = net_im_lp_model(env1, perfect_information=True)
s1 = SolverFactory('glpk')
res1 = s1.solve(m1)
print(np.sum(list(m1.P.get_values().values())))

#solve shrinking horizon model at t=0
env3 = or_gym.make("InvManagement-v2")
m3 = net_im_lp_model(env3)
s3 = SolverFactory('glpk')
res3 = s3.solve(m3)
print(np.sum(list(m3.P.get_values().values())))

#solve perfect information model with average demand
D = 20 * np.ones(30)
env4 = or_gym.make("InvManagement-v2", env_config={'user_D': {(1, 0): D}})
# env4.graph.edges[(1,0)]['demand_dist']=[20 for i in range(env4.num_periods)]
m4 = net_im_lp_model(env4, perfect_information=True)
s4 = SolverFactory('glpk')
res4 = s4.solve(m4)
print(np.sum(list(m4.P.get_values().values())))

#solve shrinking horizon model