def run_policy_net(X_train, Y_train, X_test, Y_test, params, is_nonlinear=False): if is_nonlinear: # Non-linear model, use ADAM step size 1e-3 layer_sizes = [params['n'], 200, 200, 1] layers = reduce( operator.add, [ [ nn.Linear(a, b), nn.BatchNorm1d(b), nn.ReLU(), nn.Dropout(p=0.2) ] # TODO: Why is this 0.2? (others are 0.5) for a, b in zip(layer_sizes[0:-2], layer_sizes[1:-1]) ]) layers += [nn.Linear(layer_sizes[-2], layer_sizes[-1])] model = nn.Sequential(*layers) step_size = 1e-3 else: # Linear model, use ADAM step size 1e-2 model = nn.Sequential(nn.Linear(params['n'], 1)) step_size = 1e-2 if USE_GPU: model = model.cuda() X_train_t = torch.tensor(X_train, dtype=torch.float, device=DEVICE) Y_train_t = torch.tensor(Y_train, dtype=torch.float, device=DEVICE) X_test_t = torch.tensor(X_test, dtype=torch.float, device=DEVICE) Y_test_t = torch.tensor(Y_test, dtype=torch.float, device=DEVICE) d_ = torch.tensor(params['d'], dtype=torch.float, device=DEVICE) # Expected inventory cost cost = lambda Z, Y : (params['c_lin'] * Z + 0.5 * params['c_quad'] * (Z**2) + params['b_lin'] * (Y.mv(d_).view(-1,1)-Z).clamp(min=0) + 0.5 * params['b_quad'] * (Y.mv(d_).view(-1,1)-Z).clamp(min=0)**2 + params['h_lin'] * (Z-Y.mv(d_).view(-1,1)).clamp(min=0) + 0.5 * params['h_quad'] * (Z-Y.mv(d_).view(-1,1)).clamp(min=0)**2) \ .mean() opt = optim.Adam(model.parameters(), lr=step_size) for i in range(1000): model.eval() test_cost = batch.get_cost(100, i, model, X_test_t, Y_test_t, cost) model.train() train_cost = batch_train(150, i, X_train_t, Y_train_t, model, opt, cost) print(train_cost.item(), test_cost.item()) return test_cost.item()
def run_policy_net(X_train, Y_train, X_test, Y_test, params): # Set up non-linear network of # Linear -> BatchNorm -> ReLU -> Dropout layers layer_sizes = [params['n'], 200, 200, 1] layers = reduce(operator.add, [[nn.Linear(a,b), nn.BatchNorm1d(b), nn.ReLU(), nn.Dropout(p=0.2)] for a,b in zip(layer_sizes[0:-2], layer_sizes[1:-1])]) layers += [nn.Linear(layer_sizes[-2], layer_sizes[-1])] model = nn.Sequential(*layers).cuda() X_train_t = torch.Tensor(X_train).cuda() Y_train_t = torch.Tensor(Y_train).cuda() X_test_t = torch.Tensor(X_test).cuda() Y_test_t = torch.Tensor(Y_test).cuda() d_ = Variable(torch.Tensor(params['d'])).cuda() # Expected inventory cost cost = lambda Z, Y : (params['c_lin'] * Z + 0.5 * params['c_quad'] * (Z**2) + params['b_lin'] * (Y.mv(d_)-Z).clamp(min=0) + 0.5 * params['b_quad'] * (Y.mv(d_)-Z).clamp(min=0)**2 + params['h_lin'] * (Z-Y.mv(d_)).clamp(min=0) + 0.5 * params['h_quad'] * (Z-Y.mv(d_)).clamp(min=0)**2) \ .mean() opt = optim.Adam(model.parameters(), lr=1e-3) for i in range(1000): model.eval() test_cost = batch.get_cost(100, i, model, X_test_t, Y_test_t, cost) model.train() train_cost = batch_train(150, i, X_train_t, Y_train_t, model, opt, cost) print(train_cost.data[0], test_cost.data[0]) return test_cost.data[0]
def run_task_net(X, Y, X_test, Y_test, params, is_nonlinear=False): # Training/validation split th_frac = 0.8 inds = np.random.permutation(X.shape[0]) train_inds = inds[:int(X.shape[0]*th_frac)] hold_inds = inds[int(X.shape[0]*th_frac):] X_train, X_hold = X[train_inds, :], X[hold_inds, :] Y_train, Y_hold = Y[train_inds, :], Y[hold_inds, :] X_train_t = torch.Tensor(X_train).cuda() Y_train_t = torch.Tensor(Y_train).cuda() X_hold_t = torch.Tensor(X_hold).cuda() Y_hold_t = torch.Tensor(Y_hold).cuda() X_test_t = torch.Tensor(X_test).cuda() Y_test_t = torch.Tensor(Y_test).cuda() Y_train_int_t = torch.LongTensor( np.where(Y_train_t.cpu().numpy())[1]).cuda() Y_hold_int_t = torch.LongTensor( np.where(Y_hold_t.cpu().numpy())[1]).cuda() Y_test_int_t = torch.LongTensor( np.where(Y_test_t.cpu().numpy())[1]).cuda() d_ = Variable(torch.Tensor(params['d'])).cuda() # Expected inventory cost and solver for newsvendor scheduling problem cost = lambda Z, Y : (params['c_lin'] * Z + 0.5 * params['c_quad'] * (Z**2) + params['b_lin'] * (Y.mv(d_)-Z).clamp(min=0) + 0.5 * params['b_quad'] * (Y.mv(d_)-Z).clamp(min=0)**2 + params['h_lin'] * (Z-Y.mv(d_)).clamp(min=0) + 0.5 * params['h_quad'] * (Z-Y.mv(d_)).clamp(min=0)**2) \ .mean() newsvendor_solve = SolveNewsvendor(params).cuda() cost_news_fn = lambda x, y: cost(newsvendor_solve(x), y) nll = nn.NLLLoss().cuda() lam = 10.0 # regularization if is_nonlinear: # Non-linear model, use ADAM step size 1e-3 layer_sizes = [X_train.shape[1], 200, 200, Y_train.shape[1]] layers = reduce(operator.add, [[nn.Linear(a,b), nn.BatchNorm1d(b), nn.ReLU(), nn.Dropout(p=0.5)] for a,b in zip(layer_sizes[0:-2], layer_sizes[1:-1])]) layers += [nn.Linear(layer_sizes[-2], layer_sizes[-1]), nn.Softmax()] model = nn.Sequential(*layers).cuda() step_size = 1e-3 else: # Linear model, use ADAM step size 1e-2 model = nn.Sequential( nn.Linear(X_train.shape[1], Y_train.shape[1]), nn.Softmax() ).cuda() step_size = 1e-2 opt = optim.Adam(model.parameters(), lr=step_size) # For early stopping hold_costs, test_costs = [], [] num_stop_rounds = 20 for i in range(1000): model.eval() test_cost = batch.get_cost( 100, i, model, X_test_t, Y_test_t, cost_news_fn) test_nll = batch.get_cost_nll( 100, i, model, X_test_t, Y_test_int_t, nll) hold_cost = batch.get_cost( 100, i, model, X_hold_t, Y_hold_t, cost_news_fn) hold_nll = batch.get_cost_nll( 100, i, model, X_hold_t, Y_hold_int_t, nll) model.train() train_cost, train_nll = batch_train(150, i, X_train_t, Y_train_t, Y_train_int_t, model, cost_news_fn, nll, opt, lam) print(i, train_cost.data[0], train_nll.data[0], test_cost.data[0], test_nll.data[0], hold_cost.data[0], hold_nll.data[0]) # Early stopping test_costs.append(test_cost.data[0]) hold_costs.append(hold_cost.data[0]) if i > 0 and i % num_stop_rounds == 0: idx = hold_costs.index(min(hold_costs)) # Stop if current cost is worst in num_stop_rounds rounds if max(hold_costs) == hold_cost.data[0]: print(test_costs[idx]) return(test_costs[idx]) else: # Keep only "best" round hold_costs = [hold_costs[idx]] test_costs = [test_costs[idx]] # In case of no early stopping, return best run so far idx = hold_costs.index(min(hold_costs)) return test_costs[idx]
def run_mle_net(X, Y, X_test, Y_test, params, is_nonlinear=False): # Training/validation split th_frac = 0.8 inds = np.random.permutation(X.shape[0]) train_inds = inds[:int(X.shape[0] * th_frac)] hold_inds = inds[int(X.shape[0] * th_frac):] X_train, X_hold = X[train_inds, :], X[hold_inds, :] Y_train, Y_hold = Y[train_inds, :], Y[hold_inds, :] X_train_t = torch.Tensor(X_train).cuda() Y_train_t = torch.Tensor(Y_train).cuda() X_hold_t = torch.Tensor(X_hold).cuda() Y_hold_t = torch.Tensor(Y_hold).cuda() X_test_t = torch.Tensor(X_test).cuda() Y_test_t = torch.Tensor(Y_test).cuda() Y_train_int_t = torch.LongTensor(np.where( Y_train_t.cpu().numpy())[1]).cuda() Y_hold_int_t = torch.LongTensor(np.where(Y_hold_t.cpu().numpy())[1]).cuda() Y_test_int_t = torch.LongTensor(np.where(Y_test_t.cpu().numpy())[1]).cuda() d_ = Variable(torch.Tensor(params['d'])).cuda() # Expected inventory cost and solver for newsvendor scheduling problem cost = lambda Z, Y : (params['c_lin'] * Z + 0.5 * params['c_quad'] * (Z**2) + params['b_lin'] * (Y.mv(d_).view(-1,1)-Z).clamp(min=0) + 0.5 * params['b_quad'] * (Y.mv(d_).view(-1,1)-Z).clamp(min=0)**2 + params['h_lin'] * (Z-Y.mv(d_).view(-1,1)).clamp(min=0) + 0.5 * params['h_quad'] * (Z-Y.mv(d_).view(-1,1)).clamp(min=0)**2) \ .mean() newsvendor_solve = SolveNewsvendor(params).cuda() cost_news_fn = lambda x, y: cost(newsvendor_solve(x), y) if is_nonlinear: # Non-linear model, use ADAM step size 1e-3 layer_sizes = [X_train.shape[1], 200, 200, Y_train.shape[1]] layers = reduce(operator.add, [[ nn.Linear(a, b), nn.BatchNorm1d(b), nn.ReLU(), nn.Dropout(p=0.5) ] for a, b in zip(layer_sizes[0:-2], layer_sizes[1:-1])]) layers += [nn.Linear(layer_sizes[-2], layer_sizes[-1]), nn.Softmax()] model = nn.Sequential(*layers).cuda() step_size = 1e-3 else: # Linear model, use ADAM step size 1e-2 model = nn.Sequential(nn.Linear(X_train.shape[1], Y_train.shape[1]), nn.Softmax()).cuda() step_size = 1e-2 opt = optim.Adam(model.parameters(), lr=step_size) # For early stopping hold_costs, test_costs = [], [] model_states = [] num_stop_rounds = 20 for i in range(1000): # model.eval() test_cost = batch.get_cost_nll(100, i, model, X_test_t, Y_test_int_t, nn.NLLLoss()) hold_cost = batch.get_cost_nll(100, i, model, X_hold_t, Y_hold_int_t, nn.NLLLoss()) model.train() train_cost = batch_train(150, i, X_train_t, Y_train_t, Y_train_int_t, model, nn.NLLLoss(), opt) print(i, train_cost.data[0], test_cost.data[0], hold_cost.data[0]) # Early stopping # See https://github.com/locuslab/e2e-model-learning-staging/commit/d183c65d0cd53d611a77a4508da65c25cf88c93d test_costs.append(test_cost.data[0]) hold_costs.append(hold_cost.data[0]) model_states.append(model.state_dict().copy()) if i > 0 and i % num_stop_rounds == 0: idx = hold_costs.index(min(hold_costs)) # Stop if current cost is worst in num_stop_rounds rounds if max(hold_costs) == hold_cost.data[0]: model.eval() best_model = get_model(X_train, Y_train, X_test, Y_test, params, is_nonlinear) best_model.load_state_dict(model_states[idx]) best_model.cuda() test_cost_news = batch.get_cost(100, i, best_model, X_test_t, Y_test_t, cost_news_fn) return test_cost_news.data[0] else: # Keep only "best" round hold_costs = [hold_costs[idx]] test_costs = [test_costs[idx]] model_states = [model_states[idx]] # # In case of no early stopping, return best run so far idx = hold_costs.index(min(hold_costs)) best_model = get_model(X, Y, X_test, Y_test, params, is_nonlinear) best_model.load_state_dict(model_states[idx]) best_model.cuda() test_cost_news = batch.get_cost(100, i, best_model, X_test_t, Y_test_t, cost_news_fn) return test_cost_news.data[0]