def run_policy_net(X_train,
                   Y_train,
                   X_test,
                   Y_test,
                   params,
                   is_nonlinear=False):

    if is_nonlinear:
        # Non-linear model, use ADAM step size 1e-3
        layer_sizes = [params['n'], 200, 200, 1]
        layers = reduce(
            operator.add,
            [
                [
                    nn.Linear(a, b),
                    nn.BatchNorm1d(b),
                    nn.ReLU(),
                    nn.Dropout(p=0.2)
                ]  # TODO: Why is this 0.2? (others are 0.5)
                for a, b in zip(layer_sizes[0:-2], layer_sizes[1:-1])
            ])
        layers += [nn.Linear(layer_sizes[-2], layer_sizes[-1])]
        model = nn.Sequential(*layers)
        step_size = 1e-3
    else:
        # Linear model, use ADAM step size 1e-2
        model = nn.Sequential(nn.Linear(params['n'], 1))
        step_size = 1e-2

    if USE_GPU:
        model = model.cuda()

    X_train_t = torch.tensor(X_train, dtype=torch.float, device=DEVICE)
    Y_train_t = torch.tensor(Y_train, dtype=torch.float, device=DEVICE)
    X_test_t = torch.tensor(X_test, dtype=torch.float, device=DEVICE)
    Y_test_t = torch.tensor(Y_test, dtype=torch.float, device=DEVICE)
    d_ = torch.tensor(params['d'], dtype=torch.float, device=DEVICE)

    # Expected inventory cost
    cost = lambda Z, Y : (params['c_lin'] * Z + 0.5 * params['c_quad'] * (Z**2) +
                      params['b_lin'] * (Y.mv(d_).view(-1,1)-Z).clamp(min=0) +
                      0.5 * params['b_quad'] * (Y.mv(d_).view(-1,1)-Z).clamp(min=0)**2 +
                      params['h_lin'] * (Z-Y.mv(d_).view(-1,1)).clamp(min=0) +
                      0.5 * params['h_quad'] * (Z-Y.mv(d_).view(-1,1)).clamp(min=0)**2) \
                    .mean()

    opt = optim.Adam(model.parameters(), lr=step_size)

    for i in range(1000):

        model.eval()
        test_cost = batch.get_cost(100, i, model, X_test_t, Y_test_t, cost)

        model.train()
        train_cost = batch_train(150, i, X_train_t, Y_train_t, model, opt,
                                 cost)

        print(train_cost.item(), test_cost.item())

    return test_cost.item()
def run_policy_net(X_train, Y_train, X_test, Y_test, params):

    # Set up non-linear network of 
    # Linear -> BatchNorm -> ReLU -> Dropout layers
    layer_sizes = [params['n'], 200, 200, 1]
    layers = reduce(operator.add, 
                    [[nn.Linear(a,b), nn.BatchNorm1d(b), nn.ReLU(),
                        nn.Dropout(p=0.2)]
                    for a,b in zip(layer_sizes[0:-2], layer_sizes[1:-1])])
    layers += [nn.Linear(layer_sizes[-2], layer_sizes[-1])]
    model = nn.Sequential(*layers).cuda()

    X_train_t = torch.Tensor(X_train).cuda()
    Y_train_t = torch.Tensor(Y_train).cuda()
    X_test_t  = torch.Tensor(X_test).cuda()
    Y_test_t  = torch.Tensor(Y_test).cuda()
    d_ = Variable(torch.Tensor(params['d'])).cuda()

    # Expected inventory cost
    cost = lambda Z, Y : (params['c_lin'] * Z + 0.5 * params['c_quad'] * (Z**2) +
                      params['b_lin'] * (Y.mv(d_)-Z).clamp(min=0) +
                      0.5 * params['b_quad'] * (Y.mv(d_)-Z).clamp(min=0)**2 +
                      params['h_lin'] * (Z-Y.mv(d_)).clamp(min=0) +
                      0.5 * params['h_quad'] * (Z-Y.mv(d_)).clamp(min=0)**2) \
                    .mean()

    opt = optim.Adam(model.parameters(), lr=1e-3)

    for i in range(1000):
        model.eval()
        test_cost = batch.get_cost(100, i, model, X_test_t, Y_test_t, cost)

        model.train()
        train_cost = batch_train(150, i, X_train_t, Y_train_t, model, opt, cost)

        print(train_cost.data[0], test_cost.data[0])

    return test_cost.data[0]
Exemple #3
0
def run_task_net(X, Y, X_test, Y_test, params, is_nonlinear=False):

    # Training/validation split
    th_frac = 0.8
    inds = np.random.permutation(X.shape[0])
    train_inds = inds[:int(X.shape[0]*th_frac)]
    hold_inds =  inds[int(X.shape[0]*th_frac):]
    X_train, X_hold = X[train_inds, :], X[hold_inds, :]
    Y_train, Y_hold = Y[train_inds, :], Y[hold_inds, :]

    X_train_t = torch.Tensor(X_train).cuda()
    Y_train_t = torch.Tensor(Y_train).cuda()
    X_hold_t = torch.Tensor(X_hold).cuda()
    Y_hold_t = torch.Tensor(Y_hold).cuda()
    X_test_t = torch.Tensor(X_test).cuda()
    Y_test_t = torch.Tensor(Y_test).cuda()

    Y_train_int_t = torch.LongTensor(
        np.where(Y_train_t.cpu().numpy())[1]).cuda()
    Y_hold_int_t = torch.LongTensor(
        np.where(Y_hold_t.cpu().numpy())[1]).cuda()
    Y_test_int_t = torch.LongTensor(
        np.where(Y_test_t.cpu().numpy())[1]).cuda()

    d_ = Variable(torch.Tensor(params['d'])).cuda()

    # Expected inventory cost and solver for newsvendor scheduling problem
    cost = lambda Z, Y : (params['c_lin'] * Z + 0.5 * params['c_quad'] * (Z**2) +
                          params['b_lin'] * (Y.mv(d_)-Z).clamp(min=0) +
                          0.5 * params['b_quad'] * (Y.mv(d_)-Z).clamp(min=0)**2 +
                          params['h_lin'] * (Z-Y.mv(d_)).clamp(min=0) +
                          0.5 * params['h_quad'] * (Z-Y.mv(d_)).clamp(min=0)**2) \
                        .mean()
    newsvendor_solve = SolveNewsvendor(params).cuda()
    cost_news_fn = lambda x, y: cost(newsvendor_solve(x), y)

    nll = nn.NLLLoss().cuda()
    lam = 10.0  # regularization

    if is_nonlinear:
        # Non-linear model, use ADAM step size 1e-3
        layer_sizes = [X_train.shape[1], 200, 200, Y_train.shape[1]]
        layers = reduce(operator.add, [[nn.Linear(a,b), nn.BatchNorm1d(b), 
                                        nn.ReLU(), nn.Dropout(p=0.5)]
                          for a,b in zip(layer_sizes[0:-2], layer_sizes[1:-1])])
        layers += [nn.Linear(layer_sizes[-2], layer_sizes[-1]), nn.Softmax()]
        model = nn.Sequential(*layers).cuda()
        step_size = 1e-3
    else:
        # Linear model, use ADAM step size 1e-2
        model = nn.Sequential(
            nn.Linear(X_train.shape[1], Y_train.shape[1]),
            nn.Softmax()
        ).cuda()
        step_size = 1e-2

    opt = optim.Adam(model.parameters(), lr=step_size)

    # For early stopping
    hold_costs, test_costs = [], []
    num_stop_rounds = 20

    for i in range(1000):
        model.eval()
        test_cost = batch.get_cost(
            100, i, model, X_test_t, Y_test_t, cost_news_fn)
        test_nll = batch.get_cost_nll(
            100, i, model, X_test_t, Y_test_int_t, nll)

        hold_cost = batch.get_cost(
            100, i, model, X_hold_t, Y_hold_t, cost_news_fn)
        hold_nll  = batch.get_cost_nll(
            100, i, model, X_hold_t, Y_hold_int_t, nll)

        model.train()
        train_cost, train_nll = batch_train(150, i, X_train_t, Y_train_t, 
            Y_train_int_t, model, cost_news_fn, nll, opt, lam)

        print(i, train_cost.data[0], train_nll.data[0], test_cost.data[0], 
              test_nll.data[0], hold_cost.data[0], hold_nll.data[0])

        # Early stopping
        test_costs.append(test_cost.data[0])
        hold_costs.append(hold_cost.data[0])
        if i > 0 and i % num_stop_rounds == 0:
            idx = hold_costs.index(min(hold_costs))
            # Stop if current cost is worst in num_stop_rounds rounds
            if max(hold_costs) == hold_cost.data[0]:
                print(test_costs[idx])
                return(test_costs[idx])
            else:
                # Keep only "best" round
                hold_costs = [hold_costs[idx]]
                test_costs = [test_costs[idx]]

    # In case of no early stopping, return best run so far
    idx = hold_costs.index(min(hold_costs))
    return test_costs[idx]
def run_mle_net(X, Y, X_test, Y_test, params, is_nonlinear=False):

    # Training/validation split
    th_frac = 0.8
    inds = np.random.permutation(X.shape[0])
    train_inds = inds[:int(X.shape[0] * th_frac)]
    hold_inds = inds[int(X.shape[0] * th_frac):]
    X_train, X_hold = X[train_inds, :], X[hold_inds, :]
    Y_train, Y_hold = Y[train_inds, :], Y[hold_inds, :]

    X_train_t = torch.Tensor(X_train).cuda()
    Y_train_t = torch.Tensor(Y_train).cuda()
    X_hold_t = torch.Tensor(X_hold).cuda()
    Y_hold_t = torch.Tensor(Y_hold).cuda()
    X_test_t = torch.Tensor(X_test).cuda()
    Y_test_t = torch.Tensor(Y_test).cuda()

    Y_train_int_t = torch.LongTensor(np.where(
        Y_train_t.cpu().numpy())[1]).cuda()
    Y_hold_int_t = torch.LongTensor(np.where(Y_hold_t.cpu().numpy())[1]).cuda()
    Y_test_int_t = torch.LongTensor(np.where(Y_test_t.cpu().numpy())[1]).cuda()

    d_ = Variable(torch.Tensor(params['d'])).cuda()

    # Expected inventory cost and solver for newsvendor scheduling problem
    cost = lambda Z, Y : (params['c_lin'] * Z + 0.5 * params['c_quad'] * (Z**2) +
                          params['b_lin'] * (Y.mv(d_).view(-1,1)-Z).clamp(min=0) +
                          0.5 * params['b_quad'] * (Y.mv(d_).view(-1,1)-Z).clamp(min=0)**2 +
                          params['h_lin'] * (Z-Y.mv(d_).view(-1,1)).clamp(min=0) +
                          0.5 * params['h_quad'] * (Z-Y.mv(d_).view(-1,1)).clamp(min=0)**2) \
                        .mean()
    newsvendor_solve = SolveNewsvendor(params).cuda()
    cost_news_fn = lambda x, y: cost(newsvendor_solve(x), y)

    if is_nonlinear:
        # Non-linear model, use ADAM step size 1e-3
        layer_sizes = [X_train.shape[1], 200, 200, Y_train.shape[1]]
        layers = reduce(operator.add, [[
            nn.Linear(a, b),
            nn.BatchNorm1d(b),
            nn.ReLU(),
            nn.Dropout(p=0.5)
        ] for a, b in zip(layer_sizes[0:-2], layer_sizes[1:-1])])
        layers += [nn.Linear(layer_sizes[-2], layer_sizes[-1]), nn.Softmax()]
        model = nn.Sequential(*layers).cuda()
        step_size = 1e-3
    else:
        # Linear model, use ADAM step size 1e-2
        model = nn.Sequential(nn.Linear(X_train.shape[1], Y_train.shape[1]),
                              nn.Softmax()).cuda()
        step_size = 1e-2

    opt = optim.Adam(model.parameters(), lr=step_size)

    # For early stopping
    hold_costs, test_costs = [], []
    model_states = []
    num_stop_rounds = 20

    for i in range(1000):
        # model.eval()

        test_cost = batch.get_cost_nll(100, i, model, X_test_t, Y_test_int_t,
                                       nn.NLLLoss())

        hold_cost = batch.get_cost_nll(100, i, model, X_hold_t, Y_hold_int_t,
                                       nn.NLLLoss())

        model.train()
        train_cost = batch_train(150, i, X_train_t, Y_train_t, Y_train_int_t,
                                 model, nn.NLLLoss(), opt)

        print(i, train_cost.data[0], test_cost.data[0], hold_cost.data[0])

        # Early stopping
        # See https://github.com/locuslab/e2e-model-learning-staging/commit/d183c65d0cd53d611a77a4508da65c25cf88c93d
        test_costs.append(test_cost.data[0])
        hold_costs.append(hold_cost.data[0])
        model_states.append(model.state_dict().copy())
        if i > 0 and i % num_stop_rounds == 0:
            idx = hold_costs.index(min(hold_costs))
            # Stop if current cost is worst in num_stop_rounds rounds
            if max(hold_costs) == hold_cost.data[0]:
                model.eval()
                best_model = get_model(X_train, Y_train, X_test, Y_test,
                                       params, is_nonlinear)
                best_model.load_state_dict(model_states[idx])
                best_model.cuda()
                test_cost_news = batch.get_cost(100, i, best_model, X_test_t,
                                                Y_test_t, cost_news_fn)
                return test_cost_news.data[0]
            else:
                # Keep only "best" round
                hold_costs = [hold_costs[idx]]
                test_costs = [test_costs[idx]]
                model_states = [model_states[idx]]

    # # In case of no early stopping, return best run so far
    idx = hold_costs.index(min(hold_costs))
    best_model = get_model(X, Y, X_test, Y_test, params, is_nonlinear)
    best_model.load_state_dict(model_states[idx])
    best_model.cuda()
    test_cost_news = batch.get_cost(100, i, best_model, X_test_t, Y_test_t,
                                    cost_news_fn)
    return test_cost_news.data[0]