def run(name, height, r_dim, temperature, batch_size, all_train, r_sparse, dist_func_name, x_dim, prior_factor, n_test, min_n_train, max_n_train): problem_name = '{0}_{1}_{2}_{3}'.format(x_dim, prior_factor, min_n_train, max_n_train) problem_name = os.path.join('syn', problem_name) model_name = '{0}height({1})_rdim({2})_temp({3})_rsparse({4})_dist({5})_batch_size({6})'.format( name, height, r_dim, temperature, r_sparse, dist_func_name, batch_size) log_path = os.path.join('models', problem_name) os.makedirs(log_path, exist_ok=True) set_logging(log_path, model_name) set_seed(0) sampler = lambda: get_synthetic_varied(batch_size, min_n_train, max_n_train, n_test, x_dim, prior_factor, cuda=True, with_train=all_train) dist_func = get_dist_func(dist_func_name) if height == 0: model = get_classification_tree_dyn(x_dim, 2, r_dim, 5, dist_func, temperature).cuda() else: model = get_classification_tree(x_dim, 2, r_dim, height, dist_func, temperature).cuda() model = train_model(model, sampler, sparse_norm, model_name, log_path, r_sparse=r_sparse) logging.info('Evaluating on test') sampler = lambda: get_synthetic_varied(64, min_n_train, max_n_train, n_test, x_dim, prior_factor, cuda=True, with_train=False) model.set_dist_func(DIST_SOFT) accuracy = estimate_model_accuracy(model, sampler) logging.info('Accuracy {0} (soft, soft)'.format(accuracy)) model.set_dist_func(DIST_HARD) accuracy = estimate_model_accuracy(model, sampler) logging.info('Accuracy {0} (hard, soft)'.format(accuracy)) model.make_hard_routing() model.set_dist_func(DIST_SOFT) accuracy = estimate_model_accuracy(model, sampler) logging.info('Accuracy {0} (soft, hard)'.format(accuracy)) model.set_dist_func(DIST_HARD) accuracy = estimate_model_accuracy(model, sampler) logging.info('Accuracy {0} (hard, hard)'.format(accuracy))
def train_model(model, sampler, norm, name, path, r_sparse=0.1, iterations=50000, seed=0, patience=10): filename = os.path.join(path, name) set_seed(seed) y_dim = 2 lr = 3e-4 best_acc = 0 rounds = 0 optimizer = Adam(model.parameters(), lr) criterion = nn.CrossEntropyLoss() for i in range(iterations): x_train, y_train, x_test, y_test = sampler() y_pred = model(x_train, y_train, x_test) loss = criterion(torch.log(y_pred.reshape(-1, y_dim) + EPS), y_test.reshape(-1, y_dim).argmax(1)) reg = norm(model, x_train, y_train) loss += r_sparse * reg optimizer.zero_grad() loss.backward() optimizer.step() if i % 500 == 0: acc = estimate_model_accuracy(model, sampler) if acc > best_acc: best_acc = acc rounds = 0 torch.save(model.state_dict(), filename) else: rounds += 1 logging.info('iteration {0}: {1}, patience rounds {2}'.format( i, acc, rounds)) if rounds > patience: break model.load_state_dict(torch.load(filename)) acc = estimate_model_accuracy(model, sampler) logging.info('Final: {0}'.format(acc)) return model
def train_model(model, sampler, r_sparse, name, path, lr=3e-4, iterations=50000, seed=0, patience=5): pathname = os.path.join('models', path) filename = os.path.join(pathname, name) os.makedirs(pathname, exist_ok=True) set_seed(seed) best_rmse = np.inf rounds = 0 optimizer = Adam(model.parameters(), lr) criterion = nn.MSELoss() for i in range(1, iterations + 1): x_train, y_train, x_test, y_test = sampler() y_pred = model(x_train, y_train, x_test) loss = criterion(y_pred.reshape(-1), y_test.reshape(-1)) sparse = sparse_norm(model, x_train, y_train) loss += r_sparse * sparse optimizer.zero_grad() loss.backward() optimizer.step() if i % 500 == 0: rmse = estimate_model_rmse(model, sampler) if rmse < best_rmse: best_rmse = rmse rounds = 0 torch.save(model.state_dict(), filename) else: rounds += 1 logging.info('iteration {0}: {1}, patience rounds {2}'.format( i, rmse, rounds)) if rounds > patience: break model.load_state_dict(torch.load(filename)) rmse = estimate_model_rmse(model, sampler) logging.info('Final: {0}'.format(rmse)) return model
def run(name, batch_size, r_dim, height, all_train, n_test, data_name, dist_func_name, r_sparse, lr, temperature): problem_name = 'train({0})_test({1})'.format(all_train, n_test) problem_name = os.path.join(data_name, problem_name) model_name = '{0}height({1})_rdim({2})_temp({3})_rsparse({4})_dist({5})_batch_size({6})_lr({7})'.format( name, height, r_dim, temperature, r_sparse, dist_func_name, batch_size, lr) log_path = os.path.join('models', problem_name) set_logging(log_path, model_name) set_seed(0) logging.info('running {0} using {1}'.format(problem_name, model_name)) train, test = get_data(data_name) group_parameter = 'user_id' target = 'rating' x_train, y_train = to_x_y(train, group_parameter, target) preprocessor = StandardScaler() preprocessor.fit(x_train.astype('float')) x_dim = x_train.shape[1] sampler = Sampler(batch_size, train, preprocessor, n_test, group_parameter, target, cuda=True, with_train=all_train, regression=True) min_rating = y_train.min() max_rating = y_train.max() dist_func = get_dist_func(dist_func_name) if height == 0: model = get_regression_tree_dyn(x_dim, min_rating, max_rating, r_dim, 5, dist_func, temperature).cuda() else: model = get_regression_tree(x_dim, min_rating, max_rating, r_dim, height, dist_func, temperature).cuda() model = train_model(model, sampler, r_sparse, model_name, log_path, lr) logging.info('Evaluating on test') model.set_dist_func(DIST_SOFT) rmse, mae = get_regression_performance(train, test, model, preprocessor, group_parameter, target) logging.info('RMSE {0}, MAE {1} (soft, soft)'.format(rmse, mae)) model.set_dist_func(DIST_HARD) rmse, mae = get_regression_performance(train, test, model, preprocessor, group_parameter, target) logging.info('RMSE {0}, MAE {1} (hard, soft)'.format(rmse, mae)) model.make_hard_routing() model.set_dist_func(DIST_SOFT) rmse, mae = get_regression_performance(train, test, model, preprocessor, group_parameter, target) logging.info('RMSE {0}, MAE {1} (soft, hard)'.format(rmse, mae)) model.set_dist_func(DIST_HARD) rmse, mae = get_regression_performance(train, test, model, preprocessor, group_parameter, target) logging.info('RMSE {0}, MAE {1} (hard, hard)'.format(rmse, mae))
def run(data_name, n_trees, max_depth, k, a, batch_size, lr, epochs, patience): set_seed(0) log_path = 'tem_logs' filename = 'data={0}_ntrees={1}_maxdepth={2}_k={3}_a={4}_lr={5}'.format( data_name, n_trees, max_depth, k, a, lr) set_logging(log_path, filename) logging.info('Loading data {0}'.format(data_name)) train, test = get_data(data_name, False) items_train, users_train, x_train, y_train = prepare_data(train) items_test, users_test, x_test, y_test = prepare_data(test) logging.info('Normalizing data') scaler = StandardScaler() x_train = scaler.fit_transform(x_train) x_test = scaler.transform(x_test) logging.info('Extracting features using GBDT') gbdt = GradientBoostingRegressor(n_estimators=n_trees, max_depth=max_depth) gbdt = gbdt.fit(x_train, y_train) x_dim = x_train.shape[1] n_users = int(users_train.max() + 1) n_items = int(items_train.max() + 1) train_set = get_dataset(items_train, users_train, x_train, y_train) test_set = get_dataset(items_test, users_test, x_test, y_test) train_loader = data.DataLoader(train_set, batch_size=batch_size) # using test as validation for now val_loader = data.DataLoader(test_set, batch_size=batch_size) model = TEM(a, k, x_dim, n_users, n_items, gbdt).cuda() logging.info('Training TEM') train_model(model, lr, epochs, train_loader, val_loader, patience) logging.info('Done')