Exemple #1
0
def prepare_model(opt):
    if opt.dataset == 'mnist':
        feat_layer = ndf.MNISTFeatureLayer(opt.feat_dropout)
    elif opt.dataset == 'adult':
        feat_layer = ndf.UCIAdultFeatureLayer(opt.feat_dropout)
    elif opt.dataset == 'letter':
        feat_layer = ndf.UCILetterFeatureLayer(opt.feat_dropout)
    elif opt.dataset == 'yeast':
        feat_layer = ndf.UCIYeastFeatureLayer(opt.feat_dropout)
    else:
        raise NotImplementedError

    forest = ndf.Forest(n_tree=opt.n_tree,
                        tree_depth=opt.tree_depth,
                        n_in_feature=feat_layer.get_out_feature_size(),
                        tree_feature_rate=opt.tree_feature_rate,
                        n_class=opt.n_class,
                        jointly_training=opt.jointly_training)
    model = ndf.NeuralDecisionForest(feat_layer, forest)

    if opt.cuda:
        model = model.cuda()
    else:
        model = model.cpu()

    return model
Exemple #2
0
def prepare_model(opt):
    if opt.dataset == 'mnist':
        feat_layer = ndf.MNISTFeatureLayer(opt.feat_dropout)
    elif opt.dataset == 'adult':
        feat_layer = ndf.UCIAdultFeatureLayer(opt.feat_dropout)
    elif opt.dataset == 'letter':
        feat_layer = ndf.UCILetterFeatureLayer(opt.feat_dropout)
    elif opt.dataset == 'yeast':
        feat_layer = ndf.UCIYeastFeatureLayer(opt.feat_dropout)
    elif opt.dataset == 'gisette':
        feat_layer = ndf.UCIGisetteFeatureLayer(opt.feat_dropout)
    elif opt.dataset == 'arrhythmia':
        feat_layer = ndf.UCIArrhythmiaFeatureLayer(opt.feat_dropout)
    elif opt.dataset == 'cardiotocography':
        feat_layer = ndf.UCICardiotocographyFeatureLayer(opt.feat_dropout)
    elif opt.dataset == 'breastcancer':
        feat_layer = ndf.UCIBreastcancerFeatureLayer(opt.feat_dropout)
    elif opt.dataset == 'nomao':
        feat_layer = ndf.UCINomaoFeatureLayer(opt.feat_dropout)
    elif opt.dataset == 'mutiplefeatures':
        feat_layer = ndf.UCIMutiplefeaturesFeatureLayer(opt.feat_dropout)
    elif opt.dataset == 'madelon':
        feat_layer = ndf.UCIMadelonFeatureLayer(opt.feat_dropout)
    elif opt.dataset == 'secom':
        feat_layer = ndf.UCISecomFeatureLayer(opt.feat_dropout)
    elif opt.dataset == 'isolet5':
        feat_layer = ndf.UCIIsolet5FeatureLayer(opt.feat_dropout)
    
    else:
        raise NotImplementedError

    forest = ndf.Forest(n_tree=opt.n_tree, tree_depth=opt.tree_depth, n_in_feature=feat_layer.get_out_feature_size(),
                        tree_feature_rate=opt.tree_feature_rate, n_class=opt.n_class,
                        jointly_training=opt.jointly_training)
    model = ndf.NeuralDecisionForest(feat_layer, forest)

    if opt.cuda:
        model = model.cuda()
    else:
        model = model.cpu()

    return model
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='GraphRfi model')
    parser.add_argument('--batch_size',
                        type=int,
                        default=128,
                        metavar='N',
                        help='input batch size for training')
    parser.add_argument('--embed_dim',
                        type=int,
                        default=100,
                        metavar='N',
                        help='embedding size')
    parser.add_argument('--lr',
                        type=float,
                        default=0.001,
                        metavar='LR',
                        help='learning rate')
    parser.add_argument('--test_batch_size',
                        type=int,
                        default=1000,
                        metavar='N',
                        help='input batch size for testing')
    parser.add_argument('--epochs',
                        type=int,
                        default=100,
                        metavar='N',
                        help='number of epochs to train')
    parser.add_argument('-n_tree', type=int, default=80)
    parser.add_argument('-tree_depth', type=int, default=10)
    parser.add_argument('-n_class', type=int, default=2)
    parser.add_argument('-tree_feature_rate', type=float, default=0.5)
    parser.add_argument('-jointly_training', action='store_true', default=True)
    parser.add_argument('-feat_dropout', type=float, default=0.3)
    args = parser.parse_args()
    args.cuda = True
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'

    use_cuda = False
    if torch.cuda.is_available():
        use_cuda = True
    device = torch.device("cuda" if use_cuda else "cpu")

    embed_dim = args.embed_dim
    dir_data = './data/yelp_final.txt'
    history_u_lists, history_ur_lists, history_v_lists, history_vr_lists, train_u, train_v, train_r, test_u, test_v\
        , test_r, ratings_list, feature_new, features_num, label_new, adj_matrix = read_file(dir_data)
    label_tensor = Variable(torch.Tensor(label_new))
    adj_matrix = Variable(torch.Tensor(adj_matrix))
    trainset = torch.utils.data.TensorDataset(torch.LongTensor(train_u),
                                              torch.LongTensor(train_v),
                                              torch.FloatTensor(train_r))
    testset = torch.utils.data.TensorDataset(torch.LongTensor(test_u),
                                             torch.LongTensor(test_v),
                                             torch.FloatTensor(test_r))
    train_loader = torch.utils.data.DataLoader(trainset,
                                               batch_size=args.batch_size,
                                               shuffle=True)
    test_loader = torch.utils.data.DataLoader(testset,
                                              batch_size=args.test_batch_size,
                                              shuffle=True)
    num_users = history_u_lists.__len__()
    num_items = history_v_lists.__len__()
    num_ratings = ratings_list.__len__()
    u2e = nn.Embedding(num_users, features_num).to(device)
    u2e.weight = nn.Parameter(torch.FloatTensor(feature_new),
                              requires_grad=False)
    u2e.to(device)
    v2e = nn.Embedding(num_items, features_num).to(device)
    r2e = nn.Embedding(num_ratings, embed_dim).to(device)
    # user feature
    # features: item * rating
    agg_u_history = UV_Aggregator(v2e,
                                  r2e,
                                  u2e,
                                  embed_dim,
                                  features_num,
                                  cuda=device,
                                  uv=True)
    enc_u_history = UV_Encoder(u2e,
                               embed_dim,
                               features_num,
                               history_u_lists,
                               history_ur_lists,
                               agg_u_history,
                               cuda=device,
                               uv=True)
    # item feature: user * rating
    agg_v_history = UV_Aggregator(v2e,
                                  r2e,
                                  u2e,
                                  embed_dim,
                                  features_num,
                                  cuda=device,
                                  uv=False)
    enc_v_history = UV_Encoder(v2e,
                               embed_dim,
                               features_num,
                               history_v_lists,
                               history_vr_lists,
                               agg_v_history,
                               cuda=device,
                               uv=False)
    graphrfi = GraphRfi(enc_u_history, enc_v_history, r2e,
                        features_num).to(device)

    feat_layer = ndf.UCIAdultFeatureLayer(args.feat_dropout)
    forest = ndf.Forest(n_tree=args.n_tree,
                        tree_depth=args.tree_depth,
                        n_in_feature=feat_layer.get_out_feature_size(),
                        tree_feature_rate=args.tree_feature_rate,
                        n_class=args.n_class,
                        jointly_training=args.jointly_training)
    neuralforest = ndf.NeuralDecisionForest(feat_layer, forest).to(device)
    optimizer = torch.optim.Adam(list(graphrfi.parameters()) +
                                 list(neuralforest.parameters()),
                                 lr=args.lr)
    #optimizer = torch.optim.Adam(params, lr=args.lr, weight_decay=1e-5)
    #optimizer2 = torch.optim.RMSprop(graphrfi.parameters(), lr=args.lr, alpha=0.9)
    best_rmse = 9999.0
    best_mae = 9999.0
    test_loss = 9999.0
    correct = 9999.0
    correctness = 9999.0
    endure_count = 0

    for epoch in range(1, args.epochs + 1):
        train(graphrfi, device, train_loader, optimizer, epoch, best_rmse,
              best_mae, test_loss, correct, correctness, label_tensor,
              neuralforest, args, history_u_lists, history_ur_lists)
        expected_rmse, mae, test_loss, correct, correctness = test(
            graphrfi, device, test_loader, label_tensor, neuralforest)
        # please add the validation set to tune the hyper-parameters based on your datasets.
        if best_rmse > expected_rmse:
            best_rmse = expected_rmse
            best_mae = mae
            endure_count = 0
        else:
            endure_count += 1
        print(
            "rmse: %.4f, mae:%.4f, loss:%.4f, correct:%.4f, correctness:%.4f "
            % (expected_rmse, mae, test_loss, correct, correctness))

        if endure_count > 5:
            break