def prepare_model(opt): if opt.dataset == 'mnist': feat_layer = ndf.MNISTFeatureLayer(opt.feat_dropout) elif opt.dataset == 'adult': feat_layer = ndf.UCIAdultFeatureLayer(opt.feat_dropout) elif opt.dataset == 'letter': feat_layer = ndf.UCILetterFeatureLayer(opt.feat_dropout) elif opt.dataset == 'yeast': feat_layer = ndf.UCIYeastFeatureLayer(opt.feat_dropout) else: raise NotImplementedError forest = ndf.Forest(n_tree=opt.n_tree, tree_depth=opt.tree_depth, n_in_feature=feat_layer.get_out_feature_size(), tree_feature_rate=opt.tree_feature_rate, n_class=opt.n_class, jointly_training=opt.jointly_training) model = ndf.NeuralDecisionForest(feat_layer, forest) if opt.cuda: model = model.cuda() else: model = model.cpu() return model
def prepare_model(opt): if opt.dataset == 'mnist': feat_layer = ndf.MNISTFeatureLayer(opt.feat_dropout) elif opt.dataset == 'adult': feat_layer = ndf.UCIAdultFeatureLayer(opt.feat_dropout) elif opt.dataset == 'letter': feat_layer = ndf.UCILetterFeatureLayer(opt.feat_dropout) elif opt.dataset == 'yeast': feat_layer = ndf.UCIYeastFeatureLayer(opt.feat_dropout) elif opt.dataset == 'gisette': feat_layer = ndf.UCIGisetteFeatureLayer(opt.feat_dropout) elif opt.dataset == 'arrhythmia': feat_layer = ndf.UCIArrhythmiaFeatureLayer(opt.feat_dropout) elif opt.dataset == 'cardiotocography': feat_layer = ndf.UCICardiotocographyFeatureLayer(opt.feat_dropout) elif opt.dataset == 'breastcancer': feat_layer = ndf.UCIBreastcancerFeatureLayer(opt.feat_dropout) elif opt.dataset == 'nomao': feat_layer = ndf.UCINomaoFeatureLayer(opt.feat_dropout) elif opt.dataset == 'mutiplefeatures': feat_layer = ndf.UCIMutiplefeaturesFeatureLayer(opt.feat_dropout) elif opt.dataset == 'madelon': feat_layer = ndf.UCIMadelonFeatureLayer(opt.feat_dropout) elif opt.dataset == 'secom': feat_layer = ndf.UCISecomFeatureLayer(opt.feat_dropout) elif opt.dataset == 'isolet5': feat_layer = ndf.UCIIsolet5FeatureLayer(opt.feat_dropout) else: raise NotImplementedError forest = ndf.Forest(n_tree=opt.n_tree, tree_depth=opt.tree_depth, n_in_feature=feat_layer.get_out_feature_size(), tree_feature_rate=opt.tree_feature_rate, n_class=opt.n_class, jointly_training=opt.jointly_training) model = ndf.NeuralDecisionForest(feat_layer, forest) if opt.cuda: model = model.cuda() else: model = model.cpu() return model
def main(): # Training settings parser = argparse.ArgumentParser(description='GraphRfi model') parser.add_argument('--batch_size', type=int, default=128, metavar='N', help='input batch size for training') parser.add_argument('--embed_dim', type=int, default=100, metavar='N', help='embedding size') parser.add_argument('--lr', type=float, default=0.001, metavar='LR', help='learning rate') parser.add_argument('--test_batch_size', type=int, default=1000, metavar='N', help='input batch size for testing') parser.add_argument('--epochs', type=int, default=100, metavar='N', help='number of epochs to train') parser.add_argument('-n_tree', type=int, default=80) parser.add_argument('-tree_depth', type=int, default=10) parser.add_argument('-n_class', type=int, default=2) parser.add_argument('-tree_feature_rate', type=float, default=0.5) parser.add_argument('-jointly_training', action='store_true', default=True) parser.add_argument('-feat_dropout', type=float, default=0.3) args = parser.parse_args() args.cuda = True os.environ['CUDA_VISIBLE_DEVICES'] = '0' use_cuda = False if torch.cuda.is_available(): use_cuda = True device = torch.device("cuda" if use_cuda else "cpu") embed_dim = args.embed_dim dir_data = './data/yelp_final.txt' history_u_lists, history_ur_lists, history_v_lists, history_vr_lists, train_u, train_v, train_r, test_u, test_v\ , test_r, ratings_list, feature_new, features_num, label_new, adj_matrix = read_file(dir_data) label_tensor = Variable(torch.Tensor(label_new)) adj_matrix = Variable(torch.Tensor(adj_matrix)) trainset = torch.utils.data.TensorDataset(torch.LongTensor(train_u), torch.LongTensor(train_v), torch.FloatTensor(train_r)) testset = torch.utils.data.TensorDataset(torch.LongTensor(test_u), torch.LongTensor(test_v), torch.FloatTensor(test_r)) train_loader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader(testset, batch_size=args.test_batch_size, shuffle=True) num_users = history_u_lists.__len__() num_items = history_v_lists.__len__() num_ratings = ratings_list.__len__() u2e = nn.Embedding(num_users, features_num).to(device) u2e.weight = nn.Parameter(torch.FloatTensor(feature_new), requires_grad=False) u2e.to(device) v2e = nn.Embedding(num_items, features_num).to(device) r2e = nn.Embedding(num_ratings, embed_dim).to(device) # user feature # features: item * rating agg_u_history = UV_Aggregator(v2e, r2e, u2e, embed_dim, features_num, cuda=device, uv=True) enc_u_history = UV_Encoder(u2e, embed_dim, features_num, history_u_lists, history_ur_lists, agg_u_history, cuda=device, uv=True) # item feature: user * rating agg_v_history = UV_Aggregator(v2e, r2e, u2e, embed_dim, features_num, cuda=device, uv=False) enc_v_history = UV_Encoder(v2e, embed_dim, features_num, history_v_lists, history_vr_lists, agg_v_history, cuda=device, uv=False) graphrfi = GraphRfi(enc_u_history, enc_v_history, r2e, features_num).to(device) feat_layer = ndf.UCIAdultFeatureLayer(args.feat_dropout) forest = ndf.Forest(n_tree=args.n_tree, tree_depth=args.tree_depth, n_in_feature=feat_layer.get_out_feature_size(), tree_feature_rate=args.tree_feature_rate, n_class=args.n_class, jointly_training=args.jointly_training) neuralforest = ndf.NeuralDecisionForest(feat_layer, forest).to(device) optimizer = torch.optim.Adam(list(graphrfi.parameters()) + list(neuralforest.parameters()), lr=args.lr) #optimizer = torch.optim.Adam(params, lr=args.lr, weight_decay=1e-5) #optimizer2 = torch.optim.RMSprop(graphrfi.parameters(), lr=args.lr, alpha=0.9) best_rmse = 9999.0 best_mae = 9999.0 test_loss = 9999.0 correct = 9999.0 correctness = 9999.0 endure_count = 0 for epoch in range(1, args.epochs + 1): train(graphrfi, device, train_loader, optimizer, epoch, best_rmse, best_mae, test_loss, correct, correctness, label_tensor, neuralforest, args, history_u_lists, history_ur_lists) expected_rmse, mae, test_loss, correct, correctness = test( graphrfi, device, test_loader, label_tensor, neuralforest) # please add the validation set to tune the hyper-parameters based on your datasets. if best_rmse > expected_rmse: best_rmse = expected_rmse best_mae = mae endure_count = 0 else: endure_count += 1 print( "rmse: %.4f, mae:%.4f, loss:%.4f, correct:%.4f, correctness:%.4f " % (expected_rmse, mae, test_loss, correct, correctness)) if endure_count > 5: break