if __name__ == '__main__': args = set_args() device = torch.device('cuda: 0' if torch.cuda.is_available() else 'cpu') # 加载数据 train_data = CriteoDataset('data', train=True) train_iter = DataLoader(train_data, batch_size=32, shuffle=True) eval_data = CriteoDataset('data', train=True) eval_iter = DataLoader(eval_data, batch_size=32, shuffle=True) feature_sizes = np.loadtxt('./data/feature_size.txt', delimiter=',') feature_sizes = [int(x) for x in feature_sizes] # print(feature_sizes) model = DeepFM(feature_sizes) optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=0.0) model.to(device) # criterion = F.binary_cross_entropy_with_logits for epoch in range(args.epochs): for step, (xi, xv, y) in enumerate(train_iter): xi, xv, y = xi.to(device, dtype=torch.long), xv.to(device), y.to(device) output = model(xi, xv) # print(total.size()) # torch.Size([32]) loss = F.binary_cross_entropy(output, y.float()) print('epoch:{}, step: {}, loss:{}'.format(epoch, step, loss)) loss.backward() optimizer.zero_grad() optimizer.step()
FLAGS.max_epoch, FLAGS.batch_size, FLAGS.thread_num, FLAGS.min_after_dequeue) # read validate data valid_label, valid_id, valid_value = read_batch("../data/valid.tfrecords", FLAGS.max_epoch, 100000, FLAGS.thread_num, FLAGS.min_after_dequeue) feature_size = 1000000 field_size = 39 factor_size = 10 label_num = 1 model = DeepFM(feature_size, factor_size, field_size) # define loss train_softmax = model.forward(train_id, train_value) train_label = tf.to_int64(train_label) loss = tf.losses.log_loss(train_label, train_softmax) _, train_auc = tf.contrib.metrics.streaming_auc(train_softmax, train_label) # define optimizer print("Optimization algorithm: {}".format(FLAGS.optimizer)) if FLAGS.optimizer == "sgd": optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate) elif FLAGS.optimizer == "adadelta": optimizer = tf.train.AdadeltaOptimizer(FLAGS.learning_rate) elif FLAGS.optimizer == "adagrad": optimizer = tf.train.AdagradOptimizer(FLAGS.learning_rate)
n_class = 1 threshold = 0.6 step_size = 1 sparse = False pos_weight = 9 train_dataset = custom_dataset(train_cat, train_dense, train_y, if_y= True) train_loader = DataLoader(train_dataset, batch_size= batch_size, shuffle= True, num_workers=2) test_dataset = custom_dataset(test_cat, test_dense, if_y= False) test_loader = DataLoader(test_dataset, batch_size= 512, shuffle= False, num_workers=2) model= DeepFM( cat_fields= cat_fields, num_contns= num_contns, k= k, hidden_dims= hidden_dims, dropout= p, n_class= n_class, sparse= sparse).to(device) optimizer = torch.optim.Adam(model.parameters(), lr= lr) criterion= nn.BCEWithLogitsLoss(pos_weight=torch.tensor(pos_weight, device= device)) print('model created.') # lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma= 0.3, verbose= True) '''training phase''' for epoch in range(n_epoch): model.train() train_loss= 0 train_score= 0