예제 #1
0
def main():
    args = parse_arguments()
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    worker_init = WorkerInitObj(args.seed)
    device, args = setup_training(args)
    test_data = prepare_test_data(args)
    model, optimizer, criterion = prepare_model_and_optimizer(args, device)

    pool = ProcessPoolExecutor(1)
    train_iter = ml_1mTrainDataLoader(path=args.train_path,
                                      num_negs=args.num_negs,
                                      batch_size=args.train_batch_size,
                                      seed=args.seed,
                                      worker_init=worker_init)

    print('-' * 50 + 'args' + '-' * 50)
    for k in list(vars(args).keys()):
        print('{0}: {1}'.format(k, vars(args)[k]))
    print('-' * 30)
    print(model)
    print('-' * 50 + 'args' + '-' * 50)

    global_step = 0
    global_HR = 0.0
    global_NDCG = 0.0

    s_time_train = time.time()
    for epoch in range(args.epoch):

        dataset_future = pool.submit(ml_1mTrainDataLoader, args.train_path,
                                     args.num_negs, args.train_batch_size,
                                     args.seed, worker_init)

        for step, batch in enumerate(train_iter):

            model.train()
            batch = [t.to(device) for t in batch]
            users, items, labels = batch

            logits = model(users, items)
            loss = criterion(logits, labels.float())

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            #evaluate
            if global_step != 0 and global_step % args.eval_freq == 0:
                s_time_eval = time.time()
                model.eval()
                hits, ndcgs = evaluate(model, test_data, device, args.topk)
                e_time_eval = time.time()
                print('-' * 68)
                print('Epoch:[{0}] Step:[{1}] HR:[{2}] NDCG:[{3}] time:[{4}s]'.
                      format(epoch, global_step, format(hits, '.4f'),
                             format(ndcgs, '.4f'),
                             format(e_time_eval - s_time_eval, '.4f')))

                if hits > global_HR and ndcgs > global_NDCG:
                    model_to_save = model.module if hasattr(
                        model, 'module') else model
                    output_save_file = os.path.join(
                        args.output_dir,
                        "{}_hr_{}_ndcg_{}_step_{}_ckpt.pt".format(
                            args.model_name, format(hits, '.4f'),
                            format(ndcgs, '.4f'), global_step))

                    if os.path.exists(output_save_file):
                        os.system('rm -rf {}'.format(output_save_file))
                    torch.save(
                        {
                            'model': model_to_save.state_dict(),
                            'name': args.model_name
                        }, output_save_file)
                    print('Epoch:[{0}] Step:[{1}] SavePath:[{2}]'.format(
                        epoch, global_step, output_save_file))
                    global_HR = hits
                    global_NDCG = ndcgs
                print('-' * 68)

            #log
            if global_step != 0 and global_step % args.log_freq == 0:
                e_time_train = time.time()
                print('Epoch:[{0}] Step:[{1}] Loss:[{2}] Lr:[{3}] time:[{4}s]'.
                      format(epoch, global_step, format(loss.item(), '.4f'),
                             format(optimizer.param_groups[0]['lr'], '.6'),
                             format(e_time_train - s_time_train, '.4f')))
                s_time_train = time.time()

            global_step += 1

        del train_iter
        train_iter = dataset_future.result(timeout=None)
예제 #2
0
파일: train.py 프로젝트: HELL-TO-HEAVEN/CTR
def main():
    args = parse_arguments()
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    os.environ['PYTHONHASHSEED'] = str(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    worker_init = WorkerInitObj(args.seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.enabled = True
    device, args = setup_training(args)
    model, optimizer, criterion = prepare_model_and_optimizer(args, device)
    pool = ProcessPoolExecutor(1)
    train_iter = subsetDataloader(path=args.train_path,
                                  batch_size=args.batch_size,
                                  worker_init=worker_init)
    test_iter = subsetDataloader(path=args.val_path,
                                 batch_size=args.batch_size,
                                 worker_init=worker_init)

    print('-' * 50 + 'args' + '-' * 50)
    for k in list(vars(args).keys()):
        print('{0}: {1}'.format(k, vars(args)[k]))
    print('-' * 30)
    print(model)
    print('-' * 50 + 'args' + '-' * 50)

    global_step = 0
    global_auc = 0

    s_time_train = time.time()
    for epoch in range(args.epoch):

        dataset_future = pool.submit(subsetDataloader, args.train_path,
                                     args.batch_size, worker_init)

        for step, batch in enumerate(train_iter):

            model.train()
            labels = batch['label'].to(device).float()
            batch = {
                t: {k: v.to(device)
                    for k, v in d.items()}
                for t, d in batch.items() if isinstance(d, dict)
            }

            optimizer.zero_grad()
            logits = model(batch)
            # print('logits', logits)
            # print('label', labels)
            loss = criterion(logits, labels)

            loss.backward()
            optimizer.step()

            # evaluate
            if global_step != 0 and global_step % args.eval_freq == 0:
                s_time_eval = time.time()
                model.eval()
                auc = evaluate(model, test_iter, device)
                e_time_eval = time.time()
                print('-' * 68)
                print('Epoch:[{0}] Step:[{1}] AUC:[{2}] time:[{3}s]'.format(
                    epoch, global_step, format(auc, '.4f'),
                    format(e_time_eval - s_time_eval, '.4f')))

                if auc > global_auc:
                    model_to_save = model.module if hasattr(
                        model, 'module') else model
                    output_save_file = os.path.join(
                        args.output_dir, "{}_auc_{}_step_{}_ckpt.pt".format(
                            args.model_name, format(auc, '.4f'), global_step))

                    if os.path.exists(output_save_file):
                        os.system('rm -rf {}'.format(output_save_file))
                    torch.save(
                        {
                            'model': model_to_save.state_dict(),
                            'name': args.model_name
                        }, output_save_file)
                    print('Epoch:[{0}] Step:[{1}] SavePath:[{2}]'.format(
                        epoch, global_step, output_save_file))
                    global_auc = auc
                print('-' * 68)

            # log
            if global_step != 0 and global_step % args.log_freq == 0:
                e_time_train = time.time()
                print('Epoch:[{0}] Step:[{1}] Loss:[{2}] Lr:[{3}] time:[{4}s]'.
                      format(epoch, global_step, format(loss.item(), '.4f'),
                             format(optimizer.param_groups[0]['lr'], '.6'),
                             format(e_time_train - s_time_train, '.4f')))
                s_time_train = time.time()

            global_step += 1

        del train_iter
        train_iter = dataset_future.result(timeout=None)