Esempio n. 1
0
def main(args):
    G = nx.read_edgelist(args.classifydir + '_edgelist.txt', nodetype=int)
    model = Model(nx.number_of_nodes(G), args.num_parts)
    adj = Variable(torch.FloatTensor(nx.adjacency_matrix(G).toarray()),
                   requires_grad=False)

    if torch.cuda.is_available():
        model = model.cuda()
        adj = adj.cuda()

    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                        model.parameters()),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    for epoch in tqdm(range(args.num_epochs)):
        model.zero_grad()

        super_adj = model(adj, temp=args.temp, hard=args.hard, beta=args.beta)
        loss = model.loss(super_adj,
                          balance_node=args.balance_node,
                          lam=args.lam)

        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), args.clip)
        optimizer.step()

        if epoch % 50 == 0:
            print("loss:", loss.item())

            vectors = embed_arr_2_dict(model.params.cpu().detach().numpy(), G)
            accs = classify(vectors, args)
            print("micro:", accs['micro'], "macro:", accs['macro'])
Esempio n. 2
0
def main():
    global best_acc
    start_epoch = args.start_epoch  # start from epoch 0 or last checkpoint epoch

    if not os.path.isdir(args.checkpoint):
        mkdir_p(args.checkpoint)

    # load data
    print('==> Preparing dataset %s' % args.dataset)
    features, landmarks, labels = pickle_2_img_and_landmark(args.dataset_path)
    num_classes = 6

    # Model
    print("==> creating model '{}'".format(args.arch))
    # model = ResNetAndGCN(20, num_classes=num_classes)
    model = Model(36, 6, {}, False, dropout=0.3)
    # model = torch.nn.DataParallel(model).cuda()
    model = model.cuda()
    cudnn.benchmark = True
    print('    Total params: %.2fM' %
          (sum(p.numel() for p in model.parameters()) / 1000000.0))
    # print('    resnet params: %.2fM' % (sum(p.numel() for p in model.resnet.parameters())/1000000.0))
    # print('    stgcn params: %.2fM' % (sum(p.numel() for p in model.st_gcn.parameters())/1000000.0))
    criterion = nn.CrossEntropyLoss()

    # 分层优化
    # resnet_para = [model.conv1.parameters(), model.layer1.parameters(), model.layer2.parameters(), model.layer3.parameters(), model.layer4.parameters()]
    # optimizer = optim.SGD([
    #     {'params': model.gcn11.parameters()},
    #     {'params': model.gcn12.parameters()},
    #     {'params': model.gcn21.parameters()},
    #     {'params': model.gcn22.parameters()},
    #     {'params': model.gcn31.parameters()},
    #     {'params': model.gcn32.parameters()},
    #     {'params': model.fc.parameters()},
    #     {'params': model.conv1.parameters(), 'lr': 0.005, 'weight_decay': 5e-3},
    #     {'params': model.bn1.parameters(), 'lr': 0.005, 'weight_decay': 5e-3},
    #     {'params': model.layer1.parameters(), 'lr': 0.005, 'weight_decay': 5e-3},
    #     {'params': model.layer2.parameters(), 'lr': 0.005, 'weight_decay': 5e-3},
    #     {'params': model.layer3.parameters(), 'lr': 0.005, 'weight_decay': 5e-3},
    #     {'params': model.layer4.parameters(), 'lr': 0.005, 'weight_decay': 5e-3},
    #     ], lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    # optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    # Resume
    title = 'ckp-' + args.arch
    if args.resume:
        # Load checkpoint.
        print('==> Resuming from checkpoint..')
        assert os.path.isfile(
            args.resume), 'Error: no checkpoint directory found!'
        args.checkpoint = os.path.dirname(args.resume)
        checkpoint = torch.load(args.resume)
        best_acc = checkpoint['best_acc']
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        logger = Logger(os.path.join(args.checkpoint, 'log_stat.log'),
                        title=title,
                        resume=True)
    else:
        logger = Logger(os.path.join(args.checkpoint, 'log_stat.log'),
                        title=title)
        logger.set_names([
            'fold_num', 'Learning Rate', 'Train Loss', 'Valid Loss',
            'Train Acc.', 'Valid Acc.'
        ])

    # logging
    logging.basicConfig(level=logging.DEBUG,
                        filename=os.path.join(args.checkpoint, 'log_info.log'),
                        filemode='a+',
                        format="%(asctime)-15s %(levelname)-8s  %(message)s")
    # log configuration
    logging.info('-' * 10 + 'configuration' + '*' * 10)
    for arg in vars(args):
        logging.info((arg, str(getattr(args, arg))))

    acc_fold = []
    reset_lr = state['lr']
    for f_num in range(args.folds):
        state['lr'] = reset_lr
        model.reset_all_weights()
        # optimizer = optim.SGD([
        # {'params': model.gcn11.parameters()},
        # {'params': model.gcn12.parameters()},
        # {'params': model.gcn21.parameters()},
        # {'params': model.gcn22.parameters()},
        # {'params': model.gcn31.parameters()},
        # {'params': model.gcn32.parameters()},
        # {'params': model.fc.parameters()},
        # {'params': model.conv1.parameters(), 'lr': 0.005, 'weight_decay': 5e-3},
        # {'params': model.bn1.parameters(), 'lr': 0.005, 'weight_decay': 5e-3},
        # {'params': model.layer1.parameters(), 'lr': 0.005, 'weight_decay': 5e-3},
        # {'params': model.layer2.parameters(), 'lr': 0.005, 'weight_decay': 5e-3},
        # {'params': model.layer3.parameters(), 'lr': 0.005, 'weight_decay': 5e-3},
        # {'params': model.layer4.parameters(), 'lr': 0.005, 'weight_decay': 5e-3},
        # ], lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
        optimizer = optim.SGD(model.parameters(),
                              lr=args.lr,
                              momentum=args.momentum,
                              weight_decay=args.weight_decay)
        # optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
        print(args.lr)
        # save each fold's acc and reset configuration
        average_acc = 0
        best_acc = 0

        # 10-fold cross validation
        train_x, train_lm, train_y = [], [], []
        test_x, test_lm, test_y = [], [], []
        for id_fold in range(args.folds):
            if id_fold == f_num:
                test_x = features[id_fold]
                test_lm = landmarks[id_fold]
                test_y = labels[id_fold]
            else:
                train_x = train_x + features[id_fold]
                train_lm = train_lm + landmarks[id_fold]
                train_y = train_y + labels[id_fold]
        # convert array to tensor
        train_x = torch.tensor(train_x,
                               dtype=torch.float) / 255.0  #(b_s, 128, 128)
        train_x = train_x.unsqueeze(1)  #(b_s, 1, 128, 128)

        train_lm = np.stack(train_lm)
        # 只要坐标信息, 不需要归一化
        # train_lm = (train_lm - np.mean(train_lm, axis=0)) / np.std(train_lm, axis=0)
        train_lm = torch.tensor(train_lm, dtype=torch.float)
        # train_lm = train_lm.unsqueeze(2)

        test_x = torch.tensor(test_x, dtype=torch.float) / 255.0
        test_x = test_x.unsqueeze(1)
        # 只要坐标信息, 不需要归一化
        # test_lm = (test_lm - np.mean(test_lm, axis=0)) / np.std(test_lm, axis=0)
        test_lm = torch.tensor(test_lm, dtype=torch.float)
        # test_lm = test_lm.unsqueeze(2)
        train_y, test_y = torch.tensor(train_y), torch.tensor(test_y)

        train_dataset = torch.utils.data.TensorDataset(train_x, train_lm,
                                                       train_y)
        train_iter = torch.utils.data.DataLoader(dataset=train_dataset,
                                                 batch_size=args.train_batch,
                                                 shuffle=True)

        test_dataset = torch.utils.data.TensorDataset(test_x, test_lm, test_y)
        test_iter = torch.utils.data.DataLoader(dataset=test_dataset,
                                                batch_size=args.test_batch,
                                                shuffle=False)

        # test for fold order
        print(len(test_dataset))

        if args.evaluate:
            print('\nEvaluation only')
            test_loss, test_acc = test(train_x + test_x, train_y + test_y,
                                       model, criterion, start_epoch, use_cuda)
            print(' Test Loss:  %.8f, Test Acc:  %.2f' % (test_loss, test_acc))
            continue

        # show plt
        # plt.show(block=False)

        # Train and val
        for epoch in range(start_epoch, args.epochs):

            # 在特定的epoch 调整学习率
            adjust_learning_rate(optimizer, epoch)
            # print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, state['lr']))
            print('\nEpoch: [%d | %d] LR: %f' %
                  (epoch + 1, args.epochs, optimizer.param_groups[0]['lr']))

            train_loss, train_acc = train(train_iter, model, criterion,
                                          optimizer, epoch, use_cuda)
            test_loss, test_acc = test(test_iter, model, criterion, epoch,
                                       use_cuda)

            # append logger file
            logger.append([
                f_num, state['lr'], train_loss, test_loss, train_acc, test_acc
            ])

            # save model
            is_best = test_acc > best_acc
            best_acc = max(test_acc, best_acc)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'acc': test_acc,
                    'best_acc': best_acc,
                    'optimizer': optimizer.state_dict(),
                },
                is_best,
                f_num,
                checkpoint=args.checkpoint)

        # compute average acc
        acc_fold.append(best_acc)
        average_acc = sum(acc_fold) / len(acc_fold)

        logging.info('fold: %d, best_acc: %.2f, average_acc: %.2f' %
                     (f_num, best_acc, average_acc))
    logger.close()
    # logger.plot()
    savefig(os.path.join(args.checkpoint, 'log.eps'))

    logging.info('acc_fold' + str(acc_fold))
    print('average acc:')
    print(average_acc)
Esempio n. 3
0
def train(**kwargs):

    if 'dataset' not in kwargs:
        opt = getattr(config, 'Office_Products_data_Config')()
    else:
        opt = getattr(config, kwargs['dataset'] + '_Config')()
    opt.parse(kwargs)

    random.seed(opt.seed)
    np.random.seed(opt.seed)
    torch.manual_seed(opt.seed)
    if opt.use_gpu:
        torch.cuda.manual_seed_all(opt.seed)

    if len(opt.gpu_ids) == 0 and opt.use_gpu:
        torch.cuda.set_device(opt.gpu_id)
    # 2 model
    model = Model(opt, getattr(methods, opt.model))
    if opt.use_gpu:
        model.cuda()
        if len(opt.gpu_ids) > 0:
            model = nn.DataParallel(model, device_ids=opt.gpu_ids)

    if opt.load_ckp:
        assert len(opt.ckp_path) > 0
        model.load(opt.ckp_path)

    # 3 data
    train_data = AmazonData(opt.data_root, train=True)
    train_data_loader = DataLoader(train_data, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, collate_fn=collate_fn)
    test_data = AmazonData(opt.data_root, train=False)
    test_data_loader = DataLoader(test_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers, collate_fn=collate_fn)
    print('{}: train data: {}; test data: {}'.format(now(), len(train_data), len(test_data)))

    # 4 optimiezer
    # optimizer = optim.Adadelta(model.parameters(), rho=0.95, eps=1e-6, weight_decay=opt.weight_decay)
    # optimizer = optim.SGD(model.parameters(), lr=opt.lr, momentum=0.9, weight_decay=opt.weight_decay)
    optimizer = optim.Adam(model.parameters(), lr=opt.lr, weight_decay=opt.weight_decay)
    # optimizer = optim.RMSprop(model.parameters(), lr=opt.lr, weight_decay=opt.weight_decay)

    # scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.8)
    # scheduler_warmup = GradualWarmupScheduler(optimizer, multiplier=8, total_epoch=3, after_scheduler=scheduler)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.8)
    # scheduler = optim.lr_scheduler.CyclicLR(optimizer, base_lr=opt.lr, max_lr=opt.lr * 10)

    # training
    print("start training....")
    min_loss = 1e+20
    best_res = 1e+10
    mse_func = nn.MSELoss()
    mae_func = nn.L1Loss()
    smooth_mae_func = nn.SmoothL1Loss()
    for epoch in range(opt.num_epochs):
        total_loss = 0.0
        total_maeloss = 0.0
        model.train()
        print("{} Epoch {}: start".format(now(), epoch))
        for idx, (train_datas, scores) in enumerate(train_data_loader):
            if opt.use_gpu:
                scores = torch.FloatTensor(scores).cuda()
            else:
                scores = torch.FloatTensor(scores)
            train_datas = unpack_input(opt, train_datas)
            optimizer.zero_grad()
            output = model(train_datas)
            mse_loss = mse_func(output, scores)
            total_loss += mse_loss.item() * len(scores)

            mae_loss = mae_func(output, scores)
            total_maeloss += mae_loss.item()

            smooth_mae_loss = smooth_mae_func(output, scores)

            if opt.update_method == 'mse':
                loss = mse_loss
            if opt.update_method == 'rmse':
                loss = torch.sqrt(mse_loss) / 2.0
            if opt.update_method == 'mae':
                loss = mae_loss
            if opt.update_method == 'smooth_mae':
                loss = smooth_mae_loss

            loss.backward()
            optimizer.step()

            if opt.fine_step:
                if idx % opt.print_step == 0 and idx > 0:
                    print("\t{}, {} step finised;".format(now(), idx))
                    predict_loss, test_mse = predict(model, test_data_loader, opt, use_gpu=opt.use_gpu)
                    if predict_loss < min_loss:
                        model.save(name=opt.dataset, opt=opt.print_opt)
                        min_loss = predict_loss
                        print("\tmodel save")
                    if predict_loss > min_loss:
                        best_res = min_loss

        scheduler.step(epoch)
        print("{};epoch:{};total_loss:{}".format(now(), epoch, total_loss))
        mse = total_loss * 1.0 / len(train_data)
        mae = total_maeloss * 1.0 / len(train_data)
        print("{};train reslut: mse: {}; rmse: {}; mae: {}".format(now(), mse, math.sqrt(mse), mae))
        predict_loss, test_mse = predict(model, test_data_loader, opt, use_gpu=opt.use_gpu)
        if predict_loss < min_loss:
            # model.save(name=opt.dataset, opt=opt.print_opt)
            min_loss = predict_loss
            print("model save")
        if test_mse < best_res:
            best_res = test_mse

    print("----"*20)
    print(f"{now()} {opt.dataset} {opt.print_opt} best_res:  {best_res}")
    print("----"*20)
Esempio n. 4
0
def train_model(args):
    model = Model(node_embeddings, args.node_out_dim)

    if CUDA:
        model.cuda()

    if args.is_test:
        model.load_state_dict(
            torch.load('./checkpoints/{0}/trained_{1}.pth'.format(
                args.data, args.test_check)))
        get_test_score(model)
        return

    # NN = getL()

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=1000,
                                                gamma=0.5,
                                                last_epoch=-1)

    # gat_loss_func = torch.nn.BCEWithLogitsLoss()
    gat_loss_func = torch.nn.MSELoss()

    epoch_losses = []  # losses of all epochs
    print("Number of epochs {}".format(args.epochs))

    model.train()

    for epoch in range(args.epochs + 1):
        # print("\nepoch-> ", epoch)
        # print("Training set shuffled, length is ", Corpus_.train_indices.shape)

        random.shuffle(Corpus_.train_edge_data)
        random.shuffle(Corpus_.train_neg_data)

        Corpus_.train_indices = np.array(list(Corpus_.train_edge_data)).astype(
            np.int32)
        Corpus_.train_neg_indices = np.array(list(
            Corpus_.train_neg_data)).astype(np.int32)

        start_time = time.time()
        epoch_loss = []

        if Corpus_.num_nodes % 500 == 0:
            num_iters_per_epoch = Corpus_.num_nodes // 500
        else:
            num_iters_per_epoch = (Corpus_.num_nodes // 500) + 1

        for iters in range(num_iters_per_epoch):
            start_time_iter = time.time()
            train_indices, train_indices_neg = Corpus_.get_iteration_batch(0)

            if CUDA:
                train_indices = Variable(
                    torch.LongTensor(train_indices)).cuda()
                train_indices_neg = Variable(
                    torch.LongTensor(train_indices_neg)).cuda()
            else:
                train_indices = Variable(torch.LongTensor(train_indices))

            optimizer.zero_grad()

            node_embeds = model()

            loss = batch_gat_loss(gat_loss_func, train_indices,
                                  train_indices_neg, node_embeds)

            if SP_LOSS == True:
                neighbor_spectrum_loss = get_neighbor_spectrum_loss(iters, Corpus_.neighbors, \
                       Corpus_.neighbors_count, node_embeds, num_iters_per_epoch)
                (loss +
                 float(args.regterm) * neighbor_spectrum_loss).backward()
            else:
                loss.backward()

            optimizer.step()

            epoch_loss.append(loss.data.item())

            end_time_iter = time.time()

            # print("Iteration-> {0}  , Iteration_time-> {1:.4f} , Iteration_loss {2:.4f}".format(
            # 	iters, end_time_iter - start_time_iter, loss.data.item()))

        scheduler.step()
        # if epoch % 100 == 0:
        print("Epoch {} , average loss {} , epoch_time {}\n".format(
            epoch,
            sum(epoch_loss) / len(epoch_loss),
            time.time() - start_time))
        epoch_losses.append(sum(epoch_loss) / len(epoch_loss))

        if epoch > 0 and epoch % 100 == 0:
            save_model(model, epoch, args.data)

    model.load_state_dict(
        torch.load('./checkpoints/{0}/trained_{1}.pth'.format(
            args.data, args.epochs)))
    get_test_score(model)
Esempio n. 5
0
def train_eval(args):
    logging_config(folder=args.save_dir,
                   name='log{:d}'.format(args.save_id),
                   no_console=False)
    logging.info(args)

    ### check context
    use_cuda = args.gpu >= 0 and th.cuda.is_available()
    if use_cuda:
        th.cuda.set_device(args.gpu)

    ### load data
    dataset = DataLoader(data_name=args.data_name, seed=args.seed)
    print(dataset)
    model = Model(use_KG=True,
                  input_node_dim=args.entity_embed_dim,
                  gnn_model=args.gnn_model,
                  num_gnn_layers=args.gnn_num_layer,
                  n_hidden=args.gnn_hidden_size,
                  dropout=args.dropout_rate,
                  n_entities=dataset.n_KG_entity,
                  n_relations=dataset.n_KG_relation,
                  relation_dim=args.relation_embed_dim,
                  reg_lambda_kg=args.regs,
                  reg_lambda_gnn=args.regs)
    if use_cuda:
        model.cuda()
    logging.info(model)
    ### optimizer
    optimizer = optim.Adam(model.parameters(), lr=args.lr)
    valid_metric_logger = MetricLogger(
        ['epoch', 'recall', 'ndcg', 'is_best'], ['%d', '%.5f', '%.5f', '%d'],
        os.path.join(args.save_dir, 'valid{:d}.csv'.format(args.save_id)))
    test_metric_logger = MetricLogger(
        ['epoch', 'recall', 'ndcg'], ['%d', '%.5f', '%.5f'],
        os.path.join(args.save_dir, 'test{:d}.csv'.format(args.save_id)))
    best_epoch = -1
    best_recall = 0.0

    train_g = dataset.train_g
    nid_th = th.LongTensor(train_g.ndata["id"])
    etype_th = th.LongTensor(train_g.edata["type"])
    if use_cuda:
        nid_th, etype_th = nid_th.cuda(), etype_th.cuda()
    train_g.ndata['id'] = nid_th
    train_g.edata['type'] = etype_th

    test_g = dataset.test_g
    nid_th = th.LongTensor(test_g.ndata["id"])
    etype_th = th.LongTensor(test_g.edata["type"])
    if use_cuda:
        nid_th, etype_th = nid_th.cuda(), etype_th.cuda()
    test_g.ndata['id'] = nid_th
    test_g.edata['type'] = etype_th

    item_id_range = th.LongTensor(dataset.item_id_range).cuda() if use_cuda \
        else th.LongTensor(dataset.item_id_range)

    for epoch in range(1, args.max_epoch + 1):
        ### train kg
        time1 = time()
        kg_sampler = dataset.KG_sampler(batch_size=args.batch_size_kg)
        iter = 0
        total_loss = 0.0
        for h, r, pos_t, neg_t, _ in kg_sampler:
            iter += 1
            model.train()
            h_th = th.LongTensor(h)
            r_th = th.LongTensor(r)
            pos_t_th = th.LongTensor(pos_t)
            neg_t_th = th.LongTensor(neg_t)
            if use_cuda:
                h_th, r_th, pos_t_th, neg_t_th = h_th.cuda(), r_th.cuda(
                ), pos_t_th.cuda(), neg_t_th.cuda()
            loss = model.transR(h_th, r_th, pos_t_th, neg_t_th)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            total_loss += loss.item()
            if (iter % args.print_every) == 0 or iter == 1:
                logging.info("Epoch {:04d} Iter {:04d} | Loss {:.4f} ".format(
                    epoch, iter, total_loss / iter))
        logging.info('Time for KGE: {:.1f}s, loss {:.4f}'.format(
            time() - time1, total_loss / iter))

        ### train GNN
        if args.use_attention:
            time1 = time()
            print("Compute attention weight in train ...")
            with th.no_grad():
                A_w = model.compute_attention(train_g)
            train_g.edata['w'] = A_w
            print("Time: {:.2f}s".format(time() - time1))
        time1 = time()
        cf_sampler = dataset.CF_pair_sampler(batch_size=args.batch_size)
        iter = 0
        total_loss = 0.0
        for user_ids, item_pos_ids, item_neg_ids, _ in cf_sampler:
            iter += 1
            model.train()
            user_ids_th = th.LongTensor(user_ids)
            item_pos_ids_th = th.LongTensor(item_pos_ids)
            item_neg_ids_th = th.LongTensor(item_neg_ids)
            if use_cuda:
                user_ids_th, item_pos_ids_th, item_neg_ids_th = \
                    user_ids_th.cuda(), item_pos_ids_th.cuda(), item_neg_ids_th.cuda()
            embedding = model.gnn(train_g, train_g.ndata['id'])
            loss = model.get_loss(embedding, user_ids_th, item_pos_ids_th,
                                  item_neg_ids_th)
            loss.backward()
            # th.nn.utils.clip_grad_norm_(model.parameters(), args.grad_norm)  # clip gradients
            optimizer.step()
            optimizer.zero_grad()
            total_loss += loss.item()
            if (iter % args.print_every) == 0 or iter == 1:
                logging.info("Epoch {:04d} Iter {:04d} | Loss {:.4f} ".format(
                    epoch, iter, total_loss / iter))
        logging.info('Time for GNN: {:.1f}s, loss {:.4f}'.format(
            time() - time1, total_loss / iter))

        if epoch % args.evaluate_every == 0:
            time1 = time()
            val_recall, val_ndcg = eval(model, train_g,
                                        dataset.train_user_dict,
                                        dataset.valid_user_dict, item_id_range,
                                        use_cuda, args.use_attention)

            info = "Epoch{}, [{:.1f}s] val recall:{:.5f}, val ndcg:{:.5f}".format(
                epoch,
                time() - time1, val_recall, val_ndcg)
            # save best model
            if val_recall > best_recall:
                valid_metric_logger.log(epoch=epoch,
                                        recall=val_recall,
                                        ndcg=val_ndcg,
                                        is_best=1)
                best_recall = val_recall
                #best_ndcg = val_ndcg
                best_epoch = epoch
                time1 = time()
                test_recall, test_ndcg = eval(model, test_g,
                                              dataset.train_valid_user_dict,
                                              dataset.test_user_dict,
                                              item_id_range, use_cuda,
                                              args.use_attention)
                test_metric_logger.log(epoch=epoch,
                                       recall=test_recall,
                                       ndcg=test_ndcg)

                info += "\t[{:.1f}s] test recall:{:.5f}, test ndcg:{:.5f}".format(
                    time() - time1, test_recall, test_ndcg)
                #th.save({'state_dict': model.state_dict(), 'epoch': epoch}, model_state_file)
            else:
                valid_metric_logger.log(epoch=epoch,
                                        recall=val_recall,
                                        ndcg=val_ndcg,
                                        is_best=0)
                recall, ndcg = eval(model, test_g,
                                    dataset.train_valid_user_dict,
                                    dataset.test_user_dict, item_id_range,
                                    use_cuda, args.use_attention)
                print("test recall:{}, test_ndcg: {}".format(recall, ndcg))
            logging.info(info)

    logging.info(
        "Final test recall:{:.5f}, test ndcg:{:.5f}, best epoch:{}".format(
            test_recall, test_ndcg, best_epoch))
Esempio n. 6
0
def main(args):

    # get dataframe
    df = get_df(args.groups)

    # get adaptive margin
    tmp = np.sqrt(
        1 / np.sqrt(df['label_group'].value_counts().sort_index().values))
    margins = (tmp - tmp.min()) / (tmp.max() - tmp.min()) * 0.45 + 0.05

    # get augmentations
    transforms_train, transforms_val = get_transforms(args.image_size,
                                                      args.stage)

    # get train and valid dataset
    df_train = df[df['fold'] != args.fold] if not args.full else df
    df_train['label_group'] = LabelEncoder().fit_transform(
        df_train.label_group)

    df_valid = df[df['fold'] == args.fold]

    out_dim = df_train.label_group.nunique()
    print(f"out_dim = {out_dim}")

    dataset_train = ShoppeDataset(df_train,
                                  'train',
                                  transform=transforms_train)
    dataset_valid = ShoppeDataset(df_valid, 'val', transform=transforms_val)

    print(
        f'Train on {len(df_train)} images, validate on {len(df_valid)} images')

    train_loader = torch.utils.data.DataLoader(dataset_train,
                                               batch_size=args.batch_size,
                                               num_workers=args.num_workers,
                                               pin_memory=True,
                                               shuffle=True,
                                               drop_last=True)
    valid_loader = torch.utils.data.DataLoader(dataset_valid,
                                               batch_size=args.batch_size,
                                               num_workers=args.num_workers)

    loss_config = decode_config(args.loss_config)
    # model
    if args.enet_type == 'resnest50':
        model = Resnest50(out_dim=out_dim, loss_config=loss_config, args=args)
    else:
        model = Model(args.enet_type,
                      out_dim=out_dim,
                      loss_config=loss_config,
                      args=args)
    model = model.cuda()

    # loss func
    criterion = get_criterion(args, out_dim, margins)

    # optimizer
    optimizer = optim.AdamW(model.parameters(), lr=args.init_lr)

    # load pretrained
    if args.load_from and args.load_from != 'none':
        checkpoint = torch.load(args.load_from, map_location='cuda:0')
        state_dict = checkpoint['model_state_dict']
        state_dict = {
            k[7:] if k.startswith('module.') else k: state_dict[k]
            for k in state_dict.keys()
        }
        model.load_state_dict(state_dict, strict=True)
        del checkpoint, state_dict
        torch.cuda.empty_cache()
        gc.collect()
        print(f"Loaded weight from {args.load_from}")

    # lr scheduler
    scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
        optimizer, args.n_epochs - 1)
    warmup_epochs = args.warmup_epochs if args.stage == 1 else 1
    print(warmup_epochs)
    scheduler_warmup = GradualWarmupSchedulerV2(
        optimizer,
        multiplier=10,
        total_epoch=warmup_epochs,
        after_scheduler=scheduler_cosine)

    # train & valid loop
    best_score = -1
    model_file = os.path.join(
        args.model_dir,
        weight_file(args.kernel_type, args.fold, args.stage,
                    loss_config.loss_type, out_dim))
    for epoch in range(args.start_from_epoch, args.n_epochs + 1):

        print(time.ctime(), f'Epoch: {epoch}/{args.n_epochs}')
        scheduler_warmup.step(epoch - 1)

        train_loss, acc_list = train_epoch(model, train_loader, optimizer,
                                           criterion)
        f1score = val_epoch(model, valid_loader, criterion, df_valid, args)

        content = time.ctime() + ' ' + \
            (
                f'Fold {args.fold}, Epoch {epoch}, lr: {optimizer.param_groups[0]["lr"]:.7f}, train loss: {np.mean(train_loss):.5f},'
                f' train acc {np.mean(acc_list):.5f}, f1score: {(f1score):.6f}.')

        print(content)
        with open(os.path.join(args.log_dir, f'{args.kernel_type}.txt'),
                  'a') as appender:
            appender.write(content + '\n')

        if f1score > best_score:
            print('best f1 score ({:.6f} --> {:.6f}). Saving model ...'.format(
                best_score, f1score))
            torch.save(
                {
                    'epoch': epoch,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                }, model_file)
            best_score = f1score

        if epoch == args.stop_at_epoch:
            print(time.ctime(), 'Training Finished!')
            break

    torch.save(
        {
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
        }, model_file)
Esempio n. 7
0
    pretrained_dict = torch.load(model_path)
    model_dict = model.state_dict()
    # only load parameters in dynamics_predictor
    pretrained_dict = {
        k: v for k, v in pretrained_dict.items() \
        if 'dynamics_predictor' in k and k in model_dict}
    model.load_state_dict(pretrained_dict, strict=False)

else:
    AssertionError("Unsupported stage %s, using other evaluation scripts" %
                   args.stage)

model.eval()

if use_gpu:
    model = model.cuda()

infos = np.arange(10)

for idx_episode in range(len(infos)):

    print("Rollout %d / %d" % (idx_episode, len(infos)))

    B = 1
    n_particle, n_shape = 0, 0

    # ground truth
    datas = []
    p_gt = []
    s_gt = []
    for step in range(args.time_step):
def predict(args):
    testset = IC15TestDataset()
    testloader = torch.utils.data.DataLoader(dataset=testset,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=1,
                                             drop_last=True)
    if args.backbone == 'res50':
        model = resnet50(pretrained=True, num_classes=6)
    elif args.backbone == 'res18':
        model = Model()
    else:
        raise NotImplementedError

    for param in model.parameters():
        param.requires_grad = False

    model = model.cuda()

    if args.resume is not None:
        if os.path.exists(args.resume):
            print('Load from', args.resume)
            checkpoint = torch.load(args.resume)
            # 这里为什么不直接用model.load_state_dict(checkpoint['state_dict'])
            # 是因为训练时使用多卡训练,模型中各个参数的名字前面有个前缀,需要去除该前缀
            d = collections.OrderedDict()
            for key, value in checkpoint['state_dict'].items():
                tmp = key[7:]
                d[tmp] = value
            model.load_state_dict(d)
        else:
            print('No such checkpoint file at', args.resume)

    model.eval()

    for idx, (img, original_img) in tqdm(enumerate(testloader)):
        img = Variable(img.cuda())
        original_img = original_img.numpy().astype('uint8')[0]
        original_img = original_img.copy()

        outputs = model(img)

        bboxes = generate_result_PAN(outputs, original_img, threshold=0.7)

        for i in range(len(bboxes)):
            bboxes[i] = bboxes[i].reshape(4, 2)[:, [1, 0]].reshape(-1)

        for bbox in bboxes:
            cv2.drawContours(original_img, [bbox.reshape(4, 2)], -1,
                             (0, 255, 0), 1)

        image_name = testset.img_paths[idx].split('/')[-1].split('.')[0]
        generate_txt_result_PAN(
            bboxes, image_name,
            'outputs/result_ic15_txt_PAN_baseline600_v4_4_90_7')
        generate_img_result(
            original_img, image_name,
            'outputs/result_ic15_img_PAN_baseline600_v4_4_90_7')

    cmd = 'cd %s;zip -j %s %s/*' % ('./outputs/', 'submit_ic15.zip',
                                    'result_txt_ic15_PAN_baseline')
    print(cmd)
Esempio n. 9
0
def train(**kwargs):

    if 'dataset' not in kwargs:
        opt = getattr(config, 'Toys_and_Games_data_Config')()
    else:
        opt = getattr(config, kwargs['dataset'] + '_Config')()
    opt.parse(kwargs)

    random.seed(opt.seed)
    np.random.seed(opt.seed)
    torch.manual_seed(opt.seed)
    if opt.use_gpu:
        torch.cuda.manual_seed_all(opt.seed)

    if len(opt.gpu_ids) == 0 and opt.use_gpu:
        torch.cuda.set_device(opt.gpu_id)
    # 2 model
    model = Model(opt, getattr(methods, opt.model))
    if opt.use_gpu:
        model.cuda()
        if len(opt.gpu_ids) > 0:
            print("use multi gpu")
            model = nn.DataParallel(model, device_ids=opt.gpu_ids)

    if opt.load_ckp:
        assert len(opt.ckp_path) > 0
        model.load(opt.ckp_path)

    # 3 data
    train_data = AmazonData(opt.data_root, train=True)
    train_data_loader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers, collate_fn=collate_fn)
    test_data = AmazonData(opt.data_root, train=False)
    test_data_loader = DataLoader(test_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers, collate_fn=collate_fn)
    print('{}: train data: {}; test data: {}'.format(now(), len(train_data), len(test_data)))

    # 4 optimiezer
    # optimizer = optim.Adadelta(model.parameters(), rho=0.95, eps=1e-6, weight_decay=opt.weight_decay)
    # optimizer = optim.SGD(model.parameters(), lr=opt.lr, momentum=0.9, weight_decay=opt.weight_decay)
    if opt.fine_tune:
        word_char = get_word_para()
        all_weights = dict(model.named_parameters())
        word_paras = []
        others = []
        for name in all_weights:
            if name in word_char:
                word_paras.append(all_weights[name])
            else:
                others.append(all_weights[name])

        optimizer = optim.Adam([{'params': others}, {'params': word_paras, 'lr': opt.lr * 0.3}],
                               lr=opt.lr, weight_decay=opt.weight_decay)
    else:
        optimizer = optim.Adam(model.parameters(), lr=opt.lr, betas=(0.9, 0.999), weight_decay=opt.weight_decay)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.8)

    # training
    print("start training....")
    min_loss = 1e+20
    best_res = 1e+10
    mse_func = nn.MSELoss()
    for epoch in range(opt.num_epochs):
        total_loss = 0.0
        model.train()
        scheduler.step(epoch)
        # train_data_loader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers, collate_fn=collate_fn)
        print("{} Epoch {}: start".format(now(), epoch))
        for idx, (train_datas, scores) in enumerate(train_data_loader):
            if opt.use_gpu:
                scores = torch.FloatTensor(scores).cuda()
            else:
                scores = torch.FloatTensor(scores)
            train_datas = unpack_input(opt, train_datas)
            optimizer.zero_grad()
            output = model(train_datas)
            loss = mse_func(output, scores)
            total_loss += loss.item() * len(scores)
            # loss = loss / 2.0  # tf.nn.l2loss
            loss.backward()
            optimizer.step()
            if idx % opt.print_step == 0 and idx > 0:
                print("\t{}, {} step finised;".format(now(), idx))
                predict_loss, test_mse = predict(model, test_data_loader, opt, use_gpu=opt.use_gpu)
                if predict_loss < min_loss:
                    # model.save(name=opt.dataset, opt=opt.print_opt)
                    min_loss = predict_loss
                    print("\tmodel save")
                if test_mse < best_res:
                    best_res = test_mse

        print("{};epoch:{};total_loss:{}".format(now(), epoch, total_loss))
        mse = total_loss * 1.0 / len(train_data)
        print("{};train reslut: mse: {}; rmse: {}".format(now(), mse, math.sqrt(mse)))
        predict_loss, test_mse = predict(model, test_data_loader, opt, use_gpu=opt.use_gpu)
        if predict_loss < min_loss:
            # model.save(name=opt.dataset, opt=opt.print_opt)
            min_loss = predict_loss
            print("model save")
        if test_mse < best_res:
            best_res = test_mse

    print("----"*20)
    print(f"{now()} {opt.dataset} {opt.print_opt} best_res:  {best_res}")
    print("----"*20)
Esempio n. 10
0
def crossover(fir_edge, sec_edge, adj, changes):
    co_list = []
    fitness_list = []
    co_list.append(fir_edge)
    co_list.append(sec_edge)
    fir_x, fir_y, fir_signal = fir_edge
    sec_x, sec_y, sec_signal = sec_edge
    signal = adj[fir_x, sec_y]
    if signal > 0:
        third_signal = 0
    else:
        third_signal = 1
    third_edge = (fir_x, sec_y, third_signal)
    signal = adj[sec_x, fir_y]
    if signal > 0:
        four_signal = 0
    else:
        four_signal = 1
    four_edge = (sec_x, fir_y, four_signal)
    co_list.append(third_edge)
    co_list.append(four_edge)

    for i in range(len(co_list)):
        x, y, signal = co_list[i]
        new_adj = adj.clone()
        if (x, y) in changes or (y, x) in changes:
            fitness_list.append(sys.maxsize)
            continue
        else:
            if signal == 1:
                new_adj[x, y] = 1.0
                new_adj[y, x] = 1.0
            if signal == 0:
                new_adj[x, y] = 0.0
                new_adj[x, y] = 0.0

            adj_selfloops = torch.add(new_adj, torch.eye(_N).cuda())
            inv_degrees = torch.pow(
                torch.sum(adj_selfloops, dim=0, keepdim=True), -0.5)
            adj_norm_tensor_cuda = adj_selfloops * inv_degrees * inv_degrees.transpose(
                0, 1)

            new_model = Model(_F, args.tar_hidden, _K)
            if args.cuda:
                new_model.cuda()
            new_optimizer = optim.Adam(new_model.parameters(),
                                       lr=args.tar_lr,
                                       weight_decay=args.tar_weight_decay)
            new_model.model_train(new_optimizer,
                                  args.tar_epochs,
                                  _X_cuda,
                                  adj_norm_tensor_cuda,
                                  _z_cuda,
                                  idx_train_cuda,
                                  idx_val_cuda,
                                  use_relu=False,
                                  drop_rate=args.drop_rate)
            new_model.model_test(_X_cuda,
                                 adj_norm_tensor_cuda,
                                 pre_all_labels_cuda,
                                 extra_idx_cuda,
                                 use_relu=False)
            loss_test = -new_model.loss_test
            fitness_list.append(loss_test)

    fitness_idx = sorted(range(len(fitness_list)),
                         key=lambda k: fitness_list[k])
    index = fitness_idx[0]
    return co_list[index]
Esempio n. 11
0
def get_greedy_list(ori_adj_cuda, Greedy_edges, change_edges):
    new_adj_tensor_cuda = ori_adj_cuda.clone()
    adj_selfloops = torch.add(new_adj_tensor_cuda, torch.eye(_N).cuda())
    inv_degrees = torch.pow(torch.sum(adj_selfloops, dim=0, keepdim=True),
                            -0.5)
    new_adj_norm_tensor_cuda = adj_selfloops * inv_degrees * inv_degrees.transpose(
        0, 1)
    #new_adj_norm_tensor_cuda.requires_grad = True

    new_surrogate_model = Model(_F, args.tar_hidden, _K)
    if args.cuda:
        new_surrogate_model.cuda()
    new_surrogate_optimizer = optim.Adam(new_surrogate_model.parameters(),
                                         lr=args.tar_lr,
                                         weight_decay=args.tar_weight_decay)
    new_surrogate_model.model_train(new_surrogate_optimizer,
                                    args.tar_epochs,
                                    _X_cuda,
                                    new_adj_norm_tensor_cuda,
                                    _z_cuda,
                                    idx_train_cuda,
                                    idx_val_cuda,
                                    use_relu=False,
                                    drop_rate=args.drop_rate)

    new_surrogate_model.zero_grad()
    new_adj_norm_tensor_cuda.requires_grad = True

    outputs = new_surrogate_model(_X_cuda,
                                  new_adj_norm_tensor_cuda,
                                  False,
                                  drop_rate=args.drop_rate)
    loss = F.nll_loss(outputs[idx_train_cuda], _z_cuda[idx_train_cuda])

    loss = -loss
    loss.backward()

    grad = -(new_adj_norm_tensor_cuda.grad.data.cpu().numpy().flatten())
    grad_abs = -(np.abs(grad))

    idxes = np.argsort(grad_abs)
    find = 0
    acc = None

    for p in idxes:
        if (len(Greedy_edges) < args.greedy_edges):
            x = p // _N
            y = p % _N
            if (x, y) in change_edges or (y, x) in change_edges:
                continue

            # add edge
            if grad[p] > 0:
                signal = 1
                if x == y or x in onehops_dict[y] or y in onehops_dict[x]:
                    continue
                else:
                    find, acc = method_add(x, y, new_adj_tensor_cuda,
                                           new_surrogate_model)
                    # ori_adj_cuda = new_adj_tensor_cuda.clone()
            # delete edge
            else:
                signal = 0
                if x == y or not x in onehops_dict[y] or not y in onehops_dict[
                        x]:
                    continue
                else:
                    find, acc = method_del(x, y, new_adj_tensor_cuda,
                                           new_surrogate_model)
            if find == 1:
                edge_oper = (x, y, signal)
                acc = acc.item()
                Greedy_edges[edge_oper] = acc
                print('Greedy edge number', len(Greedy_edges))
        else:
            break
    Greedy_list = sorted(Greedy_edges.items(), key=lambda x: x[1])

    return Greedy_list
Esempio n. 12
0
    adj_selfloops = torch.add(ori_adj_tensor_cuda, torch.eye(_N).cuda())
    target_inv_degrees = torch.pow(
        torch.sum(adj_selfloops, dim=0, keepdim=True), -0.5)
    target_adj_norm_tensor_cuda = adj_selfloops * target_inv_degrees * target_inv_degrees.transpose(
        0, 1)

    _X_cuda, _z_cuda, idx_train_cuda, idx_val_cuda, idx_test_cuda = convert_to_Tensor(
        [_X_obs, _Z_obs, split_train, split_val, split_test])

    all_idx_cuda = torch.cat((idx_train_cuda, idx_val_cuda, idx_test_cuda))
    extra_idx_cuda = torch.cat((idx_val_cuda, idx_test_cuda))

    surrogate_model = Model(_F, args.tar_hidden, _K)
    if args.cuda:
        surrogate_model.cuda()

    surrogate_optimizer = optim.Adam(surrogate_model.parameters(),
                                     lr=args.tar_lr,
                                     weight_decay=args.tar_weight_decay)
    surrogate_model.model_train(surrogate_optimizer,
                                args.tar_epochs,
                                _X_cuda,
                                target_adj_norm_tensor_cuda,
                                _z_cuda,
                                idx_train_cuda,
                                idx_val_cuda,
                                use_relu=False,
                                drop_rate=args.drop_rate)

    target_model = Model(_F, args.tar_hidden, _K)
Esempio n. 13
0
def main():
    model_name = '3dgnn_enet'
    current_path = os.getcwd()
    logger = logging.getLogger(model_name)
    log_path = current_path + '/artifacts/'+ str(datetime.datetime.now().strftime('%Y-%m-%d-%H')).replace(' ', '/') + '/'
    print('log path is:',log_path)
    if not os.path.exists(log_path):
        os.makedirs(log_path)
        os.makedirs(log_path + 'save/')
    hdlr = logging.FileHandler(log_path + model_name + '.log')
    formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
    hdlr.setFormatter(formatter)
    logger.addHandler(hdlr)
    logger.setLevel(logging.INFO)
    logger.info("Loading data...")
    print("Loading data...")

    label_to_idx = {'<UNK>': 0, 'beam': 1, 'board': 2, 'bookcase': 3, 'ceiling': 4, 'chair': 5, 'clutter': 6,
                    'column': 7,
                    'door': 8, 'floor': 9, 'sofa': 10, 'table': 11, 'wall': 12, 'window': 13}
    idx_to_label = {0: '<UNK>', 1: 'beam', 2: 'board', 3: 'bookcase', 4: 'ceiling', 5: 'chair', 6: 'clutter',
                    7: 'column',
                    8: 'door', 9: 'floor', 10: 'sofa', 11: 'table', 12: 'wall', 13: 'window'}

    '''Data Loader parameter'''
    # Batch size
    batch_size_tr = 4
    batch_size_va = 4
    # Multiple threads loading data
    workers_tr = 4
    workers_va = 4
    # Data augmentation
    flip_prob = 0.5
    crop_size = 0

    dataset_tr = nyudv2.Dataset(flip_prob=flip_prob,crop_type='Random',crop_size=crop_size)
    dataloader_tr = DataLoader(dataset_tr, batch_size=batch_size_tr, shuffle=True,
                               num_workers=workers_tr, drop_last=False, pin_memory=True)

    dataset_va = nyudv2.Dataset(flip_prob=0.0,crop_type='Center',crop_size=crop_size)
    dataloader_va = DataLoader(dataset_va, batch_size=batch_size_va, shuffle=False,
                               num_workers=workers_va, drop_last=False, pin_memory=True)
    cv2.setNumThreads(workers_tr)

    class_weights = [0.0]+[1.0 for i in range(13)]
    nclasses = len(class_weights)
    num_epochs = 50

    '''GNN parameter'''
    use_gnn = True
    gnn_iterations = 3
    gnn_k = 64
    mlp_num_layers = 1

    '''Model parameter'''
    use_bootstrap_loss = False
    bootstrap_rate = 0.25
    use_gpu = True

    logger.info("Preparing model...")
    print("Preparing model...")
    model = Model(nclasses, mlp_num_layers,use_gpu)
    loss = nn.NLLLoss(reduce=not use_bootstrap_loss, weight=torch.FloatTensor(class_weights))
    softmax = nn.Softmax(dim=1)
    log_softmax = nn.LogSoftmax(dim=1)

    if use_gpu:
        model = model.cuda()
        loss = loss.cuda()
        softmax = softmax.cuda()
        log_softmax = log_softmax.cuda()

    '''Optimizer parameter'''
    base_initial_lr = 5e-4
    gnn_initial_lr = 1e-3
    betas = [0.9, 0.999]
    eps = 1e-08
    weight_decay = 1e-4
    lr_schedule_type = 'exp'
    lr_decay = 0.9
    lr_patience = 10

    optimizer = torch.optim.Adam([{'params': model.decoder.parameters()},
                                  {'params': model.gnn.parameters(), 'lr': gnn_initial_lr}],
                                 lr=base_initial_lr, betas=betas, eps=eps, weight_decay=weight_decay)

    if lr_schedule_type == 'exp':
        lambda1 = lambda epoch: pow((1 - ((epoch - 1) / num_epochs)), lr_decay)
        scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda1)
    elif lr_schedule_type == 'plateau':
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=lr_decay, patience=lr_patience)
    else:
        print('bad scheduler')
        exit(1)

    model_parameters = filter(lambda p: p.requires_grad, model.parameters())
    params = sum([np.prod(p.size()) for p in model_parameters])
    logger.info("Number of trainable parameters: %d", params)

    def get_current_learning_rates():
        learning_rates = []
        for param_group in optimizer.param_groups:
            learning_rates.append(param_group['lr'])
        return learning_rates

    def eval_set(dataloader):
        model.eval()

        with torch.no_grad():
            loss_sum = 0.0
            confusion_matrix = torch.cuda.FloatTensor(np.zeros(14 ** 2))

            start_time = time.time()

            for batch_idx, rgbd_label_xy in enumerate(dataloader):

                sys.stdout.write('\rEvaluating test set... {}/{}'.format(batch_idx + 1, len(dataloader)))
                x = rgbd_label_xy[0]
                xy = rgbd_label_xy[2]
                target = rgbd_label_xy[1].long()
                x = x.float()
                xy = xy.float()

                input = x.permute(0, 3, 1, 2).contiguous()
                xy = xy.permute(0, 3, 1, 2).contiguous()
                if use_gpu:
                    input = input.cuda()
                    xy = xy.cuda()
                    target = target.cuda()

                output = model(input, gnn_iterations=gnn_iterations, k=gnn_k, xy=xy, use_gnn=use_gnn)

                if use_bootstrap_loss:
                    loss_per_pixel = loss.forward(log_softmax(output.float()), target)
                    topk, indices = torch.topk(loss_per_pixel.view(output.size()[0], -1),
                                               int((crop_size ** 2) * bootstrap_rate))
                    loss_ = torch.mean(topk)
                else:
                    loss_ = loss.forward(log_softmax(output.float()), target)
                loss_sum += loss_

                pred = output.permute(0, 2, 3, 1).contiguous()
                pred = pred.view(-1, nclasses)
                pred = softmax(pred)
                pred_max_val, pred_arg_max = pred.max(1)

                pairs = target.view(-1) * 14 + pred_arg_max.view(-1)
                for i in range(14 ** 2):
                    cumu = pairs.eq(i).float().sum()
                    confusion_matrix[i] += cumu.item()

            sys.stdout.write(" - Eval time: {:.2f}s \n".format(time.time() - start_time))
            loss_sum /= len(dataloader)

            confusion_matrix = confusion_matrix.cpu().numpy().reshape((14, 14))
            class_iou = np.zeros(14)
            # we ignore void values
            confusion_matrix[0, :] = np.zeros(14)
            confusion_matrix[:, 0] = np.zeros(14)
            for i in range(1, 14):
                class_iou[i] = confusion_matrix[i, i] / (
                        np.sum(confusion_matrix[i, :]) + np.sum(confusion_matrix[:, i]) - confusion_matrix[i, i])

        return loss_sum.item(), class_iou, confusion_matrix

    '''Training parameter'''
    model_to_load = None
    logger.info("num_epochs: %d", num_epochs)
    print("Number of epochs: %d"%num_epochs)
    interval_to_show = 100

    train_losses = []
    eval_losses = []

    if model_to_load:
        logger.info("Loading old model...")
        print("Loading old model...")
        model.load_state_dict(torch.load(model_to_load))
    else:
        logger.info("Starting training from scratch...")
        print("Starting training from scratch...")

    '''Training'''
    for epoch in range(1, num_epochs + 1):
        batch_loss_avg = 0
        if lr_schedule_type == 'exp':
            scheduler.step(epoch)
        for batch_idx, rgbd_label_xy in enumerate(dataloader_tr):

            sys.stdout.write('\rTraining data set... {}/{}'.format(batch_idx + 1, len(dataloader_tr)))

            x = rgbd_label_xy[0]
            target = rgbd_label_xy[1].long()
            xy = rgbd_label_xy[2]
            x = x.float()
            xy = xy.float()

            input = x.permute(0, 3, 1, 2).contiguous()
            input = input.type(torch.FloatTensor)

            if use_gpu:
                input = input.cuda()
                xy = xy.cuda()
                target = target.cuda()

            xy = xy.permute(0, 3, 1, 2).contiguous()

            optimizer.zero_grad()
            model.train()

            output = model(input, gnn_iterations=gnn_iterations, k=gnn_k, xy=xy, use_gnn=use_gnn)

            if use_bootstrap_loss:
                loss_per_pixel = loss.forward(log_softmax(output.float()), target)
                topk, indices = torch.topk(loss_per_pixel.view(output.size()[0], -1),
                                           int((crop_size ** 2) * bootstrap_rate))
                loss_ = torch.mean(topk)
            else:
                loss_ = loss.forward(log_softmax(output.float()), target)

            loss_.backward()
            optimizer.step()

            batch_loss_avg += loss_.item()

            if batch_idx % interval_to_show == 0 and batch_idx > 0:
                batch_loss_avg /= interval_to_show
                train_losses.append(batch_loss_avg)
                logger.info("E%dB%d Batch loss average: %s", epoch, batch_idx, batch_loss_avg)
                print('\rEpoch:{}, Batch:{}, loss average:{}'.format(epoch, batch_idx, batch_loss_avg))
                batch_loss_avg = 0

        batch_idx = len(dataloader_tr)
        logger.info("E%dB%d Saving model...", epoch, batch_idx)

        torch.save(model.state_dict(),log_path +'/save/'+'checkpoint_'+str(epoch)+'.pth')

        '''Evaluation'''
        eval_loss, class_iou, confusion_matrix = eval_set(dataloader_va)
        eval_losses.append(eval_loss)

        if lr_schedule_type == 'plateau':
            scheduler.step(eval_loss)
        print('Learning ...')
        logger.info("E%dB%d Def learning rate: %s", epoch, batch_idx, get_current_learning_rates()[0])
        print('Epoch{} Def learning rate: {}'.format(epoch, get_current_learning_rates()[0]))
        logger.info("E%dB%d GNN learning rate: %s", epoch, batch_idx, get_current_learning_rates()[1])
        print('Epoch{} GNN learning rate: {}'.format(epoch, get_current_learning_rates()[1]))
        logger.info("E%dB%d Eval loss: %s", epoch, batch_idx, eval_loss)
        print('Epoch{} Eval loss: {}'.format(epoch, eval_loss))
        logger.info("E%dB%d Class IoU:", epoch, batch_idx)
        print('Epoch{} Class IoU:'.format(epoch))
        for cl in range(14):
            logger.info("%+10s: %-10s" % (idx_to_label[cl], class_iou[cl]))
            print('{}:{}'.format(idx_to_label[cl], class_iou[cl]))
        logger.info("Mean IoU: %s", np.mean(class_iou[1:]))
        print("Mean IoU: %.2f"%np.mean(class_iou[1:]))
        logger.info("E%dB%d Confusion matrix:", epoch, batch_idx)
        logger.info(confusion_matrix)


    logger.info("Finished training!")
    logger.info("Saving model...")
    print('Saving final model...')
    torch.save(model.state_dict(), log_path + '/save/3dgnn_enet_finish.pth')
    eval_loss, class_iou, confusion_matrix = eval_set(dataloader_va)
    logger.info("Eval loss: %s", eval_loss)
    logger.info("Class IoU:")
    for cl in range(14):
        logger.info("%+10s: %-10s" % (idx_to_label[cl], class_iou[cl]))
    logger.info("Mean IoU: %s", np.mean(class_iou[1:]))
Esempio n. 14
0
def train_nn(dataset_path,
             hha_dir,
             save_models_dir,
             num_epochs=50,
             batch_size=4,
             from_last_check_point=False,
             check_point_prefix='checkpoint',
             start_epoch=0,
             pre_train_model='',
             notebook=False):
    progress = tqdm_notebook if notebook else tqdm
    logger.info('Loading data...')

    dataset_tr = nyudv2.Dataset(dataset_path,
                                hha_dir,
                                flip_prob=config.flip_prob,
                                crop_type='Random',
                                crop_size=config.crop_size)
    dataloader_tr = DataLoader(dataset_tr,
                               batch_size=batch_size,
                               shuffle=True,
                               num_workers=config.workers_tr,
                               drop_last=False,
                               pin_memory=True)

    dataset_va = nyudv2.Dataset(dataset_path,
                                hha_dir,
                                flip_prob=0.0,
                                crop_type='Center',
                                crop_size=config.crop_size)
    dataloader_va = DataLoader(dataset_va,
                               batch_size=batch_size,
                               shuffle=False,
                               num_workers=config.workers_va,
                               drop_last=False,
                               pin_memory=True)

    if from_last_check_point:
        start_epoch, pre_train_model = find_last_check_point(
            save_models_dir, check_point_prefix)

    cv2.setNumThreads(config.workers_tr)

    logger.info('Preparing model...')
    model = Model(config.nclasses, config.mlp_num_layers, config.use_gpu)
    loss = nn.NLLLoss(reduce=not config.use_bootstrap_loss,
                      weight=torch.FloatTensor(config.class_weights))
    softmax = nn.Softmax(dim=1)
    log_softmax = nn.LogSoftmax(dim=1)

    if config.use_gpu:
        model = model.cuda()
        loss = loss.cuda()
        softmax = softmax.cuda()
        log_softmax = log_softmax.cuda()

    optimizer = torch.optim.Adam([{
        'params': model.decoder.parameters()
    }, {
        'params': model.gnn.parameters(),
        'lr': config.gnn_initial_lr
    }],
                                 lr=config.base_initial_lr,
                                 betas=config.betas,
                                 eps=config.eps,
                                 weight_decay=config.weight_decay)

    if config.lr_schedule_type == 'exp':

        def lambda_1(lambda_epoch):
            return pow((1 - ((lambda_epoch - 1) / num_epochs)),
                       config.lr_decay)

        scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer,
                                                      lr_lambda=lambda_1)
    elif config.lr_schedule_type == 'plateau':
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, factor=config.lr_decay, patience=config.lr_patience)
    else:
        logger.error('Bad scheduler')
        exit(1)

    model_parameters = filter(lambda p: p.requires_grad, model.parameters())
    params = sum([np.prod(p.size()) for p in model_parameters])
    logger.info("Number of trainable parameters: %d", params)

    def get_current_learning_rates():
        learning_rates = []
        for param_group in optimizer.param_groups:
            learning_rates.append(param_group['lr'])
        return learning_rates

    def eval_set(dataloader):
        model.eval()

        with torch.no_grad():
            loss_sum = 0.0
            init_tensor_value = np.zeros(14**2)
            if config.use_gpu:
                confusion_matrix = torch.cuda.FloatTensor(init_tensor_value)
            else:
                confusion_matrix = torch.FloatTensor(init_tensor_value)

            start_time = time.time()

            for batch_idx, rgbd_label_xy in progress(enumerate(dataloader),
                                                     total=len(dataloader),
                                                     desc=f'Eval set'):
                x = rgbd_label_xy[0]
                xy = rgbd_label_xy[2]
                target = rgbd_label_xy[1].long()
                x = x.float()
                xy = xy.float()

                input = x.permute(0, 3, 1, 2).contiguous()
                xy = xy.permute(0, 3, 1, 2).contiguous()
                if config.use_gpu:
                    input = input.cuda()
                    xy = xy.cuda()
                    target = target.cuda()

                output = model(input,
                               gnn_iterations=config.gnn_iterations,
                               k=config.gnn_k,
                               xy=xy,
                               use_gnn=config.use_gnn)

                if config.use_bootstrap_loss:
                    loss_per_pixel = loss.forward(log_softmax(output.float()),
                                                  target)
                    topk, indices = torch.topk(
                        loss_per_pixel.view(output.size()[0], -1),
                        int((config.crop_size**2) * config.bootstrap_rate))
                    loss_ = torch.mean(topk)
                else:
                    loss_ = loss.forward(log_softmax(output.float()), target)
                loss_sum += loss_

                pred = output.permute(0, 2, 3, 1).contiguous()
                pred = pred.view(-1, config.nclasses)
                pred = softmax(pred)
                pred_max_val, pred_arg_max = pred.max(1)

                pairs = target.view(-1) * 14 + pred_arg_max.view(-1)
                for i in range(14**2):
                    cumu = pairs.eq(i).float().sum()
                    confusion_matrix[i] += cumu.item()

            sys.stdout.write(" - Eval time: {:.2f}s \n".format(time.time() -
                                                               start_time))
            loss_sum /= len(dataloader)

            confusion_matrix = confusion_matrix.cpu().numpy().reshape((14, 14))
            class_iou = np.zeros(14)
            confusion_matrix[0, :] = np.zeros(14)
            confusion_matrix[:, 0] = np.zeros(14)
            for i in range(1, 14):
                class_iou[i] = confusion_matrix[i, i] / (
                    np.sum(confusion_matrix[i, :]) +
                    np.sum(confusion_matrix[:, i]) - confusion_matrix[i, i])

        return loss_sum.item(), class_iou, confusion_matrix

    # Training parameter
    logger.info(f'Num_epochs: {num_epochs}')
    interval_to_show = 100

    train_losses = []
    eval_losses = []

    if pre_train_model:
        logger.info(f'Loading pre-train model {pre_train_model}... ')
        model.load_state_dict(torch.load(pre_train_model))
    else:
        logger.info('Starting training from scratch...')

    # Training
    for epoch in progress(range(start_epoch, num_epochs + 1), desc='Training'):
        batch_loss_avg = 0
        if config.lr_schedule_type == 'exp':
            scheduler.step(epoch)
        for batch_idx, rgbd_label_xy in progress(enumerate(dataloader_tr),
                                                 total=len(dataloader_tr),
                                                 desc=f'Epoch {epoch}'):
            x = rgbd_label_xy[0]
            target = rgbd_label_xy[1].long()
            xy = rgbd_label_xy[2]
            x = x.float()
            xy = xy.float()

            input = x.permute(0, 3, 1, 2).contiguous()
            input = input.type(torch.FloatTensor)

            if config.use_gpu:
                input = input.cuda()
                xy = xy.cuda()
                target = target.cuda()

            xy = xy.permute(0, 3, 1, 2).contiguous()

            optimizer.zero_grad()
            model.train()

            output = model(input,
                           gnn_iterations=config.gnn_iterations,
                           k=config.gnn_k,
                           xy=xy,
                           use_gnn=config.use_gnn)

            if config.use_bootstrap_loss:
                loss_per_pixel = loss.forward(log_softmax(output.float()),
                                              target)
                topk, indices = torch.topk(
                    loss_per_pixel.view(output.size()[0], -1),
                    int((config.crop_size**2) * config.bootstrap_rate))
                loss_ = torch.mean(topk)
            else:
                loss_ = loss.forward(log_softmax(output.float()), target)

            loss_.backward()
            optimizer.step()

            batch_loss_avg += loss_.item()

            if batch_idx % interval_to_show == 0 and batch_idx > 0:
                batch_loss_avg /= interval_to_show
                train_losses.append(batch_loss_avg)
                logger.info("E%dB%d Batch loss average: %s", epoch, batch_idx,
                            batch_loss_avg)
                print('\rEpoch:{}, Batch:{}, loss average:{}'.format(
                    epoch, batch_idx, batch_loss_avg))
                batch_loss_avg = 0

        batch_idx = len(dataloader_tr)
        logger.info("E%dB%d Saving model...", epoch, batch_idx)

        torch.save(
            model.state_dict(),
            os.path.join(
                save_models_dir,
                f'{check_point_prefix}{CHECK_POINT_SEP}{epoch!s}{MODELS_EXT}'))

        # Evaluation
        eval_loss, class_iou, confusion_matrix = eval_set(dataloader_va)
        eval_losses.append(eval_loss)

        if config.lr_schedule_type == 'plateau':
            scheduler.step(eval_loss)
        print('Learning ...')
        logger.info("E%dB%d Def learning rate: %s", epoch, batch_idx,
                    get_current_learning_rates()[0])
        print('Epoch{} Def learning rate: {}'.format(
            epoch,
            get_current_learning_rates()[0]))
        logger.info("E%dB%d GNN learning rate: %s", epoch, batch_idx,
                    get_current_learning_rates()[1])
        print('Epoch{} GNN learning rate: {}'.format(
            epoch,
            get_current_learning_rates()[1]))
        logger.info("E%dB%d Eval loss: %s", epoch, batch_idx, eval_loss)
        print('Epoch{} Eval loss: {}'.format(epoch, eval_loss))
        logger.info("E%dB%d Class IoU:", epoch, batch_idx)
        print('Epoch{} Class IoU:'.format(epoch))
        for cl in range(14):
            logger.info("%+10s: %-10s" % (IDX_LABEL[cl], class_iou[cl]))
            print('{}:{}'.format(IDX_LABEL[cl], class_iou[cl]))
        logger.info("Mean IoU: %s", np.mean(class_iou[1:]))
        print("Mean IoU: %.2f" % np.mean(class_iou[1:]))
        logger.info("E%dB%d Confusion matrix:", epoch, batch_idx)
        logger.info(confusion_matrix)

    logger.info('Finished training!')
    logger.info('Saving trained model...')
    torch.save(model.state_dict(),
               os.path.join(save_models_dir, f'finish{MODELS_EXT}'))
    eval_loss, class_iou, confusion_matrix = eval_set(dataloader_va)
    logger.info('Eval loss: %s', eval_loss)
    logger.info('Class IoU:')
    for cl in range(14):
        logger.info("%+10s: %-10s" % (IDX_LABEL[cl], class_iou[cl]))
    logger.info(f'Mean IoU: {np.mean(class_iou[1:])}')
Esempio n. 15
0
              nfeat=args.embed_size,
              nhid=args.hidden,
              gat_hidden_dim=args.gat_hidden_dim,
              joint_dim=args.joint_dim,
              features_index=features_index,
              tweet_word_adj=tweet_word_adj,
              user_tweet_adj=user_tweet_adj,
              nclass=labels.max().item() + 1,
              dropout=args.dropout,
              alpha=args.alpha)
optimizer = optim.Adam(model.parameters(),
                       lr=args.lr,
                       weight_decay=args.weight_decay)

if args.cuda:
    model.cuda()
    # features = features.cuda()
    # adj = adj.cuda()
    labels = labels.cuda()
    idx_train = idx_train.cuda()
    idx_val = idx_val.cuda()
    idx_test = idx_test.cuda()
    train_idx = train_idx.cuda()
    dev_idx = dev_idx.cuda()
    test_idx = test_idx.cuda()


def train(epoch, best_acc, patience):
    # t = time.time()
    model.train()
    total_iters = len(idx_train) // args.batch_size + 1
Esempio n. 16
0
def main(args):
    G = nx.read_edgelist(args.classifydir + '_edgelist.txt', nodetype=int)
    model = Model(nx.number_of_nodes(G), args.num_parts)
    adj = Variable(torch.FloatTensor(nx.adjacency_matrix(G).toarray()),
                   requires_grad=False)

    print("NUMBER OF NODES:", len(G.nodes()))
    print("NUMBER OF Edges:", len(G.edges()))

    if torch.cuda.is_available():
        model = model.cuda()
        adj = adj.cuda()

    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                        model.parameters()),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    micros = []
    macros = []
    accuracies = []
    losses = []

    for epoch in tqdm(range(args.num_epochs)):
        model.zero_grad()

        super_adj = model(adj, temp=args.temp, hard=args.hard, beta=args.beta)
        loss = model.loss(super_adj,
                          balance_node=args.balance_node,
                          lam=args.lam)

        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), args.clip)
        optimizer.step()

        if epoch % 50 == 0:

            vectors = embed_arr_2_dict(model.params.cpu().detach().numpy(), G)
            accs = classify(vectors, args)
            print("micro:", accs['micro'], "macro:", accs['macro'], "loss",
                  loss.item(), "accuracy", accs["acc"])
            micros.append(accs['micro'])
            macros.append(accs['macro'])
            accuracies.append(accs['acc'])
            losses.append(loss.item())

    print("Final Micro:", micros[-1], "Final Macro:", macros[-1])

    plt.plot(micros, label='micros')
    plt.plot(macros, label='macros')
    plt.plot(test_x, label='test scores')
    plt.legend()
    plt.title("Dataset: " + str(args.classifydir.split("/")[-1]) +
              ", training percentage: " + str(args.train_percent) +
              ", learning rate: " + str(args.lr) + ", epochs: " +
              str(args.num_epochs))
    plt.show()

    plt.plot(losses, label='loss')
    plt.title("Dataset: " + str(args.classifydir.split("/")[-1]) +
              ", training percentage: " + str(args.train_percent) +
              ", learning rate: " + str(args.lr) + ", epochs: " +
              str(args.num_epochs))
    plt.show()
Esempio n. 17
0
def main(args):
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
    logger = logging.getLogger('3dgnn')
    log_path = './experiment/' + str(
        datetime.datetime.now().strftime('%Y-%m-%d-%H')).replace(' ',
                                                                 '/') + '/'
    print('log path is:', log_path)
    if not os.path.exists(log_path):
        os.makedirs(log_path)
        os.makedirs(log_path + 'save/')
    hdlr = logging.FileHandler(log_path + 'log.txt')
    formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
    hdlr.setFormatter(formatter)
    logger.addHandler(hdlr)
    logger.setLevel(logging.INFO)
    logger.info("Loading data...")
    print("Loading data...")
    '''idx_to_label = {0: '<UNK>', 1: 'beam', 2: 'board', 3: 'bookcase', 4: 'ceiling', 5: 'chair', 6: 'clutter',
                    7: 'column',
                    8: 'door', 9: 'floor', 10: 'sofa', 11: 'table', 12: 'wall', 13: 'window'}*'''

    if args.is_2_headed:
        dataset_tr = nyud2headed.Dataset(flip_prob=config.flip_prob,
                                         crop_type='Random',
                                         crop_size=config.crop_size)
    else:
        dataset_tr = nyudv2.Dataset(flip_prob=config.flip_prob,
                                    crop_type='Random',
                                    crop_size=config.crop_size)
    idx_to_label = dataset_tr.label_names
    if args.is_2_headed:
        idx_to_label2 = dataset_tr.label2_names

    dataloader_tr = DataLoader(dataset_tr,
                               batch_size=args.batchsize,
                               shuffle=True,
                               num_workers=config.workers_tr,
                               drop_last=False,
                               pin_memory=True)

    if args.is_2_headed:
        dataset_va = nyud2headed.Dataset(flip_prob=0.0,
                                         crop_type='Center',
                                         crop_size=config.crop_size)
    else:
        dataset_va = nyudv2.Dataset(flip_prob=0.0,
                                    crop_type='Center',
                                    crop_size=config.crop_size)
    dataloader_va = DataLoader(dataset_va,
                               batch_size=args.batchsize,
                               shuffle=False,
                               num_workers=config.workers_va,
                               drop_last=False,
                               pin_memory=True)
    cv2.setNumThreads(config.workers_tr)

    logger.info("Preparing model...")
    print("Preparing model...")

    class_weights = [0.0] + [1.0 for i in range(1, len(idx_to_label))]
    nclasses = len(class_weights)
    if args.is_2_headed:
        nclasses1 = nclasses
        class2_weights = [0.0] + [1.0 for i in range(1, len(idx_to_label2))]
        nclasses2 = len(class2_weights)
        model = Model2Headed(nclasses1, nclasses2, config.mlp_num_layers,
                             config.use_gpu)
        loss2 = nn.NLLLoss(reduce=not config.use_bootstrap_loss,
                           weight=torch.FloatTensor(class2_weights))
    else:
        model = Model(nclasses, config.mlp_num_layers, config.use_gpu)
    loss = nn.NLLLoss(reduce=not config.use_bootstrap_loss,
                      weight=torch.FloatTensor(class_weights))

    softmax = nn.Softmax(dim=1)
    log_softmax = nn.LogSoftmax(dim=1)

    if config.use_gpu:
        model = model.cuda()
        loss = loss.cuda()
        if args.is_2_headed:
            loss2 = loss2.cuda()
        softmax = softmax.cuda()
        log_softmax = log_softmax.cuda()

    optimizer = torch.optim.Adam([{
        'params': model.decoder.parameters()
    }, {
        'params': model.gnn.parameters(),
        'lr': config.gnn_initial_lr
    }],
                                 lr=config.base_initial_lr,
                                 betas=config.betas,
                                 eps=config.eps,
                                 weight_decay=config.weight_decay)

    if config.lr_schedule_type == 'exp':
        lambda1 = lambda epoch: pow(
            (1 - ((epoch - 1) / args.num_epochs)), config.lr_decay)
        scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer,
                                                      lr_lambda=lambda1)
    elif config.lr_schedule_type == 'plateau':
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, factor=config.lr_decay, patience=config.lr_patience)
    else:
        print('bad scheduler')
        exit(1)

    model_parameters = filter(lambda p: p.requires_grad, model.parameters())
    params = sum([np.prod(p.size()) for p in model_parameters])
    logger.info("Number of trainable parameters: %d", params)

    def get_current_learning_rates():
        learning_rates = []
        for param_group in optimizer.param_groups:
            learning_rates.append(param_group['lr'])
        return learning_rates

    def eval_set(dataloader):
        model.eval()

        with torch.no_grad():
            loss_sum = 0.0
            if config.use_gpu:
                confusion_matrix = torch.cuda.FloatTensor(
                    np.zeros(len(idx_to_label)**2))
            else:
                confusion_matrix = torch.FloatTensor(
                    np.zeros(len(idx_to_label)**2))

            start_time = time.time()

            for batch_idx, rgbd_label_xy in tqdm(enumerate(dataloader),
                                                 total=len(dataloader),
                                                 smoothing=0.9):
                x = rgbd_label_xy[0]
                xy = rgbd_label_xy[2]
                target = rgbd_label_xy[1].long()
                x = x.float()
                xy = xy.float()

                input = x.permute(0, 3, 1, 2).contiguous()
                xy = xy.permute(0, 3, 1, 2).contiguous()
                if config.use_gpu:
                    input = input.cuda()
                    xy = xy.cuda()
                    target = target.cuda()

                output = model(input,
                               gnn_iterations=config.gnn_iterations,
                               k=config.gnn_k,
                               xy=xy,
                               use_gnn=config.use_gnn)
                # if args.is_2_headed:
                #     output1, output2 = model(input, gnn_iterations=config.gnn_iterations, k=config.gnn_k, xy=xy,
                #                              use_gnn=config.use_gnn)

                if config.use_bootstrap_loss:
                    loss_per_pixel = loss.forward(log_softmax(output.float()),
                                                  target)
                    topk, indices = torch.topk(
                        loss_per_pixel.view(output.size()[0], -1),
                        int((config.crop_size**2) * config.bootstrap_rate))
                    loss_ = torch.mean(topk)
                else:
                    loss_ = loss.forward(log_softmax(output.float()), target)
                loss_sum += loss_

                pred = output.permute(0, 2, 3, 1).contiguous()
                pred = pred.view(-1, nclasses)
                pred = softmax(pred)
                pred_max_val, pred_arg_max = pred.max(1)

                pairs = target.view(-1) * len(
                    idx_to_label) + pred_arg_max.view(-1)
                for i in range(len(idx_to_label)**2):
                    cumu = pairs.eq(i).float().sum()
                    confusion_matrix[i] += cumu.item()

            sys.stdout.write(" - Eval time: {:.2f}s \n".format(time.time() -
                                                               start_time))
            loss_sum /= len(dataloader)

            confusion_matrix = confusion_matrix.cpu().numpy().reshape(
                (len(idx_to_label), len(idx_to_label)))
            class_iou = np.zeros(len(idx_to_label))
            confusion_matrix[0, :] = np.zeros(len(idx_to_label))
            confusion_matrix[:, 0] = np.zeros(len(idx_to_label))
            for i in range(1, len(idx_to_label)):
                tot = np.sum(confusion_matrix[i, :]) + np.sum(
                    confusion_matrix[:, i]) - confusion_matrix[i, i]
                if tot == 0:
                    class_iou[i] = 0
                else:
                    class_iou[i] = confusion_matrix[i, i] / tot

        return loss_sum.item(), class_iou, confusion_matrix

    '''Training parameter'''
    model_to_load = args.pretrain
    logger.info("num_epochs: %d", args.num_epochs)
    print("Number of epochs: %d" % args.num_epochs)
    interval_to_show = 100

    train_losses = []
    eval_losses = []

    if model_to_load:
        logger.info("Loading old model...")
        print("Loading old model...")
        model.load_state_dict(torch.load(model_to_load))
    else:
        # print("here")
        # exit(0)
        logger.info("Starting training from scratch...")
        print("Starting training from scratch...")
    '''Training'''
    for epoch in range(1, args.num_epochs + 1):
        print("epoch", epoch)
        batch_loss_avg = 0
        if config.lr_schedule_type == 'exp':
            scheduler.step(epoch)
        for batch_idx, rgbd_label_xy in tqdm(enumerate(dataloader_tr),
                                             total=len(dataloader_tr),
                                             smoothing=0.9):
            x = rgbd_label_xy[0]
            target = rgbd_label_xy[1].long()
            if args.is_2_headed:
                target2 = rgbd_label_xy[3].long()
            xy = rgbd_label_xy[2]
            x = x.float()
            xy = xy.float()

            input = x.permute(0, 3, 1, 2).contiguous()
            input = input.type(torch.FloatTensor)

            if config.use_gpu:
                input = input.cuda()
                xy = xy.cuda()
                target = target.cuda()
                if args.is_2_headed:
                    target2 = target2.cuda()

            xy = xy.permute(0, 3, 1, 2).contiguous()

            optimizer.zero_grad()
            model.train()

            if args.is_2_headed:
                output1, output2 = model(input,
                                         gnn_iterations=config.gnn_iterations,
                                         k=config.gnn_k,
                                         xy=xy,
                                         use_gnn=config.use_gnn)
            else:
                output = model(input,
                               gnn_iterations=config.gnn_iterations,
                               k=config.gnn_k,
                               xy=xy,
                               use_gnn=config.use_gnn)

            if config.use_bootstrap_loss:
                loss_per_pixel = loss.forward(log_softmax(output.float()),
                                              target)
                topk, indices = torch.topk(
                    loss_per_pixel.view(output.size()[0], -1),
                    int((config.crop_size**2) * config.bootstrap_rate))
                loss_ = torch.mean(topk)
            else:
                if args.is_2_headed:
                    loss_ = loss.forward(log_softmax(
                        output1.float()), target) + loss2.forward(
                            log_softmax(output2.float()), target2)
                else:
                    loss_ = loss.forward(log_softmax(output.float()), target)

            loss_.backward()
            optimizer.step()

            batch_loss_avg += loss_.item()

            if batch_idx % interval_to_show == 0 and batch_idx > 0:
                batch_loss_avg /= interval_to_show
                train_losses.append(batch_loss_avg)
                logger.info("E%dB%d Batch loss average: %s", epoch, batch_idx,
                            batch_loss_avg)
                print('\rEpoch:{}, Batch:{}, loss average:{}'.format(
                    epoch, batch_idx, batch_loss_avg))
                batch_loss_avg = 0

        batch_idx = len(dataloader_tr)
        logger.info("E%dB%d Saving model...", epoch, batch_idx)

        torch.save(model.state_dict(),
                   log_path + '/save/' + 'checkpoint_' + str(epoch) + '.pth')
        '''Evaluation'''
        # eval_loss, class_iou, confusion_matrix = eval_set(dataloader_va)
        # eval_losses.append(eval_loss)
        #
        # if config.lr_schedule_type == 'plateau':
        #     scheduler.step(eval_loss)
        print('Learning ...')
        logger.info("E%dB%d Def learning rate: %s", epoch, batch_idx,
                    get_current_learning_rates()[0])
        print('Epoch{} Def learning rate: {}'.format(
            epoch,
            get_current_learning_rates()[0]))
        logger.info("E%dB%d GNN learning rate: %s", epoch, batch_idx,
                    get_current_learning_rates()[1])
        print('Epoch{} GNN learning rate: {}'.format(
            epoch,
            get_current_learning_rates()[1]))
        # logger.info("E%dB%d Eval loss: %s", epoch, batch_idx, eval_loss)
        # print('Epoch{} Eval loss: {}'.format(epoch, eval_loss))
        # logger.info("E%dB%d Class IoU:", epoch, batch_idx)
        # print('Epoch{} Class IoU:'.format(epoch))
        # for cl in range(len(idx_to_label)):
        #     logger.info("%+10s: %-10s" % (idx_to_label[cl], class_iou[cl]))
        #     print('{}:{}'.format(idx_to_label[cl], class_iou[cl]))
        # logger.info("Mean IoU: %s", np.mean(class_iou[1:]))
        # print("Mean IoU: %.2f" % np.mean(class_iou[1:]))
        # logger.info("E%dB%d Confusion matrix:", epoch, batch_idx)
        # logger.info(confusion_matrix)

    logger.info("Finished training!")
    logger.info("Saving model...")
    print('Saving final model...')
    torch.save(model.state_dict(), log_path + '/save/3dgnn_finish.pth')
def train():

    # for watching in tensorboard
    tb = SummaryWriter()

    # load data
    transform = set_transform()

    train_loader = get_loader(train_corpus, batch_size=8, transform=transform)
    valid_loader = get_loader(valid_corpus, batch_size=8, transform=transform)

    ## Define Model and print

    model = Model(vocab)
    print(model)

    batch = next(iter(valid_loader))

    # Adding Tensorboard

    grid = torchvision.utils.make_grid(batch[0])
    tb.add_image('images', grid, 0)
    tb.add_graph(model, batch[0])

    # Defining Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(),
                          lr=learning_rate,
                          momentum=momentum)

    best_train_loss, best_valid_loss = 100000, 100000
    train_loss, valid_loss = [], []
    not_improved = 0
    show_after_iter = 10
    # Checking cuda is available or not

    gpu_available = torch.cuda.is_available()
    if gpu_available:
        #print("Found GPU. Model Shifting to GPU")
        model.cuda()

    print("*" * 30 + " Training Start " + "*" * 30)
    for e in range(1, epoch):

        ## Training Start ##
        model.train()
        for i, (image, classes) in enumerate(train_loader):

            if gpu_available:
                image = image.cuda()
                classes = classes.cuda()

            output = model(image)
            #_,pred = torch.max(output.data,1)

            loss = criterion(output, classes)

            # backprop
            loss.backward()
            optimizer.step()

            # loss move to cpu
            loss = loss.cpu().detach().numpy()
            train_loss.append(loss)

            if i % show_after_iter == 0:
                avg_loss = sum(train_loss) / len(train_loss)
                print(
                    f"Epoch: ({e}/{epoch}) Loss: {loss} Avg Loss: {avg_loss} Accuracy: {100-loss} Avg Acc: {100-avg_loss}"
                )

        del image, loss, classes

        avg_train_loss = sum(train_loss) / len(train_loss)

        # Adding value in tensorboard
        tb.add_scalar("Training_Loss", avg_train_loss, e)
        tb.add_scalar("Training_Accuracy", 100 - avg_train_loss, e)

        ## Validation Start ##
        model.eval()
        for i, (image, classes) in enumerate(valid_loader):

            if gpu_available:
                image = image.cuda()
                classes = classes.cuda()

            output = model(image)
            loss = criterion(output, classes)
            # loss move to cpu
            loss = loss.cpu().detach().numpy()
            valid_loss.append(loss)
            #print(f"Loss: {loss}")

        avg_valid_loss = sum(valid_loss) / len(valid_loss)
        # save if model loss is improved
        if avg_valid_loss < best_valid_loss:
            best_train_loss = avg_valid_loss
            model_save = save_path + "/best_model.th"
            torch.save(model.state_dict(), model_save)
            not_improved = 0
        else:
            not_improved += 1

        if not_improved >= 6:
            break

        print(
            f"\n\t Epoch: {e} Training Loss: {avg_train_loss} Training Accuracy: {100-avg_train_loss}"
        )

        print(
            f"\t Epoch: {e} Validation Loss: {avg_valid_loss} Validation Accuracy: {100-avg_valid_loss} \n"
        )

        # Adding value in tensorboard
        tb.add_scalar("Validation_Loss", avg_valid_loss, e)
        tb.add_scalar("Validation_Accuracy", 100 - avg_valid_loss, e)

    # Saving training and validation losses so tha further graph can be generated
    save_loss = {"train": train_loss, "valid": valid_loss}
    with open(save_path + "/losses.pickle", "wb") as files:
        pickle.dump(save_loss, files)

    tb.close()