예제 #1
0
def main():
    from pre_process import preprocess
    feature, a_hat, labels = preprocess()
    print("loaded")

    selected, unselected = depart(len(labels), 1 - Config.test_ratio)
    labels_selected = labels[selected]
    labels_unselected = labels[unselected]

    feature = torch.from_numpy(feature).float().cuda()
    tensor_selected = torch.tensor(labels_selected).long().cuda()
    a_hat = torch.tensor(a_hat).float().cuda()
    net = GCN(a_hat, feature.shape[1], Config.num_classes, Config.hidden_size,
              Config.n_hidden_layer).cuda()

    print(net)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=Config.lr)
    net.train()
    for e in range(Config.num_epochs):
        optimizer.zero_grad()
        output = net(feature)
        loss = criterion(output[selected], tensor_selected)
        loss.backward()
        optimizer.step()

        trained_accuracy = evaluate(output[selected], labels_selected)
        untrained_accuracy = evaluate(output[unselected], labels_unselected)
        print(
            "[Epoch %d]: trained acc: %.7f, untrained acc: %.7f, loss: %.7f" %
            (e, trained_accuracy, untrained_accuracy,
             loss.detach().cpu().numpy()))
예제 #2
0
def main():
    
    # Load data
    start = time.time()
    N, _adj, _feats, _labels, train_adj, train_feats, train_nodes, val_nodes, test_nodes, y_train, y_val, y_test, val_mask, test_mask = utils.load_data(args.dataset)
    print('Loaded data in {:.2f} seconds!'.format(time.time() - start))
    
    # Prepare Train Data
    start = time.time()
    _, parts = utils.partition_graph(train_adj, train_nodes, args.num_clusters_train)
    parts = [np.array(pt) for pt in parts]
    train_features, train_support, y_train = utils.preprocess_multicluster(train_adj, parts, train_feats, y_train, args.num_clusters_train, args.batch_size)    
    print('Train Data pre-processed in {:.2f} seconds!'.format(time.time() - start))
    
    # Prepare Test Data
    if args.test == 1:    
        y_test, test_mask = y_val, val_mask
        start = time.time()
        _, test_features, test_support, y_test, test_mask = utils.preprocess(_adj, _feats, y_test, np.arange(N), args.num_clusters_test, test_mask) 
        print('Test Data pre-processed in {:.2f} seconds!'.format(time.time() - start))
    
    # Shuffle Batches
    batch_idxs = list(range(len(train_features)))
    
    # model
    model = GCN(fan_in=_in, fan_out=_out, layers=args.layers, dropout=args.dropout, normalize=True, bias=False).float()
    model.cuda()

    # Loss Function
    criterion = torch.nn.CrossEntropyLoss()
    
    # Optimization Algorithm
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
        
    # Learning Rate Schedule    
    scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=args.lr, steps_per_epoch=int(args.num_clusters_train/args.batch_size), epochs=args.epochs+1, anneal_strategy='linear')
    model.train()

    
    # Train
    for epoch in range(args.epochs + 1):
        np.random.shuffle(batch_idxs)
        avg_loss = 0
        start = time.time()
        for batch in batch_idxs:
            loss = train(model.cuda(), criterion, optimizer, train_features[batch], train_support[batch], y_train[batch], dataset=args.dataset)
            if args.lr_scheduler == 1:
                scheduler.step()
            avg_loss += loss.item()
        
        # Write Train stats to tensorboard
        writer.add_scalar('time/train', time.time() - start, epoch)
        writer.add_scalar('loss/train', avg_loss/len(train_features), epoch)
        
    if args.test == 1:    
        # Test on cpu
        f1 = test(model.cpu(), test_features, test_support, y_test, test_mask, device='cpu')
        print('f1: {:.4f}'.format(f1))
    def test_memorize_minibatch(self):
        for db_name in self.db_names:
            db_info = get_db_info(db_name)
            train_data, val_data, _ = get_train_val_test_datasets(
                dataset_name=db_name,
                train_test_split='use_full_train',
                encoders=dict(CATEGORICAL='CategoricalOrdinalEnc',
                              SCALAR='ScalarRobustScalerEnc',
                              DATETIME='DatetimeScalarEnc',
                              LATLONG='LatLongScalarEnc',
                              TEXT='TextSummaryScalarEnc'),
            )
            train_loader = get_dataloader(
                dataset=train_data,
                batch_size=256,
                sampler_class_name='SequentialSampler',
                num_workers=0,
                max_nodes_per_graph=False)

            writer = DummyWriter()
            model = GCN(writer,
                        db_info=db_info,
                        hidden_dim=256,
                        n_init_layers=3,
                        activation_class_name='SELU',
                        activation_class_kwargs={},
                        loss_class_kwargs={},
                        loss_class_name='CrossEntropyLoss',
                        p_dropout=0.0,
                        drop_whole_embeddings=True,
                        n_layers=3,
                        readout_class_name='AvgPooling',
                        readout_kwargs={})
            if torch.cuda.is_available():
                model.cuda()
                model.device = torch.device('cuda:0')
            else:
                model.device = torch.device('cpu')
            model.train()
            optimizer = AdamW(model.parameters(), lr=0.001, weight_decay=0.0)

            bdgl, features, label = next(iter(train_loader))
            recursive_to((bdgl, features, label), model.device)
            for _ in tqdm(range(200)):
                optimizer.zero_grad()
                output = model(bdgl, features)
                loss = model.loss_fxn(output, label)
                if loss < 1e-4:
                    break
                loss.backward()
                optimizer.step()
            else:
                tqdm.write(f'Loss: {loss}')
                self.fail("Didn't memorize minibatch")
예제 #4
0
def execute(params, budget=None, max_epoch=243, device='cpu', seed=42):

    np.random.seed(seed)
    torch.manual_seed(seed)
    if device == "cuda":
        torch.cuda.manual_seed(seed)

    # Load data
    if params['dataset'] == "cora":
        adj, features, labels, idx_train, idx_val, idx_test = load_data(
            dataset=params['dataset'], train_percent=0.052)
    if params['dataset'] == "citeseer":
        adj, features, labels, idx_train, idx_val, idx_test = load_citeseer(
            train_percent=0.036)

    # Model and optimizer
    model = GCN(nfeat=features.shape[1],
                nhid=params['hidden'],
                nclass=labels.max().item() + 1,
                dropout=params['dropout'])
    optimizer = optim.Adam(model.parameters(),
                           lr=params['lr'],
                           weight_decay=params['weight_decay'])

    if device == "cuda":
        model.cuda()
        features = features.cuda()
        adj = adj.cuda()
        labels = labels.cuda()
        idx_train = idx_train.cuda()
        idx_val = idx_val.cuda()
        idx_test = idx_test.cuda()

    # train model
    if device == "cuda":
        start = torch.cuda.Event(enable_timing=True)
        end = torch.cuda.Event(enable_timing=True)
        start.record()
    else:
        t1 = time.time_ns()

    model.train()
    num_epoch = int(budget) if budget != None else max_epoch
    for epoch in range(num_epoch):
        optimizer.zero_grad()
        output = model(features, adj)
        loss_train = F.nll_loss(output[idx_train], labels[idx_train])
        acc_train = accuracy(output[idx_train], labels[idx_train])
        loss_train.backward()
        optimizer.step()

    # evaluation
    model.eval()
    output = model(features, adj)
    loss_val = F.nll_loss(output[idx_val], labels[idx_val])
    acc_val = accuracy(output[idx_val], labels[idx_val])

    if device == "cuda":
        end.record()
        torch.cuda.synchronize()
        total_time = start.elapsed_time(end) / 1e3
        sys.stdout.flush()
        acc_val = acc_val.item()
    else:
        t2 = time.time_ns()
        total_time = (t2 - t1) / 1e9

    print()
    print(
        f"dataset={params['dataset']}, num_epoch={num_epoch}, device={next(model.parameters()).device}"
    )
    print("Validation results:", "loss= {:.4f}".format(loss_val.item()),
          "accuracy= {:.4f}".format(acc_val))
    print("Total training time: {:.4f} sec".format(total_time))

    return 1 - acc_val
예제 #5
0
        adj = adj.to(device)
        t = time.time()

        output = model(features, adj)
        loss_train = criterion(output, batch_labels)
        acc_train = accuracy(output, batch_labels)

        optimizer.zero_grad()
        loss_train.backward()

        optimizer.step()

        loss_val = F.nll_loss(output, batch_labels)
        acc_val = accuracy(output, batch_labels)

        model.train()

        print('Epoch: {:04d}'.format(epoch + 1),
              'loss_train: {:.4f}'.format(loss_train.item()),
              'acc_train: {:.4f}'.format(acc_train.item()),
              'loss_val: {:.4f}'.format(loss_val.item()),
              'acc_val: {:.4f}'.format(acc_val.item()),
              'time: {:.4f}s'.format(time.time() - t))

# Train model
t_total = time.time()

print("Optimization Finished!")
print("Total time elapsed: {:.4f}s".format(time.time() - start_time))

##accuracy check
예제 #6
0
def main():
    # Make dir
    temp = "./tmp"
    os.makedirs(temp, exist_ok=True)
    # Load data
    start = time.time()
    (train_adj, full_adj, train_feats, test_feats, y_train, y_val, y_test,
     train_mask, val_mask, test_mask, _, val_nodes, test_nodes,
     num_data, visible_data) = utils.load_data(args.dataset)
    print('Loaded data in {:.2f} seconds!'.format(time.time() - start))

    start = time.time()
    # Prepare Train Data
    if args.batch_size > 1:
        start = time.time()
        _, parts = utils.partition_graph(train_adj, visible_data, args.num_clusters_train)
        print('Partition graph in {:.2f} seconds!'.format(time.time() - start))
        parts = [np.array(pt) for pt in parts]
    else:
        start = time.time()
        (parts, features_batches, support_batches, y_train_batches, train_mask_batches) = utils.preprocess(
            train_adj, train_feats, y_train, train_mask, visible_data, args.num_clusters_train, diag_lambda=args.diag_lambda)
        print('Partition graph in {:.2f} seconds!'.format(time.time() - start))

    # Prepare valid Data
    start = time.time()
    (_, val_features_batches, val_support_batches, y_val_batches, val_mask_batches) = utils.preprocess(
        full_adj, test_feats, y_val, val_mask, np.arange(num_data), args.num_clusters_val, diag_lambda=args.diag_lambda)
    print('Partition graph in {:.2f} seconds!'.format(time.time() - start))

    # Prepare Test Data
    start = time.time()
    (_, test_features_batches, test_support_batches, y_test_batches, test_mask_batches) = utils.preprocess(
        full_adj, test_feats, y_test, test_mask, np.arange(num_data), args.num_clusters_test, diag_lambda=args.diag_lambda)
    print('Partition graph in {:.2f} seconds!'.format(time.time() - start))

    idx_parts = list(range(len(parts)))

    # model
    model = GCN(
        fan_in=_in, fan_out=_out, layers=args.layers, dropout=args.dropout, normalize=True, bias=False, precalc=True).float()
    model.to(torch.device('cuda'))
    print(model)

    # Loss Function
    if args.multilabel:
        criterion = torch.nn.BCEWithLogitsLoss()
    else:
        criterion = torch.nn.CrossEntropyLoss()

    # Optimization Algorithm
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

    # Learning Rate Schedule
    # scheduler = torch.optim.lr_scheduler.OneCycleLR(
    #     optimizer, max_lr=args.lr, steps_per_epoch=int(args.num_clusters_train/args.batch_size), epochs=args.epochs+1,
    #     anneal_strategy='linear')

    pbar = tqdm.tqdm(total=args.epochs, dynamic_ncols=True)
    for epoch in range(args.epochs + 1):
        # Train
        np.random.shuffle(idx_parts)
        start = time.time()
        avg_loss = 0
        total_correct = 0
        n_nodes = 0
        if args.batch_size > 1:
            (features_batches, support_batches, y_train_batches, train_mask_batches) = utils.preprocess_multicluster(
                train_adj, parts, train_feats, y_train, train_mask,
                args.num_clusters_train, args.batch_size, args.diag_lambda)
            for pid in range(len(features_batches)):
                # Use preprocessed batch data
                features_b = features_batches[pid]
                support_b = support_batches[pid]
                y_train_b = y_train_batches[pid]
                train_mask_b = train_mask_batches[pid]
                loss, pred, labels = train(
                    model.train(), criterion, optimizer,
                    features_b, support_b, y_train_b, train_mask_b, torch.device('cuda'))
                avg_loss += loss
                n_nodes += pred.squeeze().numel()
                total_correct += torch.eq(pred.squeeze(), labels.squeeze()).sum().item()
        else:
            np.random.shuffle(idx_parts)
            for pid in idx_parts:
                # use preprocessed batch data
                features_b = features_batches[pid]
                support_b = support_batches[pid]
                y_train_b = y_train_batches[pid]
                train_mask_b = train_mask_batches[pid]
                loss, pred, labels = train(
                    model.train(), criterion, optimizer,
                    features_b, support_b, y_train_b, train_mask_b, torch.device('cuda'))
                avg_loss = loss.item()
                n_nodes += pred.squeeze().numel()
                total_correct += torch.eq(pred.squeeze(), labels.squeeze()).sum().item()
        train_acc = total_correct / n_nodes
        # Write Train stats to tensorboard
        writer.add_scalar('time/train', time.time() - start, epoch)
        writer.add_scalar('loss/train', avg_loss/len(features_batches), epoch)
        writer.add_scalar('acc/train', train_acc, epoch)

        # Validation
        cost, acc, micro, macro = evaluate(
            model.eval(), criterion, val_features_batches, val_support_batches, y_val_batches, val_mask_batches,
            val_nodes, torch.device("cuda"))

        # Write Valid stats to tensorboard
        writer.add_scalar('acc/valid', acc, epoch)
        writer.add_scalar('mi_F1/valid', micro, epoch)
        writer.add_scalar('ma_F1/valid', macro, epoch)
        writer.add_scalar('loss/valid', cost, epoch)
        pbar.set_postfix({"t": avg_loss/len(features_batches),"t_acc": train_acc, "v": cost, "v_acc": acc})
        pbar.update()
    pbar.close()

    # Test
    if args.test == 1:
        # Test on cpu
        cost, acc, micro, macro = test(
            model.eval(), criterion, test_features_batches, test_support_batches, y_test_batches, test_mask_batches,
            torch.device("cpu"))
        writer.add_scalar('acc/test', acc, epoch)
        writer.add_scalar('mi_F1/test', micro, epoch)
        writer.add_scalar('ma_F1/test', macro, epoch)
        writer.add_scalar('loss/test', cost, epoch)
        print('test: acc: {:.4f}'.format(acc))
        print('test: mi_f1: {:.4f}, ma_f1: {:.4f}'.format(micro, macro))
예제 #7
0
def train(**kwargs):
    """
    GCN training
    ---
    - the folder you need:
        - args.path4AffGraph
        - args.path4node_feat
        - path4partial_label
    - these folder would be created:
        - data/GCN_prediction/label
        - data/GCN_prediction/logit
    """
    # os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(map(str, [0, 1, 2, 3]))
    t_start = time.time()
    # 根据命令行参数更新配置
    args.parse(**kwargs)
    # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    device = torch.device("cuda:" + str(kwargs["GPU"]))
    print(device)
    # 把有改動的參數寫到tensorboard名稱上
    if kwargs["debug"] is False:
        comment_init = ''
        for k, v in kwargs.items():
            comment_init += '|{} '.format(v)
        writer = SummaryWriter(comment=comment_init)

    # === set evaluate object for evaluate later
    IoU = IOUMetric(args.num_class)
    IoU_CRF = IOUMetric(args.num_class)

    # === dataset
    train_dataloader = graph_voc(start_idx=kwargs["start_index"],
                                 end_idx=kwargs["end_index"],
                                 device=device)

    # === for each image, do training and testing in the same graph
    # for ii, (adj_t, features_t, labels_t, rgbxy_t, img_name, label_fg_t,
    #          label_bg_t) in enumerate(train_dataloader):
    t4epoch = time.time()
    for ii, data in enumerate(train_dataloader):
        if data is None:
            continue
        # === use RGBXY as feature
        # if args.use_RGBXY:
        #     data["rgbxy_t"] = normalize_rgbxy(data["rgbxy_t"])
        #     features_t = data["rgbxy_t"].clone()
        # === only RGB as feature
        t_be = time.time()
        if args.use_lap:
            """ is constructing................ """
            H, W, C = data["rgbxy_t"].shape
            A = torch.zeros([H * W, H * W], dtype=torch.float64)

            def find_neibor(card_x, card_y, H, W, radius=2):
                """
                Return idx of neibors of (x,y) in list
                ---
                """
                neibors_idx = []
                for idx_x in np.arange(card_x - radius, card_x + radius + 1):
                    for idx_y in np.arange(card_y - radius,
                                           card_y + radius + 1):
                        if (-radius < idx_x < H) and (-radius < idx_y < W):
                            neibors_idx.append(
                                (idx_x * W + idx_y, idx_x, idx_y))
                return neibors_idx

            t_start = time.time()
            t_start = t4epoch
            neibors = dict()
            for node_idx in range(H * W):
                card_x, card_y = node_idx // W, node_idx % W
                neibors = find_neibor(card_x, card_y, H, W, radius=1)
                # print("H:{} W:{} | {} -> ({},{})".format(
                # H, W, node_idx, card_x, card_y))
                for nei in neibors:
                    # print("nei: ", nei)
                    diff_rgb = data["rgbxy_t"][
                        card_x, card_y, :3] - data["rgbxy_t"][nei[1],
                                                              nei[2], :3]
                    diff_xy = data["rgbxy_t"][card_x, card_y,
                                              3:] - data["rgbxy_t"][nei[1],
                                                                    nei[2], 3:]
                    A[node_idx, nei[0]] = torch.exp(
                        -torch.pow(torch.norm(diff_rgb), 2) /
                        (2. * args.CRF_deeplab["bi_rgb_std"])) + torch.exp(
                            -torch.pow(torch.norm(diff_xy), 2) /
                            (2. * args.CRF_deeplab["bi_xy_std"]))
            # print("{:3.1f}s".format(time.time() - t_start))
            D = torch.diag(A.sum(dim=1))
            L_mat = D - A
        print("time for Laplacian {:3f} s".format(time.time() - t_be))
        # === Model and optimizer
        img_label = load_image_label_from_xml(img_name=data["img_name"],
                                              voc12_root=args.path4VOC_root)
        img_class = [idx + 1 for idx, f in enumerate(img_label) if int(f) == 1]
        num_class = np.max(img_class) + 1
        # debug("num_class: {}  {}".format(num_class + 1, type(num_class + 1)),
        #       line=290)
        model = GCN(
            nfeat=data["features_t"].shape[1],
            nhid=args.num_hid_unit,
            # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
            # image label don't have BG
            # adaptive num_class should have better performance
            nclass=args.num_class,  # args.num_class| num_class
            # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
            dropout=args.drop_rate)
        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr,
                               weight_decay=args.weight_decay)

        # ==== moving tensor to GPU
        if args.cuda:
            model.to(device)
            data["features_t"] = data["features_t"].to(device)
            data["adj_t"] = data["adj_t"].to(device)
            data["labels_t"] = data["labels_t"].to(device)
            data["label_fg_t"] = data["label_fg_t"].to(device)
            data["label_bg_t"] = data["label_bg_t"].to(device)
            # L_mat = L_mat.to(device)

        # === save the prediction before training
        if args.save_mask_before_train:
            model.eval()
            postprocess_image_save(img_name=data["img_name"],
                                   model_output=model(data["features_t"],
                                                      data["adj_t"]).detach(),
                                   epoch=0)

        # ==== Train model
        # t4epoch = time.time()
        criterion_ent = HLoss()
        # criterion_sym = symmetricLoss()

        for epoch in range(args.max_epoch):
            model.train()
            optimizer.zero_grad()
            output = model(data["features_t"], data["adj_t"])

            # === seperate FB/BG label
            loss_fg = F.nll_loss(output, data["label_fg_t"], ignore_index=255)
            loss_bg = F.nll_loss(output, data["label_bg_t"], ignore_index=255)
            # F.log_softmax(label_fg_t, dim=1)
            # loss_sym = criterion_sym(output, labels_t, ignore_index=255)
            loss = loss_fg + loss_bg
            if args.use_ent:
                loss_entmin = criterion_ent(output,
                                            data["labels_t"],
                                            ignore_index=255)
                loss += 10. * loss_entmin
            if args.use_lap:
                loss_lap = torch.trace(
                    torch.mm(output.transpose(1, 0),
                             torch.mm(L_mat.type_as(output),
                                      output))) / (H * W)
                gamma = 1e-2
                loss += gamma * loss_lap
            # loss = F.nll_loss(output, labels_t, ignore_index=255)

            if loss is None:
                print("skip this image: ", data["img_name"])
                break

            # === for normalize cut
            # lamda = args.lamda
            # n_cut = 0.
            # if args.use_regular_NCut:
            #     W = gaussian_propagator(output)
            #     d = torch.sum(W, dim=1)
            #     for k in range(output.shape[1]):
            #         s = output[idx_test_t, k]
            #         n_cut = n_cut + torch.mm(
            #             torch.mm(torch.unsqueeze(s, 0), W),
            #             torch.unsqueeze(1 - s, 1)) / (torch.dot(d, s))

            # === calculus loss & updated parameters
            # loss_train = loss.cuda() + lamda * n_cut
            loss_train = loss.cuda()
            loss_train.backward()
            optimizer.step()

            # === save predcit mask at max epoch & IoU of img
            if (epoch + 1) % args.max_epoch == 0 and args.save_mask:
                t_now = time.time()
                if not kwargs["debug"]:
                    evaluate_IoU(model=model,
                                 features=data["features_t"],
                                 adj=data["adj_t"],
                                 img_name=data["img_name"],
                                 epoch=args.max_epoch,
                                 img_idx=ii + 1,
                                 writer=writer,
                                 IoU=IoU,
                                 IoU_CRF=IoU_CRF,
                                 use_CRF=False,
                                 save_prediction_np=True)
                print("[{}/{}] time: {:.4f}s\n\n".format(
                    ii + 1, len(train_dataloader), t_now - t4epoch))
                t4epoch = t_now
        # end for epoch
        # print(
        #     "loss: {} | loss_fg: {} | loss_bg:{} | loss_entmin: {} | loss_lap: {}"
        #     .format(loss.data.item(), loss_fg.data.item(), loss_bg.data.item(),
        #             loss_entmin.data.item(), loss_lap.data.item()))
    # end for dataloader
    if kwargs["debug"] is False:
        writer.close()
    print("training was Finished!")
    print("Total time elapsed: {:.0f} h {:.0f} m {:.0f} s\n".format(
        (time.time() - t_start) // 3600, (time.time() - t_start) / 60 % 60,
        (time.time() - t_start) % 60))
예제 #8
0
파일: train.py 프로젝트: Xavier-Pan/WSGCN
def gcn_train(**kwargs):
    """
    GCN training
    ---
    - the folder you need:
        - args.path4AffGraph
        - args.path4node_feat
        - path4partial_label
    - these folder would be created:
        - data/GCN4DeepLab/Label
        - data/GCN4DeepLab/Logit
    """
    t_start = time.time()
    # update config
    args.parse(**kwargs)
    device = torch.device("cuda:" + str(kwargs["GPU"]))
    print(device)

    # tensorboard
    if args.use_TB:
        time_now = datetime.datetime.today()
        time_now = "{}-{}-{}|{}-{}".format(time_now.year, time_now.month,
                                           time_now.day, time_now.hour,
                                           time_now.minute // 30)

        keys_ignore = ["start_index", "GPU"]
        comment_init = ''
        for k, v in kwargs.items():
            if k not in keys_ignore:
                comment_init += '|{} '.format(v)
        writer = SummaryWriter(
            logdir='runs/{}/{}'.format(time_now, comment_init))

    # initial IoUMetric object for evaluation
    IoU = IOUMetric(args.num_class)

    # initial dataset
    train_dataloader = graph_voc(start_idx=kwargs["start_index"],
                                 end_idx=kwargs["end_index"],
                                 device=device)

    # train a seperate GCN for each image
    t4epoch = time.time()
    for ii, data in enumerate(train_dataloader):
        if data is None:
            continue
        img_label = load_image_label_from_xml(img_name=data["img_name"],
                                              voc12_root=args.path4VOC_root)
        img_class = [idx + 1 for idx, f in enumerate(img_label) if int(f) == 1]
        num_class = np.max(img_class) + 1
        model = GCN(nfeat=data["features_t"].shape[1],
                    nhid=args.num_hid_unit,
                    nclass=args.num_class,
                    dropout=args.drop_rate)
        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr,
                               weight_decay=args.weight_decay)

        # put data into GPU
        if args.cuda:
            model.to(device)
            data["features_t"] = data["features_t"].to(device)
            data["adj_t"] = data["adj_t"].to(device)
            data["labels_t"] = data["labels_t"].to(device)
            data["label_fg_t"] = data["label_fg_t"].to(device)
            data["label_bg_t"] = data["label_bg_t"].to(device)

        t_be = time.time()

        H, W, C = data["rgbxy_t"].shape
        N = H * W
        # laplacian
        if args.use_lap:
            L_mat = compute_lap_test(data, device, radius=2).to(device)
            print("Time for laplacian {:3.1f} s".format(time.time() - t_be))

        criterion_ent = HLoss()
        for epoch in range(args.max_epoch):
            model.train()
            optimizer.zero_grad()
            output = model(data["features_t"], data["adj_t"])

            # foreground and background loss
            loss_fg = F.nll_loss(output, data["label_fg_t"], ignore_index=255)
            loss_bg = F.nll_loss(output, data["label_bg_t"], ignore_index=255)
            loss = loss_fg + loss_bg
            if args.use_ent:
                loss_entmin = criterion_ent(output,
                                            data["labels_t"],
                                            ignore_index=255)
                loss += 10. * loss_entmin
            if args.use_lap:
                loss_lap = torch.trace(
                    torch.mm(output.transpose(1, 0),
                             torch.mm(L_mat.type_as(output), output))) / N

                gamma = 1e-2
                loss += gamma * loss_lap

            if loss is None:
                print("skip this image: ", data["img_name"])
                break

            loss_train = loss.cuda()
            loss_train.backward()
            optimizer.step()

            # save predicted mask and IoU at max epoch
            if (epoch + 1) % args.max_epoch == 0 and args.save_mask:
                t_now = time.time()
                evaluate_IoU(model=model,
                             features=data["features_t"],
                             adj=data["adj_t"],
                             img_name=data["img_name"],
                             img_idx=ii + 1,
                             writer=writer,
                             IoU=IoU,
                             save_prediction_np=True)
                print("evaluate time: {:3.1f} s".format(time.time() - t_now))
                print("[{}/{}] time: {:.1f}s\n\n".format(
                    ii + 1, len(train_dataloader), t_now - t4epoch))
                t4epoch = t_now
                print("======================================")

    if writer is not None:
        writer.close()
    print("training was Finished!")
    print("Total time elapsed: {:.0f} h {:.0f} m {:.0f} s\n".format(
        (time.time() - t_start) // 3600, (time.time() - t_start) / 60 % 60,
        (time.time() - t_start) % 60))
예제 #9
0
def train_gcn(dataset,
              test_ratio=0.5,
              val_ratio=0.2,
              seed=1,
              n_hidden=64,
              n_epochs=200,
              lr=1e-2,
              weight_decay=5e-4,
              dropout=0.5,
              use_embs=False,
              verbose=True,
              cuda=False):
    data = dataset.get_data()
    # train text embs
    if use_embs:
        pad_ix, n_tokens, matrix, pretrained_embs = data['features']
        if pretrained_embs is not None:
            pretrained_embs = torch.FloatTensor(pretrained_embs)
        features = torch.LongTensor(matrix)
    else:
        pad_ix = None
        n_tokens = None
        pretrained_embs = None
        features = torch.FloatTensor(data['features'])

    labels = torch.LongTensor(data['labels'])
    n = len(data['ids'])
    train_mask, val_mask, test_mask = get_masks(n,
                                                data['main_ids'],
                                                data['main_labels'],
                                                test_ratio=test_ratio,
                                                val_ratio=val_ratio,
                                                seed=seed)

    train_mask = torch.BoolTensor(train_mask)
    val_mask = torch.BoolTensor(val_mask)
    test_mask = torch.BoolTensor(test_mask)

    if cuda:
        torch.cuda.set_device("cuda:0")
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()

    g = DGLGraph(data['graph'])
    g = dgl.transform.add_self_loop(g)
    n_edges = g.number_of_edges()

    degs = g.in_degrees().float()
    norm = torch.pow(degs, -0.5)
    norm[torch.isinf(norm)] = 0

    if cuda:
        norm = norm.cuda()

    g.ndata['norm'] = norm.unsqueeze(1)

    if use_embs:
        if pretrained_embs is not None:
            in_feats = 100
        else:
            in_feats = 64
    else:
        in_feats = features.shape[1]

    # + 1 for unknown class
    n_classes = data['n_classes'] + 1
    model = GCN(g,
                in_feats=in_feats,
                n_hidden=n_hidden,
                n_classes=n_classes,
                activation=F.relu,
                dropout=dropout,
                use_embs=use_embs,
                pretrained_embs=pretrained_embs,
                pad_ix=pad_ix,
                n_tokens=n_tokens)

    if cuda:
        model.cuda()

    loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=lr,
                                 weight_decay=weight_decay)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           mode='min',
                                                           factor=0.9,
                                                           patience=20,
                                                           min_lr=1e-10)

    best_f1 = -100
    # initialize graph
    dur = []
    for epoch in range(n_epochs):
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        mask_probs = torch.empty(features.shape).uniform_(0, 1)
        if cuda:
            mask_probs = mask_probs.cuda()

        mask_features = torch.where(mask_probs > 0.2, features,
                                    torch.zeros_like(features))
        logits = model(mask_features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        f1 = evaluate(model, features, labels, val_mask)
        scheduler.step(1 - f1)
        if f1 > best_f1:
            best_f1 = f1
            torch.save(model.state_dict(), 'best_model.pt')

        if verbose:
            print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | F1 {:.4f} | "
                  "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur),
                                                loss.item(), f1,
                                                n_edges / np.mean(dur) / 1000))

    model.load_state_dict(torch.load('best_model.pt'))
    f1 = evaluate(model, features, labels, test_mask)

    if verbose:
        print()
        print("Test F1 {:.2}".format(f1))

    return f1
예제 #10
0
def main(args):
    # convert boolean type for args
    assert args.use_ist in ['True', 'False'], ["Only True or False for use_ist, get ",
                                               args.use_ist]
    assert args.split_input in ['True', 'False'], ["Only True or False for split_input, get ",
                                                   args.split_input]
    assert args.split_output in ['True', 'False'], ["Only True or False for split_output, get ",
                                                   args.split_output]
    assert args.self_loop in ['True', 'False'], ["Only True or False for self_loop, get ",
                                                 args.self_loop]
    assert args.use_layernorm in ['True', 'False'], ["Only True or False for use_layernorm, get ",
                                                     args.use_layernorm]
    assert args.use_random_proj in ['True', 'False'], ["Only True or False for use_random_proj, get ",
                                                       args.use_random_proj]
    use_ist = (args.use_ist == 'True')
    split_input = (args.split_input == 'True')
    split_output = (args.split_output == 'True')
    self_loop = (args.self_loop == 'True')
    use_layernorm = (args.use_layernorm == 'True')
    use_random_proj = (args.use_random_proj == 'True')

    # make sure hidden layer is the correct shape
    assert (args.n_hidden % args.num_subnet) == 0

    # load and preprocess dataset
    global t0
    if args.dataset in {'cora', 'citeseer', 'pubmed'}:
        data = load_data(args)
    else:
        raise NotImplementedError(f'{args.dataset} is not a valid dataset')

    # randomly project the input to make it dense
    if use_random_proj:
        # densify input features with random projection
        from sklearn import random_projection

        # make sure input features are divisible by number of subnets
        # otherwise some parameters of the last subnet will be handled improperly
        n_components = int(data.features.shape[-1] / args.num_subnet) * args.num_subnet
        transformer = random_projection.GaussianRandomProjection(n_components=n_components)
        new_feature = transformer.fit_transform(data.features)
        features = torch.FloatTensor(new_feature)
    else:
        assert (data.features.shape[-1] % args.num_subnet) == 0.
        features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    train_mask = torch.ByteTensor(data.train_mask)
    val_mask = torch.ByteTensor(data.val_mask)
    test_mask = torch.ByteTensor(data.test_mask)
    in_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, n_classes,
           train_mask.sum().item(),
           val_mask.sum().item(),
           test_mask.sum().item()))
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    features = features.to(device)
    labels = labels.to(device)
    train_mask = train_mask.to(device)
    val_mask = val_mask.to(device)
    test_mask = test_mask.to(device)

    # graph preprocess and calculate normalization factor
    g = data.graph
    # add self loop
    if self_loop:
        g.remove_edges_from(nx.selfloop_edges(g))
        g.add_edges_from(zip(g.nodes(), g.nodes()))
    g = DGLGraph(g)
    g = g.to(device)
    n_edges = g.number_of_edges()
    
    # normalization
    degs = g.in_degrees().float()
    norm = torch.pow(degs, -0.5)
    norm[torch.isinf(norm)] = 0
    norm = norm.to(device)
    g.ndata['norm'] = norm.unsqueeze(1)

    # create GCN model
    model = GCN(
            g, in_feats, args.n_hidden, n_classes, args.n_layers, F.relu,
            args.dropout, use_layernorm)
    model = model.to(device)
    loss_fcn = torch.nn.CrossEntropyLoss()

    # initialize graph
    dur = []
    record = []
    sub_models = []
    opt_list = []
    sub_dict_list = []
    main_dict = None
    for epoch in range(args.n_epochs):
        if epoch >= 3:
            t0 = time.time()
        if use_ist:
            model.eval()
            # IST training:
            # Distribute parameter to sub networks
            num_subnet = args.num_subnet
            if (epoch % args.iter_per_site) == 0.:
                main_dict = model.state_dict()
                feats_idx = [] # store all layer indices within a single list

                # create input partition
                if split_input:
                    feats_idx.append(torch.chunk(torch.randperm(in_feats), num_subnet))
                else:
                    feats_idx.append(None)

                # create hidden layer partitions
                for i in range(1, args.n_layers):
                    feats_idx.append(torch.chunk(torch.randperm(args.n_hidden), num_subnet))

                # create output layer partitions
                if split_output:
                    feats_idx.append(torch.chunk(torch.randperm(args.n_hidden), num_subnet))
                else:
                    feats_idx.append(None)

            for subnet_id in range(args.num_subnet):
                if (epoch % args.iter_per_site) == 0.:
                    # create the sub model to train
                    sub_model = GCN(
                            g, in_feats, args.n_hidden, n_classes,
                            args.n_layers, F.relu, args.dropout, use_layernorm,
                            split_input, split_output, args.num_subnet) 
                    sub_model = sub_model.to(device)
                    sub_dict = main_dict.copy()

                    # split input params
                    if split_input:
                        idx = feats_idx[0][subnet_id]
                        sub_dict['layers.0.weight'] = main_dict['layers.0.weight'][idx, :]

                    # split hidden params (and output params)
                    for i in range(1, args.n_layers + 1):
                        if i == args.n_layers and not split_output:
                            pass # params stay the same 
                        else:
                            idx = feats_idx[i][subnet_id]
                            sub_dict[f'layers.{i - 1}.weight'] = sub_dict[f'layers.{i -1}.weight'][:, idx]
                            sub_dict[f'layers.{i - 1}.bias'] = main_dict[f'layers.{i - 1}.bias'][idx]
                            sub_dict[f'layers.{i}.weight'] = main_dict[f'layers.{i}.weight'][idx, :]

                    # use a lr scheduler
                    curr_lr = args.lr
                    if epoch >= int(args.n_epochs*0.5):
                        curr_lr /= 10
                    if epoch >= int(args.n_epochs*0.75):
                        curr_lr /= 10

                    # import params into subnet for training
                    sub_model.load_state_dict(sub_dict)
                    sub_models.append(sub_model)
                    sub_models = sub_models[-num_subnet:]
                    optimizer = torch.optim.Adam(
                            sub_model.parameters(), lr=curr_lr,
                            weight_decay=args.weight_decay)
                    opt_list.append(optimizer)
                    opt_list = opt_list[-num_subnet:]
                else:
                    sub_model = sub_models[subnet_id]
                    optimizer = opt_list[subnet_id]

                # train a sub network
                optimizer.zero_grad()
                sub_model.train()
                if split_input:
                    model_input = features[:, feats_idx[0][subnet_id]]
                else:
                    model_input = features
                logits = sub_model(model_input)
                loss = loss_fcn(logits[train_mask], labels[train_mask])

                # reset optimization for every sub training
                loss.backward()
                optimizer.step()

                # save sub model parameter
                if (
                        ((epoch + 1) % args.iter_per_site == 0.)
                        or (epoch == args.n_epochs - 1)):
                    sub_dict = sub_model.state_dict()
                    sub_dict_list.append(sub_dict)
                    sub_dict_list = sub_dict_list[-num_subnet:]

            # Merge parameter to main network:
            # force aggregation if training about to end
            if (
                    ((epoch + 1) % args.iter_per_site == 0.)
                    or (epoch == args.n_epochs - 1)):
                #keys = main_dict.keys()
                update_dict = main_dict.copy()

                # copy in the input parameters
                if split_input:
                    if args.n_layers <= 1 and not split_output:
                        for idx, sub_dict in zip(feats_idx[0], sub_dict_list):
                            update_dict['layers.0.weight'][idx, :] = sub_dict['layers.0.weight']
                    else:
                        for i, sub_dict in enumerate(sub_dict_list):
                            curr_idx = feats_idx[0][i]
                            next_idx = feats_idx[1][i]
                            correct_rows = update_dict['layers.0.weight'][curr_idx, :]
                            correct_rows[:, next_idx] = sub_dict['layers.0.weight']
                            update_dict['layers.0.weight'][curr_idx, :] = correct_rows
                else:
                    if args.n_layers <= 1 and not split_output:
                        update_dict['layers.0.weight'] = sum(sub_dict['layers.0.weight'] for sub_dict in sub_dict_list) / len(sub_dict_list)
                    else:
                        for i, sub_dict in enumerate(sub_dict_list):
                            next_idx = feats_idx[1][i]
                            update_dict['layers.0.weight'][:, next_idx] = sub_dict['layers.0.weight']

                # copy the rest of the parameters
                for i in range(1, args.n_layers + 1):
                    if i == args.n_layers:
                        if not split_output:
                            update_dict[f'layers.{i-1}.bias'] = sum(sub_dict[f'layers.{i-1}.bias'] for sub_dict in sub_dict_list) / len(sub_dict_list)
                            update_dict[f'layers.{i}.weight'] = sum(sub_dict[f'layers.{i}.weight'] for sub_dict in sub_dict_list) / len(sub_dict_list)
                        else:
                            for idx, sub_dict in zip(feats_idx[i], sub_dict_list):
                                update_dict[f'layers.{i-1}.bias'][idx] = sub_dict[f'layers.{i-1}.bias']
                                update_dict[f'layers.{i}.weight'][idx, :] = sub_dict[f'layers.{i}.weight']
                    else:
                        if i >= args.n_layers - 1 and not split_output:
                            for idx, sub_dict in zip(feats_idx[i], sub_dict_list):
                                update_dict[f'layers.{i-1}.bias'][idx] = sub_dict[f'layers.{i-1}.bias']
                                update_dict[f'layers.{i}.weight'][idx, :] = sub_dict[f'layers.{i}.weight']
                        else:
                            for idx, sub_dict in enumerate(sub_dict_list):
                                curr_idx = feats_idx[i][idx]
                                next_idx = feats_idx[i+1][idx]
                                update_dict[f'layers.{i-1}.bias'][curr_idx] = sub_dict[f'layers.{i-1}.bias']
                                correct_rows = update_dict[f'layers.{i}.weight'][curr_idx, :]
                                correct_rows[:, next_idx] = sub_dict[f'layers.{i}.weight']
                                update_dict[f'layers.{i}.weight'][curr_idx, :] = correct_rows 
                model.load_state_dict(update_dict)

        else:
            raise NotImplementedError('Should train with IST')

        if epoch >= 3:
            dur.append(time.time() - t0)

        acc_val = evaluate(model, features, labels, val_mask)
        acc_test = evaluate(model, features, labels, test_mask)
        print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Val Accuracy {:.4f} | Test Accuracy {:.4f} |"
              "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(),
                                            acc_val, acc_test, n_edges / np.mean(dur) / 1000))
        record.append([acc_val, acc_test])

    all_test_acc = [v[1] for v in record]
    all_val_acc = [v[0] for v in record]
    acc = evaluate(model, features, labels, test_mask)
    print(f"Final Test Accuracy: {acc:.4f}")
    print(f"Best Val Accuracy: {max(all_val_acc):.4f}")
    print(f"Best Test Accuracy: {max(all_test_acc):.4f}")
예제 #11
0
def main():
    net = GCN(num_classes=num_classes,
              input_size=train_args['input_size']).cuda()
    if len(train_args['snapshot']) == 0:
        curr_epoch = 0
    else:
        print 'training resumes from ' + train_args['snapshot']
        net.load_state_dict(
            torch.load(
                os.path.join(ckpt_path, exp_name, train_args['snapshot'])))
        split_snapshot = train_args['snapshot'].split('_')
        curr_epoch = int(split_snapshot[1])
        train_record['best_val_loss'] = float(split_snapshot[3])
        train_record['corr_mean_iu'] = float(split_snapshot[6])
        train_record['corr_epoch'] = curr_epoch

    net.train()

    mean_std = ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    train_simul_transform = simul_transforms.Compose([
        simul_transforms.Scale(int(train_args['input_size'][0] / 0.875)),
        simul_transforms.RandomCrop(train_args['input_size']),
        simul_transforms.RandomHorizontallyFlip()
    ])
    val_simul_transform = simul_transforms.Compose([
        simul_transforms.Scale(int(train_args['input_size'][0] / 0.875)),
        simul_transforms.CenterCrop(train_args['input_size'])
    ])
    img_transform = standard_transforms.Compose([
        standard_transforms.ToTensor(),
        standard_transforms.Normalize(*mean_std)
    ])
    target_transform = standard_transforms.Compose([
        expanded_transforms.MaskToTensor(),
        expanded_transforms.ChangeLabel(ignored_label, num_classes - 1)
    ])
    restore_transform = standard_transforms.Compose([
        expanded_transforms.DeNormalize(*mean_std),
        standard_transforms.ToPILImage()
    ])

    train_set = CityScapes('train',
                           simul_transform=train_simul_transform,
                           transform=img_transform,
                           target_transform=target_transform)
    train_loader = DataLoader(train_set,
                              batch_size=train_args['batch_size'],
                              num_workers=16,
                              shuffle=True)
    val_set = CityScapes('val',
                         simul_transform=val_simul_transform,
                         transform=img_transform,
                         target_transform=target_transform)
    val_loader = DataLoader(val_set,
                            batch_size=val_args['batch_size'],
                            num_workers=16,
                            shuffle=False)

    weight = torch.ones(num_classes)
    weight[num_classes - 1] = 0
    criterion = CrossEntropyLoss2d(weight).cuda()

    # don't use weight_decay for bias
    optimizer = optim.SGD([{
        'params': [
            param for name, param in net.named_parameters()
            if name[-4:] == 'bias' and ('gcm' in name or 'brm' in name)
        ],
        'lr':
        2 * train_args['new_lr']
    }, {
        'params': [
            param for name, param in net.named_parameters()
            if name[-4:] != 'bias' and ('gcm' in name or 'brm' in name)
        ],
        'lr':
        train_args['new_lr'],
        'weight_decay':
        train_args['weight_decay']
    }, {
        'params': [
            param for name, param in net.named_parameters()
            if name[-4:] == 'bias' and not ('gcm' in name or 'brm' in name)
        ],
        'lr':
        2 * train_args['pretrained_lr']
    }, {
        'params': [
            param for name, param in net.named_parameters()
            if name[-4:] != 'bias' and not ('gcm' in name or 'brm' in name)
        ],
        'lr':
        train_args['pretrained_lr'],
        'weight_decay':
        train_args['weight_decay']
    }],
                          momentum=0.9,
                          nesterov=True)

    if len(train_args['snapshot']) > 0:
        optimizer.load_state_dict(
            torch.load(
                os.path.join(ckpt_path, exp_name,
                             'opt_' + train_args['snapshot'])))
        optimizer.param_groups[0]['lr'] = 2 * train_args['new_lr']
        optimizer.param_groups[1]['lr'] = train_args['new_lr']
        optimizer.param_groups[2]['lr'] = 2 * train_args['pretrained_lr']
        optimizer.param_groups[3]['lr'] = train_args['pretrained_lr']

    if not os.path.exists(ckpt_path):
        os.mkdir(ckpt_path)
    if not os.path.exists(os.path.join(ckpt_path, exp_name)):
        os.mkdir(os.path.join(ckpt_path, exp_name))

    for epoch in range(curr_epoch, train_args['epoch_num']):
        train(train_loader, net, criterion, optimizer, epoch)
        validate(val_loader, net, criterion, optimizer, epoch,
                 restore_transform)