예제 #1
0
def test(model_name: str,
         dataset_folder: str,
         save_folder: str,
         hypers: HyperParameters,
         batch_size: Optional[int],
         max_num_batches: Optional[int],
         series: DataSeries = DataSeries.TEST):
    # Create the dataset
    dataset = get_dataset(hypers.dataset_type, dataset_folder)

    # Build model and restore trainable parameters
    model = get_model(hypers, save_folder=save_folder, is_train=False)
    model.restore(name=model_name, is_train=False, is_frozen=False)

    # Test the model
    print('Starting evaluation on {0} set...'.format(series.name.capitalize()))
    test_results = model.predict(dataset=dataset,
                                 test_batch_size=batch_size,
                                 max_num_batches=max_num_batches,
                                 series=series)
    # Close the dataset
    dataset.close()

    if series == DataSeries.TRAIN:
        result_file = os.path.join(save_folder,
                                   FINAL_TRAIN_LOG_PATH.format(model_name))
    elif series == DataSeries.VALID:
        result_file = os.path.join(save_folder,
                                   FINAL_VALID_LOG_PATH.format(model_name))
    else:
        result_file = os.path.join(save_folder,
                                   TEST_LOG_PATH.format(model_name))

    save_by_file_suffix([test_results], result_file)
    print('Completed evaluation.')
예제 #2
0
def main():
    args = get_args()
    cfg = Config.fromfile(args.config)
    cfg.fold = args.fold
    global device
    cfg.device = device
    log.info(cfg)

    # torch.cuda.set_device(cfg.gpu)
    util.set_seed(cfg.seed)
    log.info(f'setting seed = {cfg.seed}')

    # setup -------------------------------------
    for f in ['checkpoint', 'train', 'valid', 'test', 'backup']:
        os.makedirs(cfg.workdir + '/' + f, exist_ok=True)
    if 0:  #not work perfect
        file.backup_project_as_zip(
            PROJECT_PATH,
            cfg.workdir + '/backup/code.train.%s.zip' % IDENTIFIER)

    ## model ------------------------------------
    model = model_factory.get_model(cfg)

    # multi-gpu----------------------------------
    if torch.cuda.device_count() > 1 and len(cfg.gpu) > 1:
        log.info(f"Let's use {torch.cuda.device_count()} GPUs!")
        model = nn.DataParallel(model)
    model.to(device)

    ## train model-------------------------------
    do_train(cfg, model)
예제 #3
0
파일: train.py 프로젝트: hhliu79/TWP
def main(args):

    torch.cuda.set_device(args.gpu)
    dataset = load_corafull_amazon_dataset(args)
    continuum_data = continuum_corafull_amazon_dataset(dataset, args)

    task_manager = semi_task_manager(continuum_data.dataset_info())
    g, features, labels, train_mask, val_mask, test_mask = dataset
    task_manager.add_g(g)

    model = get_model(dataset, args, task_manager).cuda()
    life_model = importlib.import_module(f'LifeModel.{args.method}_model')
    life_model_ins = life_model.NET(model, task_manager, args)

    acc_matrix = np.zeros([args.n_tasks, args.n_tasks])
    meanas = []
    prev_model = None
    for task_i, dataset_i in enumerate(continuum_data):
        current_task, (g, features, labels, train_mask, val_mask,
                       test_mask) = dataset_i
        task_manager.add_task(current_task, {"test_mask": test_mask})
        label_offset1, label_offset2 = task_manager.get_label_offset(
            current_task)

        dur = []
        for epoch in range(args.epochs):
            if args.method == 'lwf':
                life_model_ins.observe(features, labels, task_i, train_mask,
                                       prev_model)
            else:
                life_model_ins.observe(features, labels, task_i, train_mask)

        acc_mean = []
        for t in range(task_i + 1):
            test_mask = task_manager.retrieve_task(t)['test_mask']
            label_offset1, label_offset2 = task_manager.get_label_offset(t)

            acc = evaluate(model, features, labels, test_mask, label_offset1,
                           label_offset2)
            acc_matrix[task_i][t] = round(acc * 100, 2)
            acc_mean.append(acc)
            print(f"T{t:02d} {acc*100:.2f}|", end="")

        accs = acc_mean[:task_i + 1]
        meana = round(np.mean(accs) * 100, 2)
        meanas.append(meana)

        acc_mean = round(np.mean(acc_mean) * 100, 2)
        print(f"acc_mean: {acc_mean}", end="")
        print()
        prev_model = copy.deepcopy(life_model_ins).cuda()

    print('AP: ', acc_mean)
    backward = []
    forward = []
    for t in range(args.n_tasks - 1):
        b = acc_matrix[args.n_tasks - 1][t] - acc_matrix[t][t]
        backward.append(round(b, 2))
    mean_backward = round(np.mean(backward), 2)
    print('AF: ', mean_backward)
예제 #4
0
def make_submission(cfg):
    predictions = []
    # setting dataset ---------------------------
    loader_test = dataset_factory.get_dataloader(cfg.data.test)

    ## model ------------------------------------
    model = model_factory.get_model(cfg)
    util.load_model(model_paths[0], model)
    model.to(device)
    model.eval()

    train_df = pd.read_csv(cfg.train_csv)
    regr_model = kaggle.get_regr_model(train_df)

    for img, _, _ in tqdm(loader_test):
        with torch.no_grad():
            output = model(img.to(device))
        output = output.data.cpu().numpy()
        for out in output:
            coords = kaggle.extract_coords(out, regr_model)
            s = kaggle.coords2str(coords)
            predictions.append(s)

    test = pd.read_csv(cfg.data.test.dataframe)
    test['PredictionString'] = predictions
    test.to_csv('predictions.csv', index=False)
    log.info(test.head())
예제 #5
0
def run(config):
    model = get_model(config).cuda()
    criterion = get_loss(config)
    optimizer = get_optimizer(config, model.parameters())

    checkpoint = utils.checkpoint.get_initial_checkpoint(config)
    if checkpoint is not None:
        last_epoch, score = utils.checkpoint.load_checkpoint(config, model, checkpoint)
    else:
        print('[*] no checkpoint found')
        last_epoch, score = -1, -1

    print('last epoch:{} score:{:.4f}'.format(last_epoch, score))

    optimizer.param_groups[0]['initial_lr'] = config.OPTIMIZER.LR
    scheduler = get_scheduler(config, optimizer, last_epoch)
    if last_epoch != -1:
        scheduler.step()

    writer = SummaryWriter(os.path.join(config.TRAIN_DIR, 'logs'))

    train_loader = get_dataloader(config, 'train', transform=transforms.Compose([Albu(),
                                                                                 Normalize(),
                                                                                 ToTensor()]))
    test_loader = get_dataloader(config, 'val', transform=transforms.Compose([Normalize(),
                                                                               ToTensor()]))

    train(config, model, train_loader, test_loader, criterion, optimizer, scheduler, writer, last_epoch+1, score)
예제 #6
0
def _infer(model, data):
    start = time.time()
    ################################################################################
    print('test preprocessing start!')
    # # data: [a, b, c,...]
    data_bc = []
    bc_func, _ = preprocess_dict["ben_clahe"]

    for d in data:
        d = cv2.resize(d, (704, 544))
        data_bc.append(bc_func(d))
        # del d

    # del data
    ellapsed = time.time() - start
    print('test preprocessing time: %d hours %d minutes %d seconds' %
          (ellapsed // 3600, (ellapsed % 3600) // 60, (ellapsed % 3600) % 60))
    print('test preprocessing ended!')
    del data

    ################################################################################

    # n_ensemble = len(ensemble_checkpoints)
    final = []

    for sess, ckpt, config_path in ensemble_checkpoints:
        config = utils.config.load(config_path)

        model = get_model(config).cuda()
        bind_model(model)

        nsml.load(checkpoint=ckpt, session=sess)

        # data_processed = []
        # _func, _ = preprocess_dict[config.DATA.PREPROCESS]
        # for d in data:
        #     d = cv2.resize(d, (config.DATA.IMG_W, config.DATA.IMG_H))
        #     data_processed.append(_func(d))

        out = run(model, data_bc, config)
        final.append(out)

        del model

    # final = sum(final) / float(n_ensemble)
    final = sum(final)

    final = np.argmax(final, axis=1)
    print(final.shape)
    print(final)

    ellapsed = time.time() - start
    print('Total inference time: %d hours %d minutes %d seconds' %
          (ellapsed // 3600, (ellapsed % 3600) // 60, (ellapsed % 3600) % 60))

    return final
예제 #7
0
def run(config, num_checkpoint, epoch_end, output_filename):
    dataloader = get_dataloader(config, split='val', transform=None)

    model = get_model(config).cuda()
    checkpoints = get_checkpoints(config, num_checkpoint, epoch_end)

    utils.checkpoint.load_checkpoint(config, model, checkpoints[0])
    for i, checkpoint in enumerate(checkpoints[1:]):
        model2 = get_model(config).cuda()
        last_epoch, _, _ = utils.checkpoint.load_checkpoint(config, model2, checkpoint)
        swa.moving_average(model, model2, 1. / (i + 2))

    with torch.no_grad():
        swa.bn_update(dataloader, model)

    # output_name = '{}.{}.{:03d}'.format(output_filename, num_checkpoint, last_epoch)
    # print('save {}'.format(output_name))
    utils.checkpoint.save_checkpoint(config, model, None, None, epoch_end,
                                     weights_dict={'state_dict': model.state_dict()},
                                     name=output_filename)
def _get_model(config, inp, label, bsize, is_training, name_scope, reuse):
    """Builds models."""
    model_cls = 'resnet'
    trn_kwargs = {
        'is_training': is_training,
        'inp': inp,
        'label': label,
        'batch_size': bsize,
    }
    with tf.name_scope(name_scope):
        with tf.variable_scope('Model', reuse=reuse):
            m = get_model(model_cls, config, **trn_kwargs)
    return m
예제 #9
0
def run(config):

    model = get_model(config).to(device)
    criterion = get_loss(config.LOSS.NAME)
    optimizer = get_optimizer(config, model.parameters())

    checkpoint = utils.checkpoint.get_initial_checkpoint(config)
    if checkpoint is not None:
        last_epoch, score, loss = utils.checkpoint.load_checkpoint(
            config, model, checkpoint)
    else:
        print('[*] no checkpoint found')
        last_epoch, score, loss = -1, -1, float('inf')

    print('last epoch:{} score:{:.4f} loss:{:.4f}'.format(
        last_epoch, score, loss))

    optimizer.param_groups[0]['initial_lr'] = config.OPTIMIZER.LR
    scheduler = get_scheduler(config, optimizer, last_epoch)

    if config.SCHEDULER.NAME == 'multi_step':
        milestones = scheduler.state_dict()['milestones']
        step_count = len([i for i in milestones if i < last_epoch])
        optimizer.param_groups[0]['lr'] *= scheduler.state_dict(
        )['gamma']**step_count

    if last_epoch != -1:
        scheduler.step()

    writer = SummaryWriter(os.path.join(config.TRAIN_DIR, 'logs'))

    train_loader = get_dataloader(config,
                                  'train',
                                  transform=transforms.Compose([
                                      Albu(),
                                      CV2_Resize(config.DATA.IMG_W,
                                                 config.DATA.IMG_H),
                                      Normalize(),
                                      ToTensor()
                                  ]))
    val_loader = get_dataloader(config,
                                'val',
                                transform=transforms.Compose([
                                    CV2_Resize(config.DATA.IMG_W,
                                               config.DATA.IMG_H),
                                    Normalize(),
                                    ToTensor()
                                ]))

    train(config, model, train_loader, val_loader, criterion, optimizer,
          scheduler, writer, last_epoch + 1, score, loss)
def forward_model(best_model, method):
    args = best_model['args']

    torch.cuda.set_device(args.gpu)
    set_seed(args)

    # load and preprocess dataset
    all_data = load_dataset(args)
    training = all_data[:int(len(all_data) * 0.7)]
    validation = all_data[int(len(all_data) * 0.7):int(len(all_data) * 0.8)]
    testing = all_data[int(len(all_data) * 0.8):]

    train_loader = DataLoader(training,
                              batch_size=1000,
                              shuffle=True,
                              collate_fn=collate)
    val_loader = DataLoader(validation,
                            batch_size=1000,
                            shuffle=True,
                            collate_fn=collate)
    test_loader = DataLoader(testing,
                             batch_size=4000,
                             shuffle=False,
                             collate_fn=collate)

    dataset = (None, np.zeros((15, 15)), np.zeros(
        (1, args.num_factors)), None, None, None, None)
    # create model
    model = get_model(dataset, args, mode='multilabel').cuda()

    for step, (g, labels, gt_adjs) in enumerate(test_loader):
        model.load_state_dict(best_model['model_state_dict'])
        model.eval()

        # update the new graph
        model.g = g

        features = g.ndata['feat'].float().cuda()
        labels = labels.cuda()
        logits = model(features)  #.view(-1, n_class, n_latent)

        hidden = model.get_hidden_feature()
        matrix = hidden[0]  # #sample x dim
        correlation = np.zeros((matrix.shape[1], matrix.shape[1]))
        for i in range(matrix.shape[1]):
            for j in range(matrix.shape[1]):
                cof = scipy.stats.pearsonr(matrix[:, i], matrix[:, j])[0]
                correlation[i][j] = cof

        plot_corr(np.abs(correlation), save=f'{method}.png')
예제 #11
0
def evaluate(model_enum = Models.Vgg16GAP, dataset_enum = Datasets.cityscapes, loader_split = LoaderSplit.val):
    # Set dataset
    dataset = get_loader(dataset_enum, LoaderType.classification, loader_split)
    dataloader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=0)

    # Set up model
    model = get_model(dataset_enum, model_enum)
    model.train()
    model.load()
    model.to(device)

    output_name = model_enum.name + '_' + dataset_enum.name + '_' + loader_split.name

    evaluate_model(model, dataloader, output_name)
def _get_assign_weighted_model(config, inp, label, weights, weights_dict,
                               bsize, is_training, name_scope, reuse):
    """Builds models."""
    model_cls = 'assign-wts-resnet'
    trn_kwargs = {
        'is_training': is_training,
        'inp': inp,
        'label': label,
        'ex_wts': weights,
        'batch_size': bsize
    }
    with tf.name_scope(name_scope):
        with tf.variable_scope('Model', reuse=reuse):
            m = get_model(model_cls, config, weights_dict, **trn_kwargs)
    return m
예제 #13
0
def main():
    seed_everything()

    pprint.pprint(config, indent=2)

    model = get_model(config).cuda()
    bind_model(model)

    args = get_args()
    if args.pause:  ## test mode일 때
        print('Inferring Start...')
        nsml.paused(scope=locals())

    if args.mode == 'train':  ### training mode일 때
        print('Training Start...')

        nsml.load(checkpoint='18', session='team146/KHD2019_FUNDUS/20')
        nsml.save(0)
        exit()
예제 #14
0
def main():
    seed_everything()

    config = utils.config.load(ensemble_checkpoints[0][2])
    model = get_model(config).cuda()
    bind_model(model)

    args = get_args()
    if args.pause:  ## test mode일 때
        print('Inferring Start...')
        nsml.paused(scope=locals())

    if args.mode == 'train':  ### training mode일 때
        print('Training Start...')

        nsml.load(session=ensemble_checkpoints[0][0],
                  checkpoint=ensemble_checkpoints[0][1])
        nsml.save(0)
        exit()
예제 #15
0
    def __init__(self, device, model_name, logger):
        """
        Creates and opens the Neural Compute device and creates a graph that can execute inferences on it.
        """
        if device is None:
            raise Exception("No devices found.")
        else:
            self.device = device

        self.logger = logger

        # Init model
        self.logger.info("Initializing %s model.", model_name)
        self.model = model_factory.get_model(model_name.lower())

        graph_file_path = self.model.graph_path
        # Load graph file
        start_time = time.time()
        try:
            with open(graph_file_path, mode="rb") as graph_file:
                in_memory_graph = graph_file.read()
        except Exception:
            self.logger.error("Error reading graph file: %s.", graph_file_path)
            raise

        self.graph = None
        self.input_fifo = None
        self.output_fifo = None

        self.initialization_queue.put(0)
        self.graph = mvnc2.Graph("mvnc2 graph")
        self.input_fifo, self.output_fifo = self.graph.allocate_with_fifos(self.device, in_memory_graph,
                                                                           input_fifo_data_type=mvnc2.FifoDataType.FP16,
                                                                           output_fifo_data_type=mvnc2.FifoDataType.FP16)
        _ = self.initialization_queue.get()
        graph_alloc_time = (time.time() - start_time) * 1000
        self.logger.info("Graph allocated in %f ms.", graph_alloc_time)

        if self.graph is None or self.input_fifo is None or self.output_fifo is None:
            raise Exception("Could not initialize device.")

        self.inference_results = 0
예제 #16
0
def train(data_folder: str,
          save_folder: str,
          hypers: HyperParameters,
          should_print: bool,
          max_epochs: Optional[int] = None) -> str:
    model = get_model(hypers, save_folder=save_folder, is_train=True)

    # Create dataset
    dataset = get_dataset(hypers.dataset_type, data_folder)

    if max_epochs is not None:
        hypers.epochs = max_epochs

    # Train the model
    train_label = model.train(dataset=dataset, should_print=should_print)

    # Close the dataset files
    dataset.close()

    return train_label
예제 #17
0
def run(config):
    model = get_model(config).to(device)
    # model_params = [{'params': model.encoder.parameters(), 'lr': config.OPTIMIZER.ENCODER_LR},
    #                 {'params': model.decoder.parameters(), 'lr': config.OPTIMIZER.DECODER_LR}]

    optimizer = get_optimizer(config, model.parameters())
    # optimizer = get_optimizer(config, model_params)

    checkpoint = utils.checkpoint.get_initial_checkpoint(config)
    if checkpoint is not None:
        last_epoch, score, loss = utils.checkpoint.load_checkpoint(config, model, checkpoint)
    else:
        print('[*] no checkpoint found')
        last_epoch, score, loss = -1, -1, float('inf')
    print('last epoch:{} score:{:.4f} loss:{:.4f}'.format(last_epoch, score, loss))

    optimizer.param_groups[0]['initial_lr'] = config.OPTIMIZER.LR
    # optimizer.param_groups[0]['initial_lr'] = config.OPTIMIZER.ENCODER_LR
    # optimizer.param_groups[1]['initial_lr'] = config.OPTIMIZER.DECODER_LR

    scheduler = get_scheduler(config, optimizer, last_epoch)

    if config.SCHEDULER.NAME == 'multi_step':
        milestones = scheduler.state_dict()['milestones']
        step_count = len([i for i in milestones if i < last_epoch])
        optimizer.param_groups[0]['lr'] *= scheduler.state_dict()['gamma'] ** step_count
        # optimizer.param_groups[0]['lr'] *= scheduler.state_dict()['gamma'] ** step_count
        # optimizer.param_groups[1]['lr'] *= scheduler.state_dict()['gamma'] ** step_count

    if last_epoch != -1:
        scheduler.step()

    log_train = Logger()
    log_val = Logger()
    log_train.open(os.path.join(config.TRAIN_DIR, 'log_train.txt'), mode='a')
    log_val.open(os.path.join(config.TRAIN_DIR, 'log_val.txt'), mode='a')

    train_loader = get_dataloader(config, 'train', transform=Albu(config.ALBU))
    val_loader = get_dataloader(config, 'val')

    train(config, model, train_loader, val_loader, optimizer, scheduler, log_train, log_val, last_epoch+1, score, loss)
    def __init__(self, kind, device=None):
        """
        args:
            kind (str): one of {'sk', 'sw'}
            device (torch.device) defice to use in inference
        """
        self._kind = kind
        self._transform = Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.732], std=[0.129])
        ])

        if device:
            self._device = device
        else:
            self._device = torch.device('cpu')

        self._model = get_model(kind=kind, device=self._device, cache=True)
        self._model.eval()

        self._radius = config['DATA INGESTION'].getint('radius')
예제 #19
0
def run(config):
    model = get_model(config).to(device)
    optimizer = get_optimizer(config, model.parameters())

    checkpoint = utils.checkpoint.get_initial_checkpoint(config)
    if checkpoint is not None:
        last_epoch, score, loss = utils.checkpoint.load_checkpoint(
            config, model, checkpoint)
    else:
        print('[*] no checkpoint found')
        last_epoch, score, loss = -1, -1, float('inf')
    print('last epoch:{} score:{:.4f} loss:{:.4f}'.format(
        last_epoch, score, loss))

    optimizer.param_groups[0]['initial_lr'] = config.OPTIMIZER.LR

    scheduler = get_scheduler(config, optimizer, last_epoch)

    if config.SCHEDULER.NAME == 'multi_step':
        milestones = scheduler.state_dict()['milestones']
        step_count = len([i for i in milestones if i < last_epoch])
        optimizer.param_groups[0]['lr'] *= scheduler.state_dict(
        )['gamma']**step_count

    if last_epoch != -1:
        scheduler.step()

    # writer = SummaryWriter(os.path.join(config.TRAIN_DIR, 'logs'))
    log_train = Logger()
    log_val = Logger()
    log_train.open(os.path.join(config.TRAIN_DIR, 'log_train.txt'), mode='a')
    log_val.open(os.path.join(config.TRAIN_DIR, 'log_val.txt'), mode='a')

    augmentation = Albu_Seg() if config.TASK == 'seg' else Albu_Cls()
    train_loader = get_dataloader(config, 'train', transform=augmentation)
    val_loader = get_dataloader(config, 'val')

    train(config, model, train_loader, val_loader, optimizer, scheduler,
          log_train, log_val, last_epoch + 1, score, loss)
    # resnest
    # for model_name in model_names:
    #     print('model_name',model_name)
    #     model = get_model(model_name=model_name,input_shape=input_shape,n_classes=n_classes,
    #                 verbose=True,fc_activation=fc_activation,using_cb=True)
    #     print('-'*10)

    #RegNetY600 set
    # model = get_model(model_name="RegNet",input_shape=input_shape,n_classes=n_classes,
    #             verbose=True,fc_activation=fc_activation,stage_depth=[1,3,7,4],
    #             stage_width=[48,112,256,608],stage_G=16,SEstyle_atten="SE",active='mish')
    # print('-'*10)

    #DETR
    # model_name = 'res34_DETR'
    # print('model_name',model_name)
    # model = get_model(model_name=model_name,input_shape=input_shape,
    #                   n_classes=n_classes,verbose=True,training=None,
    #                   fc_activation=fc_activation)
    # print('-'*10)

    model_names = ['ResNest50_DETR', 'res34_DETR']
    for model_name in model_names:
        print('model_name', model_name)
        model = get_model(model_name=model_name,
                          input_shape=input_shape,
                          n_classes=n_classes,
                          verbose=True,
                          fc_activation=fc_activation,
                          using_cb=True)
예제 #21
0
def main(args):
    torch.cuda.set_device(args.gpu)
    set_seed(args)

    log_dir = make_log_dir(args.model_name, args.dataset, args.log_subdir)

    log_file = os.path.join(log_dir, "log.txt")
    sys.stdout = open(log_file, 'w')
    backup_model = f"cp -r ./models {log_dir}"
    os.system(backup_model)

    # load and preprocess dataset
    zinc_data = load_dataset(args)

    train_loader = DataLoader(zinc_data.train,
                              batch_size=1000,
                              shuffle=True,
                              collate_fn=zinc_data.collate,
                              num_workers=4)
    val_loader = DataLoader(zinc_data.val,
                            batch_size=1000,
                            shuffle=False,
                            collate_fn=zinc_data.collate)
    test_loader = DataLoader(zinc_data.test,
                             batch_size=1000,
                             shuffle=False,
                             collate_fn=zinc_data.collate)

    # placeholder of dataset
    dataset = (None, None, None, None, None, None, None)
    # create model
    model = get_model(dataset, args, mode='zinc').cuda()

    print(model)
    # define loss func
    loss_fcn = torch.nn.L1Loss()

    # define optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     mode='min',
                                                     factor=0.5,
                                                     patience=50,
                                                     verbose=True)

    best_val_loss = sys.maxsize
    best_test_mae = sys.maxsize
    dur = []
    for epoch in range(args.epochs):
        model.train()
        epoch_loss = 0
        epoch_train_mae = 0
        t0 = time.time()
        for iter, (batch_graphs, batch_targets, batch_snorm_n,
                   batch_snorm_e) in enumerate(train_loader):
            batch_x = batch_graphs.ndata['feat'].cuda()  # num x feat
            batch_e = batch_graphs.edata['feat'].cuda()
            batch_snorm_e = batch_snorm_e.cuda()
            batch_targets = batch_targets.cuda()
            batch_snorm_n = batch_snorm_n.cuda()  # num x 1

            optimizer.zero_grad()

            model.g = batch_graphs
            batch_scores = model.forward(batch_x, batch_e, batch_snorm_n,
                                         batch_snorm_e)

            loss = loss_fcn(batch_scores, batch_targets)

            if args.model_name == "FactorGNN" and args.dis_weight > 0.0:
                losses = model.compute_disentangle_loss()
                dis_loss = model.merge_loss(losses) * args.dis_weight
                loss = loss + dis_loss

            loss.backward()
            optimizer.step()

            iter_loss = loss.item()
            iter_mae = F.l1_loss(batch_scores, batch_targets).item()
            epoch_loss += iter_loss
            epoch_train_mae += iter_mae

        dur.append(time.time() - t0)
        epoch_loss /= (iter + 1)
        epoch_train_mae /= (iter + 1)
        # print(f"loss {epoch_loss:.4f}, mae {epoch_train_mae:.4f}")
        val_loss, val_mae = test(model, val_loader)
        test_loss, test_mae = test(model, test_loader)

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_test_mae = test_mae
            torch.save(
                {
                    'epoch': epoch,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'loss': best_test_mae,
                    'args': args
                }, os.path.join(log_dir, 'best_model.pt'))

        print(f"time {np.mean(dur):.2f} epoch {epoch:03d} | " +
              f"train ({epoch_loss:.4f}, {epoch_train_mae:.4f}) | " +
              f"val ({val_loss:.4f}, {val_mae:.4f}) | " +
              f"test ({test_loss:.4f}, {test_mae:.4f}) | " +
              f"best: {best_test_mae:.4f}")

        sys.stdout.flush()

        if optimizer.param_groups[0]['lr'] > 1e-5:
            scheduler.step(val_loss)
예제 #22
0
def main():
    seed_everything()

    # yml = 'configs/base.yml'
    # config = utils.config.load(yml)
    # pprint.pprint(config, indent=2)

    model = get_model(config).cuda()
    bind_model(model)

    args = get_args()
    if args.pause:  ## test mode일 때
        print('Inferring Start...')
        nsml.paused(scope=locals())

    if args.mode == 'train':  ### training mode일 때
        print('Training Start...')

        # no bias decay
        if config.OPTIMIZER.NO_BIAS_DECAY:
            group_decay, group_no_decay = group_weight(model)
            params = [{'params': group_decay},
                      {'params': group_no_decay, 'weight_decay': 0.0}]
        else:
            params = model.parameters()

        optimizer = get_optimizer(config, params)
        optimizer.param_groups[0]['initial_lr'] = config.OPTIMIZER.LR
        if config.OPTIMIZER.NO_BIAS_DECAY:
            optimizer.param_groups[1]['initial_lr'] = config.OPTIMIZER.LR

        ###############################################################################################

        if IS_LOCAL:
            prepare_train_directories(config)
            utils.config.save_config(yml, config.LOCAL_TRAIN_DIR)

            checkpoint = utils.checkpoint.get_initial_checkpoint(config)
            if checkpoint is not None:
                last_epoch, score, loss = utils.checkpoint.load_checkpoint(config, model, checkpoint)
            else:
                print('[*] no checkpoint found')
                last_epoch, score, loss = -1, -1, float('inf')
            print('last epoch:{} score:{:.4f} loss:{:.4f}'.format(last_epoch, score, loss))

        else:
            last_epoch = -1
        ###############################################################################################

        scheduler = get_scheduler(config, optimizer, last_epoch=last_epoch)

        ###############################################################################################
        if IS_LOCAL:
            if config.SCHEDULER.NAME == 'multi_step':
                if config.SCHEDULER.WARMUP:
                    scheduler_dict = scheduler.state_dict()['after_scheduler'].state_dict()
                else:
                    scheduler_dict = scheduler.state_dict()

                milestones = scheduler_dict['milestones']
                step_count = len([i for i in milestones if i < last_epoch])
                optimizer.param_groups[0]['lr'] *= scheduler_dict['gamma'] ** step_count
                if config.OPTIMIZER.NO_BIAS_DECAY:
                    optimizer.param_groups[1]['initial_lr'] *= scheduler_dict['gamma'] ** step_count

            if last_epoch != -1:
                scheduler.step()
        ###############################################################################################
        # for dirname, _, filenames in os.walk(DATASET_PATH):
        #     for filename in filenames:
        #         print(os.path.join(dirname, filename))

        # if preprocessing possible
        preprocess_type = config.DATA.PREPROCESS
        cv2_size = (config.DATA.IMG_W, config.DATA.IMG_H)
        if not IS_LOCAL:
            preprocess(os.path.join(DATASET_PATH, 'train', 'train_data', 'NOR'), os.path.join(preprocess_type, 'NOR'), preprocess_type, cv2_size)
            preprocess(os.path.join(DATASET_PATH, 'train', 'train_data', 'AMD'), os.path.join(preprocess_type, 'AMD'), preprocess_type, cv2_size)
            preprocess(os.path.join(DATASET_PATH, 'train', 'train_data', 'RVO'), os.path.join(preprocess_type, 'RVO'), preprocess_type, cv2_size)
            preprocess(os.path.join(DATASET_PATH, 'train', 'train_data', 'DMR'), os.path.join(preprocess_type, 'DMR'), preprocess_type, cv2_size)
            data_dir = preprocess_type
            # data_dir = os.path.join(DATASET_PATH, 'train/train_data')
        else:  # IS_LOCAL
            data_dir = os.path.join(DATASET_PATH, preprocess_type)

        # eda
        # train_std(data_dir, preprocess_type, cv2_size)

        fold_df = split_cv(data_dir, n_splits=config.NUM_FOLDS)
        val_fold_idx = config.IDX_FOLD

        ###############################################################################################

        train_loader = get_dataloader(config, data_dir, fold_df, val_fold_idx, 'train', transform=Albu())
        val_loader = get_dataloader(config, data_dir, fold_df, val_fold_idx, 'val')

        postfix = dict()
        num_epochs = config.TRAIN.NUM_EPOCHS

        val_acc_list = []
        for epoch in range(last_epoch+1, num_epochs):

            if epoch >= config.LOSS.FINETUNE_EPOCH:
                criterion = get_loss(config.LOSS.FINETUNE_LOSS)
            else:
                criterion = get_loss(config.LOSS.NAME)

            train_values = train_single_epoch(config, model, train_loader, criterion, optimizer, scheduler, epoch)
            val_values = evaluate_single_epoch(config, model, val_loader, criterion, epoch)
            val_acc_list.append((epoch, val_values[2]))

            if config.SCHEDULER.NAME != 'one_cyle_lr':
                scheduler.step()

            if IS_LOCAL:
                utils.checkpoint.save_checkpoint(config, model, epoch, val_values[1], val_values[0])

            else:
                postfix['train_loss'] = train_values[0]
                postfix['train_res'] = train_values[1]
                postfix['train_acc'] = train_values[2]
                postfix['train_sens'] = train_values[3]
                postfix['train_spec'] = train_values[4]

                postfix['val_loss'] = val_values[0]
                postfix['val_res'] = val_values[1]
                postfix['val_acc'] = val_values[2]
                postfix['val_sens'] = val_values[3]
                postfix['val_spec'] = val_values[4]

                nsml.report(**postfix, summary=True, step=epoch)

                val_res = '%.10f' % val_values[1]
                val_res = val_res.replace(".", "")
                val_res = val_res[:4] + '.' + val_res[4:8] + '.' + val_res[8:]
                save_name = 'epoch_%02d_score%s_loss%.4f.pth' % (epoch, val_res, val_values[0])
                # nsml.save(save_name)
                nsml.save(epoch)

        for e, val_acc in val_acc_list:
            print('%02d %s' % (e, val_acc))
예제 #23
0
def main():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    args = get_args()
    cfg = Config.fromfile(args.config)
    cfg.device = device

    train = pd.read_csv(cfg.train_csv)
    camera_matrix_inv = np.linalg.inv(kaggle.camera_matrix)

    if 0:
        points_df = pd.DataFrame()
        for col in ['x', 'y', 'z', 'yaw', 'pitch', 'roll']:
            arr = []
            for ps in train['PredictionString']:
                coords = kaggle.str2coords(ps)
                arr += [c[col] for c in coords]
            points_df[col] = arr

        log.info(f'len(points_df): {len(points_df)}')
        log.info(points_df.head())

        img = imread(opj(cfg.train_images, train.iloc[0]['ImageId'] + '.jpg'))
        # plt.figure(figsize=(15,8))
        # plt.imshow(img)
        # plt.show()

        # log.info(train.head())
        # log.info(kaggle.camera_matrix)
        pred_string = train.iloc[0]['PredictionString']
        coords = kaggle.str2coords(pred_string)
        # log.info(coords)

        lens = [len(kaggle.str2coords(s)) for s in train['PredictionString']]

        ############
        plt.figure(figsize=(15, 6))
        sns.countplot(lens)
        # plt.xlabel('Number of cars in image')
        # plt.show()
        plt.savefig('eda/number_cars_in_image.png')

        ############
        plt.figure(figsize=(15, 6))
        sns.distplot(functools.reduce(lambda a, b: a + b,
                                      [[c['x'] for c in kaggle.str2coords(s)]
                                       for s in train['PredictionString']]),
                     bins=500)
        # sns.distplot([kaggle.str2coords(s)[0]['x'] for s in train['PredictionString']]);
        plt.xlabel('x')
        # plt.show()
        plt.savefig('eda/x.png')

        ############
        plt.figure(figsize=(15, 6))
        sns.distplot(functools.reduce(lambda a, b: a + b,
                                      [[c['y'] for c in kaggle.str2coords(s)]
                                       for s in train['PredictionString']]),
                     bins=500)
        plt.xlabel('y')
        # plt.show()
        plt.savefig('eda/y.png')

        ############
        plt.figure(figsize=(15, 6))
        sns.distplot(functools.reduce(lambda a, b: a + b,
                                      [[c['z'] for c in kaggle.str2coords(s)]
                                       for s in train['PredictionString']]),
                     bins=500)
        plt.xlabel('z')
        # plt.show()
        plt.savefig('eda/z.png')

        ############
        plt.figure(figsize=(15, 6))
        sns.distplot(
            functools.reduce(lambda a, b: a + b,
                             [[c['yaw'] for c in kaggle.str2coords(s)]
                              for s in train['PredictionString']]))
        plt.xlabel('yaw')
        # plt.show()
        plt.savefig('eda/yaw.png')

        ############
        plt.figure(figsize=(15, 6))
        sns.distplot(
            functools.reduce(lambda a, b: a + b,
                             [[c['roll'] for c in kaggle.str2coords(s)]
                              for s in train['PredictionString']]))
        plt.xlabel('roll')
        # plt.show()
        plt.savefig('eda/roll.png')

        ############
        plt.figure(figsize=(15, 6))
        sns.distplot(
            functools.reduce(lambda a, b: a + b,
                             [[c['pitch'] for c in kaggle.str2coords(s)]
                              for s in train['PredictionString']]))
        plt.xlabel('pitch')
        # plt.show()
        plt.savefig('eda/pitch.png')

        ############
        plt.figure(figsize=(15, 6))
        sns.distplot(
            functools.reduce(lambda a, b: a + b, [[
                kaggle.rotate(c['roll'], np.pi) for c in kaggle.str2coords(s)
            ] for s in train['PredictionString']]))
        plt.xlabel('roll rotated by pi')
        # plt.show()
        plt.savefig('eda/roll_rotated_by_pi.png')

        plt.figure(figsize=(14, 14))
        plt.imshow(
            imread(opj(cfg.train_images,
                       train.iloc[2217]['ImageId'] + '.jpg')))
        plt.scatter(*kaggle.get_img_coords(
            train.iloc[2217]['PredictionString']),
                    color='red',
                    s=100)
        # plt.show()
        # log.info(kaggle.get_img_coords(train.iloc[2217]['PredictionString']))

        ############
        xs, ys = [], []

        for ps in train['PredictionString']:
            x, y = kaggle.get_img_coords(ps)
            xs += list(x)
            ys += list(y)

        plt.figure(figsize=(18, 18))
        plt.imshow(imread(
            opj(cfg.train_images, train.iloc[2217]['ImageId'] + '.jpg')),
                   alpha=0.3)
        plt.scatter(xs, ys, color='red', s=10, alpha=0.2)
        # plt.show()
        plt.savefig('eda/xs-ys_distribution.png')

        ############
        # view distribution from the sky
        road_width = 3
        road_xs = [
            -road_width, road_width, road_width, -road_width, -road_width
        ]
        road_ys = [0, 0, 500, 500, 0]

        plt.figure(figsize=(16, 16))
        plt.axes().set_aspect(1)
        plt.xlim(-50, 50)
        plt.ylim(0, 100)

        # View road
        plt.fill(road_xs, road_ys, alpha=0.2, color='gray')
        plt.plot([road_width / 2, road_width / 2], [0, 100],
                 alpha=0.4,
                 linewidth=4,
                 color='white',
                 ls='--')
        plt.plot([-road_width / 2, -road_width / 2], [0, 100],
                 alpha=0.4,
                 linewidth=4,
                 color='white',
                 ls='--')

        # View cars
        # plt.scatter(points_df['x'], np.sqrt(points_df['z']**2 + points_df['y']**2), color='red', s=10, alpha=0.1)
        # plt.savefig('eda/view_from_sky.png')

        ############
        fig = px.scatter_3d(points_df,
                            x='x',
                            y='y',
                            z='z',
                            color='pitch',
                            range_x=(-50, 50),
                            range_y=(0, 50),
                            range_z=(0, 250),
                            opacity=0.1)
        # fig.show()

        zy_slope = LinearRegression()
        X = points_df[['z']]
        y = points_df[['y']]
        zy_slope.fit(X, y)
        print('MAE without x:', mean_absolute_error(y, zy_slope.predict(X)))

        # Will use this model later
        xzy_slope = LinearRegression()
        X = points_df[['x', 'z']]
        y = points_df['y']
        xzy_slope.fit(X, y)
        print('MAE with x:', mean_absolute_error(y, xzy_slope.predict(X)))
        print('\ndy/dx = {:.3f} \ndy/dz = {:.3f}'.format(*xzy_slope.coef_))

        plt.figure(figsize=(16, 16))
        plt.xlim(0, 500)
        plt.ylim(0, 100)
        plt.scatter(points_df['z'], points_df['y'], label='Real points')
        X_line = np.linspace(0, 500, 10)
        plt.plot(X_line,
                 zy_slope.predict(X_line.reshape(-1, 1)),
                 color='orange',
                 label='Regression')
        plt.legend()
        plt.xlabel('z coordinate')
        plt.ylabel('y coordinate')
        plt.savefig('eda/linear_regression.png')

        # 3d view
        n_rows = 6
        for idx in range(n_rows):
            fig, axes = plt.subplots(1, 2, figsize=(20, 20))
            img = imread(
                opj(cfg.train_images, train['ImageId'].iloc[idx] + '.jpg'))
            axes[0].imshow(img)
            img_vis = kaggle.visualize(
                img, kaggle.str2coords(train['PredictionString'].iloc[idx]))
            axes[1].imshow(img_vis)
            # plt.show()
            plt.savefig(f'eda/img-view_coords_{idx}.png')

    if 0:
        img0 = imread(opj(cfg.train_images, train.iloc[0]['ImageId'] + '.jpg'))
        img = kaggle.preprocess_image(img0)

        print(train.iloc[0]['PredictionString'])
        mask, regr = kaggle.get_mask_and_regr(
            img0, train.iloc[0]['PredictionString'])
        # print('img.shape', img.shape, 'std:', np.std(img))
        # print('mask.shape', mask.shape, 'std:', np.std(mask))
        # print('regr.shape', regr.shape, 'std:', np.std(regr))

        plt.figure(figsize=(16, 16))
        plt.title('Processed image')
        plt.imshow(img)
        # plt.show()
        plt.savefig('eda/processed_image.png')

        plt.figure(figsize=(16, 16))
        plt.title('Detection Mask')
        plt.imshow(mask)
        # plt.show()
        plt.savefig('eda/detection_mask.png')

        plt.figure(figsize=(16, 16))
        plt.title('Yaw values')
        plt.imshow(regr[:, :, -2])
        # plt.show()
        plt.savefig('eda/yaw_values.png')

    #############
    if 0:
        regr_model = kaggle.get_regr_model(train)

        for idx in range(2):
            fig, axes = plt.subplots(1, 2, figsize=(20, 20))

            for ax_i in range(2):
                img0 = imread(
                    opj(cfg.train_images, train['ImageId'].iloc[idx] + '.jpg'))
                if ax_i == 1:
                    img0 = img0[:, ::-1]
                img = kaggle.preprocess_image(img0, ax_i == 1)
                mask, regr = kaggle.get_mask_and_regr(
                    img0, train['PredictionString'][idx], ax_i == 1)
                regr = np.rollaxis(regr, 2, 0)
                coords = kaggle.extract_coords(
                    np.concatenate([mask[None], regr], 0), regr_model,
                    ax_i == 1)

                axes[ax_i].set_title('Flip = {}'.format(ax_i == 1))
                axes[ax_i].imshow(kaggle.visualize(img0, coords))
            # plt.show()
            plt.savefig(f'eda/{idx}_{ax_i}.png')

    if 0:
        dataset = dataset_factory.CarDataset(cfg.data.train)
        img, mask, regr = dataset[0]

        plt.figure(figsize=(16, 16))
        plt.imshow(np.rollaxis(img, 0, 3))
        # plt.show()
        plt.savefig(f'eda/img.png')

        plt.figure(figsize=(16, 16))
        plt.imshow(mask)
        # plt.show()
        plt.savefig(f'eda/mask.png')

        plt.figure(figsize=(16, 16))
        plt.imshow(regr[:, :, -2])
        # plt.show()
        plt.savefig(f'eda/regr.png')

    #########
    if 1:
        # initial -----------------------------------
        best = {
            'loss': float('inf'),
            'score': 0.0,
            'epoch': -1,
        }

        train_loader = dataset_factory.get_dataloader(cfg.data.train)
        valid_loader = dataset_factory.get_dataloader(cfg.data.valid)
        test_loader = dataset_factory.get_dataloader(cfg.data.test)
        for i, (img, mask, regr) in enumerate(tqdm(test_loader)):
            print(i)
            if i == 3:
                break

        model = model_factory.get_model(cfg)
        optimizer = optimizer_factory.get_optimizer(model, cfg)
        scheduler = scheduler_factory.get_scheduler(cfg, optimizer,
                                                    best['epoch'])
예제 #24
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    args.distributed = args.world_size > 1

    if args.distributed:
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size)

    # create model
    print("=> creating model '{}'".format(args.model))
    if 'sn' in args.model:
        model = model_factory.get_model(
            args.model, using_moving_average=args.using_moving_average)
    else:
        model = model_factory.get_model(args.model)

    if not args.distributed:
        if args.model.startswith('alexnet') or args.model.startswith('vgg'):
            model.features = torch.nn.DataParallel(model.features)
            model.cuda()
        else:
            model = torch.nn.DataParallel(model).cuda()
    else:
        model.cuda()
        model = torch.nn.parallel.DistributedDataParallel(model)

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()

    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # auto resume from a checkpoint
    model_dir = args.model_dir
    start_epoch = 0
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    if args.evaluate:
        util.load_state_ckpt(args.checkpoint_path, model)
    else:
        best_prec1, start_epoch = util.load_state(model_dir,
                                                  model,
                                                  optimizer=optimizer)
    writer = SummaryWriter(model_dir)

    cudnn.benchmark = True

    # Data loading code
    traindir = os.path.join(args.data, 'train')
    valdir = os.path.join(args.data, 'val')

    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    val_loader = torch.utils.data.DataLoader(datasets.ImageFolder(
        valdir,
        transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    if args.evaluate:
        validate(val_loader, model, criterion, 0, writer)
        return

    train_dataset_multi_scale = datasets.ImageFolder(
        traindir,
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            ColorAugmentation(),
            normalize,
        ]))

    train_dataset = datasets.ImageFolder(
        traindir,
        transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            ColorAugmentation(),
            normalize,
        ]))

    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset)
    else:
        train_sampler = None

    train_loader_multi_scale = torch.utils.data.DataLoader(
        train_dataset_multi_scale,
        batch_size=args.batch_size,
        shuffle=(train_sampler is None),
        num_workers=args.workers,
        pin_memory=True,
        sampler=train_sampler)

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=(train_sampler is None),
                                               num_workers=args.workers,
                                               pin_memory=True,
                                               sampler=train_sampler)

    if not args.using_moving_average:
        train_dataset_snhelper = datasets.ImageFolder(
            traindir,
            transforms.Compose([
                transforms.Resize(256),
                transforms.CenterCrop(224),
                transforms.ToTensor(),
                normalize,
            ]))
        train_loader_snhelper = torch.utils.data.DataLoader(
            train_dataset_snhelper,
            batch_size=args.batch_size * torch.cuda.device_count(),
            shuffle=(train_sampler is None),
            num_workers=args.workers,
            pin_memory=True,
            sampler=train_sampler)

    for epoch in range(start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        adjust_learning_rate(optimizer, epoch)

        # train for one epoch
        if epoch < 95:
            train(train_loader_multi_scale, model, criterion, optimizer, epoch,
                  writer)
        else:
            train(train_loader, model, criterion, optimizer, epoch, writer)

        if not args.using_moving_average:
            sn_helper(train_loader_snhelper, model)

        # evaluate on validation set
        prec1 = validate(val_loader, model, criterion, epoch, writer)

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        util.save_checkpoint(
            model_dir, {
                'epoch': epoch + 1,
                'model': args.model,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
                'optimizer': optimizer.state_dict(),
            }, is_best)
예제 #25
0
    train_loader = torch.utils.data.DataLoader(train_ds,
                                               batch_size=BATCH_SIZE,
                                               shuffle=True,
                                               num_workers=WORKERS,
                                               pin_memory=True)
    valid_loader = torch.utils.data.DataLoader(valid_ds,
                                               batch_size=BATCH_SIZE,
                                               num_workers=WORKERS,
                                               pin_memory=True)

    loaders = collections.OrderedDict()
    loaders["train"] = train_loader
    loaders["valid"] = valid_loader

    model = get_model('se_resnext50')

    optimizer = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           factor=0.7,
                                                           patience=10,
                                                           verbose=True)

    # the only tricky part
    n_epochs = 120
    # logdir = "/tmp/runs/"
    logdir = "/tmp/runs_se_resnext50/"

    callbacks = collections.OrderedDict()

    callbacks['f1_score'] = F1ScoreCallback()
예제 #26
0
def main(args):
    torch.cuda.set_device(args.gpu)
    set_seed(args)

    if args.log_subdir != "":
        log_dir = make_log_dir(args.model_name, args.dataset, args.log_subdir)

        log_file = os.path.join(log_dir, "log.txt")
        sys.stdout = open(log_file, 'w')
        backup_model = f"cp -r ./models {log_dir}"
        os.system(backup_model)

    # load and preprocess dataset
    train_loader, val_loader = load_gin_dataset(args)

    # num_feats = features.shape[1]
    # n_classes = torch.max(labels).item() + 1)

    # create model
    sample = next(iter(train_loader))
    n_class_dict = {'MUTAG': 2, "IMDBBINARY": 2, "COLLAB": 3}

    in_dim = sample[0].ndata['attr'].shape[1]
    if args.dataset == "IMDBBINARY": in_dim = 150
    if args.dataset == "COLLAB": in_dim = 500
    feat = torch.ones(1, in_dim)
    dataset = (None, feat, torch.tensor([n_class_dict[args.dataset] - 1]),
               None, None, None, None)
    model = get_model(dataset, args).cuda()

    # define loss func
    loss_fcn = torch.nn.CrossEntropyLoss()

    # define optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=100,
                                                gamma=0.5)

    dur = []
    best_acc = 0
    for epoch in range(args.epochs):
        total_loss = []
        # total_edges = 0
        # total_nodes = 0
        for step, (g, labels) in enumerate(train_loader):

            # total_nodes += g.number_of_nodes()
            # total_edges += g.number_of_edges()
            # continue
            model.train()
            # update the new graph
            model.g = g
            # print(max(g.in_degrees()))
            t0 = time.time()

            if args.dataset in ["IMDBBINARY", "COLLAB"]:
                if args.dataset == "IMDBBINARY": in_dim = 150
                if args.dataset == "COLLAB": in_dim = 500
                y = g.in_degrees().long().unsqueeze(-1)
                y_onehot = torch.FloatTensor(g.number_of_nodes(), in_dim)
                y_onehot.zero_()
                y_onehot.scatter_(1, y, 1)
                features = y_onehot.float().cuda()
            else:
                features = g.ndata['attr'].float().cuda()

            labels = labels.cuda()
            logits = model(features)  #.view(-1, n_class, n_latent)
            loss = loss_fcn(logits, labels)

            if args.model_name == 'FactorGNN' and args.dis_weight > 0.0:
                losses = model.compute_disentangle_loss()
                dis_loss = model.merge_loss(losses) * args.dis_weight
                loss = loss + dis_loss

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss.append(loss.item())

            dur.append(time.time() - t0)

        loss, acc = eval_net(args, model, train_loader, loss_fcn)
        val_loss, val_acc = eval_net(args, model, val_loader, loss_fcn)
        if val_acc > best_acc:
            best_acc = val_acc
            if args.log_subdir != "":
                torch.save(
                    {
                        'epoch': epoch,
                        'model_state_dict': model.state_dict(),
                        'optimizer_state_dict': optimizer.state_dict(),
                        'loss': best_acc,
                        'args': args
                    }, os.path.join(log_dir, 'best_model.pt'))
        print(
            f"epoch {epoch:03d} | train_loss {np.mean(total_loss):.3f} | train acc {acc:.3f} | val acc {val_acc:.3f} | best {best_acc:.3f}"
        )
        if args.log_subdir != "":
            sys.stdout.flush()

        scheduler.step()
예제 #27
0
def make_model(model_name: str, hypers: HyperParameters, save_folder: str) -> Model:
    model = get_model(hypers, save_folder, is_train=False)
    model.restore(name=model_name, is_train=False, is_frozen=False)
    return model
예제 #28
0
def forward_model(best_model, method):
    args = best_model['args']

    torch.cuda.set_device(args.gpu)
    set_seed(args)

    # load and preprocess dataset
    all_data = load_dataset(args)
    training = all_data[:int(len(all_data) * 0.7)]
    validation = all_data[int(len(all_data) * 0.7):int(len(all_data) * 0.8)]
    testing = all_data[int(len(all_data) * 0.8):]

    train_loader = DataLoader(training,
                              batch_size=1000,
                              shuffle=True,
                              collate_fn=collate)
    val_loader = DataLoader(validation,
                            batch_size=1000,
                            shuffle=True,
                            collate_fn=collate)
    test_loader = DataLoader(testing,
                             batch_size=4000,
                             shuffle=False,
                             collate_fn=collate)

    dataset = (None, np.zeros((15, 15)), np.zeros(
        (1, args.num_factors)), None, None, None, None)
    # create model
    model = get_model(dataset, args, mode='multilabel').cuda()

    g, labels, gt_adjs = next(iter(test_loader))
    model.load_state_dict(best_model['model_state_dict'])
    model.eval()

    # update the new graph
    model.g = g

    features = g.ndata['feat'].float().cuda()
    labels = labels.cuda()
    logits = model(features)  #.view(-1, n_class, n_latent)
    factors = model.get_factor()

    batch_g = factors[0]
    unbatch_g = dgl.unbatch(batch_g)

    ged_ins = compute_GED()

    total_ged = []
    total_factor_map = collections.defaultdict(list)
    sample_n = 0
    for gt_list, pred_g in tqdm.tqdm(zip(gt_adjs, unbatch_g)):
        # dgl graph to adj
        pred_list = generate_adj_factor_graph(pred_g)
        ged, factor_map = ged_ins.hungarian_match(gt_list, pred_list, sample_n)

        for edge_id in factor_map.keys():
            total_factor_map[
                edge_id] = total_factor_map[edge_id] + factor_map[edge_id]

        total_ged.append(ged / len(gt_list))
        sample_n += 1

    c_score = compute_consistant(total_factor_map)

    print(
        f" c_score {c_score:.3f} | ged: {np.mean(total_ged):.3f} $\pm$ {np.std(total_ged):.3f}"
    )
예제 #29
0
        train_data = get_dataset(args, args.train_img_transformers, args.train_tnsr_transformers, 'train')
        dev_data = get_dataset(args, args.test_img_transformers, args.test_tnsr_transformers, 'dev')

    if args.test_phase:
        if not args.train_phase:
            print("\nLoading dev data...")
            dev_data = get_dataset(args, args.test_img_transformers, args.test_tnsr_transformers, 'dev')
        print("\nLoading test data...")
        test_data = get_dataset(args, args.test_img_transformers, args.test_tnsr_transformers, 'test')
    
    if args.forward_thru_convs:
        # Get model output dimensions, before classification
        args.rolled_size = get_rolled_out_size(args)

    # Load model
    model, optimizer = get_model(args)
    if args.snapshot_path is not None:
        try:
            model, optimizer, args.lr, args.epoch_stats = load_model(args.snapshot_path, model, optimizer, args)
        except:
            print("\n Error loading snapshot...Starting run from scratch.")
    else:
        args.epoch_stats = None
    print(model)
    
    args.run_time = time.strftime(RESULTS_DATE_FORMAT, time.localtime())

    # Train model and get statistics
    model_stats = {}
    if args.train_phase:
        print("\nBeginning Training Phase:")
예제 #30
0
def main(args):
    torch.cuda.set_device(args.gpu)
    set_seed(args)

    log_dir = make_log_dir(args.model_name, args.dataset, args.log_subdir)

    log_file = os.path.join(log_dir, "log.txt")
    sys.stdout = open(log_file, 'w')
    backup_model = f"cp -r ./models {log_dir}"
    os.system(backup_model)

    # load and preprocess dataset
    all_data = load_dataset(args)
    training = all_data[:int(len(all_data) * 0.7)]
    validation = all_data[int(len(all_data) * 0.7):int(len(all_data) * 0.8)]
    testing = all_data[int(len(all_data) * 0.8):]

    train_loader = DataLoader(training,
                              batch_size=1000,
                              shuffle=True,
                              collate_fn=collate)
    val_loader = DataLoader(validation,
                            batch_size=1000,
                            shuffle=True,
                            collate_fn=collate)
    test_loader = DataLoader(testing,
                             batch_size=1000,
                             shuffle=False,
                             collate_fn=collate)

    dataset = (None, np.zeros((15, 15)), np.zeros(
        (1, args.num_factors)), None, None, None, None)
    # create model
    model = get_model(dataset, args, mode='multilabel').cuda()

    print(model)

    # define loss func
    loss_fcn = torch.nn.BCEWithLogitsLoss()

    # define optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    best_val_f1 = 0
    best_test_f1 = 0
    dur = []
    for epoch in range(args.epochs):
        for step, (g, labels, gt_adjs) in enumerate(train_loader):
            model.train()

            # update the new graph
            model.g = g

            t0 = time.time()
            features = g.ndata['feat'].float().cuda()
            labels = labels.cuda()
            logits = model(features)  #.view(-1, n_class, n_latent)
            loss = loss_fcn(logits, labels)

            if args.model_name == 'FactorGNN' and args.dis_weight > 0.0:
                losses = model.compute_disentangle_loss()
                dis_loss = model.merge_loss(losses) * args.dis_weight
                loss = loss + dis_loss

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            dur.append(time.time() - t0)

        val_micro_f1 = test(model, val_loader)
        test_micro_f1 = test(model, val_loader)

        if val_micro_f1 > best_val_f1:
            best_val_f1 = val_micro_f1
            best_test_f1 = test_micro_f1
            torch.save(
                {
                    'epoch': epoch,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'loss': best_test_f1,
                    'args': args
                }, os.path.join(log_dir, 'best_model.pt'))

        print(f"time {np.mean(dur):.2f} epoch {epoch:03d} | " +
              f"val ({val_micro_f1:.4f}) | " +
              f"test ({test_micro_f1:.4f}) | " + f"best: {best_test_f1:.4f}")

        sys.stdout.flush()