Example #1
0
    def calculate_flops_lut(self, file_name, input_size):
        shared_config = torch.zeros(18, len(self.tasks))
        model = BranchMobileNetV2(tasks=['semseg'],
                                  branch_config=shared_config)
        in_shape = (1, 3, input_size[0], input_size[1])
        n_blocks = len(model.encoder)
        flops = torch.zeros(n_blocks, device='cpu')

        model.eval()
        with torch.no_grad():
            for idx, m in enumerate(model.encoder):
                m = resources.add_flops_counting_methods(m)
                m.start_flops_count()
                cache_inputs = torch.rand(in_shape)
                _ = model(cache_inputs)
                block_flops = m.compute_average_flops_cost()
                m.stop_flops_count()
                flops[idx] = block_flops
        flops_dict = {'per_block_flops': flops.numpy().tolist()}
        del model

        # save the FLOPS to LUT
        utils.write_json(flops_dict, file_name)

        return flops_dict
Example #2
0
 def handle(self):
     """
     Stores job inside data_dir directory.
     """
     data_dir = os.path.join(
         self.job.work_dir,
         ExaParserConfig.get("disk_data_handler", "data_dir"))
     if not os.path.exists(data_dir): os.makedirs(data_dir)
     write_json(os.path.join(data_dir, "job.json"), self.job.to_json())
Example #3
0
def main(config):
    """
    Main function for training LSTMs.
    After training, results on validation & test sets are recorded in the specified log_path.
    """
    dataset, train_loader, subgraph_loader = get_data(config)

    # define logger
    Path(config['log_path']).mkdir(parents=True, exist_ok=True)
    logger = loggers.TensorBoardLogger(config['log_path'],
                                       version=config['version'])
    logger.log_hyperparams(params=config)

    # define model
    model = Model(config, dataset, train_loader, subgraph_loader)
    chkpt = None if config['load'] is None else get_checkpoint_path(
        config['load'])

    trainer = pl.Trainer(gpus=config['gpus'],
                         logger=logger,
                         max_epochs=config['epochs'],
                         distributed_backend='dp',
                         precision=16 if config['use_amp'] else 32,
                         default_root_dir=config['log_path'],
                         deterministic=True,
                         resume_from_checkpoint=chkpt,
                         auto_lr_find=config['auto_lr'],
                         auto_scale_batch_size=config['auto_bsz'])
    trainer.fit(model)

    for phase in ['test', 'valid']:
        if phase == 'valid':
            trainer.eval_split = 'val'
            trainer.eval_mask = dataset.data.val_mask
            print(phase, trainer.eval_split)
        ret = trainer.test()
        if isinstance(ret, list):
            ret = ret[0]

        per_node = ret.pop('per_node')
        test_results = ret
        res_dir = Path(config['log_path']) / 'default'
        if config['version'] is not None:
            res_dir = res_dir / config['version']
        else:
            res_dir = res_dir / ('results_' + str(config['seed']))
        print(phase, ':', test_results)
        Path(res_dir).mkdir(parents=True, exist_ok=True)
        write_json(test_results,
                   res_dir / f'{phase}_results.json',
                   sort_keys=True,
                   verbose=True)
        write_pkl(per_node, res_dir / f'{phase}_per_node.pkl')

        path_results = Path(config['log_path']) / f'all_{phase}_results.csv'
        record_results(path_results, config, test_results)
Example #4
0
def test_branched(device, tasks, testloader, model, metrics_dict, exp_dir):

    model.eval()

    # get resources
    sample = next(iter(testloader))
    height, width = sample['image'].shape[-2:]
    gflops = resources.compute_gflops(model,
                                      device=device,
                                      in_shape=(1, 3, height, width))
    params = resources.count_parameters(model)
    results = {'gmadds': gflops / 2.0, 'mparams': params / 1e6}

    for idx, samples in enumerate(testloader):

        inputs = samples['image'].to(device, non_blocking=True)
        target = {
            task: samples[task].to(device, non_blocking=True)
            for task in tasks
        }
        im_size = tuple(x.item() for x in samples['meta']['im_size'])
        im_name = samples['meta']['image'][0]

        output = model(inputs)

        for task in tasks:

            uniq = torch.unique(target[task])
            if len(uniq) == 1 and uniq[0] == 255:
                continue

            ground_truth = torch.squeeze(target[task], dim=0).cpu().numpy()
            prediction = torch.squeeze(output[task], dim=0).cpu().numpy()

            # metrics want numpy array of format (H x W x C)
            ground_truth = ground_truth.transpose(1, 2, 0)
            prediction = prediction.transpose(1, 2, 0)

            metrics_dict[task].update(prediction, ground_truth, im_size,
                                      im_name)

        if (idx + 1) % 100 == 0:
            print('{} / {} images done.'.format(idx + 1, len(testloader)))

    for task in tasks:
        results['_'.join([task, metrics_dict[task].__class__.__name__
                          ])] = metrics_dict[task].get_score()
    utils.write_json(results, Path(exp_dir) / 'eval.json')
Example #5
0
def convert_timeseries_into_mmap(data_dir, save_dir, n_rows=100000):
    """
    read csv file and convert time series data into mmap file.
    """
    save_path = Path(save_dir) / 'ts.dat'
    shape = (n_rows, 24, 34)
    write_file = np.memmap(save_path, dtype=np.float32, mode='w+', shape=shape)
    ids = []
    n = 0
    info = {}
    info['name'] = 'ts'

    for split in ['train', 'val', 'test']:
        print('split: ', split)
        csv_path = Path(data_dir) / split / 'timeseries.csv'
        df = pd.read_csv(csv_path)
        arr = df.values
        new = np.reshape(arr, (-1, 24, 35))
        pos_to_id = new[:, 0, 0]
        ids.append(pos_to_id)
        new = new[:, :, 1:]  # no patient column
        write_file[n:n + len(new), :, :] = new
        info[split + '_len'] = len(new)
        n += len(new)
        del new, arr

    info['total'] = n
    info['shape'] = shape
    info['columns'] = list(df)[1:]
    del df

    ids = np.concatenate(ids)
    id2pos = {pid: pos for pos, pid in enumerate(ids)}
    pos2id = {pos: pid for pos, pid in enumerate(ids)}

    assert len(set(ids)) == len(ids)

    print('saving..')
    write_pkl(id2pos, Path(save_dir) / 'id2pos.pkl')
    write_pkl(pos2id, Path(save_dir) / 'pos2id.pkl')
    write_json(info, Path(save_dir) / 'ts_info.json')
    print(info)
    def __init__(self, config, resume=None, modification=None):
        """
        class to parse configuration json file. Handles hyper-parameters for training, initializations of modules,
        checkpoint saving
        and logging module.
        :param config: Dict containing configurations, hyper-parameters for training. contents of `config.json` file for
        example.
        :param resume: String, path to the checkpoint being loaded.
        :param modification: Dict keychain:value, specifying position values to be replaced from config dict.
        """
        # load config file and apply modification
        self._config = _update_config(config, modification)
        self.resume = resume
        # str to bool, from modification or from default json file
        self.update_config('distributed', (self.config['distributed'] == 'true') or self.config['distributed'])

        if self.config['local_rank'] == 0:  # only local master process create saved output dir
            # set save_dir where trained model and log will be saved.
            save_dir = Path(self.config['trainer']['save_dir'])

            experiment_name = self.config['name']

            self.save_dir = save_dir / 'models' / experiment_name
            self.log_dir = save_dir / 'log' / experiment_name
            self.tensorboard_dir = save_dir / 'tensorboard' / experiment_name

            # make directory for saving checkpoints and log.
            self.save_dir.mkdir(parents=True, exist_ok=False)
            self.log_dir.mkdir(parents=True, exist_ok=False)
            self.tensorboard_dir.mkdir(parents=True, exist_ok=False)

            # save updated config file to the checkpoint dir, only local master save file
            write_json(self.config, self.save_dir / 'config.json')

            # configure logging module, only local master setup logging
            setup_logging(self.log_dir)
            self.log_levels = {
                0: logging.WARNING,
                1: logging.INFO,
                2: logging.DEBUG
            }
Example #7
0
def main_test(hparams, path_results=None):
    """
    main function to load and evaluate a trained model. 
    """
    assert (hparams['load'] is not None) and (hparams['phase'] is not None)
    phase = hparams['phase']
    log_dir = hparams['load']

    # Load trained model
    print(f'Loading from {log_dir} to evaluate {phase} data.')

    model, config, dataset, train_loader, subgraph_loader = Model.load_model(
        log_dir,
        multi_gpu=hparams['multi_gpu'],
        num_workers=hparams['num_workers'])
    trainer = pl.Trainer(gpus=hparams['gpus'],
                         logger=None,
                         max_epochs=hparams['epochs'],
                         default_root_dir=hparams['log_path'],
                         deterministic=True)
    # Evaluate the model
    if phase == 'valid':
        trainer.eval_split = 'val'
        trainer.eval_mask = dataset.data.val_mask
        print(phase, trainer.eval_split)

    test_results = trainer.test(model)
    if isinstance(test_results, list):
        test_results = test_results[0]
    per_node = test_results.pop('per_node')
    print(phase, ':', test_results)
    # Save evaluation results
    results_path = Path(log_dir) / f'{phase}_results.json'
    write_json(test_results, results_path, sort_keys=True, verbose=True)
    write_pkl(per_node, Path(log_dir) / f'{phase}_per_node.pkl')

    if path_results is None:
        path_results = Path(log_dir).parent / 'results.csv'
    tmp = {'version': hparams['version']}
    tmp = {**tmp, **config}
    record_results(path_results, tmp, test_results)
Example #8
0
def convert_into_mmap(data_dir,
                      save_dir,
                      csv_name,
                      n_cols=None,
                      n_rows=100000):
    """
    read csv file and convert flat data into mmap file.
    """
    csv_to_cols = {
        'diagnoses': 520,
        'diagnoses_1033': 1034,
        'labels': 5,
        'flat': 58
    }  # including patient column
    n_cols = (csv_to_cols[csv_name] - 1) if n_cols is None else n_cols
    shape = (n_rows, n_cols)

    save_path = Path(save_dir) / f'{csv_name}.dat'
    write_file = np.memmap(save_path, dtype=np.float32, mode='w+', shape=shape)

    info = {'name': csv_name, 'shape': shape}

    n = 0

    for split in ['train', 'val', 'test']:
        print('split: ', split)
        csv_path = Path(data_dir) / split / f'{csv_name}.csv'
        df = pd.read_csv(csv_path)
        arr = df.values[:, 1:]  # cut out patient column
        arr_len = len(arr)
        write_file[n:n + arr_len, :] = arr  # write into mmap
        info[split + '_len'] = arr_len
        n += arr_len
        del arr

    info['total'] = n
    info['columns'] = list(df)[1:]

    write_json(info, Path(save_dir) / f'{csv_name}_info.json')
    print(info)
Example #9
0
def main_test(hparams, path_results=None):
    """
    main function to load and evaluate a trained model. 
    """
    assert (hparams['load'] is not None) and (hparams['phase'] is not None)
    phase = hparams['phase']
    log_dir = hparams['load']

    # Load trained model
    print(f'Loading from {log_dir} to evaluate {phase} data.')

    model, config, loaderDict, collate = DynamicGraphModel.load_model(log_dir, \
        data_dir=hparams['data_dir'], 
        multi_gpu=hparams['multi_gpu'], num_workers=hparams['num_workers'])
    trainer = pl.Trainer(
        gpus=hparams['gpus'],
        logger=None,
        max_epochs=hparams['epochs'],
        default_root_dir=hparams['log_path'],
        deterministic=True
    )
    # Evaluate the model
    test_dataloader = DataLoader(loaderDict[phase], collate_fn=collate, batch_size=config['batch_size'], num_workers=config['num_workers'], shuffle=False)
    test_results = trainer.test(model, test_dataloaders=test_dataloader)
    if isinstance(test_results, list):
        test_results = test_results[0]
    per_node = test_results.pop('per_node')
    print(phase, ':', test_results)
    # Save evaluation results
    results_path = Path(log_dir) / f'{phase}_results.json'
    write_json(test_results, results_path, sort_keys=True, verbose=True)
    write_pkl(per_node, Path(log_dir) / f'{phase}_per_node.pkl')

    if path_results is None:
        path_results = Path(log_dir).parent / 'results.csv'
    tmp = {'version': hparams['version']}
    tmp = {**tmp, **config}
    record_results(path_results, tmp, test_results)
Example #10
0
def train_search(device, start_epoch, max_epochs, tasks, trainloader_weight,
                 trainloader_arch, model, loss, optimizer_weight,
                 optimizer_arch, exp_dir):

    writer = SummaryWriter(log_dir=exp_dir)

    iter_per_epoch = len(
        trainloader_weight.dataset) // trainloader_weight.batch_size
    total_iter = iter_per_epoch * max_epochs
    delay_epochs = max_epochs // 20

    model.train()
    for epoch in range(start_epoch, max_epochs + 1):

        model.warmup_flag = (epoch <= delay_epochs)
        # set the gumbel temperature according to a linear schedule
        model.gumbel_temp = min(
            5.0 - (epoch - delay_epochs - 1) /
            (max_epochs - delay_epochs - 1) * (5.0 - 0.1), 5.0)

        arch_loss = 0
        arch_counter = 0

        if epoch > delay_epochs:
            print('modifying architecture...')

            # we reset the arch optimizer state
            optimizer_arch.state = defaultdict(dict)

            # we use current batch statistics in search period
            model.freeze_encoder_bn_running_stats()

            for samples_search in trainloader_arch:

                inputs_search = samples_search['image'].to(device,
                                                           non_blocking=True)
                target_search = {
                    task: samples_search[task].to(device, non_blocking=True)
                    for task in tasks
                }

                optimizer_arch.zero_grad()

                for task in tasks:
                    # many images don't have human parts annotations, skip those
                    uniq = torch.unique(target_search[task])
                    if len(uniq) == 1 and uniq[0] == 255:
                        continue

                    output = model(inputs_search, task=task)
                    tot_loss = loss(output, target_search, task=task)
                    tot_loss.backward()

                    arch_loss += tot_loss.item()
                    arch_counter += 1

                optimizer_arch.step()

            # we reset the main optimizer state because arch has changed
            optimizer_weight.state = defaultdict(dict)

            # we should reset bn running stats
            model.unfreeze_encoder_bn_running_stats()
            model.reset_encoder_bn_running_stats()

        for batch_idx, samples in enumerate(trainloader_weight):

            inputs = samples['image'].to(device, non_blocking=True)
            target = {
                task: samples[task].to(device, non_blocking=True)
                for task in tasks
            }

            current_loss = 0
            counter = 0

            for task in tasks:
                # many images don't have human parts annotations, skip those
                uniq = torch.unique(target[task])
                if len(uniq) == 1 and uniq[0] == 255:
                    continue

                optimizer_weight.zero_grad()

                output = model(inputs, task=task)
                tot_loss = loss(output, target, task=task, omit_resource=True)
                tot_loss.backward()

                optimizer_weight.step()

                current_loss += tot_loss.item()
                counter += 1

            if (batch_idx + 1) % 100 == 0:
                n_iter = (epoch - 1) * iter_per_epoch + batch_idx + 1
                print('Train Iterations: {}, Loss: {:.4f}'.format(
                    utils.progress(n_iter, total_iter),
                    current_loss / counter))
                writer.add_scalar('loss_current', current_loss / counter,
                                  n_iter)
                writer.add_scalar('arch_loss',
                                  arch_loss / max(1, arch_counter), n_iter)
                writer.add_scalar('gumbel_temp', model.gumbel_temp, n_iter)
                for name, param in model.named_arch_parameters():
                    writer.add_image(name,
                                     torch.nn.functional.softmax(param.data,
                                                                 dim=-1),
                                     n_iter,
                                     dataformats='HW')

        # save model
        state = {
            'state_dict': model.state_dict(),
            'tasks': tasks,
            'epoch': epoch,
            'optimizer_weight': optimizer_weight.state_dict(),
            'optimizer_arch': optimizer_arch.state_dict(),
        }
        torch.save(state, Path(exp_dir) / 'checkpoint.pth')

    branch_config = model.get_branch_config()
    utils.write_json({'config': branch_config},
                     Path(exp_dir) / 'branch_config.json')
Example #11
0
                    '%s: %.4f' % (v, np.mean(stats[k])) for k, v in stats_str
                    if len(stats[k]) > 0
                ]
                stats_log.append('%i samples/s' % int(n_words_proc /
                                                      (time.time() - tic)))
                logger.info(('%06i - ' % n_iter) + ' - '.join(stats_log))

                # reset
                tic = time.time()
                n_words_proc = 0
                for k, _ in stats_str:
                    del stats[k][:]

        if params.print_grads:
            write_json(
                os.path.join(params.exp_path,
                             'weights_{}.json'.format(n_epoch)), weights)
            write_json(
                os.path.join(params.exp_path, 'grads_{}.json'.format(n_epoch)),
                grads)

        # embeddings / discriminator evaluation
        to_log = OrderedDict({'n_epoch': n_epoch})
        evaluator.all_eval(to_log)
        evaluator.eval_dis(to_log)

        # JSON log / save best model / end of epoch
        logger.info("__log__:%s" % json.dumps(to_log))
        trainer.save_best(to_log, VALIDATION_METRIC)
        logger.info('End of epoch %i.\n\n' % n_epoch)