예제 #1
0
def test_build_triplet():
    config = read_yaml("./tests/config/triplet.yaml")
    dset_config = read_yaml("./tests/config/mnist.yaml")

    dset = get_dataset(config, dset_config, mode="train")
    valid_dset = get_dataset(config, dset_config, mode="valid")
    model = get_model(config, dset_config)

    trainer = get_trainer(config, dset_config)
예제 #2
0
def main():
    """Main function"""

    # Initialization
    args = parse_args()
    rank, n_ranks = init_workers(args.distributed_backend)

    # Load configuration
    config = load_config(args.config)

    # Prepare output directory
    output_dir = os.path.expandvars(args.output_dir if args.output_dir is not None
                                    else config['output_dir'])
    os.makedirs(output_dir, exist_ok=True)

    # Setup logging
    log_file = os.path.join(output_dir, 'out_%i.log' % rank)
    config_logging(verbose=args.verbose, log_file=log_file)
    logging.info('Initialized rank %i out of %i', rank, n_ranks)
    if rank == 0:
        logging.info('Configuration: %s' % config)

    # Load the datasets
    is_distributed = args.distributed_backend is not None
    train_data_loader, valid_data_loader = get_data_loaders(
        distributed=is_distributed, **config['data_config'])

    # Load the trainer
    gpu = (rank % args.ranks_per_node) if args.rank_gpu else args.gpu
    if gpu is not None:
        logging.info('Using GPU %i', gpu)
    trainer = get_trainer(name=config['trainer'], distributed=is_distributed,
                          rank=rank, output_dir=output_dir, gpu=gpu)
    # Build the model
    trainer.build_model(**config['model_config'])
    if rank == 0:
        trainer.print_model_summary()

    # Run the training
    summary = trainer.train(train_data_loader=train_data_loader,
                            valid_data_loader=valid_data_loader,
                            **config['train_config'])
    trainer.write_summaries()

    # Print some conclusions
    logging.info('Finished training')
    logging.info('Train samples %g time %g s rate %g samples/s',
                 np.mean(summary['train_samples']),
                 np.mean(summary['train_time']),
                 np.mean(summary['train_rate']))
    if valid_data_loader is not None:
        logging.info('Valid samples %g time %g s rate %g samples/s',
                     np.mean(summary['valid_samples']),
                     np.mean(summary['valid_time']),
                     np.mean(summary['valid_rate']))

    logging.info('All done!')
예제 #3
0
def test_build_arcface():
    config = read_yaml("./tests/config/arcface.yaml")
    dset_config = read_yaml("./tests/config/mnist.yaml")

    dset = get_dataset(config, dset_config, mode="train")
    valid_dset = get_dataset(config, dset_config, mode="valid")
    model = get_model(config, dset_config)

    trainer = get_trainer(config, dset_config)
    trainer.train(dataset=dset, valid_dataset=valid_dset, model=model)
예제 #4
0
def main(cfg: DictConfig) -> None:
    """Main function
    
    Builds model, loads data, trains and evaluates

    Args:
        cfg: Config.
    
    Returns:
        None.

    """

    trainer = get_trainer(cfg)
    trainer.execute(eval=cfg.train.eval)
예제 #5
0
def get_edge_scores(result_dir, n_tasks, task):
    """
    - Takes config info for triplet training dataset (different from doublet training dataset),
    - Runs the dataset through the trained doublet network,
    - Returns edge scores with same indices as edge network input
    """

    # Load configs
    config = load_config_dir(result_dir)
    logging.info('Inferring triplets on model configuration:')
    logging.info(config)

    # Find the best epoch
    summaries = load_summaries(config)
    best_idx = summaries.valid_loss.idxmin()
    summaries.loc[[best_idx]]

    # Build the trainer and load best checkpoint
    trainer = get_trainer(output_dir=config['output_dir'],
                          gpu=0,
                          **config['trainer'])
    trainer.build_model(optimizer_config=config['optimizer'],
                        **config['model'])

    best_epoch = summaries.epoch.loc[best_idx]
    trainer.load_checkpoint(checkpoint_id=best_epoch)

    logging.info("With weight system:")
    logging.info(trainer.model)
    logging.info("On device:")
    logging.info(trainer.device)

    # Load the test dataset

    test_loader, filelist = get_seed_data_loader(config, n_tasks, task)

    # Apply the model
    test_preds, test_targets = trainer.device_predict(test_loader)
    print("Graph prediction complete")

    #GET Hit ID data here and GRAPH NAMES
    graph_dataset, graph_names = load_triplets(test_loader, filelist)

    return test_preds, graph_dataset, graph_names
예제 #6
0
def main(config, dset_config):

    root_dir = Path(increment_path(os.path.join(config.result_dir, "runs")))
    Path(os.path.join(root_dir, "weights")).mkdir(parents=True, exist_ok=True)
    config.result_dir = root_dir
    log = setup_logger.setFileHandler(
        filename=os.path.join(root_dir, "log.txt"))
    save_yaml(config)
    save_hostname(config)
    dt_now = datetime.datetime.now()
    logger.info(f"\n Start: {dt_now.strftime('%Y年%m月%d日 %H:%M:%S')}")

    dset = get_dataset(config, dset_config, mode="train")
    valid_dset = get_dataset(config, dset_config, mode="valid")
    model = get_model(config, dset_config)

    trainer = get_trainer(config, dset_config)
    trainer.train(dataset=dset, valid_dataset=valid_dset, model=model)

    trainer.save()
예제 #7
0
def main(cfg):
    print(cfg)
    print()

    # setup logdir, writer and logger
    logdir = os.path.join(cfg['root'], cfg['logdir'])

    if not os.path.exists(logdir):
        os.makedirs(logdir)

    writer = SummaryWriter(log_dir=logdir)

    trainer_name = cfg['trainer']

    with open(os.path.join(logdir,trainer_name+'.yml'), 'w') as fp:
        yaml.dump(cfg, fp)

    logger  = get_logger(logdir)

    Trainer = get_trainer(trainer_name)(cfg, writer, logger)
    print()

    # start training
    Trainer.train()
예제 #8
0
def main():
    """Main function"""

    # Parse the command line
    args = parse_args()

    # Initialize distributed workers
    rank, n_ranks = init_workers(args.distributed)

    # Load configuration
    config = load_config(args.config, output_dir=args.output_dir,
                         n_ranks=n_ranks, crayai=args.crayai)
    config = update_config(config, args)
    os.makedirs(config['output_dir'], exist_ok=True)

    # Setup logging
    config_logging(verbose=args.verbose, output_dir=config['output_dir'],
                   append=args.resume, rank=rank)
    logging.info('Initialized rank %i out of %i', rank, n_ranks)
    if args.show_config and (rank == 0):
        logging.info('Command line config: %s' % args)
    if rank == 0:
        logging.info('Configuration: %s', config)
        logging.info('Saving job outputs to %s', config['output_dir'])
        if args.distributed is not None:
            logging.info('Using distributed mode: %s', args.distributed)

    # Reproducible training
    torch.manual_seed(args.seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(args.seed + 10)

    # Save configuration in the outptut directory
    if rank == 0:
        save_config(config)

    # Load the datasets
    is_distributed = (args.distributed is not None)
    # Workaround because multi-process I/O not working with MPI backend
    if args.distributed in ['ddp-mpi', 'cray']:
        if rank == 0:
            logging.info('Disabling I/O workers because of MPI issue')
        config['data']['n_workers'] = 0
    train_data_loader, valid_data_loader = get_data_loaders(
        distributed=is_distributed, rank=rank, n_ranks=n_ranks, **config['data'])
    logging.info('Loaded %g training samples', len(train_data_loader.dataset))
    if valid_data_loader is not None:
        logging.info('Loaded %g validation samples', len(valid_data_loader.dataset))

    # Load the trainer
    gpu = (rank % args.ranks_per_node) if args.rank_gpu else args.gpu
    if gpu is not None:
        logging.info('Choosing GPU %s', gpu)
    trainer = get_trainer(distributed_mode=args.distributed,
                          output_dir=config['output_dir'],
                          rank=rank, n_ranks=n_ranks,
                          gpu=gpu, pbt_checkpoint=args.pbt_checkpoint,
                          **config['trainer'])

    # Build the model and optimizer
    model_config = config.get('model', {})
    optimizer_config = config.get('optimizer', {})
    logging.debug("Building model")
    trainer.build_model(optimizer_config=optimizer_config, **model_config)
    if rank == 0:
        trainer.print_model_summary()

    # Checkpoint resume
    if args.resume:
        trainer.load_checkpoint()

    # Run the training
    logging.debug("Training")
    summary = trainer.train(train_data_loader=train_data_loader,
                            valid_data_loader=valid_data_loader,
                            **config['training'])

    # Print some conclusions
    n_train_samples = len(train_data_loader.sampler)
    logging.info('Finished training')
    train_time = summary.train_time.mean()
    logging.info('Train samples %g time %g s rate %g samples/s',
                 n_train_samples, train_time, n_train_samples / train_time)
    if valid_data_loader is not None:
        n_valid_samples = len(valid_data_loader.sampler)
        valid_time = summary.valid_time.mean()
        logging.info('Valid samples %g time %g s rate %g samples/s',
                     n_valid_samples, valid_time, n_valid_samples / valid_time)

    # Drop to IPython interactive shell
    if args.interactive and (rank == 0):
        logging.info('Starting IPython interactive session')
        import IPython
        IPython.embed()

    if rank == 0:
        if args.crayai:
            print("FoM: %e" % summary['valid_loss'][0])
        logging.info('All done!')
예제 #9
0
def main():
    """Main function"""

    # Parse the command line
    args = parse_args()

    # Setup logging
    log_format = '%(asctime)s %(levelname)s %(message)s'
    log_level = logging.DEBUG if args.verbose else logging.INFO
    logging.basicConfig(level=log_level, format=log_format)
    logging.info('Initializing')
    if args.show_config:
        logging.info('Command line config: %s' % args)

    # Initialize MPI
    if args.distributed:
        dist.init_process_group(backend='mpi')
        logging.info('MPI rank %i out of %i', dist.get_rank(),
                     dist.get_world_size())

    # Load configuration
    with open(args.config) as f:
        config = yaml.load(f)
    if not args.distributed or (dist.get_rank() == 0):
        logging.info('Configuration: %s' % config)
    data_config = config['data_config']
    model_config = config.get('model_config', {})
    train_config = config['train_config']

    # Load the datasets
    train_data_loader, valid_data_loader = get_data_loaders(
        distributed=args.distributed, **data_config)
    logging.info('Loaded %g training samples', len(train_data_loader.dataset))
    if valid_data_loader is not None:
        logging.info('Loaded %g validation samples',
                     len(valid_data_loader.dataset))
    print('train_data')
    print(train_data_loader.dataset)
    print('valid_data')
    print(valid_data_loader.dataset)

    # Load the trainer
    experiment_config = config['experiment_config']
    output_dir = experiment_config.pop('output_dir', None)
    if args.distributed and dist.get_rank() != 0:
        output_dir = None
    trainer = get_trainer(distributed=args.distributed,
                          output_dir=output_dir,
                          device=args.device,
                          **experiment_config)
    # Build the model
    trainer.build_model(**model_config)
    if not args.distributed or (dist.get_rank() == 0):
        trainer.print_model_summary()
    print('model')
    print(trainer)

    # Run the training
    summary = trainer.train(train_data_loader=train_data_loader,
                            valid_data_loader=valid_data_loader,
                            **train_config)
    if not args.distributed or (dist.get_rank() == 0):
        trainer.write_summaries()
    print('summary')
    print(summary)

    # Print some conclusions
    n_train_samples = len(train_data_loader.sampler)
    logging.info('Finished training')
    train_time = np.mean(summary['train_time'])
    logging.info('Train samples %g time %gs rate %g samples/s',
                 n_train_samples, train_time, n_train_samples / train_time)
    if valid_data_loader is not None:
        n_valid_samples = len(valid_data_loader.sampler)
        valid_time = np.mean(summary['valid_time'])
        logging.info('Valid samples %g time %g s rate %g samples/s',
                     n_valid_samples, valid_time, n_valid_samples / valid_time)

    # Drop to IPython interactive shell
    if args.interactive:
        logging.info('Starting IPython interactive session')
        import IPython
        IPython.embed()

    logging.info('All done!')
예제 #10
0
            valid_data_loader = DataLoader(valid_dataset,
                                           batch_size=batch_size,
                                           cores=args.cores)
            logging.info('Loaded %g validation samples', len(valid_dataset))
        else:
            valid_data_loader = None
        if test_dataset.__len__() > 0:
            test_data_loader = DataLoader(test_dataset,
                                          batch_size=batch_size,
                                          cores=args.cores)
            logging.info('Loaded %g test samples', len(test_dataset))
        else:
            test_data_loader = None

        # Load the trainer
        trainer = get_trainer(output_dir=output_dir, **experiment_config)

        # Build the model
        trainer.build_model(**model_config)
        trainer.print_model_summary()

        # Run the training
        summary = trainer.train(train_data_loader=train_data_loader,
                                valid_data_loader=valid_data_loader,
                                test_data_loader=test_data_loader,
                                **train_config)
        trainer.write_summary(kfold_i=kfold_i)

        # Print some conclusions
        tf.keras.backend.clear_session()
        logging.info('All done!')
예제 #11
0
    TestingDataSets)

###################################################################################################
# Step 5: Setting up the neural network
###################################################################################################

import torch.distributed as dist
from torch.utils.data import DataLoader
from torch.utils.data.distributed import DistributedSampler

# Locals
from datasets import get_data_loaders
from trainers import get_trainer

trainer = get_trainer(distributed=args.distributed,
                      output_dir=output_dir,
                      device=args.device,
                      **experiment_config)

# Build the model
trainer.build_model(**model_config)
if not args.distributed or (dist.get_rank() == 0):
    trainer.print_model_summary()

###################################################################################################
# Step 6: Training and evaluating the network
###################################################################################################

summary = trainer.train(train_data_loader=train_data_loader,
                        valid_data_loader=valid_data_loader,
                        **train_config)
if not args.distributed or (dist.get_rank() == 0):
예제 #12
0
def main():
    """Main function"""

    # Initialization
    args = parse_args()
    rank, n_ranks = init_workers(args.distributed)

    # Load configuration
    config = load_config(args.config)
    data_config = config['data_config']
    model_config = config.get('model_config', {})
    train_config = config['train_config']

    # Prepare output directory
    output_dir = config.get('output_dir', None)
    if output_dir is not None:
        output_dir = os.path.expandvars(output_dir)
        os.makedirs(output_dir, exist_ok=True)

    # Setup logging
    log_file = (os.path.join(output_dir, 'out_%i.log' %
                             rank) if output_dir is not None else None)
    config_logging(verbose=args.verbose, log_file=log_file)
    logging.info('Initialized rank %i out of %i', rank, n_ranks)
    if rank == 0:
        logging.info('Configuration: %s' % config)

    # Load the datasets
    train_data_loader, valid_data_loader = get_data_loaders(
        distributed=args.distributed, **data_config)

    # Load the trainer
    trainer = get_trainer(name=config['trainer'],
                          distributed=args.distributed,
                          rank=rank,
                          output_dir=output_dir,
                          device=args.device)
    # Build the model
    trainer.build_model(**model_config)
    if rank == 0:
        trainer.print_model_summary()

    # Run the training
    summary = trainer.train(train_data_loader=train_data_loader,
                            valid_data_loader=valid_data_loader,
                            **train_config)
    if output_dir is not None:
        trainer.write_summaries()

    # Print some conclusions
    n_train_samples = len(train_data_loader.sampler)
    logging.info('Finished training')
    train_time = np.mean(summary['train_time'])
    logging.info('Train samples %g time %g s rate %g samples/s',
                 n_train_samples, train_time, n_train_samples / train_time)
    if valid_data_loader is not None:
        n_valid_samples = len(valid_data_loader.sampler)
        valid_time = np.mean(summary['valid_time'])
        logging.info('Valid samples %g time %g s rate %g samples/s',
                     n_valid_samples, valid_time, n_valid_samples / valid_time)

    # Drop to IPython interactive shell
    if args.interactive and rank == 0:
        logging.info('Starting IPython interactive session')
        import IPython
        IPython.embed()

    logging.info('All done!')
예제 #13
0
def main():
    """Main function"""

    # Initialization
    args = parse_args()
    rank, n_ranks = init_workers(args.distributed_backend)

    # Load configuration
    config = load_config(args)

    # Prepare output directory
    output_dir = config.get('output_dir', None)
    if output_dir is not None:
        output_dir = os.path.expandvars(output_dir)
        os.makedirs(output_dir, exist_ok=True)

    # Setup logging
    log_file = (os.path.join(output_dir, 'out_%i.log' %
                             rank) if output_dir is not None else None)
    config_logging(verbose=args.verbose, log_file=log_file, append=args.resume)
    logging.info('Initialized rank %i out of %i', rank, n_ranks)
    try_barrier()
    if rank == 0:
        logging.info('Configuration: %s' % config)

    # Load the datasets
    distributed = args.distributed_backend is not None
    train_data_loader, valid_data_loader = get_data_loaders(
        distributed=distributed, **config['data'])

    # Load the trainer
    gpu = (rank % args.ranks_per_node) if args.rank_gpu else args.gpu
    if gpu is not None:
        logging.info('Using GPU %i', gpu)
    trainer = get_trainer(name=config['trainer'],
                          distributed=distributed,
                          rank=rank,
                          output_dir=output_dir,
                          gpu=gpu)

    # Build the model and optimizer
    trainer.build(config)

    # Resume from checkpoint
    if args.resume:
        trainer.load_checkpoint()

    # Run the training
    summary = trainer.train(train_data_loader=train_data_loader,
                            valid_data_loader=valid_data_loader,
                            **config['train'])

    # Print some conclusions
    try_barrier()
    n_train_samples = len(train_data_loader.sampler)
    logging.info('Finished training')
    train_time = np.mean(summary['train_time'])
    logging.info('Train samples %g time %g s rate %g samples/s',
                 n_train_samples, train_time, n_train_samples / train_time)
    if valid_data_loader is not None:
        n_valid_samples = len(valid_data_loader.sampler)
        valid_time = np.mean(summary['valid_time'])
        logging.info('Valid samples %g time %g s rate %g samples/s',
                     n_valid_samples, valid_time, n_valid_samples / valid_time)

    logging.info('All done!')
예제 #14
0
def main():
    """Main function"""

    # Parse the command line
    args = parse_args()
    # Initialize MPI
    rank, n_ranks = init_workers(args.distributed)

    # Load configuration
    config = load_config(args.config)
    output_dir = os.path.expandvars(config.get('output_dir', None))
    if rank == 0:
        os.makedirs(output_dir, exist_ok=True)
    else:
        output_dir = None

    # Setup logging
    config_logging(verbose=args.verbose, output_dir=output_dir)
    logging.info('Initialized rank %i out of %i', rank, n_ranks)
    if args.show_config and (rank == 0):
        logging.info('Command line config: %s' % args)
    if rank == 0:
        logging.info('Configuration: %s', config)
        logging.info('Saving job outputs to %s', output_dir)

    # Load the datasets
    train_data_loader, valid_data_loader = get_data_loaders(
        distributed=args.distributed, **config['data'])
    logging.info('Loaded %g training samples', len(train_data_loader.dataset))
    if valid_data_loader is not None:
        logging.info('Loaded %g validation samples',
                     len(valid_data_loader.dataset))

    # Load the trainer
    trainer = get_trainer(distributed=args.distributed,
                          output_dir=output_dir,
                          device=args.device,
                          **config['trainer'])
    # Build the model and optimizer
    trainer.build_model(**config.get('model', {}))
    if rank == 0:
        trainer.print_model_summary()

    # Run the training
    summary = trainer.train(train_data_loader=train_data_loader,
                            valid_data_loader=valid_data_loader,
                            **config['training'])
    if rank == 0:
        trainer.write_summaries()

    # Print some conclusions
    n_train_samples = len(train_data_loader.sampler)
    logging.info('Finished training')
    train_time = np.mean(summary['train_time'])
    logging.info('Train samples %g time %g s rate %g samples/s',
                 n_train_samples, train_time, n_train_samples / train_time)
    if valid_data_loader is not None:
        n_valid_samples = len(valid_data_loader.sampler)
        valid_time = np.mean(summary['valid_time'])
        logging.info('Valid samples %g time %g s rate %g samples/s',
                     n_valid_samples, valid_time, n_valid_samples / valid_time)

    # Drop to IPython interactive shell
    if args.interactive and (rank == 0):
        logging.info('Starting IPython interactive session')
        import IPython
        IPython.embed()

    if rank == 0:
        logging.info('All done!')