Esempio n. 1
0
    # ES stuff
    params.no_copy = args.no_copy

    # set number of gpu
    params.ngpu = comm_size

    # Set up directory
    baseDir = './expts/'
    expDir = os.path.join(baseDir, args.config + '/' + str(run_num) + '/')
    if comm_rank == 0:
        if not os.path.isdir(expDir):
            os.makedirs(expDir, exist_ok=True)
            os.makedirs(expDir + 'training_checkpoints/', exist_ok=True)

        logging_utils.log_to_file(logger_name=None,
                                  log_filename=os.path.join(expDir, 'out.log'))
        params.log()
        #args.tboard_writer = SummaryWriter(log_dir=os.path.join(expDir, 'logs/'))

    params.experiment_dir = os.path.abspath(expDir)
    params.checkpoint_path = os.path.join(params.experiment_dir,
                                          'training_checkpoints/ckpt.tar')
    if os.path.isfile(params.checkpoint_path):
        args.resuming = True

    train(params, args, comm_rank, comm_local_rank)
    #if comm_rank == 0:
    #  args.tboard_writer.flush()
    #  args.tboard_writer.close()
    logging.info('DONE ---- rank %d' % comm_rank)
Esempio n. 2
0

if __name__ == '__main__':

    torch.backends.cudnn.benchmark = True
    if len(sys.argv) != 3:
        logging.error("Usage", sys.argv[0], "configuration_YAML_file",
                      "configuration")
        exit()

    params = YParams(os.path.abspath(sys.argv[1]), sys.argv[2])
    if not os.path.exists(params.experiment_dir):
        os.makedirs(os.path.abspath(params.experiment_dir))

    logging_utils.log_to_file(logger_name=None,
                              log_filename=os.path.join(
                                  params.experiment_dir, 'out.log'))
    params.log()
    tboard_writer = SummaryWriter(
        log_dir=os.path.join(params.experiment_dir, 'logs/'))

    params.experiment_dir = os.path.abspath(params.experiment_dir)
    params.checkpoint_file = os.path.join(params.experiment_dir, 'checkpt.tar')

    if params.seed:
        random.seed(params.seed)
        torch.manual_seed(params.seed)

    train(params, tboard_writer)
    tboard_writer.flush()
    tboard_writer.close()