def train(conf): ''' take the configuration and run the em on it. :param conf: configuration for the training :return: return translation table ''' return t.train(conf)
if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('-m', '--mode', type=str, required=True, help='either "train" or "test"') parser.add_argument('-n', '--model', type=str, help='path to a trained model') parser.add_argument('-d', '--data', type=str, help='path to financial data from Yahoo Finance') args = parser.parse_args() if args.mode == 'test': assert args.model is not None assert args.data is not None test(args.data, args.model) elif args.mode == 'train': assert args.data is not None train(args.data) elif args.mode == 'general': assert args.data is None train_general()
from config.classic_control import muzero_config # just using same config as classic_control for now elif args.case == 'classic_control': from config.classic_control import muzero_config else: raise Exception('Invalid --case option') # set config as per arguments exp_path = muzero_config.set_config(args) exp_path, log_base_path = make_results_dir(exp_path, args) # set-up logger init_logger(log_base_path) try: if args.opr == 'train': summary_writer = SummaryWriter(exp_path, flush_secs=10) train(muzero_config, summary_writer) elif args.opr == 'test': assert os.path.exists(muzero_config.model_path), 'model not found at {}'.format(muzero_config.model_path) model = muzero_config.get_uniform_network().to('cpu') model.load_state_dict(torch.load(muzero_config.model_path, map_location=torch.device('cpu'))) test_score = test(muzero_config, model, args.test_episodes, device='cpu', render=args.render, save_video=True) logging.getLogger('test').info('Test Score: {}'.format(test_score)) else: raise Exception('Please select a valid operation(--opr) to be performed') ray.shutdown() except Exception as e: logging.getLogger('root').error(e, exc_info=True)
def bulid_net(cfg): # Enable the inbuilt cudnn auto-tuner to find the best algorithm to use torch.backends.cudnn.benchmark = True # Set up data augmentation train_transforms = utils.data_transforms.Compose([ utils.data_transforms.RandomCrop(cfg.DATA.CROP_IMG_SIZE, cfg.CONST.SCALE), utils.data_transforms.FlipRotate(), utils.data_transforms.BGR2RGB(), utils.data_transforms.RandomColorChannel(), # utils.data_transforms.ColorJitter(cfg.DATA.COLOR_JITTER), # utils.data_transforms.Normalize(mean=cfg.DATA.MEAN, std=cfg.DATA.STD), # utils.data_transforms.RandomGaussianNoise(cfg.DATA.GAUSSIAN), utils.data_transforms.ToTensor() ]) test_transforms = utils.data_transforms.Compose([ # utils.data_transforms.BorderCrop(cfg.CONST.SCALE), utils.data_transforms.BGR2RGB(), # utils.data_transforms.Normalize(mean=cfg.DATA.MEAN, std=cfg.DATA.STD), utils.data_transforms.ToTensor() ]) # Set up data loader train_dataset_loader = utils.data_loaders.DATASET_LOADER_MAPPING[ cfg.DATASET.DATASET_TRAIN_NAME](utils.data_loaders.DatasetType.TRAIN) test_dataset_loader = utils.data_loaders.DATASET_LOADER_MAPPING[ cfg.DATASET.DATASET_TEST_NAME](utils.data_loaders.DatasetType.TEST) if cfg.NETWORK.PHASE in ['train', 'resume']: train_data_loader = torch.utils.data.DataLoader( dataset=train_dataset_loader.get_dataset(train_transforms), batch_size=cfg.CONST.TRAIN_BATCH_SIZE, num_workers=cfg.CONST.NUM_WORKER, pin_memory=True, shuffle=True) val_data_loader = torch.utils.data.DataLoader( dataset=test_dataset_loader.get_dataset(test_transforms), batch_size=cfg.CONST.VAL_BATCH_SIZE, num_workers=cfg.CONST.NUM_WORKER, pin_memory=True, shuffle=False) elif cfg.NETWORK.PHASE in ['test']: test_data_loader = torch.utils.data.DataLoader( dataset=test_dataset_loader.get_dataset(test_transforms), batch_size=cfg.CONST.TEST_BATCH_SIZE, num_workers=cfg.CONST.NUM_WORKER, pin_memory=True, shuffle=False) # Set up networks net = models.__dict__[cfg.NETWORK.SRNETARCH].__dict__[ cfg.NETWORK.SRNETARCH]() print('[DEBUG] %s Parameters in %s: %d.' % (dt.now(), cfg.NETWORK.SRNETARCH, net_utils.count_parameters(net))) # Initialize weights of networks if cfg.NETWORK.PHASE == 'train': net_utils.initialize_weights(net, cfg.TRAIN.KAIMING_SCALE) # Set up solver solver = torch.optim.Adam(filter(lambda p: p.requires_grad, net.parameters()), lr=cfg.TRAIN.LEARNING_RATE, betas=(cfg.TRAIN.MOMENTUM, cfg.TRAIN.BETA)) if torch.cuda.is_available(): net = torch.nn.DataParallel(net, range(cfg.CONST.NUM_GPU)).cuda() # Load pretrained model if exists Init_Epoch = 0 Best_Epoch = 0 Best_PSNR = 0 if cfg.NETWORK.PHASE in ['test', 'resume']: print('[INFO] %s Recovering from %s ...' % (dt.now(), cfg.CONST.WEIGHTS)) checkpoint = torch.load(cfg.CONST.WEIGHTS) net.load_state_dict(checkpoint['net_state_dict']) if cfg.NETWORK.PHASE == 'resume': Init_Epoch = checkpoint['epoch_idx'] Best_PSNR = checkpoint['best_PSNR'] Best_Epoch = checkpoint['best_epoch'] if 'solver_state_dict' in checkpoint: solver.load_state_dict(checkpoint['solver_state_dict']) print('[INFO] {0} Recover complete. Current Epoch #{1}, Best_PSNR = {2} at Epoch #{3}.' \ .format(dt.now(), Init_Epoch, Best_PSNR, Best_Epoch)) if cfg.NETWORK.PHASE in ['train', 'resume']: # Set up learning rate scheduler to decay learning rates dynamically lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( solver, milestones=cfg.TRAIN.LR_MILESTONES, gamma=cfg.TRAIN.LR_DECAY) # Summary writer for TensorBoard output_dir = os.path.join( cfg.DIR.OUT_PATH, 'tb_log', dt.now().isoformat() + '_' + cfg.NETWORK.SRNETARCH, '%s') log_dir = output_dir % 'logs' ckpt_dir = output_dir % 'checkpoints' train_writer = SummaryWriter(os.path.join(log_dir, 'train')) val_writer = SummaryWriter(os.path.join(log_dir, 'val')) # train and val train(cfg, Init_Epoch, train_data_loader, val_data_loader, net, solver, lr_scheduler, ckpt_dir, train_writer, val_writer, Best_PSNR, Best_Epoch) return elif cfg.NETWORK.PHASE in ['test']: if cfg.DATASET.DATASET_TEST_NAME == 'Demo': test_woGT(cfg, test_data_loader, net) else: test(cfg, test_data_loader, net, Best_Epoch) return
if args.use_wandb: os.makedirs(args.wandb_dir, exist_ok=True) os.environ['WANDB_DIR'] = str(args.wandb_dir) import wandb wandb.init(job_type='train', dir=args.wandb_dir, group=args.case + ':' + args.env, project="variable-td3", config=run_config.get_hparams(), sync_tensorboard=True) summary_writer = SummaryWriter(run_config.exp_path, flush_secs=60 * 1) # flush every 1 minutes train(run_config, summary_writer) summary_writer.flush() summary_writer.close() if args.use_wandb: wandb.join() elif args.opr == 'test': # restore from wandb model_path = run_config.model_path if args.restore_model_from_wandb: assert args.wandb_run_id is not None, 'wandb run id cannot be {}'.format( args.wandb_run_id) import wandb root, name = os.path.split(model_path)
# cnn 'filter_sizes': [3], 'num_filters': 100, # lstm 'hidden_size': 100, 'num_layers': 2, 'bidirectional': True, # word-level attention 'da': 100, 'r': 10, # feature-level attention 'soa_size': 50, } dictionary = Dictionary.load_from_file() train_dset = NARREDataset('train', dictionary) val_dset = NARREDataset('val', dictionary) train_loader = DataLoader(train_dset, batch_size=hyperparas['batch_size'], shuffle=True) val_loader = DataLoader(val_dset, batch_size=hyperparas['batch_size'], shuffle=True) constructor = 'build_' + hyperparas['model'] model = getattr(build_model, constructor)(train_dset, hyperparas) train(model, train_loader, val_loader, hyperparas)
def bulid_net(cfg): # Enable the inbuilt cudnn auto-tuner to find the best algorithm to use torch.backends.cudnn.benchmark = True # Set up data augmentation train_transforms = utils.data_transforms.Compose([ utils.data_transforms.ColorJitter(cfg.DATA.COLOR_JITTER), utils.data_transforms.Normalize(mean=cfg.DATA.MEAN, std=cfg.DATA.STD), utils.data_transforms.RandomCrop(cfg.DATA.CROP_IMG_SIZE), utils.data_transforms.RandomVerticalFlip(), utils.data_transforms.RandomHorizontalFlip(), utils.data_transforms.RandomColorChannel(), utils.data_transforms.RandomGaussianNoise(cfg.DATA.GAUSSIAN), utils.data_transforms.ToTensor(), ]) test_transforms = utils.data_transforms.Compose([ utils.data_transforms.Normalize(mean=cfg.DATA.MEAN, std=cfg.DATA.STD), utils.data_transforms.ToTensor(), ]) # Set up data loader dataset_loader = utils.data_loaders.DATASET_LOADER_MAPPING[cfg.DATASET.DATASET_NAME]() # Set up networks deblurnet = models.__dict__[cfg.NETWORK.DEBLURNETARCH].__dict__[cfg.NETWORK.DEBLURNETARCH]() print('[DEBUG] %s Parameters in %s: %d.' % (dt.now(), cfg.NETWORK.DEBLURNETARCH, utils.network_utils.count_parameters(deblurnet))) # Initialize weights of networks deblurnet.apply(utils.network_utils.init_weights_xavier) # Set up solver a = filter(lambda p: p.requires_grad, deblurnet.parameters()) deblurnet_solver = torch.optim.Adam(filter(lambda p: p.requires_grad, deblurnet.parameters()), lr=cfg.TRAIN.LEARNING_RATE, betas=(cfg.TRAIN.MOMENTUM, cfg.TRAIN.BETA)) if torch.cuda.is_available(): deblurnet = torch.nn.DataParallel(deblurnet).cuda() # Load pretrained model if exists init_epoch = 0 Best_Epoch = -1 Best_Img_PSNR = 0 if cfg.NETWORK.PHASE in ['test','resume']: print('[INFO] %s Recovering from %s ...' % (dt.now(), cfg.CONST.WEIGHTS)) checkpoint = torch.load(cfg.CONST.WEIGHTS) deblurnet.load_state_dict(checkpoint['deblurnet_state_dict']) # deblurnet_solver.load_state_dict(checkpoint['deblurnet_solver_state_dict']) init_epoch = checkpoint['epoch_idx']+1 Best_Img_PSNR = checkpoint['Best_Img_PSNR'] Best_Epoch = checkpoint['Best_Epoch'] print('[INFO] {0} Recover complete. Current epoch #{1}, Best_Img_PSNR = {2} at epoch #{3}.' \ .format(dt.now(), init_epoch, Best_Img_PSNR, Best_Epoch)) # Set up learning rate scheduler to decay learning rates dynamically deblurnet_lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(deblurnet_solver, milestones=cfg.TRAIN.LR_MILESTONES, gamma=cfg.TRAIN.LR_DECAY) # Summary writer for TensorBoard output_dir = os.path.join(cfg.DIR.OUT_PATH, dt.now().isoformat() + '_' + cfg.NETWORK.DEBLURNETARCH, '%s') log_dir = output_dir % 'logs' ckpt_dir = output_dir % 'checkpoints' train_writer = SummaryWriter(os.path.join(log_dir, 'train')) test_writer = SummaryWriter(os.path.join(log_dir, 'test')) print('[INFO] Output_dir: {0}'.format(output_dir[:-2])) if cfg.NETWORK.PHASE in ['train','resume']: train(cfg, init_epoch, dataset_loader, train_transforms, test_transforms, deblurnet, deblurnet_solver, deblurnet_lr_scheduler, ckpt_dir, train_writer, test_writer, Best_Img_PSNR, Best_Epoch) else: if os.path.exists(cfg.CONST.WEIGHTS): test(cfg, init_epoch, dataset_loader, test_transforms, deblurnet, test_writer) else: print('[FATAL] %s Please specify the file path of checkpoint.' % (dt.now())) sys.exit(2)
level=logging.INFO) logging.info("Cli app started execution ...") device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") logging.info("Using a {} device".format(device)) # Program flow parameters train = args.trainNew model_path = args.modelPath # ML parameters logging.info("Loading training dataset ...") batch_size = 4 data = CIFAR10_train(data_dir='./data', batch_size=batch_size, augment=False, random_seed=42) net = model_factory.Net() # Load or train the model if (train or model_path is None): logging.info("Starting the model training ...") train_parameters = train_parameters_factory.load_parameters( args.trainParameters) model_training.train(data, net, train_parameters) else: net.load_state_dict(torch.load(model_path)) # Analyze the model performance logging.info("Analyzing the performance of the model ...") analysis.model_analysis(net, data, batch_size=batch_size)
#! /usr/bin/python3 # Martino Ferrari import json import sys import core.train as T import time ''' Optional script to generate and store the translation tables, instead of compute it on the fly every time. ''' if __name__ == "__main__": if len(sys.argv) == 2: conf = sys.argv[1] conf = "conf.json" with open(conf) as data_file: config = json.load(data_file) print('Reading initial values and sentences...') output = config['training']['translationfile'] t = time.time() teta = T.train(config) t = time.time() - t print("time to train {}s".format(t)) print('save >> {}'.format(output)) with open(output, 'w') as f: json.dump(teta, f)
def main(args): if args.seed is not None: paddle.seed(args.seed) np.random.seed(args.seed) random.seed(args.seed) env_info = get_sys_env() info = ['{}: {}'.format(k, v) for k, v in env_info.items()] info = '\n'.join(['', format('Environment Information', '-^48s')] + info + ['-' * 48]) logger.info(info) place = 'gpu' if env_info['Paddle compiled with cuda'] and env_info[ 'GPUs used'] else 'cpu' paddle.set_device(place) if not args.cfg: raise RuntimeError('No configuration file specified.') cfg = Config(args.cfg, learning_rate=args.learning_rate, iters=args.iters, batch_size=args.batch_size) # Only support for the DeepLabv3+ model if args.data_format == 'NHWC': if cfg.dic['model']['type'] != 'DeepLabV3P': raise ValueError( 'The "NHWC" data format only support the DeepLabV3P model!') cfg.dic['model']['data_format'] = args.data_format cfg.dic['model']['backbone']['data_format'] = args.data_format loss_len = len(cfg.dic['loss']['types']) for i in range(loss_len): cfg.dic['loss']['types'][i]['data_format'] = args.data_format train_dataset = cfg.train_dataset if train_dataset is None: raise RuntimeError( 'The training dataset is not specified in the configuration file.') elif len(train_dataset) == 0: raise ValueError( 'The length of train_dataset is 0. Please check if your dataset is valid' ) val_dataset = cfg.val_dataset if args.do_eval else None losses = cfg.loss msg = '\n---------------Config Information---------------\n' msg += str(cfg) msg += '------------------------------------------------' logger.info(msg) config_check(cfg, train_dataset=train_dataset, val_dataset=val_dataset) train(cfg.model, train_dataset, val_dataset=val_dataset, optimizer=cfg.optimizer, save_dir=args.save_dir, iters=cfg.iters, batch_size=cfg.batch_size, resume_model=args.resume_model, save_interval=args.save_interval, log_iters=args.log_iters, num_workers=args.num_workers, use_vdl=args.use_vdl, losses=losses, keep_checkpoint_max=args.keep_checkpoint_max, test_config=cfg.test_config, fp16=args.fp16, profiler_options=args.profiler_options)