def main(): # spawn theano vars xs = [T.imatrix('x%d' % i) for i in range(options['max_src'])] y = T.ivector('y') learning_rate = T.scalar('learning_rate') trng = RandomStreams(4321) # use test values """ import numpy as np batch_size = 10 theano.config.compute_test_value = 'raise' xl.tag.test_value = np.random.randn(batch_size, 392).astype(floatX) xr.tag.test_value = np.random.randn(batch_size, 392).astype(floatX) y.tag.test_value = np.random.randint(8, size=batch_size).astype(np.int32) learning_rate.tag.test_value = 0.5 """ # build cgs model = build_model(xs, y, learning_rate, trng=trng, **options) # compile opt = get_optimizer(options['optimizer']) f_train = opt(learning_rate, model, xs + [y], return_alpha=True) # compile validation/test functions f_valid = theano.function(xs + [y], [model.cost, model.acc], on_unused_input='warn') # training loop train(f_train, f_valid, xs, y, **options)
def __init__(self, opt, logger=None): super(Model, self).__init__() self.opt = opt self.logger = logger # 根据YoloV2和YoloV3使用不同的配置文件 if opt.model == 'Yolo2': cfgfile = 'configs/yolo2-voc.cfg' elif opt.model == 'Yolo3': cfgfile = 'configs/yolo3-coco.cfg' # 初始化detector self.detector = Darknet(cfgfile, device=opt.device).to(opt.device) print_network(self.detector, logger=logger) # 在--load之前加载weights文件(可选) if opt.weights: utils.color_print('Load Yolo weights from %s.' % opt.weights, 3) self.detector.load_weights(opt.weights) self.optimizer = get_optimizer(opt, self.detector) self.scheduler = get_scheduler(opt, self.optimizer) self.avg_meters = ExponentialMovingAverage(0.95) self.save_dir = os.path.join(opt.checkpoint_dir, opt.tag)
def soft_train(network, args): device = torch.device("cuda" if args.gpu_flag is True else "cpu") optimizer, scheduler = get_optimizer(network, args) train_data_set = get_data_set(args, train_flag=True) test_data_set = get_data_set(args, train_flag=False) train_data_loader = torch.utils.data.DataLoader(train_data_set, batch_size=args.batch_size, shuffle=True) test_data_loader = torch.utils.data.DataLoader(test_data_set, batch_size=args.batch_size, shuffle=False) print("-*-" * 10 + "\n\t\tTrain network\n" + "-*-" * 10) for epoch in range(0, args.epoch): network = network.cpu() if args.network is "vgg": network = soft_prune_vgg_step(network, args.prune_rate[0]) elif args.network == 'resnet': network = soft_prune_resnet_step(network, args.prune_rate) network = network.to(device) train_step(network, train_data_loader, test_data_loader, optimizer, device, epoch) if scheduler is not None: scheduler.step() return network
def train(args): iters, vocab = get_iterator(args) model = get_model(args, vocab) loss_fn = get_loss(args, vocab) optimizer = get_optimizer(args, model) trainer = get_trainer(args, model, loss_fn, optimizer) metrics = get_metrics(args, vocab) evaluator = get_evaluator(args, model, loss_fn, metrics) logger = get_logger(args) @trainer.on(Events.STARTED) def on_training_started(engine): print("Begin Training") @trainer.on(Events.ITERATION_COMPLETED) def log_iter_results(engine): log_results(logger, 'train/iter', engine.state, engine.state.iteration) @trainer.on(Events.EPOCH_COMPLETED) def evaluate_epoch(engine): log_results(logger, 'train/epoch', engine.state, engine.state.epoch) state = evaluate_once(evaluator, iterator=iters['val']) log_results(logger, 'valid/epoch', state, engine.state.epoch) trainer.run(iters['train'], max_epochs=args.max_epochs)
def get_optimizer(self, optimizer_name, used_look_head=True): return get_optimizer(optimizer_name, self.net, self.args, Trainer.learn_change_func, used_look_head=used_look_head)
def main(): """ Main Function """ # Set up the Arguments, Tensorboard Writer, Dataloader, Loss Fn, Optimizer assert_and_infer_cfg(args) prep_experiment(args, parser) writer = None _, _, _, extra_val_loaders, _ = datasets.setup_loaders(args) criterion, criterion_val = loss.get_loss(args) criterion_aux = loss.get_loss_aux(args) net = network.get_net(args, criterion, criterion_aux) optim, scheduler = optimizer.get_optimizer(args, net) net = torch.nn.SyncBatchNorm.convert_sync_batchnorm(net) net = network.warp_network_in_dataparallel(net, args.local_rank) epoch = 0 i = 0 if args.snapshot: epoch, mean_iu = optimizer.load_weights(net, optim, scheduler, args.snapshot, args.restore_optimizer) print("#### iteration", i) torch.cuda.empty_cache() # Main Loop # for epoch in range(args.start_epoch, args.max_epoch): for dataset, val_loader in extra_val_loaders.items(): print("Extra validating... This won't save pth file") validate(val_loader, dataset, net, criterion_val, optim, scheduler, epoch, writer, i, save_pth=False)
def train_network(network, args): if network is None: return device = torch.device("cuda" if args.gpu_flag is True else "cpu") network = network.to(device) optimizer, scheduler = get_optimizer(network, args) train_data_set = get_data_set(args, train_flag=True) test_data_set = get_data_set(args, train_flag=False) train_data_loader = torch.utils.data.DataLoader(train_data_set, batch_size=args.batch_size, shuffle=True) test_data_loader = torch.utils.data.DataLoader(test_data_set, batch_size=args.batch_size, shuffle=False) print("-*-" * 10 + "\n\t\tTrain network\n" + "-*-" * 10) for epoch in range(0, args.epoch): if args.pruned and args.alpha < 1: network = network.cpu() network = soft_prune_step(network, 1 - args.alpha) network = network.to(device) train_step(network, train_data_loader, test_data_loader, optimizer, device, epoch) if scheduler is not None: scheduler.step() return network
def train_workers(dataset, workers, epochs, training_steps, cutoff, optimizer, test_size=1000): train_step, init_op, reset_opt = get_optimizer(optimizer) step = 0 with tf.Session() as sess: sess.run(init_op) step_time = Timer() for pbt_step in range(1, training_steps + 1): for worker in workers: step += 1 print('%d, ' % step, end='') print('%d, ' % worker['id'], end='') score_value = worker['score_value'] train_epochs(sess, epochs, worker['hparams'][0], dataset, train_step) train, test, valid = test_accuracy.test_graph( sess, test_size, dataset) print('%f, %f, %f' % (train, test, valid)) worker[ 'score_value'] = train * test #overfit_score.overfit_blended(train, test) worker['score'] = (1.0 + worker['score_value']) / (1.0 + score_value) pbt.tournament_replace(worker, workers, cutoff, dup_all=False, explore_fun=pbt.perturb_hparams) #pbt.pbt(workers, cutoff, dup_all=False) print('# step time %3.1fs, ' % step_time.split())
def train(config): # load Vocab src_vocab = data_reader.Vocab(vocab_limits=config['src_vocab_size']) src_vocab.load_metadata(config['metadata']['src']) config['src_vocab_size'] = src_vocab.vocab_size() tgt_vocab = data_reader.Vocab(vocab_limits=config['tgt_vocab_size']) tgt_vocab.load_metadata(config['metadata']['tgt']) config['tgt_vocab_size'] = tgt_vocab.vocab_size() tf.logging.info(config) initializer = tf.random_uniform_initializer(-config['init_scale'], config['init_scale']) # create models with tf.name_scope('Train'): opt, lr = optimizer.get_optimizer(config['optimizer'], config['learning_rate']) with tf.variable_scope("Model", reuse=None, initializer=initializer): train_model = model.Model(is_training=True, config=config, seq_length=config['tgt_length'] - 1, optimizer=opt, lr=lr) with tf.name_scope('Test'): with tf.variable_scope("Model", reuse=True): test_model = model.Model(is_training=False, config=config, seq_length=1) sv = tf.train.Supervisor(logdir=config['logdir']) sess_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=True) sess_config.gpu_options.allow_growth = True sess_config.gpu_options.per_process_gpu_memory_fraction = 0.9 # load Data train_data = data_reader.DataReader( src_data=config['train_data']['src'][0], tgt_data=config['train_data']['tgt'][0], src_vocab=src_vocab, tgt_vocab=tgt_vocab, src_length=config['src_length'], tgt_length=config['tgt_length'], batch_size=config['batch_size']) tf.logging.info('Start Sess') with sv.managed_session(config=sess_config) as sess: for i in range(config['n_epoch']): lr_decay = config['lr_decay']**max(i + 1 - config['decay_epoch'], 0) train_model.assign_lr(sess, config['learning_rate'] * lr_decay) tf.logging.info('Iter %d Start, Learning_rate: %.4f' % (i, sess.run(train_model.lr))) loss = run_epoch(sess, train_model, train_data, is_training = True, \ t_model = test_model, src_vocab = src_vocab, tgt_vocab = tgt_vocab) tf.logging.info('Iter %d: training_loss: %.4f' % (i, np.power(2, loss)))
def train_network(network, args): if network is None: if args.network == 'vgg': network = MyVGG() elif args.network == 'resnet': network = resnet32() device = torch.device("cuda" if args.gpu_flag is True else "cpu") network = network.to(device) optimizer, scheduler = get_optimizer(network, args) train_data_set = get_data_set(args, train_flag=True) test_data_set = get_data_set(args, train_flag=False) train_data_loader = torch.utils.data.DataLoader(train_data_set, batch_size=args.batch_size, shuffle=True) test_data_loader = torch.utils.data.DataLoader(test_data_set, batch_size=args.batch_size, shuffle=False) print("-*-" * 10 + "\n\t\tTrain network\n" + "-*-" * 10) for epoch in range(0, args.epoch): train_step(network, train_data_loader, test_data_loader, optimizer, device, epoch) if scheduler is not None: scheduler.step() return network
def retrain_with_pseudo_label(loaded_models, train_ids, valid_ids, TRAIN_IMAGE_DIR, DATAFRAME, config): if 'pseudo_dataframe' not in loaded_models[list(loaded_models.keys())[0]]: return def worker_init_fn(worker_id): random.seed(worker_id+random_seed) np.random.seed(worker_id+random_seed) for key in loaded_models.keys(): # make dataloader with pseudo label model_config = loaded_models[key]['config'] dataframe_with_pseudo = pd.concat([DATAFRAME.loc[DATAFRAME['image_id'].isin(train_ids), :], loaded_models[key]['pseudo_dataframe']], axis=0) retrain_dataset = GWDDataset(dataframe_with_pseudo, TRAIN_IMAGE_DIR, model_config, is_train=True, do_transform=False) # dataset for retrain retrain_data_loader = DataLoader(retrain_dataset, batch_size=1, shuffle=True, num_workers=0, worker_init_fn=worker_init_fn, collate_fn=collate_fn) model = copy.deepcopy(loaded_models[key]['model']) model.train() trainable_params = [p for p in model.parameters() if p.requires_grad] optimizer = get_optimizer(model_config['train']['optimizer'], trainable_params) # retraining print("Retraining %s" % key) for epoch in range(0, config['epochs']): if model_config['general']['kfold'] < 0: print("\r[Epoch %d]" % epoch) train_epoch(model, retrain_data_loader, None, optimizer) model.eval() loaded_models[key]['pseudo_model'] = model return
def train(args): args, model, iters, vocab, ckpt_available = get_model_ckpt(args) if ckpt_available: print("loaded checkpoint {}".format(args.ckpt_name)) loss_fn = get_loss(args, vocab) optimizer = get_optimizer(args, model) trainer = get_trainer(args, model, loss_fn, optimizer) metrics = get_metrics(args, vocab) evaluator = get_evaluator(args, model, loss_fn, metrics) logger = get_logger(args) @trainer.on(Events.STARTED) def on_training_started(engine): print("Begin Training") @trainer.on(Events.ITERATION_COMPLETED) def log_iter_results(engine): log_results(logger, 'train/iter', engine.state, engine.state.iteration) @trainer.on(Events.EPOCH_COMPLETED) def evaluate_epoch(engine): log_results(logger, 'train/epoch', engine.state, engine.state.epoch) state = evaluate_once(evaluator, iterator=iters['val']) log_results(logger, 'valid/epoch', state, engine.state.epoch) save_ckpt(args, engine.state.epoch, engine.state.metrics['loss'], model, vocab) trainer.run(iters['train'], max_epochs=args.max_epochs)
def __init__(self, opt): super(Model, self).__init__() self.opt = opt self.classifier = Classifier(opt.model) #.cuda(device=opt.device) ##################### # Init weights ##################### # self.classifier.apply(weights_init) print_network(self.classifier) self.optimizer = get_optimizer(opt, self.classifier) self.scheduler = get_scheduler(opt, self.optimizer) # load networks # if opt.load: # pretrained_path = opt.load # self.load_network(self.classifier, 'G', opt.which_epoch, pretrained_path) # if self.training: # self.load_network(self.discriminitor, 'D', opt.which_epoch, pretrained_path) self.avg_meters = ExponentialMovingAverage(0.95) self.save_dir = os.path.join(opt.checkpoint_dir, opt.tag) # with open('datasets/class_weight.pkl', 'rb') as f: # class_weight = pickle.load(f, encoding='bytes') # class_weight = np.array(class_weight, dtype=np.float32) # class_weight = torch.from_numpy(class_weight).to(opt.device) # if opt.class_weight: # self.criterionCE = nn.CrossEntropyLoss(weight=class_weight) # else: self.criterionCE = nn.CrossEntropyLoss()
def __init__(self, config): self.cuda = int(config['cuda']) #torch.cuda.empty_cache() self.train_dataloader = get_dataloader(config, scope='train') self.val_dataloader = get_dataloader(config, scope='val') self.model = get_model(config) try: model_weights = 'experiment/' + config['dir'] + '/' + config[ 'weights'] self.model.load_state_dict(torch.load(model_weights)['model']) print('Weigths loaded') except: print('Weights randomized') self.optimizer = get_optimizer(config, self.model) self.total_epochs = config['epochs'] self.batches_per_epoch = config['batches_per_epoch'] self.val_batches_per_epoch = config['val_batches_per_epoch'] self.final_weights_file = 'experiment/' + config[ 'dir'] + '/weights_last.pth' self.best_weights_file = 'experiment/' + config[ 'dir'] + '/weights_best.pth' self.log_file = 'experiment/' + config['dir'] + '/logs.csv' self.loss_dict = { 'sample_name': config['sample_name'], 'output_name': config['output_name'], 'loss': [get_criterion(x) for x in config['loss_criterion']], 'weight': config['loss_weight'] } self.train_fe = bool(config['train_feature_extractor'])
def main(): ''' Main Function ''' #Set up the Arguments, Tensorboard Writer, Dataloader, Loss Fn, Optimizer assert_and_infer_cfg(args) writer = prep_experiment(args, parser) train_loader, val_loader, train_obj = datasets.setup_loaders(args) criterion, criterion_val = loss.get_loss(args) net = network.get_net(args, criterion) optim, scheduler = optimizer.get_optimizer(args, net) torch.cuda.empty_cache() if args.evaluate: # Early evaluation for benchmarking validate(val_loader, net, criterion_val, optim, epoch, writer) evaluate(val_loader, net) return #Main Loop for epoch in range(args.start_epoch, args.max_epoch): # Update EPOCH CTR cfg.immutable(False) cfg.EPOCH = epoch cfg.immutable(True) scheduler.step() train(train_loader, net, criterion, optim, epoch, writer) validate(val_loader, net, criterion_val, optim, epoch, writer)
def __init__(self, opt, logger=None): super(Model, self).__init__(config, kwargs) self.opt = opt # cfgfile = 'yolo-voc.cfg' # self.detector = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) # in_features = self.detector.roi_heads.box_predictor.cls_score.in_features # # # replace the pre-trained head with a new one # self.detector.roi_heads.box_predictor = FastRCNNPredictor(in_features, config.DATA.NUM_CLASSESS + 1) self.detector = yolov4(inference=True, n_classes=config.DATA.NUM_CLASSESS) # """ # 预训练模型 # """ # pretrained_dict = torch.load('pretrained/yolov4.pth') # self.detector.load_state_dict(pretrained_dict) self.yolov4loss = Yolo_loss(device=opt.device, batch=opt.batch_size) ##################### # Init weights ##################### # normal_init(self.detector) if opt.debug: print_network(self.detector) self.optimizer = get_optimizer(opt, self.detector) self.scheduler = get_scheduler(opt, self.optimizer) self.avg_meters = ExponentialMovingAverage(0.95) self.save_dir = os.path.join('checkpoints', opt.tag)
def __init__(self, log_dir: str, cfg: EasyDict, use_cuda: bool = True, use_multi_gpu: bool = True, load_ckpt: str = "", is_train=True): self.use_multi_gpu = use_multi_gpu self.use_cuda = use_cuda self.cfg = cfg self.log_dir = Path(log_dir) self.timer_start_train = 0 self.det_best_field_current = 0 self.det_best_field_best = 0 self.best_epoch = 0 # logger / metrics self.metrics_fh = None if is_train: os.makedirs(self.log_dir, exist_ok=True) metrics_file = self.log_dir / "train_metrics.csv" metric_keys = utils.get_csv_header_keys( cfg.training.compute_clip_retrieval) self.metrics_fh = metrics_file.open("wt", encoding="utf8") self.metrics_writer = csv.DictWriter(self.metrics_fh, metric_keys) self.metrics_writer.writeheader() self.metrics_fh.flush() utils.dump_config(cfg, self.log_dir / "config.yaml") self.logger = utils.get_logger(self.log_dir, "trainer", log_file=is_train) # model self.model = CootModel(cfg, use_cuda, use_multi_gpu) # contrastive loss self.loss_f_contr = ContrastiveLoss(use_cuda) # cycle consistency loss self.loss_f_cyclecons = None if cfg.training.loss_cycle_cons != 0: self.loss_f_cyclecons = CycleConsistencyLoss(num_samples=1, use_cuda=use_cuda) # optimizer self.optimizer = get_optimizer(cfg.optimizer, self.model.get_params()) # scheduler self.lr_scheduler = ReduceLROnPlateauWarmup( self.optimizer, cfg.scheduler.warmup, mode="max", patience=cfg.scheduler.patience, cooldown=cfg.scheduler.cooldown) if load_ckpt != "": self.logger.info(f"Load checkpoint {load_ckpt}") self.model.load_checkpoint(load_ckpt)
def __init__(self, opt, logger=None): super(Model, self).__init__() self.opt = opt self.logger = logger kargs = {} if opt.scale: min_size = opt.scale max_size = int(min_size / 3 * 4) kargs = { 'min_size': min_size, 'max_size': max_size, } kargs.update({'box_nms_thresh': nms_thresh}) # 定义backbone和Faster RCNN模型 if opt.backbone is None or opt.backbone.lower() in [ 'res50', 'resnet50' ]: # 默认是带fpn的resnet50 self.detector = fasterrcnn_resnet50_fpn(pretrained=False, **kargs) in_features = self.detector.roi_heads.box_predictor.cls_score.in_features # replace the pre-trained head with a new one self.detector.roi_heads.box_predictor = FastRCNNPredictor( in_features, opt.num_classes + 1) elif opt.backbone.lower() in ['vgg16', 'vgg']: backbone = vgg16_backbone() self.detector = FasterRCNN(backbone, num_classes=opt.num_classes + 1, **kargs) elif opt.backbone.lower() in ['res101', 'resnet101']: # 不带FPN的resnet101 backbone = res101_backbone() self.detector = FasterRCNN(backbone, num_classes=opt.num_classes + 1, **kargs) elif opt.backbone.lower() in ['res', 'resnet']: raise RuntimeError( f'backbone "{opt.backbone}" is ambiguous, please specify layers.' ) else: raise NotImplementedError(f'no such backbone: {opt.backbone}') print_network(self.detector) self.optimizer = get_optimizer(opt, self.detector) self.scheduler = get_scheduler(opt, self.optimizer) self.avg_meters = ExponentialMovingAverage(0.95) self.save_dir = os.path.join(opt.checkpoint_dir, opt.tag)
def __init__(self, opt): super(Model, self).__init__() self.opt = opt self.classifier = Classifier(opt.model) # self.classifier.apply(weights_init) # 初始化权重 print_network(self.classifier) self.optimizer = get_optimizer(opt, self.classifier) self.scheduler = get_scheduler(opt, self.optimizer)
def train(args): args, model, iters, vocab, ckpt_available = get_model_ckpt(args) if ckpt_available: print("loaded checkpoint {}".format(args.ckpt_name)) loss_fn = get_loss(args, vocab) optimizer = get_optimizer(args, model) pretrainer = get_pretrainer(args, model, loss_fn, optimizer) trainer = get_trainer(args, model, loss_fn, optimizer) metrics = get_metrics(args, vocab) evaluator = get_evaluator(args, model, loss_fn, metrics) logger = get_logger(args) @pretrainer.on(Events.STARTED) def on_training_started(engine): print("Begin Pretraining") @pretrainer.on(Events.ITERATION_COMPLETED) def log_iter_results(engine): log_results(logger, 'pretrain/iter', engine.state, engine.state.iteration) @pretrainer.on(Events.EPOCH_COMPLETED) def evaluate_epoch(engine): log_results(logger, 'pretrain/epoch', engine.state, engine.state.epoch) """ @pretrainer.on(Events.COMPLETED) def unfreeze_language_model(engine): for param in model.module.language_model.base_model.parameters(): param.requires_grad = True """ @trainer.on(Events.STARTED) def on_training_started(engine): print("Begin Training") @trainer.on(Events.ITERATION_COMPLETED) def log_iter_results(engine): log_results(logger, 'train/iter', engine.state, engine.state.iteration) @trainer.on(Events.EPOCH_COMPLETED) def evaluate_epoch(engine): log_results(logger, 'train/epoch', engine.state, engine.state.epoch) state = evaluate_once(evaluator, iterator=iters['val']) log_results(logger, 'valid/epoch', state, engine.state.epoch) log_results_cmd('valid/epoch', state, engine.state.epoch) save_ckpt(args, engine.state.epoch, engine.state.metrics['loss'], model, vocab) evaluate_by_logic_level(args, model, iterator=iters['val']) if args.pretrain_epochs > 0: pretrainer.run(iters['pretrain'], max_epochs=args.pretrain_epochs) trainer.run(iters['train'], max_epochs=args.max_epochs)
def input_stream(): model = TinyYolo() train_loader, test_loader, val_loader = getdatasets('./data/', batch_size=16) for i, train_data in enumerate(train_loader): opt = get_optimizer(model, [0.00001]) out,train_map = train(model, train_data, opt=opt, iou_thresh=0.1 ) print( out) print(train_map) print() print()
def train(config): # load Vocab vocab = data_reader.Vocab(vocab_limits=config['vocab_size']) vocab.load_metadata(config['metadata']) config['vocab_size'] = vocab.vocab_size() tf.logging.info(config) initializer = tf.random_uniform_initializer(-config['init_scale'], config['init_scale']) # create models with tf.name_scope('Train'): opt, lr = optimizer.get_optimizer("sgd", config['learning_rate']) with tf.variable_scope("Model", reuse=None, initializer=initializer): train_model = model.Model(is_training = True, \ config = config, \ optimizer = opt, lr = lr) with tf.name_scope('Generate'): generate_config = copy.deepcopy(config) generate_config['batch_size'] = 1 generate_config['seq_length'] = 1 with tf.variable_scope("Model", reuse=True, initializer=initializer): gen_model = model.Model(is_training=False, config=generate_config) sv = tf.train.Supervisor(logdir=config['logdir']) sess_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) sess_config.gpu_options.allow_growth = True sess_config.gpu_options.per_process_gpu_memory_fraction = 0.5 tf.logging.info('Start Sess') with sv.managed_session(config=sess_config) as sess: for i in range(config['n_epoch']): lr_decay = config['lr_decay']**max(i + 1 - config['decay_epoch'], 0) train_model.assign_lr(sess, config['learning_rate'] * lr_decay) tf.logging.info('Iter %d Start, Learning_rate: %.4f' % (i, sess.run(train_model.lr))) costs, speed = run_epoch(sess, train_model, datapath=config['train_data'][0], config=config, is_training=True, gen_model=gen_model, vocab=vocab) tf.logging.info( 'Iter %d: training_loss:%.4f, speed %.4f words/sec' % (i, np.exp(costs), speed))
def main(): """ Main Function """ # Set up the Arguments, Tensorboard Writer, Dataloader, Loss Fn, Optimizer assert_and_infer_cfg(args) writer = prep_experiment(args, parser) train_loader, val_loader, train_obj = datasets.setup_loaders(args) criterion, criterion_val = loss.get_loss(args) net = network.get_net(args, criterion) optim, scheduler = optimizer.get_optimizer(args, net) if args.fix_bn: net.apply(set_bn_eval) print("Fix bn for finetuning") if args.fp16: net, optim = amp.initialize(net, optim, opt_level="O1") net = network.wrap_network_in_dataparallel(net, args.apex) if args.snapshot: optimizer.load_weights(net, optim, args.snapshot, args.restore_optimizer) if args.evaluateF: assert args.snapshot is not None, "must load weights for evaluation" evaluate(val_loader, net, args) return torch.cuda.empty_cache() # Main Loop for epoch in range(args.start_epoch, args.max_epoch): # Update EPOCH CTR cfg.immutable(False) cfg.EPOCH = epoch cfg.immutable(True) scheduler.step() train(train_loader, net, optim, epoch, writer) if args.apex: train_loader.sampler.set_epoch(epoch + 1) validate(val_loader, net, criterion_val, optim, epoch, writer) if args.class_uniform_pct: if epoch >= args.max_cu_epoch: train_obj.build_epoch(cut=True) if args.apex: train_loader.sampler.set_num_samples() else: train_obj.build_epoch()
def _initialize(self, batch_steps_per_epoch, output_path): self.cost = get_cost(self.net.logits, self.tgt, self.cost_kwargs) self.optimizer, self.ema, self.learning_rate_node = get_optimizer( self.cost, self.global_step, batch_steps_per_epoch, self.opt_kwargs) init = tf.global_variables_initializer() if not output_path is None: output_path = os.path.abspath(output_path) if not os.path.exists(output_path): print("Allocating '{:}'".format(output_path)) os.makedirs(output_path) return init
def train_new_model(model, train_queue, valid_queue, test_queue): ori_model = model.module if args.distributed else model optimizer = get_optimizer(model, args) scheduler = get_scheduler(optimizer, args) drop_layers = ori_model.drop_layers() criterion = get_criterion(args.classes, args.label_smoothing) for epoch in range(args.epochs): scheduler.step() if args.warmup and epoch < args.warmup_epochs: lr = args.learning_rate * epoch / args.warmup_epochs + args.warmup_lr for param_group in optimizer.param_groups: param_group['lr'] = lr cond_logging('epoch %d lr %e', epoch, lr) else: lr = scheduler.get_lr()[0] cond_logging('epoch %d lr %e', epoch, lr) if args.distributed: train_queue.sampler.set_epoch(epoch) if args.epd: drop_rate = args.drop_rate * epoch / args.epochs else: drop_rate = args.drop_rate drop_rates = [drop_rate] * drop_layers if args.layerd: for i in range(drop_layers): drop_rates[i] = drop_rates[i] * (i + 1) / drop_layers ori_model.set_drop_rates(drop_rates) cond_logging('drop rates:') cond_logging(ori_model.drop_rates) #training train_acc, train_obj = train(train_queue, model, criterion, optimizer, lr, args.report_freq, args.world_size, args.distributed, args.local_rank) cond_logging('train acc %f', train_acc) #validation drop_rates = [0] * drop_layers ori_model.set_drop_rates(drop_rates) valid_acc, valid_obj = infer(valid_queue, model, criterion, args.report_freq, args.world_size, args.distributed, args.local_rank) cond_logging('valid acc %f', valid_acc) test_acc, test_obj = infer(test_queue, model, criterion, args.report_freq, args.world_size, args.distributed, args.local_rank) cond_logging('test acc %f', test_acc) return model
def train_network(args, network=None, data_set=None): device = torch.device("cuda" if args.gpu_no >= 0 else "cpu") if network is None: if args.data_set == 'CIFAR10': if 'vgg' in args.network: network = VGG(args.network, args.data_set) network = network.to(device) print(network) if data_set is None: data_set = get_data_set(args, train_flag=True) loss_calculator = Loss_Calculator() optimizer, scheduler = get_optimizer(network, args) if args.resume_flag: check_point = torch.load(args.load_path) network.load_state_dict(check_point['state_dict']) loss_calculator.loss_seq = check_point['loss_seq'] args.start_epoch = check_point['epoch'] # update start epoch print("Start at %s" % time.ctime()) print("-*-" * 10 + "\n\tTrain network\n" + "-*-" * 10) for epoch in range(args.start_epoch, args.epoch): print("---------- EPOCH %d ----------" % (epoch + 1)) # make shuffled data loader data_loader = torch.utils.data.DataLoader(data_set, batch_size=args.batch_size, shuffle=True) # train one epoch train_step(network, data_loader, loss_calculator, optimizer, device, epoch, args.print_freq) # adjust learning rate if scheduler is not None: scheduler.step() torch.save( { 'epoch': epoch + 1, 'state_dict': network.state_dict(), 'loss_seq': loss_calculator.loss_seq }, args.save_path + args.network + '_checkpoint.pth') print("End at %s" % time.ctime()) return network
def load_model(dataset, rc, experiment_name): loss = LossCombiner(4, dataset.class_weights, NllLoss) transformer = TransformerEncoder(dataset.source_embedding, hyperparameters=rc) model = JointAspectTagger(transformer, rc, 4, 20, dataset.target_names) optimizer = get_optimizer(model, rc) trainer = Trainer(model, loss, optimizer, rc, dataset, experiment_name, enable_tensorboard=False, verbose=False) return trainer
def run_WrappedVOCSBDSegmentation5i_network(): from torch.utils import data from model.head.pgn import PGN import torch.nn.functional as F from loss import cross_entropy2d from optimizer import get_optimizer batch_size = 4 epoch = 1 train_set = WrappedVOCSBDSegmentation5i( root=roots_path, fold=1, # remember to run both train and test set split='test', rebuild_mask=False, img_size=224) train_loader = data.DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=8) model = PGN() optim = get_optimizer()(model.parameters(), lr=0.0025, momentum=0.9, dampening=0, weight_decay=0, nesterov=False) for e in range(epoch): for i_iter, data in enumerate(train_loader): Is, Ys, Iq, Yq, sample_class, _, _ = data Ys, Yq = Ys.unsqueeze(1).float(), Yq.unsqueeze(1).float() pred = model(Is, Ys, Iq) pred = F.interpolate(pred, size=Yq.size()[-2:], mode='bilinear', align_corners=True) loss = cross_entropy2d(pred, Yq.long()) optim.zero_grad() loss.backward() optim.step() print(loss.item(), sample_class)
def __init__(self, opt): super(Model, self).__init__() self.opt = opt self.cleaner = FFA().to(device=opt.device) ##################### # Init weights ##################### # normal_init(self.cleaner) print_network(self.cleaner) self.g_optimizer = get_optimizer(opt, self.cleaner) self.scheduler = get_scheduler(opt, self.g_optimizer) self.avg_meters = ExponentialMovingAverage(0.95) self.save_dir = os.path.join(opt.checkpoint_dir, opt.tag)
def search_grid_epochs(dataset, epochs, learnlist=[0.1], optimizer='sgd', start_wid=0, test_size=1000): train_step, init_op, reset_opt = get_optimizer(optimizer) worker_time = Timer() with tf.Session() as sess: for wid, learn_rate in enumerate(learnlist): step = 0 sess.run(init_op) for e in range(epochs): step = train_epochs(sess, wid + start_wid, 1, step, learn_rate, dataset, test_size, train_step) print('# worker time %3.1fs' % worker_time.split())
def main(): # spawn theano vars xl = T.matrix('xl') xr = T.matrix('xr') y = T.ivector('y') learning_rate = T.scalar('learning_rate') trng = RandomStreams(4321) # use test values """ import numpy as np batch_size = 10 theano.config.compute_test_value = 'raise' xl.tag.test_value = np.random.randn(batch_size, 392).astype(floatX) xr.tag.test_value = np.random.randn(batch_size, 392).astype(floatX) y.tag.test_value = np.random.randint(8, size=batch_size).astype(np.int32) learning_rate.tag.test_value = 0.5 """ # build cgs model_l, model_r, model_b = build_model( xl, xr, y, learning_rate, trng=trng, **options) # compile opt = get_optimizer(options['optimizer']) f_train_l = opt(learning_rate, model_l, [xl, y]) f_train_r = opt(learning_rate, model_r, [xr, y]) f_train_b = opt(learning_rate, model_b, [xl, xr, y], return_alpha=True) # compile validation/test functions f_valid_l = theano.function([xl, y], [model_l.cost, model_l.acc]) f_valid_r = theano.function([xr, y], [model_r.cost, model_r.acc]) f_valid_b = theano.function([xl, xr, y], [model_b.cost, model_b.acc]) # training loop train(f_train_l, f_train_r, f_train_b, f_valid_l, f_valid_r, f_valid_b, xl, xr, y, **options)