def test_partial_trainability(): #Define some model to start with model_keras = model_zoo.get_model("VGG_small_4", 32, 3, 2) #model_keras = model_zoo.get_model("LeNet5_do",32,3,2) # model_keras1_c = model_keras1.get_config() # model_keras2_c = model_keras2.get_config() Layer_types_orig = [ model_keras.layers[i].__class__.__name__ for i in range(len(model_keras.layers)) ] Layer_names_orig = [ model_keras.layers[i].name for i in range(len(model_keras.layers)) ] #Count Dense and Conv layers is_dense_or_conv = [ layer_type in ["Dense", "Conv2D"] for layer_type in Layer_types_orig ] index = np.where(np.array(is_dense_or_conv) == True)[0] #That would be the stuff, written in the table in AID Layer_names = np.array(Layer_names_orig)[index] #Provide a list with layer names, which should be changed and the corresponding trainabilities Layer_names = [Layer_names[0], Layer_names[1], Layer_names[2]] Layer_trainabliities = [0.2, 0.4, 0.6] model_keras_new = partial_trainability(model_keras, Layer_names, Layer_trainabliities) shape = list(model_keras_new.layers[0].input_shape) shape[0] = 1 img_rndm = np.random.randint(low=0, high=255, size=shape) img_rndm = img_rndm.astype(float) / 255.0 #Both models should perform identically p1 = model_keras.predict(img_rndm) p2 = model_keras_new.predict(img_rndm) assert np.allclose(p1, p2) #Also start a fititng processs shape_tr = shape shape_tr[0] = 250 train_x = np.random.randint(low=0, high=255, size=shape) train_x = train_x.astype(float) / 255.0 train_y = np.r_[np.repeat(0, 125), np.repeat(1, 125)] train_y_ = to_categorical(train_y, 2) # * 2 - 1 model_keras_new.fit(train_x, train_y_, epochs=1)
def __init__(self, prefix, epoch, im_size=128, ctx_id=0): print('loading', prefix, epoch) if ctx_id >= 0: ctx = mx.gpu(ctx_id) else: ctx = mx.cpu() image_size = (im_size, im_size) sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch) all_layers = sym.get_internals() sym = all_layers['fc1_output'] self.image_size = image_size model = mx.mod.Module(symbol=sym, context=ctx, label_names=None) model.bind(for_training=False, data_shapes=[ ('data', (1, 3, image_size[0], image_size[1]))]) model.set_params(arg_params, aux_params) self.model = model self.detector = model_zoo.get_model('retinaface_mnet025_v1') self.detector.prepare(ctx_id=ctx_id)
def init_model(self): if isinstance(self.model, str): if os.path.isfile(self.model): return Model(file=self.model, binary=self.binary, regularizers=self.regularizers, class_weights=self.class_weights, recompile=getattr(self, 'recompile', False), optimizer=getattr(self, 'optimizer', None), loss=getattr(self, 'loss', None), metrics=getattr(self, 'metrics', None)) else: self.model = get_model(name=self.model, in_shape=self.in_shape, n_classes=self.n_classes, backend=self.backend) return Model(keras_model=self.model, binary=self.binary, optimizer=self.optimizer, loss=self.loss, metrics=self.metrics, regularizers=self.regularizers, class_weights=self.class_weights)
def train(): train_input, train_target, val_input, val_target, test_input, test_target = load_data( ) img_size = train_input.shape[1] num_channels = train_input.shape[-1] num_classes = train_target.shape[1] num_samples = train_input.shape[0] if PRETRAIN: assert PREPROCESSING == 'auto' model, preprocess_input = get_model(img_size, num_channels, num_classes, model_name=MODEL_NAME, pretrain=PRETRAIN) if CHECKPOINT_FILE is not None: assert os.path.exists(CHECKPOINT_FILE) model.load_weights(CHECKPOINT_FILE) epoch = int(CHECKPOINT_FILE.split('_')[-2][1:]) + 1 lr_init = learning_rate_function(LR_INIT, epoch) print("Loaded weights from {}".format(CHECKPOINT_FILE)) else: epoch = 0 lr_init = LR_INIT shape_str = "({0}, {0}, {1})".format(img_size, num_channels) print("Finished loading {0} with {1} input shape (pretrained: {2})".format( MODEL_NAME, shape_str, PRETRAIN)) if PREPROCESSING == 'auto': train_input = preprocess_input(train_input) test_input = preprocess_input(test_input) val_input = preprocess_input(val_input) elif PREPROCESSING == 'standard': row_axis = 1 col_axis = 2 channel_axis = 3 mean = np.mean(train_input, axis=(0, row_axis, col_axis)) broadcast_shape = [1, 1, 1] broadcast_shape[channel_axis - 1] = train_input.shape[channel_axis] mean = np.reshape(mean, broadcast_shape) # x -= mean train_input -= mean test_input -= mean val_input -= mean std = np.std(train_input, axis=(0, row_axis, col_axis)) broadcast_shape = [1, 1, 1] broadcast_shape[channel_axis - 1] = train_input.shape[channel_axis] std = np.reshape(std, broadcast_shape) # x /= (std + K.epsilon()) train_input /= (std + EPSILON) test_input /= (std + EPSILON) val_input /= (std + EPSILON) elif PREPROCESSING == 'simple': train_input /= 255. test_input /= 255. val_input /= 255 print("Finished preprocessing data using method: {}".format(PREPROCESSING)) optimizer = Adam(lr=lr_init, beta_1=0.9, beta_2=0.999) model.compile(optimizer=optimizer, loss='binary_crossentropy') print("Constructed and compiled model") if not TEST_ONLY: ###### Start training ###### # Note: _ is pythonic for an unused variable: Was i for _ in range(EPOCHS_TRAINING): print() print("Starting training for epoch {}".format(epoch)) shuffle([train_input, train_target]) generator = get_data_gen(train_input, train_target, class_weights=None) K.set_value(model.optimizer.lr, learning_rate_function(LR_INIT, epoch)) progbar = generic_utils.Progbar(num_samples, interval=0.0) for input_batch, target_batch in generator: loss = model.train_on_batch(input_batch, target_batch) progbar.add(BATCH_SIZE, values=[("train_loss", loss)]) print() val_auc_scores = test(model, val_input, val_target) mean_score = np.mean(val_auc_scores) if (epoch + 1) % CHECKPOINT_INTERVAL == 0: model_file = os.path.join(OUTDIR, "checkpoint_E%s_F%s.h5" % \ (epoch, round(mean_score, 3))) model.save_weights(model_file) print("Saved weights to {}".format(model_file)) epoch += 1 ###### Finished training ###### print("Test results:") test_auc_scores = test(model, test_input, test_target) print(test_auc_scores)
train_dataset = DistractorDataset(image_path=args.dataset, csv_data_file=os.path.join(args.csv_path, 'distractor_train.csv'), transform=train_transforms) val_dataset = DistractorDataset(image_path=args.dataset, csv_data_file=os.path.join(args.csv_path, 'distractor_test.csv'), transform=val_transforms) train_dataloader = DataLoader(train_dataset, batch_size=args.bs, shuffle=True, num_workers=args.n_threads) val_dataloader = DataLoader(val_dataset, batch_size=args.bs, shuffle=False, num_workers=args.n_threads) net = model_zoo.get_model(args) net = nn.DataParallel(net) net.cuda() optimizer = optim.SGD(net.parameters(), lr=args.lr, weight_decay=args.wd) # Scheduler scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[50, 100], gamma=0.1) # Criterion criterion = nn.CrossEntropyLoss().cuda() train_losses = [] val_losses = [] val_acc = [] prev_model = None
def main(): opt = parse_args() makedirs(opt.save_dir) filehandler = logging.FileHandler( os.path.join(opt.save_dir, opt.logging_file)) streamhandler = logging.StreamHandler() logger = logging.getLogger('') logger.setLevel(logging.INFO) logger.addHandler(filehandler) logger.addHandler(streamhandler) logger.info(opt) sw = SummaryWriter(logdir=opt.save_dir, flush_secs=5, verbose=False) if opt.kvstore is not None: kv = mx.kvstore.create(opt.kvstore) logger.info( 'Distributed training with %d workers and current rank is %d' % (kv.num_workers, kv.rank)) if opt.use_amp: amp.init() batch_size = opt.batch_size classes = opt.num_classes num_gpus = opt.num_gpus batch_size *= max(1, num_gpus) logger.info('Total batch size is set to %d on %d GPUs' % (batch_size, num_gpus)) context = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()] num_workers = opt.num_workers lr_decay = opt.lr_decay lr_decay_period = opt.lr_decay_period if opt.lr_decay_period > 0: lr_decay_epoch = list( range(lr_decay_period, opt.num_epochs, lr_decay_period)) else: lr_decay_epoch = [int(i) for i in opt.lr_decay_epoch.split(',')] lr_decay_epoch = [e - opt.warmup_epochs for e in lr_decay_epoch] if opt.slowfast: optimizer = 'nag' else: optimizer = 'sgd' if opt.clip_grad > 0: optimizer_params = { 'learning_rate': opt.lr, 'wd': opt.wd, 'momentum': opt.momentum, 'clip_gradient': opt.clip_grad } else: optimizer_params = { 'learning_rate': opt.lr, 'wd': opt.wd, 'momentum': opt.momentum } if opt.dtype != 'float32': optimizer_params['multi_precision'] = True model_name = opt.model if opt.use_pretrained and len(opt.hashtag) > 0: opt.use_pretrained = opt.hashtag net = get_model(name=model_name, nclass=classes, pretrained=opt.use_pretrained, use_tsn=opt.use_tsn, num_segments=opt.num_segments, partial_bn=opt.partial_bn, input_channel=opt.input_channel) net.cast(opt.dtype) net.collect_params().reset_ctx(context) logger.info(net) if opt.resume_params is not '': net.load_parameters(opt.resume_params, ctx=context) if opt.kvstore is not None: train_data, val_data, batch_fn = get_data_loader( opt, batch_size, num_workers, logger, kv) else: train_data, val_data, batch_fn = get_data_loader( opt, batch_size, num_workers, logger) num_batches = len(train_data) lr_scheduler = LRSequential([ LRScheduler('linear', base_lr=opt.warmup_lr, target_lr=opt.lr, nepochs=opt.warmup_epochs, iters_per_epoch=num_batches), LRScheduler(opt.lr_mode, base_lr=opt.lr, target_lr=0, nepochs=opt.num_epochs - opt.warmup_epochs, iters_per_epoch=num_batches, step_epoch=lr_decay_epoch, step_factor=lr_decay, power=2) ]) optimizer_params['lr_scheduler'] = lr_scheduler train_metric = mx.metric.Accuracy() acc_top1 = mx.metric.Accuracy() acc_top5 = mx.metric.TopKAccuracy(5) def get_diff(input_data, new_length=5): assert input_data.shape[3] == new_length + 1 fron = input_data.slice_axis(axis=3, begin=1, end=new_length + 1).copy() last = input_data.slice_axis(axis=3, begin=0, end=new_length) fron = fron - last return fron def test(ctx, val_data, kvstore=None): acc_top1.reset() acc_top5.reset() L = gluon.loss.SoftmaxCrossEntropyLoss() num_test_iter = len(val_data) val_loss_epoch = 0 for i, batch in enumerate(val_data): data, label = batch_fn(batch, ctx) outputs = [] for _, X in enumerate(data): # X = X.reshape((-1,) + X.shape[2:]) X = get_diff(X, new_length=opt.new_length) X = X.reshape((-3, -3, -2)) pred = net(X.astype(opt.dtype, copy=False)) outputs.append(pred) loss = [ L(yhat, y.astype(opt.dtype, copy=False)) for yhat, y in zip(outputs, label) ] acc_top1.update(label, outputs) acc_top5.update(label, outputs) val_loss_epoch += sum([l.mean().asscalar() for l in loss]) / len(loss) if opt.log_interval and not (i + 1) % opt.log_interval: logger.info('Batch [%04d]/[%04d]: evaluated' % (i, num_test_iter)) _, top1 = acc_top1.get() _, top5 = acc_top5.get() val_loss = val_loss_epoch / num_test_iter if kvstore is not None: top1_nd = nd.zeros(1) top5_nd = nd.zeros(1) val_loss_nd = nd.zeros(1) kvstore.push(111111, nd.array(np.array([top1]))) kvstore.pull(111111, out=top1_nd) kvstore.push(555555, nd.array(np.array([top5]))) kvstore.pull(555555, out=top5_nd) kvstore.push(999999, nd.array(np.array([val_loss]))) kvstore.pull(999999, out=val_loss_nd) top1 = top1_nd.asnumpy() / kvstore.num_workers top5 = top5_nd.asnumpy() / kvstore.num_workers val_loss = val_loss_nd.asnumpy() / kvstore.num_workers return (top1, top5, val_loss) def train(ctx): if isinstance(ctx, mx.Context): ctx = [ctx] if opt.no_wd: for k, v in net.collect_params('.*beta|.*gamma|.*bias').items(): v.wd_mult = 0.0 if opt.partial_bn: train_patterns = None if 'inceptionv3' in opt.model: train_patterns = '.*weight|.*bias|inception30_batchnorm0_gamma|inception30_batchnorm0_beta|inception30_batchnorm0_running_mean|inception30_batchnorm0_running_var' else: logger.info( 'Current model does not support partial batch normalization.' ) if opt.kvstore is not None: trainer = gluon.Trainer(net.collect_params(train_patterns), optimizer, optimizer_params, kvstore=kv, update_on_kvstore=False) else: trainer = gluon.Trainer(net.collect_params(train_patterns), optimizer, optimizer_params, update_on_kvstore=False) else: if opt.kvstore is not None: trainer = gluon.Trainer(net.collect_params(), optimizer, optimizer_params, kvstore=kv, update_on_kvstore=False) else: trainer = gluon.Trainer(net.collect_params(), optimizer, optimizer_params, update_on_kvstore=False) if opt.accumulate > 1: params = [ p for p in net.collect_params().values() if p.grad_req != 'null' ] for p in params: p.grad_req = 'add' if opt.resume_states is not '': trainer.load_states(opt.resume_states) if opt.use_amp: amp.init_trainer(trainer) L = gluon.loss.SoftmaxCrossEntropyLoss() best_val_score = 0 lr_decay_count = 0 for epoch in range(opt.resume_epoch, opt.num_epochs): tic = time.time() train_metric.reset() btic = time.time() num_train_iter = len(train_data) train_loss_epoch = 0 train_loss_iter = 0 for i, batch in enumerate(train_data): data, label = batch_fn(batch, ctx) with ag.record(): outputs = [] for _, X in enumerate(data): # X = X.reshape((-1,) + X.shape[2:]) X = get_diff(X, new_length=opt.new_length) X = X.reshape((-3, -3, -2)) pred = net(X.astype(opt.dtype, copy=False)) outputs.append(pred) loss = [ L(yhat, y.astype(opt.dtype, copy=False)) for yhat, y in zip(outputs, label) ] if opt.use_amp: with amp.scale_loss(loss, trainer) as scaled_loss: ag.backward(scaled_loss) else: ag.backward(loss) if opt.accumulate > 1 and (i + 1) % opt.accumulate == 0: if opt.kvstore is not None: trainer.step(batch_size * kv.num_workers * opt.accumulate) else: trainer.step(batch_size * opt.accumulate) net.collect_params().zero_grad() else: if opt.kvstore is not None: trainer.step(batch_size * kv.num_workers) else: trainer.step(batch_size) train_metric.update(label, outputs) train_loss_iter = sum([l.mean().asscalar() for l in loss]) / len(loss) train_loss_epoch += train_loss_iter train_metric_name, train_metric_score = train_metric.get() sw.add_scalar(tag='train_acc_top1_iter', value=train_metric_score * 100, global_step=epoch * num_train_iter + i) sw.add_scalar(tag='train_loss_iter', value=train_loss_iter, global_step=epoch * num_train_iter + i) sw.add_scalar(tag='learning_rate_iter', value=trainer.learning_rate, global_step=epoch * num_train_iter + i) if opt.log_interval and not (i + 1) % opt.log_interval: logger.info( 'Epoch[%03d] Batch [%04d]/[%04d]\tSpeed: %f samples/sec\t %s=%f\t loss=%f\t lr=%f' % (epoch, i, num_train_iter, batch_size * opt.log_interval / (time.time() - btic), train_metric_name, train_metric_score * 100, train_loss_epoch / (i + 1), trainer.learning_rate)) btic = time.time() train_metric_name, train_metric_score = train_metric.get() throughput = int(batch_size * i / (time.time() - tic)) mx.ndarray.waitall() if opt.kvstore is not None and epoch == opt.resume_epoch: kv.init(111111, nd.zeros(1)) kv.init(555555, nd.zeros(1)) kv.init(999999, nd.zeros(1)) if opt.kvstore is not None: acc_top1_val, acc_top5_val, loss_val = test(ctx, val_data, kv) else: acc_top1_val, acc_top5_val, loss_val = test(ctx, val_data) logger.info('[Epoch %03d] training: %s=%f\t loss=%f' % (epoch, train_metric_name, train_metric_score * 100, train_loss_epoch / num_train_iter)) logger.info('[Epoch %03d] speed: %d samples/sec\ttime cost: %f' % (epoch, throughput, time.time() - tic)) logger.info( '[Epoch %03d] validation: acc-top1=%f acc-top5=%f loss=%f' % (epoch, acc_top1_val * 100, acc_top5_val * 100, loss_val)) sw.add_scalar(tag='train_loss_epoch', value=train_loss_epoch / num_train_iter, global_step=epoch) sw.add_scalar(tag='val_loss_epoch', value=loss_val, global_step=epoch) sw.add_scalar(tag='val_acc_top1_epoch', value=acc_top1_val * 100, global_step=epoch) if acc_top1_val > best_val_score: best_val_score = acc_top1_val net.save_parameters('%s/%.4f-%s-%s-%03d-best.params' % (opt.save_dir, best_val_score, opt.dataset, model_name, epoch)) trainer.save_states('%s/%.4f-%s-%s-%03d-best.states' % (opt.save_dir, best_val_score, opt.dataset, model_name, epoch)) else: if opt.save_frequency and opt.save_dir and ( epoch + 1) % opt.save_frequency == 0: net.save_parameters( '%s/%s-%s-%03d.params' % (opt.save_dir, opt.dataset, model_name, epoch)) trainer.save_states( '%s/%s-%s-%03d.states' % (opt.save_dir, opt.dataset, model_name, epoch)) # save the last model net.save_parameters( '%s/%s-%s-%03d.params' % (opt.save_dir, opt.dataset, model_name, opt.num_epochs - 1)) trainer.save_states( '%s/%s-%s-%03d.states' % (opt.save_dir, opt.dataset, model_name, opt.num_epochs - 1)) if opt.mode == 'hybrid': net.hybridize(static_alloc=True, static_shape=True) train(context) sw.close()
optimizer_params = { 'learning_rate': opt.lr, 'wd': opt.wd, 'momentum': opt.momentum } if opt.dtype != 'float32': optimizer_params['multi_precision'] = True model_name = opt.model net = get_model(name=model_name, nclass=classes, pretrained=opt.use_pretrained, use_tsn=True, num_segments=opt.num_segments, partial_bn=opt.partial_bn, input_channel=opt.input_channel) net.cast(opt.dtype) net.collect_params().reset_ctx(ctx) print(net) net.hybridize(static_alloc=True, static_shape=True) train_data, val_data, batch_fn = get_data_loader(opt, batch_size, num_workers, logger=None) # Stochastic gradient descent optimizer = 'sgd' # Set parameters