def load_model(model_uri, ctx): """ Load a Gluon model from a local file or a run. :param model_uri: The location, in URI format, of the MLflow model. For example: - ``/Users/me/path/to/local/model`` - ``relative/path/to/local/model`` - ``s3://my_bucket/path/to/model`` - ``runs:/<mlflow_run_id>/run-relative/path/to/model`` - ``models:/<model_name>/<model_version>`` - ``models:/<model_name>/<stage>`` For more information about supported URI schemes, see `Referencing Artifacts <https://www.mlflow.org/docs/latest/concepts.html# artifact-locations>`_. :param ctx: Either CPU or GPU. :return: A Gluon model instance. >>> # Load persisted model as a Gluon model, make inferences against an NDArray >>> model = mlflow.gluon.load_model("runs:/" + gluon_random_data_run.info.run_id + "/model") >>> model(nd.array(np.random.rand(1000, 1, 32))) """ local_model_path = _download_artifact_from_uri(artifact_uri=model_uri) model_arch_path = os.path.join(local_model_path, "data", _MODEL_SAVE_PATH) + "-symbol.json" model_params_path = os.path.join(local_model_path, "data", _MODEL_SAVE_PATH) + "-0000.params" symbol = sym.load(model_arch_path) inputs = sym.var('data', dtype='float32') net = gluon.SymbolBlock(symbol, inputs) net.collect_params().load(model_params_path, ctx) return net
def install_backbone(net, creator, featnames, fixed, pretrained): with net.name_scope(): backbone = creator(pretrained=pretrained) name = backbone.name # hacking parameters params = backbone.collect_params() for key, item in params.items(): should_fix = False for pattern in fixed: if name + '_' + pattern + '_' in key: should_fix = True if should_fix: print('fix parameter', key) item.grad_req = 'null' # special for batchnorm # freeze_bn(backbone.features) # create symbol data = mx.sym.var('data') out_names = [ '_'.join([backbone.name, featname, 'output']) for featname in featnames ] internals = backbone(data).get_internals() outs = [internals[out_name] for out_name in out_names] net.backbone = gl.SymbolBlock(outs, data, params=backbone.collect_params())
def getDenseNet(num_classes, ctx): densenet = vision.densenet201(pretrained=True, ctx=ctx) net = vision.densenet201(classes=num_classes, prefix='densenet0_') with net.name_scope(): net.output = nn.Dense(num_classes, flatten=True) net.output.collect_params().initialize(mx.init.Xavier( rnd_type='gaussian', factor_type="in", magnitude=2), ctx=ctx) net.features = densenet.features net.collect_params().reset_ctx(ctx) inputs = mx.sym.var('data') out = net(inputs) internals = out.get_internals() outputs = [ internals['densenet0_conv3_fwd_output'], internals['densenet0_stage4_concat15_output'], internals['densenet0_dense1_fwd_output'] ] feat_model = gluon.SymbolBlock(outputs, inputs, params=net.collect_params()) feat_model._prefix = 'densenet0_' return feat_model
def getResNet(num_classes, ctx, NoTraining=True): resnet = vision.resnet101_v1(pretrained=True, ctx=ctx) net = vision.resnet101_v1(classes=num_classes, prefix='resnetv10_') with net.name_scope(): net.output = nn.Dense(num_classes, flatten=True, in_units=resnet.output._in_units) net.output.collect_params().initialize(mx.init.Xavier( rnd_type='gaussian', factor_type="in", magnitude=2), ctx=ctx) net.features = resnet.features net.collect_params().reset_ctx(ctx) inputs = mx.sym.var('data') out = net(inputs) internals = out.get_internals() outputs = [ internals['resnetv10_stage3_activation19_output'], internals['resnetv10_stage3_activation22_output'], internals['resnetv10_stage4_activation2_output'], internals['resnetv10_dense1_fwd_output'] ] feat_model = gluon.SymbolBlock(outputs, inputs, params=net.collect_params()) feat_model._prefix = 'resnetv10_' if NoTraining: feat_model.collect_params().setattr('grad_req', 'null') return feat_model
def test_sparse_symbol_block(): data = mx.sym.var('data') weight = mx.sym.var('weight', stype='row_sparse') bias = mx.sym.var('bias') out = mx.sym.broadcast_add(mx.sym.dot(data, weight), bias) # an exception is expected when creating a SparseBlock w/ sparse param net = gluon.SymbolBlock(out, data)
def model_fn(model_dir): symbol = mx.sym.load('%s/model.json' % model_dir) outputs = mx.symbol.softmax(data=symbol, name='softmax_label') inputs = mx.sym.var('data') param_dict = gluon.ParameterDict('model_') net = gluon.SymbolBlock(outputs, inputs, param_dict) net.load_params('%s/model.params' % model_dir, ctx=mx.cpu()) return net
def model_fn(model_dir): print('here') print(ctx) symbol = mx.sym.load('%s/model-symbol.json' % model_dir) outputs = mx.symbol.softmax(data=symbol, name='softmax_label') inputs = mx.sym.var('data') net = gluon.SymbolBlock(outputs, inputs) net.load_parameters('%s/model-0000.params' % model_dir, ctx=ctx) return net
def load_model(model_uri, ctx, dst_path=None): """ Load a Gluon model from a local file or a run. :param model_uri: The location, in URI format, of the MLflow model. For example: - ``/Users/me/path/to/local/model`` - ``relative/path/to/local/model`` - ``s3://my_bucket/path/to/model`` - ``runs:/<mlflow_run_id>/run-relative/path/to/model`` - ``models:/<model_name>/<model_version>`` - ``models:/<model_name>/<stage>`` For more information about supported URI schemes, see `Referencing Artifacts <https://www.mlflow.org/docs/latest/concepts.html# artifact-locations>`_. :param ctx: Either CPU or GPU. :param dst_path: The local filesystem path to which to download the model artifact. This directory must already exist. If unspecified, a local output path will be created. :return: A Gluon model instance. .. code-block:: python :caption: Example # Load persisted model as a Gluon model, make inferences against an NDArray model = mlflow.gluon.load_model("runs:/" + gluon_random_data_run.info.run_id + "/model") model(nd.array(np.random.rand(1000, 1, 32))) """ import mxnet as mx from mxnet import gluon from mxnet import sym local_model_path = _download_artifact_from_uri(artifact_uri=model_uri, output_path=dst_path) flavor_conf = _get_flavor_configuration(model_path=local_model_path, flavor_name=FLAVOR_NAME) _add_code_from_conf_to_system_path(local_model_path, flavor_conf) model_arch_path = os.path.join(local_model_path, "data", _MODEL_SAVE_PATH) + "-symbol.json" model_params_path = os.path.join(local_model_path, "data", _MODEL_SAVE_PATH) + "-0000.params" if Version(mx.__version__) >= Version("2.0.0"): return gluon.SymbolBlock.imports(model_arch_path, input_names=["data"], param_file=model_params_path, ctx=ctx) else: symbol = sym.load(model_arch_path) inputs = sym.var("data", dtype="float32") net = gluon.SymbolBlock(symbol, inputs) net.collect_params().load(model_params_path, ctx) return net
def model_fn(model_dir): with open("{}/model.json".format(model_dir), "r") as model_file: model_json = model_file.read() outputs = mx.sym.load_json(model_json) inputs = mx.sym.var("data") param_dict = gluon.ParameterDict("model_") net = gluon.SymbolBlock(outputs, inputs, param_dict) # We will serve the model on CPU net.load_params("{}/model.params".format(model_dir), ctx=mx.cpu()) return net
def __init__(self): super(Net, self).__init__() with self.name_scope(): backbone = gluon.model_zoo.vision.resnet18_v1() data = mx.sym.var('data') featnames = ['stage1_activation0', 'stage2_activation0', 'stage3_activation0'] out_names = ['_'.join([backbone.name, featname, 'output']) for featname in featnames] internals = backbone(data).get_internals() outs = [internals[out_name] for out_name in out_names] self.backbone = gluon.SymbolBlock(outs, data, params=backbone.collect_params()) self.body = nn.Conv2D(3, 1)
def test_symbol_block(): model = nn.HybridSequential() model.add(nn.Dense(128, activation='tanh')) model.add(nn.Dropout(0.5)) model.add(nn.Dense(64, activation='tanh'), nn.Dense(32, in_units=64)) model.add(nn.Activation('relu')) model.initialize() inputs = mx.sym.var('data') outputs = model(inputs).get_internals() smodel = gluon.SymbolBlock(outputs, inputs, params=model.collect_params()) assert len(smodel(mx.nd.zeros((16, 10)))) == 14 out = smodel(mx.sym.var('in')) assert len(out) == len(outputs.list_outputs()) class Net(nn.HybridBlock): def __init__(self, model): super(Net, self).__init__() self.model = model def hybrid_forward(self, F, x): out = self.model(x) return F.add_n(*[i.sum() for i in out]) net = Net(smodel) net.hybridize() assert isinstance(net(mx.nd.zeros((16, 10))), mx.nd.NDArray) inputs = mx.sym.var('data') outputs = model(inputs) smodel = gluon.SymbolBlock(outputs, inputs, params=model.collect_params()) net = Net(smodel) net.hybridize() assert isinstance(net(mx.nd.zeros((16, 10))), mx.nd.NDArray)
def load_export(model_name, epoch_num): sym = mx.sym.load(f'{model_name}-symbol.json') net = gluon.SymbolBlock([sym], [mx.sym.var('data')]) weights_path = f'{model_name}-{epoch_num:04d}.params' net.load_parameters(weights_path, ctx=mx.cpu(), cast_dtype=True, allow_missing=True, ignore_extra=True) net.initialize(mx.init.Normal(), ctx=mx.cpu()) net.collect_params().reset_ctx(mx.cpu()) net.hybridize() net.export('model', epoch_num)
def get_pretrained_layer(net_train, layer_name): inputs = mx.sym.var('data') out = net_train(inputs) internals = out.get_internals() #print(internals.list_outputs()) outputs = internals[layer_name + "_output"] # Create SymbolBlock that shares parameters with alexnet #mx.viz.plot_network(outputs, shape={'data': (1, 3, 224, 224)}).view() feat_model = gluon.SymbolBlock(outputs, inputs, params=net_train.collect_params()) #x = mx.nd.random.normal(shape=(16, 3, 224, 224)) #print(feat_model(x)) return feat_model
def model_fn(model_dir): """ Load the gluon model. Called once when hosting service starts. :param: model_dir The directory where model files are stored. :return: a model (in this case a Gluon network) """ symbol = mx.sym.load('%s/model.json' % model_dir) outputs = mx.symbol.softmax(data=symbol, name='softmax_label') inputs = mx.sym.var('data') param_dict = gluon.ParameterDict('model_') net = gluon.SymbolBlock(outputs, inputs, param_dict) net.load_params('%s/model.params' % model_dir, ctx=mx.cpu()) return net
def getResNet(num_classes, ctx, NoTraining=True): resnet = vision.resnet101_v1(pretrained=True, ctx=ctx) net = vision.resnet101_v1(classes=num_classes, prefix='resnetv10_') x = nd.random.uniform(shape=(1, 3, 224, 224), ctx=ctx) # for layer in resnet.features: # x=layer(x) # print(layer.name, x.shape) with net.name_scope(): net.output = nn.Dense(num_classes, flatten=True, in_units=resnet.output._in_units) net.output.collect_params().initialize(mx.init.Xavier( rnd_type='gaussian', factor_type="in", magnitude=2), ctx=ctx) net.features = resnet.features net.collect_params().reset_ctx(ctx) inputs = mx.sym.var('data') out = net(inputs) # sym_json = net(mx.sym.var('data')).tojson() # json_file = os.path.join('json', 'sym.json') # sym_json.save(json_file) # arg_shapes, out_shapes, aux_shapes = out.infer_shape(data=(1,3,224,224)) # print(arg_shapes, out_shapes)#, aux_shapes ) # print(net.summary(inputs)) # print(out) #resnetv10_dense1_fwd internals = out.get_internals() # print(internals) outputs = [ internals['resnetv10_batchnorm0_fwd_output'], internals['resnetv10_stage3_activation19_output'], internals['resnetv10_stage3_activation22_output'], internals['resnetv10_stage4_activation2_output'], internals['resnetv10_dense1_fwd_output'] ] # seg=internals['resnetv10_stage4_activation2_output'] # print(seg.shape) feat_model = gluon.SymbolBlock(outputs, inputs, params=net.collect_params()) feat_model._prefix = 'resnetv10_' if NoTraining: feat_model.collect_params().setattr('grad_req', 'null') # output_dict = pickle.load( open( path, "rb" ) ) # output_tensor = output_dict[y] return feat_model
def model_fn(model_dir): """Loads the Gluon model. Called once when hosting service starts. Args: model_dir (str): The directory where model files are stored. Returns: mxnet.gluon.block.Block: a Gluon network. """ symbol = mx.sym.load('%s/model.json' % model_dir) vocab = vocab_from_json('%s/vocab.json' % model_dir) outputs = mx.symbol.softmax(data=symbol, name='softmax_label') inputs = mx.sym.var('data') param_dict = gluon.ParameterDict('model_') net = gluon.SymbolBlock(outputs, inputs, param_dict) net.load_params('%s/model.params' % model_dir, ctx=mx.cpu()) return net, vocab
def model_fn(model_dir): """Load the gluon model. Called once when hosting service starts. Args: model_dir: The directory where model files are stored. Returns: a model (in this case a Gluon network) """ symbol = mx.sym.load("%s/model.json" % model_dir) outputs = mx.symbol.softmax(data=symbol, name="softmax_label") inputs = mx.sym.var("data") param_dict = gluon.ParameterDict("model_") net = gluon.SymbolBlock(outputs, inputs, param_dict) net.load_params("%s/model.params" % model_dir, ctx=mx.cpu()) return net
def init_net(self, vgg): # 获取要输出的层对应symbol列表 data = mx.sym.var('data') internals = vgg(data).get_internals() print(internals.list_outputs()) list = [ internals['vgg0_conv9_fwd_output'], internals['vgg0_conv0_fwd_output'], internals['vgg0_conv2_fwd_output'], internals['vgg0_conv4_fwd_output'], internals['vgg0_conv8_fwd_output'], internals['vgg0_conv12_fwd_output'] ] # 根据原有网络构建新的网络 net = gluon.SymbolBlock(list, data, params=vgg.collect_params()) return net
def model_fn(model_dir): """ Load the Gluon model for hosting. Arguments: model_dir -- SageMaker model directory. Retuns: Gluon model """ # Load the saved Gluon model symbol = mx.sym.load('%s/model.json' % model_dir) outputs = mx.sym.sigmoid(data=symbol, name='sigmoid_label') inputs = mx.sym.var('data') param_dict = gluon.ParameterDict('model_') net = gluon.SymbolBlock(outputs, inputs, param_dict) net.load_params('%s/model.params' % model_dir, ctx=mx.cpu()) return net
def test_symbol_block(): model = nn.HybridSequential() model.add(nn.Dense(128, activation='tanh')) model.add(nn.Dropout(0.5)) model.add(nn.Dense(64, activation='tanh'), nn.Dense(32, in_units=64)) model.add(nn.Activation('relu')) model.initialize() inputs = mx.sym.var('data') outputs = model(inputs).get_internals() smodel = gluon.SymbolBlock(outputs, inputs, params=model.collect_params()) assert len(smodel(mx.nd.zeros((16, 10)))) == 14 out = smodel(mx.sym.var('in')) assert len(out.get_internals().list_outputs()) == len( outputs.list_outputs())
def test(): opt = parse_args() inputs = mx.sym.Variable('data') outputs = MRDM(inputs, num_class=opt.num_class, resolution=opt.resolution) net = gluon.SymbolBlock(outputs, inputs) contex = [mx.gpu(x) for x in range(opt.num_gpus)] net.load_parameters(opt.weights_path, ctx=contex) net.hybridize(static_alloc=True, static_shape=True) val_data_ori = mx.io.ImageRecordIter( path_imgrec=opt.val_data_dir, preprocess_threads=6, shuffle=False, resize=512, batch_size=opt.batch_size, data_shape=(3, 512, 512), ) val_data_ori.reset() acc_top1 = mx.metric.Accuracy() for i, batch in enumerate(val_data_ori): data = gluon.utils.split_and_load(batch.data[0], ctx_list=contex, batch_axis=0) label = gluon.utils.split_and_load(batch.label[0], ctx_list=contex, batch_axis=0) logits = [net(X.astype('float32', copy=False)) for X in data] acc_top1.update(label, logits) name, acc = acc_top1.get() print('{}:{}'.format(name, acc))
def extracting_features(): # glob images im_paths = glob_files(args.db_dir, args.im_exts) feature_paths = [ im_path.replace(args.db_dir, args.ft_dir).replace( os.path.splitext(os.path.basename(im_path))[1], args.ft_ext) for im_path in im_paths ] # generate feature directory [ os.makedirs(os.path.dirname(feature_path)) for feature_path in feature_paths if not os.path.exists(os.path.dirname(feature_path)) ] # loading model use_symbol_block = False if len(args.symbol_prefix) > 0: use_symbol_block = True sym, arg_params, aux_params = mx.model.load_checkpoint( args.symbol_prefix, 0) internals = sym.get_internals() inputs = internals['data'] outputs_blobs = [ internals[layer_name + '_output'] for layer_name in args.output_blobs.split(',') ] inference = gluon.SymbolBlock(outputs_blobs, inputs) else: inference = gluon.model_zoo.vision.get_model(args.arch, classes=args.num_classes) inference.load_params(args.weights, ctx=ctx) if len(args.symbol_prefix) == 0: inference.hybridize() # extracting features global_start_time = timeit.default_timer() valid_counts = 0 for im_idx, (im_path, feature_path) in enumerate(zip(im_paths, feature_paths)): if not os.path.exists(feature_path): start_time = timeit.default_timer() im = image_processing(im_path) if im is not None: im = im.as_in_context(ctx) elapsed_time = timeit.default_timer() processing_time = elapsed_time - start_time start_time = timeit.default_timer() if use_symbol_block: features = inference(im) else: features = inference.features(im) feature_extracting_time = timeit.default_timer() - start_time features = features.asnumpy().flatten() feature = Signature(features) feature.save_features(feature_path) valid_counts += 1 print( 'Presssed [%d/%d]: %s \t Pre-processing time: %.2f ms\t Extracting features time: %.2f ms' % (im_idx, len(im_paths), im_path, processing_time * 1000, feature_extracting_time * 1000)) else: print('The feature file exists, skip the file') global_elapsed_time = timeit.default_timer() - global_start_time print( 'Total elapsed time: %s \t Processed [%d] images \t Avg. time: %.2f ms' % (str(datetime.timedelta(seconds=global_elapsed_time)), valid_counts, global_elapsed_time * 1000 / valid_counts))
def debug_net(net): data = symbol.var('flow') internals = net(data).get_internals() hooks = [internals[i] for i in params.debug_nodes] new = gluon.SymbolBlock(hooks, data, params=net.collect_params()) return new
def train(config: BasicConfig, data_df: pd.DataFrame) -> None: res = prepare_experiment(config) if res is None: return None experiment_path, logger = res use_gpu = len(config.gpus) > 0 if use_gpu: ctx = [mx.gpu(cur_idx) for cur_idx in config.gpus] else: ctx = [mx.cpu()] # train_val_ratio = 0.9 # subj_dict = aggregate_subjects(list(data_df.index), data_df['SUBJECT_ID']) # train_subj, val_subj = split_data(subj_dict, train_val_ratio) # train_df = data_df.iloc[sum(list(train_subj.values()), [])] # val_pairs = sample_pairs(val_subj, config.val_num_sample) # val_idx, val_labels = unzip(val_pairs) # val_path_pairs = [(data_df['img_path'][left], data_df['img_path'][right]) for left, right in val_idx] train_df = data_df dataset = InfoDataset(train_df, filter_fn=config.filter_fn, augs=config.train_augmentations) # dataset = ImgRecDataset(config.extra_rec[0], augs=config.train_augmentations) train_data = DataLoader(dataset, batch_size=config.batch_size, shuffle=not config.uniform_subjects, sampler=UniformClassSampler(dataset) if config.uniform_subjects else None, last_batch='discard', num_workers=config.num_workers, pin_memory=use_gpu) # val_dataset = PairDataset(val_path_pairs, val_labels, augs=config.test_augmentations) # val_data = DataLoader( # val_dataset, # batch_size=config.batch_size, # shuffle=False, # num_workers=config.num_workers, # pin_memory=use_gpu # ) model_name = 'VGG2-ResNet50-Arcface' net_name = config.name weight_path = str(Path.home() / f'.insightface/models/{model_name}/model-0000.params') sym_path = str(Path.home() / f'.insightface/models/{model_name}/model-symbol.json') num_subjects = dataset.num_labels warmup = config.warmup_epoch * len(train_data) lr = config.initial_lr cooldown = config.cooldown_epoch * len(train_data) lr_factor = config.lr_factor num_epoch = config.num_epochs momentum = config.momentum wd = config.weight_decay clip_gradient = config.clip_gradient lr_steps = config.steps snapshots_path = ensure_path(experiment_path / 'snapshots') sym = mx.sym.load(str(sym_path)) sym = sym.get_internals()['fc1_output'] if config.normalize: norm_sym = mx.sym.sqrt( mx.sym.sum(sym**2, axis=1, keepdims=True) + 1e-6) sym = mx.sym.broadcast_div(sym, norm_sym, name='fc_normed') * 32 embeddings = sym fc_weights = mx.sym.Variable('fc_weight', shape=(num_subjects, 512), init=mx.initializer.Xavier( rnd_type='gaussian', factor_type="in", magnitude=2), lr_mult=1) if config.weight_normalize: fc_weights = mx.sym.L2Normalization(data=fc_weights, name='norm_fc_weight') sym = mx.sym.FullyConnected(sym, weight=fc_weights, num_hidden=num_subjects, name='fc_classification', no_bias=False) sym = mx.sym.Group([embeddings, sym]) net = gluon.SymbolBlock([sym], [mx.sym.var('data')]) # net.load_parameters(str(weight_path), ctx=mx.cpu(), cast_dtype=True, # allow_missing=True, ignore_extra=True) net.initialize(mx.init.Normal(), ctx=mx.cpu()) net.collect_params().reset_ctx(ctx) net.hybridize() softmax = gluon.loss.SoftmaxCrossEntropyLoss() softmax.hybridize() center = gluonfr.loss.CenterLoss(num_subjects, 512, 1e2) center.initialize(ctx=mx.cpu()) center.hybridize() center.collect_params().reset_ctx(ctx) arc = gluonfr.loss.ArcLoss(num_subjects, m=0.7, s=32, easy_margin=False) all_losses = [ # ('softmax', lambda ots, gts: softmax(ots[1], gts)), # ('arc', lambda ots, gts: arc(ots[1], gts)), ('center', lambda ots, gts: center(ots[1], gts, ots[0])) ] # all_losses[1][1].initialize(mx.init.Normal(), ctx=mx.cpu()) # all_losses[1][1].collect_params().reset_ctx(ctx) # [cur_loss[1].hybridize() for cur_loss in all_losses] if warmup > 0: start_lr = 1e-10 else: start_lr = lr warmup_iter = 0 end_iter = num_epoch * len(train_data) cooldown_start = end_iter - cooldown cooldown_iter = 0 end_lr = 1e-10 param_dict = net.collect_params() for key, val in param_dict._params.items(): if key.startswith('fc_classification'): val.lr_mult *= config.classifier_mult trainer = mx.gluon.Trainer( param_dict, 'sgd', { 'learning_rate': start_lr, 'momentum': momentum, 'wd': wd, 'clip_gradient': clip_gradient }) lr_counter = 0 num_batch = len(train_data) for epoch in range(num_epoch): if epoch == lr_steps[lr_counter]: trainer.set_learning_rate(trainer.learning_rate * lr_factor) lr_counter += 1 tic = time() losses = [0] * len(all_losses) metric = mx.metric.Accuracy() logger.info(f' > training {epoch}') for i, batch in tqdm(enumerate(train_data), total=len(train_data)): if warmup_iter < warmup: cur_lr = (warmup_iter + 1) * (lr - start_lr) / warmup + start_lr trainer.set_learning_rate(cur_lr) warmup_iter += 1 elif cooldown_iter > cooldown_start: cur_lr = (end_iter - cooldown_iter) * ( trainer.learning_rate - end_lr) / cooldown + end_lr trainer.set_learning_rate(cur_lr) cooldown_iter += 1 data = mx.gluon.utils.split_and_load(batch[0], ctx_list=ctx, even_split=False) gts = mx.gluon.utils.split_and_load(batch[1], ctx_list=ctx, even_split=False) with ag.record(): outputs = [net(X) for X in data] if np.any([ np.any(np.isnan(o.asnumpy())) for os in outputs for o in os ]): print('OOps!') raise RuntimeError cur_losses = [[cur_loss(o, l) for (o, l) in zip(outputs, gts)] for _, cur_loss in all_losses] metric.update(gts, [ots[1] for ots in outputs]) combined_losses = [cur[0] for cur in zip(*cur_losses)] if np.any( [np.any(np.isnan(l.asnumpy())) for l in cur_losses[0]]): print('OOps2!') raise RuntimeError for combined_loss in combined_losses: combined_loss.backward() trainer.step(config.batch_size, ignore_stale_grad=True) for idx, cur_loss in enumerate(cur_losses): losses[idx] += sum([l.mean().asscalar() for l in cur_loss]) / len(cur_loss) if (i + 1) % 1000 == 0: # net.save_parameters(str(snapshots_path / f'{net_name}-{(epoch + 1):04d}.params')) net.export(str(snapshots_path / f'{net_name}_{i + 1}'), epoch + 1) i_losses = [ sum([l.mean().asscalar() for l in cur_loss]) / len(cur_loss) for cur_loss in cur_losses ] losses_str = [ f'{l_name}: {i_losses[idx]:.3f}' for idx, (l_name, _) in enumerate(all_losses) ] losses_str = '; '.join(losses_str) m_name, m_val = metric.get() losses_str += f'| {m_name}: {m_val}' logger.info( f'[Epoch {epoch:03d}][{i+1}] {losses_str} | time: {time() - tic:.1f}' ) if (epoch + 1) % config.save_epoch == 0: # net.save_parameters(str(snapshots_path / f'{net_name}-{(epoch + 1):04d}.params')) net.export(str(snapshots_path / f'{net_name}'), epoch + 1) losses = [l / num_batch for l in losses] losses_str = [ f'{l_name}: {losses[idx]:.3f}' for idx, (l_name, _) in enumerate(all_losses) ] losses_str = '; '.join(losses_str) m_name, m_val = metric.get() losses_str += f'| {m_name}: {m_val}' logger.info( f'[Epoch {epoch:03d}] {losses_str} | time: {time() - tic:.1f}')
train_loader = mx.gluon.data.DataLoader(train_data, shuffle=True, batch_size=batch_size) val_loader = mx.gluon.data.DataLoader(val_data, shuffle=False, batch_size=batch_size) # create network data = mx.symbol.Variable('data') fc1 = mx.symbol.FullyConnected(data=data, name='fc1', num_hidden=128) act1 = mx.symbol.Activation(data=fc1, name='relu1', act_type="relu") fc2 = mx.symbol.FullyConnected(data=act1, name='fc2', num_hidden=64) act2 = mx.symbol.Activation(data=fc2, name='relu2', act_type="relu") fc3 = mx.symbol.FullyConnected(data=act2, name='fc3', num_hidden=10) net = gluon.SymbolBlock(outputs=[fc3], inputs=[data]) net.initialize(ctx=ctx) # create trainer, metric trainer = gluon.Trainer( params=net.collect_params(), optimizer='sgd', optimizer_params={ 'learning_rate': 0.1, 'momentum': 0.9, 'wd': 0.00001 }, ) metric = mx.metric.Accuracy() # learn
def test_symbol_block(): model = nn.HybridSequential() model.add(nn.Dense(128, activation='tanh')) model.add(nn.Dropout(0.5)) model.add(nn.Dense(64, activation='tanh'), nn.Dense(32, in_units=64)) model.add(nn.Activation('relu')) model.initialize() inputs = mx.sym.var('data') outputs = model(inputs).get_internals() smodel = gluon.SymbolBlock(outputs, inputs, params=model.collect_params()) assert len(smodel(mx.nd.zeros((16, 10)))) == 14 out = smodel(mx.sym.var('in')) assert len(out) == len(outputs.list_outputs()) class Net(nn.HybridBlock): def __init__(self, model): super(Net, self).__init__() self.model = model def hybrid_forward(self, F, x): out = self.model(x) return F.add_n(*[i.sum() for i in out]) net = Net(smodel) net.hybridize() assert isinstance(net(mx.nd.zeros((16, 10))), mx.nd.NDArray) inputs = mx.sym.var('data') outputs = model(inputs) smodel = gluon.SymbolBlock(outputs, inputs, params=model.collect_params()) net = Net(smodel) net.hybridize() assert isinstance(net(mx.nd.zeros((16, 10))), mx.nd.NDArray) # Test case to verify if initializing the SymbolBlock from a model with params # other than fp32 param dtype. # 1. Load a resnet model, cast it to fp64 and export tmp = tempfile.mkdtemp() tmpfile = os.path.join(tmp, 'resnet34_fp64') ctx = mx.cpu(0) net_fp32 = mx.gluon.model_zoo.vision.resnet34_v2(pretrained=True, ctx=ctx, root=tmp) net_fp32.cast('float64') net_fp32.hybridize() data = mx.nd.zeros((1,3,224,224), dtype='float64', ctx=ctx) net_fp32.forward(data) net_fp32.export(tmpfile, 0) # 2. Load the saved model and verify if all the params are loaded correctly. # and choose one of the param to verify the type if fp64. sm = mx.sym.load(tmpfile + '-symbol.json') inputs = mx.sym.var('data', dtype='float64') net_fp64 = mx.gluon.SymbolBlock(sm, inputs) net_fp64.collect_params().load(tmpfile + '-0000.params', ctx=ctx) # 3. Get a conv layer's weight parameter name. Conv layer's weight param is # expected to be of dtype casted, fp64. for param_name in net_fp64.params.keys(): if 'conv' in param_name and 'weight' in param_name: break assert np.dtype(net_fp64.params[param_name].dtype) == np.dtype(np.float64) # Cast the symbol block to FP32 and try to forward a FP32 data. # This will verify SymbolBlock.cast() functionality. net_fp64.cast('float32') fp32_data = mx.nd.zeros((1,3,224,224), dtype='float32', ctx=ctx) prediction = net_fp64.forward(fp32_data) assert np.dtype(prediction.dtype) == np.dtype(np.float32)
def create_val_net(net): ipt = mx.sym.var('data') with autograd.predict_mode(): out = net(ipt) val_net = gluon.SymbolBlock(out, ipt, params=net.collect_params()) return val_net
# data = g.utils.split_and_load(normalized, ctx_list=ctx1, batch_axis=0) # image = mx.nd.transpose(image, (0, 2, 3, 1)) # image = [normalize_image(im) for im in image] # image = mx.nd.transpose(mx.nd.array(np.array(image)), (0, 3, 1, 2)) return image vgg19 = vision.vgg19(pretrained=True) vgg19.collect_params().reset_ctx(ctx=ctx1) x = mx.sym.var('data') y = vgg19(x) print('\n=== the symbolic program of net===') interals = y.get_internals() print(interals.list_outputs()) vgg19_relu5_4 = g.SymbolBlock([interals['vgg0_conv15_fwd_output']], x, params=vgg19.collect_params()) vgg19_relu5_4.hybridize() # d_net = g.SymbolBlock([interals['discriminator0_d_dense0_fwd_output']], x, params=d_net_sigm.collect_params()) # vgg19_relu5_4.collect_params().reset_ctx(ctx=ctx) enhanced = resnet() enhanced.hybridize() blur_op = blur() blur_op.hybridize() #dont forget about softmax
optimizer = 'SGD' optimizer_params = { 'wd': opt.wd, 'momentum': opt.momentum, 'lr_scheduler': lr_scheduler } if opt.dtype != 'float32': optimizer_params['multi_precision'] = True model_name = opt.model_name if opt.use_pretrain: inputs = mx.sym.Variable('data') outputs = ResNet50_V2(inputs, classes=opt.num_classes) net = gluon.SymbolBlock(outputs, inputs) net.load_parameters( r'test_weights/0.8664-imagenet-Resnet50_v2-best-0072.params', ctx=context) elif opt.union: from models.attention_net import Att_Master inputs0 = mx.sym.Variable('data') inputs1 = mx.sym.Variable('data1', shape=(-1, 200)) outputs = Att_Master([inputs0, inputs1], num_class=opt.num_classes) net = gluon.SymbolBlock(outputs, [inputs0, inputs1]) net.initialize(init.Xavier(), ctx=context) net.load_parameters(r'union_weights/resnet50v2-teanet-0000.params', ctx=context, allow_missing=False, ignore_extra=False)