def gluon_model(model_data): train_data, train_label, _ = model_data train_data_loader = DataLoader(list(zip(train_data, train_label)), batch_size=128, last_batch="discard") model = HybridSequential() model.add(Dense(128, activation="relu")) model.add(Dense(64, activation="relu")) model.add(Dense(10)) model.initialize() model.hybridize() trainer = Trainer(model.collect_params(), "adam", optimizer_params={ "learning_rate": 0.001, "epsilon": 1e-07 }) # `metrics` was renamed in mxnet 1.6.0: https://github.com/apache/incubator-mxnet/pull/17048 arg_name = ("metrics" if LooseVersion(mx.__version__) < LooseVersion("1.6.0") else "train_metrics") est = estimator.Estimator(net=model, loss=SoftmaxCrossEntropyLoss(), trainer=trainer, **{arg_name: Accuracy()}) with warnings.catch_warnings(): warnings.simplefilter("ignore") est.fit(train_data_loader, epochs=3) return model
def __init__(self): task_name = 'sst' class_num = 2 feature_column = 'sentence' label_column = 'label' metric = [Accuracy()] super(SST, self).__init__(task_name, class_num, feature_column, label_column, metric)
def __init__(self): task_name = 'mrpc' class_num = 2 feature_column = ['sentence1', 'sentence2'] label_column = 'label' metric = [Accuracy(), F1()] super(MRPC, self).__init__(task_name, class_num, feature_column, label_column, metric)
def __init__(self): task_name = 'mnli' class_num = 3 feature_column = ['sentence1', 'sentence2'] label_column = 'label' metric = [Accuracy()] super(MNLI, self).__init__(task_name, class_num, feature_column, label_column, metric)
def __init__(self, train_dir=None, eval_dir=None): task_name = 'sst' class_num = 2 feature_column = 'sentence' label_column = 'label' metric = [Accuracy()] super(SST, self).__init__(task_name, class_num, feature_column, label_column, metric, train_dir, eval_dir)
def __init__(self, train_dir=None, eval_dir=None): task_name = 'rte' class_num = 2 feature_column = ['sentence1', 'sentence2'] label_column = 'label' metric = [Accuracy()] super(RTE, self).__init__(task_name, class_num, feature_column, label_column, metric, train_dir, eval_dir)
def get_estimator(net, trainer): # `metrics` argument was split into `train_metrics` and `val_metrics` in mxnet 1.6.0: # https://github.com/apache/incubator-mxnet/pull/17048 acc = Accuracy() loss = SoftmaxCrossEntropyLoss() return ( # pylint: disable=unexpected-keyword-arg estimator.Estimator(net=net, loss=loss, trainer=trainer, metrics=acc) if is_mxnet_older_than_1_6_0() else estimator.Estimator( net=net, loss=loss, trainer=trainer, train_metrics=acc))
def get_metric(metric_name): if metric_name == 'Accuracy': return Accuracy() elif metric_name == 'f1': return F1() elif metric_name == 'PearsonCorrelation': return PearsonCorrelation() elif metric_name == 'mcc': return MCC() else: raise NotImplementedError
def get_metrics(): # `metrics` argument was split into `train_metrics` and `val_metrics` in mxnet 1.6.0: # https://github.com/apache/incubator-mxnet/pull/17048 arg_name = "metrics" if is_mxnet_older_than_1_6_0() else "train_metrics" return {arg_name: Accuracy()}
model_name = opt.model dataset_classes = { 'mnist': 10, 'cifar10': 10, 'caltech101': 101, 'imagenet': 1000, 'dummy': 1000 } batch_size, dataset, classes = opt.batch_size, opt.dataset, dataset_classes[ opt.dataset] context = [mx.gpu(int(i)) for i in opt.gpus.split(',')] if opt.gpus.strip() else [mx.cpu()] num_gpus = len(context) batch_size *= max(1, num_gpus) lr_steps = [int(x) for x in opt.lr_steps.split(',') if x.strip()] metric = CompositeEvalMetric([Accuracy(), TopKAccuracy(5)]) kv = mx.kv.create(opt.kvstore) def get_model(model, ctx, opt): """Model initialization.""" kwargs = {'ctx': ctx, 'pretrained': opt.use_pretrained, 'classes': classes} if model.startswith('resnet'): kwargs['thumbnail'] = opt.use_thumbnail elif model.startswith('vgg'): kwargs['batch_norm'] = opt.batch_norm net = models.get_model(model, **kwargs) if opt.resume: net.load_parameters(opt.resume) elif not opt.use_pretrained:
def main(): data_p = Path('/storage/data/').resolve() checkpoint_p = Path('./checkpoints/').resolve() checkpoint_p.mkdir(parents=True, exist_ok=True) logs_p = Path('./logs/').resolve() shutil.rmtree(logs_p, ignore_errors=True) encoder = SevenPlaneEncoder((19, 19)) builder = SGFDatasetBuilder(data_p, encoder=encoder) builder.download_and_prepare() train_itr = builder.train_dataset(batch_size=BATCH_SIZE, max_worker=cpu_count(), factor=FACTOR) test_itr = builder.test_dataset(batch_size=BATCH_SIZE, max_worker=cpu_count(), factor=FACTOR) # build model betago = Model() # convert to half-presicion floating point FP16 # NOTE: all NVIDIA GPUs with compute capability 6.1 have a low-rate FP16 performance == FFP16 is not the fast path on these GPUs # data passed to split_and_load() must be float16 too #betago.cast('float16') # hybridize for speed betago.hybridize(static_alloc=True, static_shape=True) # print graph shape = (1, ) + encoder.shape() mx.viz.print_summary(betago(mx.sym.var('data')), shape={'data': shape}) # pin GPUs ctx = [mx.gpu(i) for i in range(GPU_COUNT)] # optimizer opt_params = { 'learning_rate': 0.001, 'beta1': 0.9, 'beta2': 0.999, 'epsilon': 1e-08 } opt = mx.optimizer.create('adam', **opt_params) # initialize parameters # MXNet initializes the weight matrices uniformly by drawing from [−0.07,0.07], bias parameters are all set to 0 # 'Xavier': initializer is designed to keep the scale of gradients roughly the same in all layers betago.initialize(mx.init.Xavier(magnitude=2.3), ctx=ctx, force_reinit=True) # fetch and broadcast parameters params = betago.collect_params() # trainer trainer = Trainer(params=params, optimizer=opt, kvstore='device') # loss function loss_fn = SoftmaxCrossEntropyLoss() # use accuracy as the evaluation metric metric = Accuracy() with mxb.SummaryWriter(logdir='./logs') as sw: # add graph to MXBoard #betago.forward(mx.nd.ones(shape, ctx=ctx[0])) #betago.forward(mx.nd.ones(shape, ctx=ctx[1])) #sw.add_graph(betago) profiler.set_config(profile_all=True, aggregate_stats=True, continuous_dump=True, filename='profile_output.json') start = time.perf_counter() # train for e in range(EPOCHS): if 0 == e: profiler.set_state('run') tick = time.time() # reset the train data iterator. train_itr.reset() # loop over the train data iterator for i, batch in enumerate(train_itr): if 0 == i: tick_0 = time.time() # splits train data into multiple slices along batch_axis # copy each slice into a context data = split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0, even_split=False) # splits train label into multiple slices along batch_axis # copy each slice into a context label = split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0, even_split=False) outputs = [] losses = [] # inside training scope with ag.record(): for x, y in zip(data, label): z = betago(x) # computes softmax cross entropy loss l = loss_fn(z, y) outputs.append(z) losses.append(l) # backpropagate the error for one iteration for l in losses: l.backward() # make one step of parameter update. # trainer needs to know the batch size of data # to normalize the gradient by 1/batch_size trainer.step(BATCH_SIZE) # updates internal evaluation metric.update(label, outputs) # Print batch metrics if 0 == i % PRINT_N and 0 < i: # checkpointing betago.save_parameters( str(checkpoint_p.joinpath( 'betago-{}.params'.format(e)))) sw.add_scalar(tag='Accuracy', value={'naive': metric.get()[1]}, global_step=i - PRINT_N) sw.add_scalar(tag='Speed', value={ 'naive': BATCH_SIZE * (PRINT_N) / (time.time() - tick) }, global_step=i - PRINT_N) print( 'epoch[{}] batch [{}], accuracy {:.4f}, samples/sec: {:.4f}' .format(e, i, metric.get()[1], BATCH_SIZE * (PRINT_N) / (time.time() - tick))) tick = time.time() if 0 == e: profiler.set_state('stop') profiler.dump() # gets the evaluation result print('epoch [{}], accuracy {:.4f}, samples/sec: {:.4f}'.format( e, metric.get()[1], BATCH_SIZE * (i + 1) / (time.time() - tick_0))) # reset evaluation result to initial state metric.reset() elapsed = time.perf_counter() - start print('elapsed: {:0.3f}'.format(elapsed)) # use Accuracy as the evaluation metric metric = Accuracy() for batch in test_itr: data = split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0) label = split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0) outputs = [] for x in data: outputs.append(betago(x)) metric.update(label, outputs) print('validation %s=%f' % metric.get())