コード例 #1
0
ファイル: transformer.py プロジェクト: cheesama/transformer
def run_epoch(epoch, data_iter, model, trainer, loss_fn, ctx = mx.cpu()):
    "Standard Training and Logging Function"
    start = time.time()
    total_tokens = 0
    tokens = 0
    total_loss = 0

    for i, batch in enumerate(data_iter):        
        src = batch.src.as_in_context(ctx)
        trg = batch.trg.as_in_context(ctx)
        src_mask = batch.src_mask.as_in_context(ctx)
        trg_mask = batch.trg_mask.as_in_context(ctx)
        trg_y = batch.trg_y.as_in_context(ctx)
        ntokens = batch.ntokens
        with autograd.record():
            out = model(src, trg, src_mask, trg_mask)
            _out = out.reshape(-1, out.shape[-1])
            _cols = list(batch.trg_y.reshape(-1).asnumpy())
            _rows = list(range(_out.shape[0]))
            _idx = nd.array([_rows, _cols], ctx = ctx)
            _trg = nd.scatter_nd(nd.ones_like(trg_y.reshape(-1)), _idx, _out.shape)
            loss = nd.sum(loss_fn(_out, _trg))
            loss.backward()
        trainer.step(out.shape[0])
        total_loss += loss.asnumpy()[0]
        total_tokens += ntokens.asnumpy()[0]
        tokens += ntokens.asnumpy()[0]
        if i % 50 == 0:
            elapsed = time.time() - start
            logger.info("Epoch Step: %d Loss: %f Tokens per Sec: %f" % (epoch, loss.asnumpy()[0] / ntokens.asnumpy()[0], tokens / elapsed))
            start = time.time()
            tokens = 0
    return total_loss #/ total_tokens
コード例 #2
0
def generate_backgrad_data(net, data, label, ctx=mx.gpu(0), bn_control=None, 
                           max_iters=60, sgd=SGD(lr=0.1), # optimizer args
                           iter_log_period=None, show_clip=False, record_detail=False, logger=None,# log args
                           labelmap=None, labeloutputmap=None, loss_f=SCELoss(), # loss_args
                           threshold=None ):#constraint_agrs
    """
        param:
            net: base net model
            data: data will be changed.recomand data context in cpu.
            label: data's label, recomand label context in cpu.
            ctx: backgrad context, if ctx is gpu and data/label in gpu(or said they in same), then backgradwill change the data iteself, 
            max_iters: max_iters for backgrad
            lr: lr for backgrad
            iter_log_period: output log period, None means never output.
            show_clip: log show backgrad image is clip?
            loss_f: loss function for backgrad
            bn_control: bn_control be fore backgrad, None means never use BN Control.
            sgd: backgrad optimizer method.
            trheshold: returned data's color clip trheshold.
            labelmap, labeloutputmap: decide use what label to backgrad generate adversal smaple. use -loss when all None. only one canbe specified.

        data is better in cpu, if data in ctx(global var), the returned backgrad_data is shallow copy of data.
    """        
    if bn_control is not None:
        bn_control.store()

    sparse_label = label.copy()
    if labelmap is not None: label = labelmap(sparse_label)
    data, label, sparse_label_ctx = data.as_in_context(ctx), label.as_in_context(ctx), sparse_label.as_in_context(ctx)
    if logger is None: logger = LogRecorder()
    if record_detail:
        origin_data = data.copy()
    constraint = Constraint(threshold)
        
    for iters in range(1, max_iters+1):
        with autograd.record():
            data.attach_grad()
            output = net(data)
            if labeloutputmap is not None: label = labeloutputmap(output, sparse_label).as_in_context(ctx)
            loss = loss_f(output, label)
            if labeloutputmap is None and labelmap is None:
                loss = -loss
        loss.backward()
        mean_loss = nd.mean(loss).asscalar()     # reduce will make memory release
        
        if record_detail:
            logger(output, sparse_label_ctx, origin_data, data, loss)
        logger.print_log(iter_log_period, iters, mean_loss, record_detail, data, show_clip)
        
        sgd(data)
        
        constraint(data, iters, max_iters)
    if bn_control is not None:
        bn_control.load()

    logger.asnumpy()
    sgd.clear()
    return data, (logger, )
コード例 #3
0
 def batch_estimate_and_update(self, data: np.ndarray,
                               targets: np.ndarray) -> np.ndarray:
     input_tensor = mx.nd.array(data)
     target_tensor = mx.nd.array(targets)
     with mx.autograd.record():
         result = self.model(input_tensor)
         loss = self.loss_fn(result, target_tensor)
     loss.backward()
     self.trainer.step(input_tensor.shape[0])
     return result.asnumpy()
コード例 #4
0
    def train(self):
        model = self.getModel()
        if not os.path.exists(model):
            os.mkdir(model)

        ctx = mx.gpu(self.gpu)
        net, myLoss = self.getNet(ctx)
        trainI, testI = self.getData(ctx)
        metric, monitor = self.getMetric()
        trainer = self.getTrainer(net.collect_params(), trainI.iters)

        logging.info('')
        result, epochs = 0, self.getEpoch()
        for epoch in xrange(1, epochs + 1):
            logging.info('Epoch[%04d] start ...' % epoch)

            map(lambda x: x.reset(), [trainI, metric])
            for data, label in trainI:
                with autograd.record():
                    out = net.forward(data)
                    self.forDebug(out)
                    loss = myLoss(out, label)
                loss.backward()
                trainer.step(data.shape[0])

                metric.update(label, out)
            for name, value in metric.get():
                logging.info('Epoch[%04d] Train-%s=%f ...', epoch, name, value)

            _result = None
            map(lambda x: x.reset(), [testI, metric])
            for data, label in testI:
                out = net.forward(data)
                self.forDebug(out)
                metric.update(label, out)
            for name, value in metric.get():
                if name == monitor: _result = value
                logging.info('Epoch[%04d] Validation-%s=%f', epoch, name,
                             value)

            if _result > result:
                result = _result
                name = '%s/%04d-%3.3f%%.params' % (model, epoch, result * 100)
                net.save_params(name)
                logging.info('Save params to %s ...', name)

            logging.info('Epoch[%04d] done ...\n' % epoch)
コード例 #5
0
ファイル: vgg16_mxnet.py プロジェクト: vino5211/mycodes
from mxnet import autograd
from mxnet.gluon import trainer
import time
import os

os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0'

vgg16 = model_zoo.vision.vgg16(ctx=mx.gpu())
vgg16.initialize(ctx=mx.gpu())
criterion = loss.SoftmaxCrossEntropyLoss()

update = trainer.Trainer(vgg16.collect_params(), optimizer='sgd')
begin = time.time()
bs = 60

for i in range(1000):
    print(i)
    inputs = mx.nd.normal(shape=(bs, 3, 224, 224), ctx=mx.gpu())
    labels = mx.nd.array([0] * bs, ctx=mx.gpu())
    with autograd.record():
        logits = vgg16(inputs)
        loss = criterion(logits, labels)

    loss.backward()
    update.step(batch_size=bs)

print("time ", time.time() - begin)

# bs=10, iter=1000, time 10.77, Menory=3441M
# upper bound=60
コード例 #6
0
ファイル: nnHelper.py プロジェクト: lemon234071/2018-
    def train(self):
        model = self.getModel()
        if not os.path.exists(model):
            os.mkdir(model)

        ctx = mx.cpu(self.gpu)
        net, myLoss = self.getNet(ctx)
        trainI, testI = self.getData(ctx)
        metric, monitor = self.getMetric()
        trainer = self.getTrainer(
            net.collect_params(), trainI.iters)

        logging.info('')
        result, epochs = 0, self.getEpoch()

        for epoch in range(1, epochs+1):
            train_l_sum = mx.nd.array([0], ctx=ctx)
            logging.info('Epoch[%04d] start ...' % epoch)

            list(map(lambda x: x.reset(), [trainI, metric]))
            for batch_i, (data, label) in enumerate(trainI):
                with autograd.record():

                    out = net.forward(data)#2048,590
                    self.forDebug(out)
                    loss = myLoss(out, label)
                loss.backward()
                # grads = [p.grad(ctx) for p in net.collect_params().values()]
                # mx.gluon.utils.clip_global_norm(
                #     grads, .2 * 5 * data.shape[0])
                trainer.step(data.shape[0])#trainer.step(batch_size)
                print('train loss:', loss.mean().asnumpy())
                ###############################################
                # train_l_sum += loss.sum() / data.shape[0]
                # eval_period = 1
                # if batch_i % eval_period == 0 and batch_i > 0:
                #     cur_l = train_l_sum / eval_period
                #     print('epoch %d, batch %d, train loss %.6f, perplexity %.2f'
                #           % (epoch, batch_i, cur_l.asscalar(),
                #              cur_l.exp().asscalar()))
                #     train_l_sum = mx.nd.array([0], ctx=ctx)
                #############################################
                metric.update(label, out)
            for name, value in metric.get():
                logging.info('Epoch[%04d] Train-%s=%f ...', epoch, name, value)

            _result = None
            list(map(lambda x: x.reset(), [testI, metric]))
            l_sum = mx.nd.array([0], ctx=ctx)
            n = 0
            for data, label in testI:
                out = net.forward(data)
                l = myLoss(out, label)
                l_sum += l.sum()
                n += l.size
                self.forDebug(out)
                metric.update(label, out)
            print('valid loss:', (l_sum / n).mean().asnumpy())
            for name, value in metric.get():
                if name == monitor : _result = value
                logging.info('Epoch[%04d] Validation-%s=%f', epoch, name, value)

            if _result > result:
                result = _result
                name = '%s/%04d-%3.3f%%.params' % (model, epoch, result*100)
                net.save_params(name)
                logging.info('Save params to %s ...', name)

            logging.info('Epoch[%04d] done ...\n' % epoch)