Esempio n. 1
0
def run_epoch(e, network, dataloader, trainer, log_dir, print_name, is_train):
    total_loss = nd.zeros(1, ctx)
    for i, (x, y) in enumerate(dataloader):
        x = x.as_in_context(ctx)
        y = y.as_in_context(ctx)

        with autograd.record(train_mode=is_train):
            output = network(x)
            loss_ctc = ctc_loss(output, y)

        if is_train:
            loss_ctc.backward()
            trainer.step(x.shape[0])

        if i == 0 and e % SEND_IMAGE_EVERY_N == 0 and e > 0:
            predictions = output.softmax().topk(axis=2).asnumpy()
            decoded_text = decode(predictions)
#             print(decoded_text)
            output_image = draw_text_on_image(x.asnumpy(), decoded_text)
            output_image[output_image < 0] = 0
            output_image[output_image > 1] = 1
            print("{} first decoded text = {}".format(print_name, decoded_text[0]))
            with SummaryWriter(logdir=log_dir, verbose=False, flush_secs=5) as sw:
                sw.add_image('bb_{}_image'.format(print_name), output_image, global_step=e)

        total_loss += loss_ctc.mean()

    epoch_loss = float(total_loss.asscalar())/len(dataloader)
    with SummaryWriter(logdir=log_dir, verbose=False, flush_secs=5) as sw:
        sw.add_scalar('loss', {print_name: epoch_loss}, global_step=e)

    return epoch_loss
Esempio n. 2
0
    def train(train_data, val_data, epochs, ctx):
        if isinstance(ctx, mx.Context):
            ctx = [ctx]

        # with SummaryWriter(logdir=log_dir, verbose=False) as sw:
        #     sw.add_graph(tripletnet)

        trainer = gluon.Trainer(net.collect_params(), 'adam',
                                {'learning_rate': 0.001})
        # Init contrastive loss
        loss_fn = TripletSemiHardLoss()

        global_step = 0

        for epoch in range(epochs):
            train_loss = 0
            num_batch = len(train_data)

            tbar = tqdm(train_data)

            for i, batch in enumerate(tbar):
                batch_loss = 0
                data = mx.gluon.utils.split_and_load(batch[0],
                                                     ctx_list=context,
                                                     batch_axis=0,
                                                     even_split=False)
                label = mx.gluon.utils.split_and_load(batch[1],
                                                      ctx_list=context,
                                                      batch_axis=0,
                                                      even_split=False)
                with ag.record():
                    losses = []
                    for x, y in zip(data, label):
                        embs = net(x)
                        losses.append(loss_fn(embs, y))
                for l in losses:
                    l.backward()
                    batch_loss += l.mean().asscalar()
                trainer.step(batch_size)
                train_loss += sum([l.sum().asscalar() for l in losses])
                global_step += batch_size

                with SummaryWriter(logdir=log_dir, verbose=False) as sw:
                    sw.add_scalar(tag="BatchLoss",
                                  value=batch_loss,
                                  global_step=global_step)

            train_loss /= batch_size * num_batch
            with SummaryWriter(logdir=log_dir, verbose=False) as sw:
                sw.add_scalar(tag="TrainLoss",
                              value=train_loss,
                              global_step=global_step)

            if epoch % save_period == 0:
                # Test on first device
                print("Test and visualize")
                test(val_data, ctx, epoch)
                net.export("{}/{}".format(save_dir, model_name), epoch=epoch)
Esempio n. 3
0
 def __init__(self, config):
     self.config = config
     self.train_summary_dir = os.path.join(
         os.path.dirname(os.path.dirname(__file__)), "logs", "train")
     self.validate_summary_dir = os.path.join(
         os.path.dirname(os.path.dirname(__file__)), "logs", "val")
     if not os.path.exists(self.train_summary_dir):
         os.makedirs(self.train_summary_dir)
     if not os.path.exists(self.validate_summary_dir):
         os.makedirs(self.validate_summary_dir)
     self.train_summary_writer = SummaryWriter(self.train_summary_dir)
     self.validate_summary_writer = SummaryWriter(self.validate_summary_dir)
Esempio n. 4
0
def run_epoch(e, network, dataloader, trainer, print_name, update_network,
              save_network, print_output):
    total_loss = nd.zeros(1, ctx)
    for i, (x, y) in enumerate(dataloader):
        x = x.as_in_context(ctx)
        y = y.as_in_context(ctx)

        with autograd.record():
            output = network(x, y)
            loss = loss_func(output, y)

        if update_network:
            loss.backward()
            trainer[0].step(y.shape[0])
            trainer[1].step(y.shape[0])

        total_loss += loss.mean()
        batch_loss += loss.mean()

        # if i % print_n == 0 and i > 0:
        #     mean_batch_loss = float(batch_loss.asscalar()/print_n)
        #     print('{} Batches {}: {:.6f}'.format(print_name, i, mean_batch_loss))
        #     batch_loss = nd.zeros(1, ctx)
        #     nd.waitall()

    epoch_loss = float(total_loss.asscalar()) / len(dataloader)

    if print_output and e % print_text_every_n == 0 and e > 0:
        text = "predicted\t| actual\t| noisy \n ---- | ---- | ---- \n"
        for n in range(y.shape[0]):
            out_np = output.asnumpy()[n, :]
            y_np = y.asnumpy()[n, :]
            x_np = x.asnumpy()[n, :]
            out_np_max = np.argmax(out_np, axis=1)

            out_decoded = decode(out_np_max)
            y_decoded = decode(y_np)
            x_decoded = decode(x_np)

            output_text = out_decoded + "\t| " + y_decoded + "\t| " + x_decoded
            text += output_text + "\n"
        with SummaryWriter(logdir="./logs", verbose=False, flush_secs=5) as sw:
            sw.add_text(tag='{}_text'.format(print_name),
                        text=text,
                        global_step=e)
            print("output {}".format(text))

    # if save_network and e % save_every_n == 0 and e > 0:
    #     network.save_params("{}/{}".format(checkpoint_dir, checkpoint_name))
    with SummaryWriter(logdir="./logs", verbose=False, flush_secs=5) as sw:
        sw.add_scalar('loss', {print_name: epoch_loss}, global_step=e)

    return epoch_loss
Esempio n. 5
0
 def check_add_pr_curve(labels, predictions, num_thresholds):
     with SummaryWriter(_LOGDIR) as sw:
         sw.add_pr_curve(tag='test_add_pr_curve',
                         labels=labels,
                         predictions=predictions,
                         num_thresholds=num_threshodls)
     check_event_file_and_remove_logdir()
Esempio n. 6
0
def test_add_multiple_scalars():
    sw = SummaryWriter(logdir=_LOGDIR)
    sw.add_scalar(tag='test_multiple_scalars',
                  value=np.random.uniform(),
                  global_step=0)
    sw.add_scalar(tag='test_multiple_scalars',
                  value=('scalar1', np.random.uniform()),
                  global_step=0)
    sw.add_scalar(tag='test_multiple_scalars',
                  value=['scalar2', np.random.uniform()],
                  global_step=0)
    sw.add_scalar(tag='test_multiple_scalars',
                  value={
                      'scalar3': np.random.uniform(),
                      'scalar4': np.random.uniform()
                  },
                  global_step=0)
    items = os.listdir(_LOGDIR)
    assert len(items) == 2
    assert 'test_multiple_scalars' in items
    items.remove('test_multiple_scalars')
    assert items[0].startswith(_EVENT_FILE_PREFIX)
    print(items[0])
    assert file_exists(os.path.join(_LOGDIR, items[0]))

    named_scalar_dir = os.path.join(_LOGDIR, 'test_multiple_scalars')
    assert dir_exists(named_scalar_dir)
    for i in range(1, 5):
        sub_dir = os.path.join(named_scalar_dir, 'scalar%d' % i)
        assert dir_exists(sub_dir)
        sub_items = os.listdir(sub_dir)
        assert len(sub_items) == 1
        assert sub_items[0].startswith(_EVENT_FILE_PREFIX)
Esempio n. 7
0
def test_add_scalar():
    sw = SummaryWriter(logdir=_LOGDIR)
    sw.add_scalar(tag='test_add_scalar',
                  value=np.random.uniform(),
                  global_step=0)
    sw.close()
    check_event_file_and_remove_logdir()
    def __init__(self,
                 model_param,
                 vocab_path,
                 mode='train',
                 vocab_tag_path=None,
                 encoder_type='rnn',
                 head_attention=False,
                 decoder_cell='lstm',
                 ctx=cpu()):
        """

        # TODO 选择模型的编码器解码器部分
        # TODO Encoder: Parsed | RNN
        # TODO Decoder: Headline | RNN
        # TODO Decoder_RNN_TYPE: DLSTM | LSMT | GRU

        根据参数与模式,构建模型

        :param mode: train|decode|test 控制当前模型的用途
        :param vocab_path: 词典路径
        :param vocab_tag_path: 句法解析标记词典路径
        :param model_param: 模型中的超参数
        """
        self.vocab_path = vocab_path
        self.vocab = Vocab(vocab_path)

        if vocab_tag_path is not None:
            self.vocab_tag_path = vocab_tag_path
            self.vocab_tag = Vocab(vocab_tag_path)

        self.mode = mode
        self.loss = SoftmaxCrossEntropyLoss()
        self.model_param = model_param
        self.encoder_type = encoder_type
        if encoder_type == 'rnn':
            pass
            # self.model = Seq2SeqRNN(self.vocab, self.model_param, ctx)
            self.model = Seq2SeqRNN(self.vocab,
                                    'LSTM',
                                    model_param['emb_size'],
                                    model_param['hidden_size'],
                                    self.vocab.size,
                                    60,
                                    'Bahdanau',
                                    'two_way',
                                    None,
                                    None,
                                    0,
                                    1,
                                    ctx=ctx)
        elif encoder_type == 'parse':
            self.model = ParseModel(self.vocab, self.vocab_tag,
                                    self.model_param, ctx)

        self.model.initialize(ctx=ctx)
        self.ctx = ctx
        self.trainer = Trainer(self.model.collect_params(), 'adam',
                               {'learning_rate': 0.01})
        self.global_step = 0
        self.sw = SummaryWriter('./logs', flush_secs=2)
Esempio n. 9
0
    def __init__(self,
                 model,
                 run_id,
                 gpu_idxs=None,
                 tensorboard_logging=False):
        """

        Parameters
        ----------
        model: HybridBlock
        gpu_idxs: None or list of ints
            If None will set context to CPU.
            If list of ints, will set context to given GPUs.
        """
        logging.info("Using Module Learner.")
        model.hybridize()
        logging.info("Hybridized model.")
        input = mx.sym.var('data')
        pre_output = model(input)
        output = mx.sym.SoftmaxOutput(pre_output, name='softmax')
        context = get_context(gpu_idxs)
        self.module = mx.mod.Module(symbol=output,
                                    context=context,
                                    data_names=['data'],
                                    label_names=['softmax_label'])
        self.tensorboard_logging = tensorboard_logging
        if self.tensorboard_logging:
            from mxboard import SummaryWriter
            current_folder = os.path.dirname(os.path.realpath(__file__))
            tensorboard_folder = os.path.join(current_folder, "..", "logs",
                                              "tensorboard")
            summary_filepath = os.path.join(tensorboard_folder, run_id)
            self.writer = SummaryWriter(logdir=summary_filepath)
Esempio n. 10
0
 def log_mxboard(self, step_idx):
     with SummaryWriter(logdir="../logs/" + self._experiment_id) as sw:
         ### scalars
         sw.add_scalar(tag=self._prefix + '_loss',
                       value=self.loss(), global_step=step_idx)
         sw.add_scalar(tag=self._prefix + '_accuracy',
                       value=self.accuracy(), global_step=step_idx)
         sw.add_scalar(tag=self._prefix + '_roc_auc',
                       value=self.roc_auc(), global_step=step_idx)
         sw.add_scalar(tag=self._prefix + '_precision',
                       value=self.precision(), global_step=step_idx)
         sw.add_scalar(tag=self._prefix + '_recall',
                       value=self.recall(), global_step=step_idx)
         sw.add_scalar(tag=self._prefix + '_f1',
                       value=self.f1(), global_step=step_idx)
         sw.add_scalar(tag=self._prefix + '_average_precision',
                       value=self.average_precision(), global_step=step_idx)
         sw.add_scalar(tag=self._prefix + '_window_diff',
                       value=self.window_diff(), global_step=step_idx)
         ### histograms
         sw.add_histogram(tag=self._prefix + '_loss_histogram',
                          values=self._losses.get(), bins=100, global_step=step_idx)
         ### other
         sw.add_pr_curve(tag=self._prefix + '_precision_recall',
                         labels=self._labels.get(), predictions=self._pred_probs.get(),
                         num_thresholds=100, global_step=step_idx)
Esempio n. 11
0
def test_add_text():
    # this will generate an event file under _LOGDIR and
    # a json file called tensors.json under _LOGDIR/plugins/tensorboard_text/tensors.json
    sw = SummaryWriter(logdir=_LOGDIR)
    sw.add_text(tag='test_add_text', text='Hello MXNet!')
    sw.close()
    check_and_remove_logdir_for_text()
Esempio n. 12
0
def run_epoch(e, network, dataloader, loss_function, trainer, log_dir, print_name, update_cnn, save_cnn, ctx=mx.gpu()):
    total_loss = nd.zeros(1, ctx)
    for i, (data, label) in enumerate(dataloader):
        data = data.as_in_context(ctx)
        label = label.as_in_context(ctx)
        
        with autograd.record():
            output = network(data)
            loss_i = loss_function(output, label)
        if update_cnn:
            loss_i.backward()
            trainer.step(data.shape[0])

        total_loss += loss_i.mean()
        
        if e % send_image_every_n == 0 and e > 0 and i == 0:
            output_image = draw_box_on_image(output.asnumpy(), label.asnumpy(), data.asnumpy())
            
    epoch_loss = float(total_loss.asscalar())/len(dataloader)
    
    with SummaryWriter(logdir=log_dir, verbose=False, flush_secs=5) as sw:
        sw.add_scalar('loss', {print_name: epoch_loss}, global_step=e)
        if e % send_image_every_n == 0 and e > 0:
            output_image[output_image<0] = 0
            output_image[output_image>1] = 1
            sw.add_image('bb_{}_image'.format(print_name), output_image, global_step=e)
            
    if save_cnn and e % save_every_n == 0 and e > 0:
        network.save_parameters("{}/{}".format(checkpoint_dir, checkpoint_name))
    return epoch_loss
Esempio n. 13
0
def test_add_graph_symbol():
    data = mx.sym.Variable('data')
    conv = mx.sym.Convolution(data, kernel=(2, 2), num_filter=2)
    nodes = _get_nodes_from_symbol(conv)
    expected_nodes = [NodeDef(name='data', op='null'),
                      NodeDef(name='convolution0/convolution0_weight', op='null',
                              attr={'param': AttrValue(
                                  s='{ kernel :  (2, 2) ,  num_filter :  2 }'.encode(encoding='utf-8'))}),
                      NodeDef(name='convolution0/convolution0_bias', op='null',
                              attr={'param': AttrValue(
                                  s='{ kernel :  (2, 2) ,  num_filter :  2 }'.encode(encoding='utf-8'))}),
                      NodeDef(name='convolution0/convolution0', op='Convolution',
                              input=['data', 'convolution0/convolution0_weight', 'convolution0/convolution0_bias'],
                              attr={'param': AttrValue(
                                  s='{ kernel :  (2, 2) ,  num_filter :  2 }'.encode(encoding='utf-8'))})]
    # check _get_nodes_from_symbol
    for expected_node, node in zip(expected_nodes, nodes):
        assert expected_node == node

    # check _sym2pb
    expected_graph = GraphDef(node=expected_nodes, versions=VersionDef(producer=100))
    graph = _net2pb(conv)
    assert expected_graph == graph

    # check add_graph
    with SummaryWriter(logdir=_LOGDIR) as sw:
        sw.add_graph(conv)
    check_event_file_and_remove_logdir()
Esempio n. 14
0
def test(ctx=mx.cpu()):
    from mxboard import SummaryWriter
    sw = SummaryWriter(logdir='sphere_dynamic', flush_secs=5)

    net = nn.Sequential()
    b1 = base_net(48,
                  3,
                  fun=special_conv,
                  kernel_size=(3, 3),
                  same_shape=False)
    b2 = base_net(1,
                  48,
                  fun=special_conv,
                  kernel_size=(3, 3),
                  same_shape=False)
    fc = nn.Dense(3, in_units=9)
    net.add(b1, b2, fc)
    init_s(net, ctx)

    from mxnet import gluon, autograd
    trainer = gluon.Trainer(net.collect_params(), 'sgd',
                            {'learning_rate': 0.01})
    for i in range(10000):
        with autograd.record():
            out = net(img)
            loss = nd.sum(nd.abs(out - target))
        loss.backward()
        trainer.step(2)
        sw.add_scalar(tag='loss', value=loss.asscalar(), global_step=i)
        if i % 100 == 0:
            print i, loss.asscalar()
    sw.close()
Esempio n. 15
0
def plot_mxboard(block, logdir='./logs'):
    """Plot network to visualize internal structures.

    Parameters
    ----------
    block : mxnet.gluon.HybridBlock
        A hybridizable network to be visualized.
    logdir : str
        The directory to save.

    """
    try:
        from mxboard import SummaryWriter
    except ImportError:
        print('mxboard is required. Please install via `pip install mxboard` ' +
              'or refer to https://github.com/awslabs/mxboard.')
        raise
    data = mx.sym.var('data')
    sym = block(data)
    if isinstance(sym, tuple):
        sym = mx.sym.Group(sym)
    with SummaryWriter(logdir=logdir) as sw:
        sw.add_graph(sym)
    usage = '`tensorboard --logdir={} --host=127.0.0.1 --port=8888`'.format(logdir)
    print('Log saved. Use: {} to visualize it'.format(usage))
Esempio n. 16
0
    def _init_train(self):
        from mxboard import SummaryWriter
        self.record_step = record_step
        self.loss_name = ['score', 'box', 'class']

        self.nd_all_anchors = [
            self.all_anchors.copyto(device) for device in ctx
        ]
        self.get_default_ltrb()

        self.L1_loss = gluon.loss.L1Loss()
        self.L2_loss = gluon.loss.L2Loss()
        self.LG_loss = gluon.loss.LogisticLoss(label_format='binary')
        self.CE_loss = gluon.loss.SoftmaxCrossEntropyLoss(from_logits=False,
                                                          sparse_label=False)

        self.trainer = gluon.Trainer(self.net.collect_params(), 'adam',
                                     {'learning_rate': 0.0001})

        self.sw = SummaryWriter(logdir=version + '/logs')  #, flush_secs=30)
        #a = self.net(nd.zeros((1,3,self.size[0],self.size[1]), ctx=ctx[0]))
        #self.sw.add_graph(self.net)

        if not os.path.exists(self.backup_dir):
            os.makedirs(self.backup_dir)
Esempio n. 17
0
 def __init__(self, logging_dir, prefix=None):
     self.prefix = prefix
     try:
         from mxboard import SummaryWriter
         self.summary_writer = SummaryWriter(logging_dir)
     except ImportError:
         logging.error('You can install mxboard via `pip install mxboard`.')
Esempio n. 18
0
def test_add_audio():
    shape = (100,)
    data = mx.nd.random.uniform(-1, 1, shape=shape)
    sw = SummaryWriter(logdir=_LOGDIR)
    sw.add_audio(tag='test_add_audio', audio=data)
    sw.close()
    check_event_file_and_remove_logdir()
Esempio n. 19
0
    def test(val_data, ctx, epoch):
        embedding = None
        labels = None
        images = None
        initialized = False

        for i, (data, label) in enumerate(val_data):
            data = gluon.utils.split_and_load(data, ctx_list=ctx, batch_axis=0)
            label = gluon.utils.split_and_load(label,
                                               ctx_list=ctx,
                                               batch_axis=0)
            outputs = [net(X) for X in data]
            outputs = mx.nd.concat(*outputs, dim=0)
            label = mx.nd.concat(*label, dim=0)
            if initialized:
                embedding = mx.nd.concat(*(embedding, outputs), dim=0)
                labels = mx.nd.concat(*(labels, label), dim=0)
            else:
                embedding = outputs
                labels = label
                initialized = True

        with SummaryWriter(logdir=log_dir) as sw:
            sw.add_embedding(tag='{}_tripletnet_semihard_{}'.format(
                opt.dataset, epoch),
                             embedding=embedding,
                             labels=labels,
                             images=images)
Esempio n. 20
0
def test_add_image():
    shape = list(rand_shape_nd(4))
    shape[1] = 3
    shape = tuple(shape)
    sw = SummaryWriter(logdir=_LOGDIR)
    sw.add_image(tag='test_add_image', image=mx.nd.random.normal(shape=shape), global_step=0)
    sw.close()
    check_event_file_and_remove_logdir()
Esempio n. 21
0
def test_add_pr_curve():
    shape = (100,)
    predictions = mx.nd.uniform(low=0.0, high=1.0, shape=shape)
    labels = mx.nd.uniform(low=0, high=2, shape=shape).astype('int32')
    num_threshodls = 100
    with SummaryWriter(_LOGDIR) as sw:
        sw.add_pr_curve(tag='test_add_pr_curve', labels=labels, predictions=predictions, num_thresholds=num_threshodls)
    check_event_file_and_remove_logdir()
Esempio n. 22
0
 def check_add_histogram(data):
     sw = SummaryWriter(logdir=_LOGDIR)
     sw.add_histogram(tag='test_add_histogram',
                      values=data,
                      global_step=0,
                      bins=100)
     sw.close()
     check_event_file_and_remove_logdir()
Esempio n. 23
0
 def plot_mxboard(self, sample_id, step_idx, pred_proba, pred, label):
     pred_proba = pred_proba[0].asnumpy()
     pred = pred[0].asnumpy()
     label = label.asnumpy()
     img_array = plot_segment_breaks(pred_proba, pred, label)
     img_array = img_array.transpose((2,0,1))[:3].astype(np.float32)/255
     with SummaryWriter(logdir="../logs/" + self._experiment_id) as sw:
         sw.add_image(tag=sample_id, image=img_array, global_step=step_idx)
Esempio n. 24
0
    def __init__(self, train_fn, args=None, resource=None,
                 searcher=None, search_options=None,
                 checkpoint='./exp/checkpoint.ag',
                 resume=False, num_trials=None,
                 time_out=None, max_reward=1.0, time_attr='epoch',
                 reward_attr='accuracy',
                 visualizer='none', dist_ip_addrs=None):
        super(FIFOScheduler,self).__init__(dist_ip_addrs)
        if resource is None:
            resource = {'num_cpus': 1, 'num_gpus': 0}
        if searcher is None:
            searcher = 'random'  # Default: Random searcher
        if search_options is None:
            search_options = dict()
        assert isinstance(train_fn, _autogluon_method)
        self.train_fn = train_fn
        self.args = args if args else train_fn.args
        self.resource = resource
        if isinstance(searcher, str):
            kwargs = search_options.copy()
            kwargs['configspace'] = train_fn.cs
            self.searcher = searcher_factory(searcher, **kwargs)
        else:
            assert isinstance(searcher, BaseSearcher)
            self.searcher = searcher
        # meta data
        self.metadata = {}
        self.metadata['search_space'] = train_fn.kwspaces
        keys = copy.deepcopy(list(self.metadata['search_space'].keys()))
        self.metadata['search_strategy'] = searcher
        self.metadata['stop_criterion'] = {'time_limits': time_out, 'max_reward': max_reward}
        self.metadata['resources_per_trial'] = resource

        self.num_trials = num_trials
        self.time_out = time_out
        self.max_reward = max_reward
        self._checkpoint = checkpoint
        self._time_attr = time_attr
        self._reward_attr = reward_attr
        self.visualizer = visualizer.lower()
        if self.visualizer == 'tensorboard' or self.visualizer == 'mxboard':
            try_import_mxboard()
            from mxboard import SummaryWriter
            self.mxboard = SummaryWriter(
                logdir=os.path.join(os.path.splitext(checkpoint)[0], 'logs'),
                flush_secs=3,
                verbose=False)
        self.log_lock = mp.Lock()
        self.training_history = OrderedDict()
        self.config_history = OrderedDict()

        if resume:
            if os.path.isfile(checkpoint):
                self.load_state_dict(load(checkpoint))
            else:
                msg = 'checkpoint path {} is not available for resume.'.format(checkpoint)
                logger.exception(msg)
                raise FileExistsError(msg)
Esempio n. 25
0
    def __init__(
        self,
        net,
        val_data,
        train_config: TrainConfig,
        train_objects: TrainObjects,
        use_rtpt: bool,
    ):
        """
        Class for training the neural network.
        :param net: The NN with loaded parameters that shall be trained.
        :param val_data: The validation data loaded with gluon DataLoader.
        :param train_config: An instance of the TrainConfig data class.
        :param train_objects: Am omstamce pf the TrainObject data class.
        :param use_rtpt: If True, an RTPT object will be created and modified within this class.
        """
        # Too many instance attributes (29/7) - Too many arguments (24/5) - Too many local variables (25/15)
        # Too few public methods (1/2)
        self.tc = train_config
        self.to = train_objects
        if self.to.metrics is None:
            self.to.metrics = {}
        self._ctx = get_context(train_config.context, train_config.device_id)
        self._net = net
        self._graph_exported = False
        self._val_data = val_data
        # define a summary writer that logs data and flushes to the file every 5 seconds
        if self.tc.log_metrics_to_tensorboard:
            self.sum_writer = SummaryWriter(logdir=self.tc.export_dir + "logs",
                                            flush_secs=5,
                                            verbose=False)
        # Define the two loss functions
        self._softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss(
            sparse_label=self.tc.sparse_policy_label)
        self._l2_loss = gluon.loss.L2Loss()
        if self.tc.optimizer_name != "nag":
            raise NotImplementedError(
                "The requested optimizer %s Isn't supported yet." %
                self.tc.optimizer_name)
        self._trainer = gluon.Trainer(
            self._net.collect_params(),
            "nag",
            {
                "learning_rate": self.to.lr_schedule(0),
                "momentum": self.to.momentum_schedule(0),
                "wd": self.tc.wd,
            },
        )

        # collect parameter names for logging the gradients of parameters in each epoch
        self._params = self._net.collect_params()
        self._param_names = self._params.keys()
        self.ordering = list(
            range(self.tc.nb_parts)
        )  # define a list which describes the order of the processed batches

        self.use_rtpt = use_rtpt
        self.rtpt = None  # Set this later in training function
Esempio n. 26
0
File: rl.py Progetto: tsuberim/RL
 def __init__(self, save_path=None, **kwargs):
     if not save_path:
         raise ValueError('save_path not specified')
     from mxboard import SummaryWriter
     logdir = save_path
     save_path = os.path.join(logdir, 'save')
     os.makedirs(logdir, exist_ok=True)
     self.sw = SummaryWriter(logdir=logdir)
     StatsWriter.__init__(self, save_path=save_path, **kwargs)
Esempio n. 27
0
def run_epoch(e, network, dataloader, trainer, log_dir, print_name, is_train):
    print_n = 250
    total_loss = nd.zeros(1, ctx)
    tick = time.time()
    for i, (x, y) in enumerate(dataloader):
        x = x.as_in_context(ctx)
        y = y.as_in_context(ctx)
        with autograd.record(train_mode=is_train):
            output = network(x)
            loss_ctc = ctc_loss(output, y)

        if is_train:
            loss_ctc.backward()
            trainer.step(x.shape[0])

        if i == 0 and e % send_image_every_n == 0 and e > 0:
            predictions = output.softmax().topk(axis=2).asnumpy()
            decoded_text = decode(predictions)
            output_image = draw_text_on_image(x.asnumpy(), decoded_text)
            output_image[output_image < 0] = 0
            output_image[output_image > 1] = 1
            print("{} first decoded text = {}".format(print_name,
                                                      decoded_text[0]))
            with SummaryWriter(logdir=log_dir, verbose=False,
                               flush_secs=5) as sw:
                sw.add_image('bb_{}_image'.format(print_name),
                             output_image,
                             global_step=e)

        total_loss += loss_ctc.mean()

        if i % print_n == 0 and i > 0:
            print('Batches {0}: CTC Loss: {1:.2f}, time:{2:.2f} s'.format(
                i, float(total_loss.asscalar() / print_n),
                time.time() - tick))
            tick = time.time()
            nd.waitall()

    epoch_loss = float(total_loss.asscalar()) / len(dataloader)

    with SummaryWriter(logdir=log_dir, verbose=False, flush_secs=5) as sw:
        sw.add_scalar('loss', {print_name: epoch_loss}, global_step=e)

    return epoch_loss
Esempio n. 28
0
    def __init__(self, config, model, criterion, ctx, sample_input):
        config['trainer']['output_dir'] = os.path.join(str(pathlib.Path(os.path.abspath(__name__)).parent),
                                                       config['trainer']['output_dir'])
        config['name'] = config['name'] + '_' + model.model_name
        self.save_dir = os.path.join(config['trainer']['output_dir'], config['name'])
        self.checkpoint_dir = os.path.join(self.save_dir, 'checkpoint')
        self.alphabet = config['dataset']['alphabet']

        if config['trainer']['resume_checkpoint'] == '' and config['trainer']['finetune_checkpoint'] == '':
            shutil.rmtree(self.save_dir, ignore_errors=True)
        if not os.path.exists(self.checkpoint_dir):
            os.makedirs(self.checkpoint_dir)
        # 保存本次实验的alphabet 到模型保存的地方
        save(list(self.alphabet), os.path.join(self.save_dir, 'dict.txt'))
        self.global_step = 0
        self.start_epoch = 0
        self.config = config

        self.model = model
        self.criterion = criterion
        # logger and tensorboard
        self.tensorboard_enable = self.config['trainer']['tensorboard']
        self.epochs = self.config['trainer']['epochs']
        self.display_interval = self.config['trainer']['display_interval']
        if self.tensorboard_enable:
            from mxboard import SummaryWriter
            self.writer = SummaryWriter(self.save_dir, verbose=False)

        self.logger = setup_logger(os.path.join(self.save_dir, 'train.log'))
        self.logger.info(pformat(self.config))
        self.logger.info(self.model)
        # device set
        self.ctx = ctx
        mx.random.seed(2)  # 设置随机种子

        self.logger.info('train with mxnet: {} and device: {}'.format(mx.__version__, self.ctx))
        self.metrics = {'val_acc': 0, 'train_loss': float('inf'), 'best_model': ''}

        schedule = self._initialize('lr_scheduler', mx.lr_scheduler)
        optimizer = self._initialize('optimizer', mx.optimizer, lr_scheduler=schedule)
        self.trainer = gluon.Trainer(self.model.collect_params(), optimizer=optimizer)

        if self.config['trainer']['resume_checkpoint'] != '':
            self._laod_checkpoint(self.config['trainer']['resume_checkpoint'], resume=True)
        elif self.config['trainer']['finetune_checkpoint'] != '':
            self._laod_checkpoint(self.config['trainer']['finetune_checkpoint'], resume=False)

        if self.tensorboard_enable:
            try:
                # add graph
                from mxnet.gluon import utils as gutils
                self.model(sample_input)
                self.writer.add_graph(model)
            except:
                self.logger.error(traceback.format_exc())
                self.logger.warn('add graph to tensorboard failed')
Esempio n. 29
0
    def __init__(self, logdir, keys=['val_acc', 'val_loss']):
        if not isinstance(keys, (list, tuple)):
            raise ValueError("Keys should be a list or a tuple.")
        self.keys = keys

        self.sw = SummaryWriter(logdir=os.path.join(logdir, 'tb'))
        self.csv_path = os.path.join(logdir, 'history.csv')

        with open(self.csv_path, 'w') as f:
            f.write(";".join(keys) + "\n")
Esempio n. 30
0
def test_add_embedding():
    batch_size = 10
    embedding = mx.nd.uniform(shape=(batch_size, 20))
    labels = mx.nd.uniform(low=1, high=2, shape=(batch_size,)).astype('int32')
    images = mx.nd.uniform(shape=(batch_size, 3, 10, 10))
    global_step = np.random.randint(low=0, high=999999)
    with SummaryWriter(logdir=_LOGDIR) as sw:
        sw.add_embedding(tag='test_add_embedding', embedding=embedding, labels=labels,
                         images=images, global_step=global_step)
    check_and_remove_for_embedding(global_step)