Beispiel #1
0
    def _gradient_calculation(self, true_batchs, examples, total_stats,
                              report_stats, step):
        self.model.zero_grad()

        for batch in true_batchs:
            loss = self.model(batch)

            # Topic Model loss
            topic_stats = Statistics(topic_loss=loss.clone().item() /
                                     float(examples))
            loss.div(float(examples)).backward(retain_graph=False)
            total_stats.update(topic_stats)
            report_stats.update(topic_stats)

        if step % 1000 == 0:
            for k in range(self.args.topic_num):
                logger.info(','.join([
                    self.model.voc_id_wrapper.i2w(i)
                    for i in self.model.topic_model.tm1.beta.topk(20, dim=-1)
                    [1][k].tolist()
                ]))
        # in case of multi step gradient accumulation,
        # update only after accum batches
        if self.n_gpu > 1:
            grads = [
                p.grad.data for p in self.model.parameters()
                if p.requires_grad and p.grad is not None
            ]
            distributed.all_reduce_and_rescale_tensors(grads, float(1))
        for o in self.optims:
            o.step()
    def poll(self):
        try:
            value = self.touchpin.read()
        except ValueError:
            logger.error('Failed reading touchpin')
            return
        weighted_value = sum(self.readings[-2:] + [value]) / 3
        mean = self.get_current_mean()
        thresh = mean * self.threshold
        ratio = weighted_value / mean
        #logger.debug(
        #    '[{}] Mean: {:04.0f}, Threshold: {:04.0f}, This: {:04.0f}, This weighted: {:04.0f} / {:.0%}'
        #    .format(utime.ticks_ms(), mean, thresh, value, weighted_value, ratio)
        #)
        # logger.debug('{} {} {}'.format(mean, weighted_value, int(ratio*100)))

        if weighted_value < thresh:
            now = utime.ticks_ms()
            if (utime.ticks_diff(now, self.callback_triggered_last) <
                    self.debounce_ms):
                logger.info('Debounced')
                # Make reading affect mean less - this allows for slow recalibration
                #value += (thresh - value)*0.9
            else:
                self.callback()
                self.callback_triggered_last = now
        self.readings.pop(0)
        self.readings.append(weighted_value)
Beispiel #3
0
 def record(self, entry):
     """Records an entry in the history."""
     self.entries.append(entry)
     message = '[{}] {} {} ({})'.format(
         entry.process, entry.action, entry.operation,
         ', '.join([str(value) for value in entry.values]))
     if entry.action == 'invoke':
         logger.warn(message)
     elif entry.action == 'ok':
         logger.debug(message)
     elif entry.action == 'fail':
         logger.error(message)
     elif entry.action == 'function':
         logger.info(message)
Beispiel #4
0
def build_trainer(args, device_id, model, optims, tokenizer):
    """
    Simplify `Trainer` creation based on user `opt`s*
    Args:
        opt (:obj:`Namespace`): user options (usually from argument parsing)
        model (:obj:`onmt.models.NMTModel`): the model to train
        fields (dict): dict of fields
        optim (:obj:`onmt.utils.Optimizer`): optimizer used during training
        data_type (str): string describing the type of data
            e.g. "text", "img", "audio"
        model_saver(:obj:`onmt.models.ModelSaverBase`): the utility object
            used to save the model
    """
    device = "cpu" if args.visible_gpus == '-1' else "cuda"

    grad_accum_count = args.accum_count
    n_gpu = args.world_size

    if device_id >= 0:
        gpu_rank = int(args.gpu_ranks[device_id])
    else:
        gpu_rank = 0
        n_gpu = 0

    print('gpu_rank %d' % gpu_rank)

    tensorboard_log_dir = args.model_path

    writer = SummaryWriter(tensorboard_log_dir, comment="Unmt")

    report_manager = ReportMgr(args.report_every,
                               start_time=-1,
                               tensorboard_writer=writer)

    trainer = Trainer(args, model, optims, tokenizer, grad_accum_count, n_gpu,
                      gpu_rank, report_manager)

    # print(tr)
    if (model):
        n_params = _tally_parameters(model)
        logger.info('* number of parameters: %d' % n_params)

    return trainer
    def poll(self):
        value = self.touchpin.read()
        weighted_value = sum(self.readings[-2:] + [value]) / 3
        mean = self.get_current_mean()
        thresh = mean * self.threshold
        ratio = weighted_value / mean
        #logger.debug(
        #    '[{}] Mean: {:04.0f}, Threshold: {:04.0f}, This: {:04.0f}, This weighted: {:04.0f} / {:.0%}'
        #    .format(utime.ticks_ms(), mean, thresh, value, weighted_value, ratio)
        #)
        logger.debug('{} {} {}'.format(mean, weighted_value, int(ratio*100)))

        if weighted_value < thresh:
            now = utime.ticks_ms()
            if (utime.ticks_diff(now, self.callback_triggered_last)
                    < self.debounce_ms):
                logger.info('Debounced')
                # Make reading affect mean less - this allows for slow recalibration
                #value += (thresh - value)*0.9
            else:
                self.callback()
                self.callback_triggered_last = now
        self.readings.pop(0)
        self.readings.append(weighted_value)
Beispiel #6
0
    def train(self,
              train_iter_fct,
              train_steps,
              valid_iter_fct=None,
              valid_steps=-1):
        """
        The main training loops.
        by iterating over training data (i.e. `train_iter_fct`)
        and running validation (i.e. iterating over `valid_iter_fct`

        Args:
            train_iter_fct(function): a function that returns the train
                iterator. e.g. something like
                train_iter_fct = lambda: generator(*args, **kwargs)
            valid_iter_fct(function): same as train_iter_fct, for valid data
            train_steps(int):
            valid_steps(int):
            save_checkpoint_steps(int):

        Return:
            None
        """
        logger.info('Start training...')

        step = self.optims[0]._step + 1
        true_batchs = []
        accum = 0
        examples = 0

        train_iter = train_iter_fct()
        total_stats = Statistics()
        report_stats = Statistics()
        self._start_report_manager(start_time=total_stats.start_time)

        while step <= train_steps:

            for i, batch in enumerate(train_iter):
                if self.n_gpu == 0 or (i % self.n_gpu == self.gpu_rank):

                    true_batchs.append(batch)
                    examples += batch.tgt.size(0)
                    accum += 1
                    if accum == self.grad_accum_count:
                        if self.n_gpu > 1:
                            examples = sum(
                                distributed.all_gather_list(examples))
                        self._gradient_calculation(true_batchs, examples,
                                                   total_stats, report_stats,
                                                   step)
                        report_stats = self._maybe_report_training(
                            step, train_steps, self.optims[0].learning_rate,
                            report_stats)
                        true_batchs = []
                        accum = 0
                        examples = 0
                        if (step % self.save_checkpoint_steps == 0
                                and self.gpu_rank == 0):
                            self._save(step)
                        step += 1
                        if step > train_steps:
                            break
            train_iter = train_iter_fct()

        return total_stats