def exec(self):
        if self.decode_mode != 'greedy':
            logger.notice(f"Start decoding with beam search (with beam size: {self.config['solver']['beam_decode']['beam_size']})")
            raise NotImplementedError(f"{self.decode_mode} haven't supported yet")
            self._decode = self.beam_decode
        else:
            logger.notice("Start greedy decoding")
            if self.batch_size > 1:
                dev = 'gpu' if self.use_gpu else 'cpu'
                logger.log(f"Number of utterance batches to decode: {len(self.eval_set)}, decoding with {self.batch_size} batch_size using {dev}")
                self._decode = self.batch_greedy_decode
                self.njobs = 1
            else:
                logger.log(f"Number of utterances to decode: {len(self.eval_set)}, decoding with {self.njobs} threads using cpu")
                self._decode = self.greedy_decode

        if self.njobs > 1:
            try:
                _ = Parallel(n_jobs=self.njobs)(delayed(self._decode)(i, x, ilen, y, olen) for i, (x, ilen, y, olen) in enumerate(self.eval_set))

            #NOTE: cannot log comet here, since it cannot serialize
            except KeyboardInterrupt:
                logger.warning("Decoding stopped")
            else:
                logger.notice("Decoding done")
                # self.comet_exp.log_other('status','decoded')
        else:

            tbar = get_bar(total=len(self.eval_set), leave=True)

            for cur_b, (xs, ilens, ys, olens) in enumerate(self.eval_set):
                self.batch_greedy_decode(xs, ilens, ys, olens)
                tbar.update(1)
    def train(self):
        try:
            while self.global_step < self.max_step:
                tbar = get_bar(total=self.eval_ival, \
                               desc=f"Step {self.global_step}", leave=True)

                for _ in range(self.eval_ival):
                    #TODO: we can add sampling method to compare Meta and Multi fair
                    idx, (x, ilens, ys,
                          olens) = self.data_container.get_item()[0]

                    batch_size = len(ys)
                    info = self._train(idx,
                                       x,
                                       ilens,
                                       ys,
                                       olens,
                                       accent_idx=idx)
                    self.train_info.add(info, batch_size)

                    grad_norm = nn.utils.clip_grad_norm_(
                        self.asr_model.parameters(), GRAD_CLIP)

                    if math.isnan(grad_norm):
                        logger.warning(
                            f"grad norm NaN @ step {self.global_step}")
                    else:
                        self.asr_opt.step()

                    if isinstance(self.asr_opt, TransformerOptimizer):
                        self.log_msg(self.asr_opt.lr)
                    else:
                        self.log_msg()
                    self.check_evaluate()

                    self.global_step += 1
                    self.dashboard.step()

                    del x, ilens, ys, olens
                    tbar.update(1)

                    if self.global_step % self.save_ival == 0:
                        self.save_per_steps()
                    self.dashboard.check()
                tbar.close()

        except KeyboardInterrupt:
            logger.warning("Pretraining stopped")
            self.save_per_steps()
            self.dashboard.set_status('pretrained(SIGINT)')
        else:
            logger.notice("Pretraining completed")
            self.dashboard.set_status('pretrained')
    def evaluate(self):
        self.asr_model.eval()
        self.write_tr_logs()

        dev_info_ls = [
            RunningAvgDict(decay_rate=1.) for _ in range(self.num_pretrain)
        ]
        for idx, dev_loader in enumerate(self.data_container.dev_loaders):
            tbar = get_bar(
                total=len(dev_loader),
                desc=f"Eval on {self.accents[idx]} @ step {self.global_step}")
            with torch.no_grad():
                for cur_b, (x, ilens, ys, olens) in enumerate(dev_loader):

                    if ilens.max() > self.dev_max_ilen:
                        tbar.update(1)
                        continue

                    batch_size = len(ys)
                    info = self._eval(idx, x, ilens, ys, olens)
                    dev_info_ls[idx].add(info, batch_size)

                    if cur_b % self.log_ival == 0:
                        logger.log_info(dev_info_ls[idx], prefix='test')

                    del x, ilens, ys, olens
                    tbar.update(1)

                logger.flush()
                tbar.close()

                self.dashboard.log_info(f"dev_{self.accents[idx]}",
                                        dev_info_ls[idx])
                self.write_dev_logs(f"dev_{self.accents[idx]}",
                                    dev_info_ls[idx])

        dev_avg_info = RunningAvgDict(decay_rate=1.0)
        for dev_info in dev_info_ls:
            dev_avg_info.add({k: float(v) for k, v in dev_info.items()})

        self.dashboard.log_info("dev", dev_avg_info)
        self.write_dev_logs("dev_avg", dev_avg_info)
        cur_cer = float(dev_avg_info['cer'])
        cur_wer = float(dev_avg_info['wer'])
        if cur_wer < self.best_wer:
            self.best_wer = cur_wer
            self.save_best_model()
        if cur_cer < self.best_cer:
            self.best_cer = cur_cer
            self.save_best_model('cer', only_stat=True)

        self.asr_model.train()
Beispiel #4
0
    def train(self):
        self.evaluate()
        try:
            if self.save_verbose:
                self.save_init()
            while self.ep < self.max_epoch:
                tbar = get_bar(total=len(self.train_set), \
                               desc=f"Epoch {self.ep}", leave=True)

                for cur_b, (x, ilens, ys, olens) in enumerate(self.train_set):

                    batch_size = len(ys)
                    info = self._train(cur_b, x, ilens, ys, olens)
                    self.train_info.add(info, batch_size)

                    grad_norm = nn.utils.clip_grad_norm_(
                        self.asr_model.parameters(), GRAD_CLIP)

                    if math.isnan(grad_norm):
                        logger.warning(
                            f"grad norm NaN @ step {self.global_step}")
                    else:
                        self.asr_opt.step()

                    if isinstance(self.asr_opt, TransformerOptimizer):
                        self.log_msg(self.asr_opt.lr)
                    else:
                        self.log_msg()
                    self.check_evaluate()

                    self.global_step += 1
                    self.dashboard.step()

                    del x, ilens, ys, olens
                    tbar.update(1)

                self.ep += 1
                self.save_per_epoch()
                self.dashboard.check()
                tbar.close()

                if self.eval_every_epoch:
                    self.evaluate()

        except KeyboardInterrupt:
            logger.warning("Training stopped")
            self.evaluate()
            self.dashboard.set_status('trained(SIGINT)')
        else:
            logger.notice("Training completed")
            self.dashboard.set_status('trained')
    def train(self):

        try:
            task_ids = list(range(self.num_pretrain))
            while self.global_step < self.max_step:
                tbar = get_bar(total=self.eval_ival, \
                               desc=f"Step {self.global_step}", leave=True)
                for _ in range(self.eval_ival):
                    shuffle(task_ids)

                    #FIXME: Here split to inner-train and inner-test (should observe whether the performance drops)
                    for accent_id in task_ids[:self.meta_batch_size]:
                        # inner-loop learn
                        tr_batches = self.data_container.get_item(accent_id, self.meta_k)
                        self.run_task(tr_batches)

                        # inner-loop test
                        val_batch = self.data_container.get_item(accent_id)[0]
                        batch_size = len(val_batch[1][2])
                        info = self._train(val_batch[0],*val_batch[1], accent_idx = val_batch[0])
                        grad_norm = nn.utils.clip_grad_norm_(
                            self.asr_model.parameters(), GRAD_CLIP)

                        if math.isnan(grad_norm):
                            logger.warning(f"grad norm NaN @ step {self.global_step} on {self.accents[accent_id]}, ignore...")

                        self._partial_meta_update()
                        del val_batch
                        self.train_info.add(info, batch_size)

                    self._final_meta_update()

                    self.log_msg(self.meta_opt.lr)
                    self.check_evaluate()
                    self.global_step += 1
                    self.dashboard.step()
                    tbar.update(1)

                    if self.global_step % self.save_ival == 0:
                        self.save_per_steps()
                    self.dashboard.check()
                tbar.close()

        except KeyboardInterrupt:
            logger.warning("Pretraining stopped")
            self.save_per_steps()
            self.dashboard.set_status('pretrained(SIGINT)')
        else:
            logger.notice("Pretraining completed")
            self.dashboard.set_status('pretrained')
Beispiel #6
0
    def evaluate(self):
        self.asr_model.eval()

        dev_info = RunningAvgDict(decay_rate=1.)
        tbar = get_bar(total=len(self.dev_set),
                       desc=f"Eval @step{self.global_step}",
                       leave=True)

        with torch.no_grad():
            for cur_b, (x, ilens, ys, olens) in enumerate(self.dev_set):

                if ilens.max() > self.dev_max_ilen:
                    tbar.update(1)
                    continue

                batch_size = len(ys)
                info = self._eval(cur_b, x, ilens, ys, olens)
                dev_info.add(info, batch_size)

                if cur_b % self.log_ival == 0:
                    logger.log_info(dev_info, prefix='test')

                del x, ilens, ys, olens
                tbar.update(1)

            logger.flush()
            tbar.close()

            self.dashboard.log_info('dev', dev_info)
            self.write_logs(dev_info)

            cur_cer = float(dev_info['cer'])
            cur_wer = float(dev_info['wer'])
            if cur_wer < self.best_wer:
                self.best_wer = cur_wer
                self.save_best_model()

            if cur_cer < self.best_cer:
                self.best_cer = cur_cer
                self.save_best_model('cer', only_stat=True)

            if self.lr_scheduler is not None:
                self.lr_scheduler.step(float(dev_info['loss']))

        self.asr_model.train()