def __init__(self, config, paras, id2accent):

        self.config = config
        self.paras = paras
        self.train_type = 'evaluation'
        self.is_memmap = paras.is_memmap
        self.model_name = paras.model_name

        self.njobs = paras.njobs

        if paras.algo == 'no' and paras.pretrain_suffix is None:
            paras.pretrain_suffix = paras.eval_suffix

        ### Set path
        cur_path = Path.cwd()
        self.data_dir = Path(config['solver']['data_root'], id2accent[paras.accent])
        self.log_dir = Path(cur_path, LOG_DIR, self.train_type, 
                            config['solver']['setting'], paras.algo, \
                            paras.pretrain_suffix, paras.eval_suffix, \
                            id2accent[paras.accent], str(paras.runs))
        self.model_path = Path(self.log_dir, paras.test_model)
        assert self.model_path.exists(), f"{self.model_path.as_posix()} not exists..."
        self.decode_dir = Path(self.log_dir, paras.decode_suffix)


        ### Decode
        self.decode_mode = paras.decode_mode
        self.beam_decode_param = config['solver']['beam_decode']
        self.batch_size = paras.decode_batch_size
        self.use_gpu = paras.cuda

        if paras.decode_mode == 'lm_beam':
            assert paras.lm_model_path is not None, "In LM Beam decode mode, lm_model_path should be specified"
            # assert self.model_name == 'blstm', "LM Beam decode is only supported in blstm model"
            self.lm_model_path  = paras.lm_model_path
        else :
            self.lm_model_path = None

        # if paras.decode_mode == 'greedy':
            # self._decode = self.greedy_decode
        # elif paras.decode_mode == 'beam' or paras.decode_mode == 'lm_beam':
            # self._decode = self.beam_decode
        # else :
            # raise NotImplementedError
        #####################################################################

        ### Resume Mechanism
        if not paras.resume:
            if self.decode_dir.exists():
                assert paras.overwrite, \
                    f"Path exists ({self.decode_dir}). Use --overwrite or change decode suffix"
                # time.sleep(10)
                logger.warning('Overwrite existing directory')
                rmtree(self.decode_dir)
            self.decode_dir.mkdir(parents=True)
            self.prev_decode_step = -1
        else:
            with open(Path(self.decode_dir,'best-hyp'),'r') as f:
                for i, l in enumerate(f):
                    pass
                self.prev_decode_step = i+1
            logger.notice(f"Decode resume from {self.prev_decode_step}")

        ### Comet
        with open(Path(self.log_dir,'exp_key'),'r') as f:
            exp_key = f.read().strip()
            comet_exp = ExistingExperiment(previous_experiment=exp_key,
                                           project_name=COMET_PROJECT_NAME,
                                           workspace=COMET_WORKSPACE,
                                           auto_output_logging=None,
                                           auto_metric_logging=None,
                                           display_summary_level=0,
                                           )
        comet_exp.log_other('status','decode')
    project_name = basename(project_dir)
    if args.resume:
        experiment = ExistingExperiment(
                api_key=args.api_key, 
                previous_experiment=checkpoint['experiment_key'],
                auto_param_logging=False,
                auto_metric_logging=False,
                parse_args=False)
    else:
        experiment = Experiment(
            api_key=args.api_key,
            project_name=project_name,
            auto_param_logging=False,
            auto_metric_logging=False,
            parse_args=False)
    experiment.log_other('experiment_name', experiment_name)
    experiment.log_parameters(vars(args))
    for k in hyperparameters:
        if type(hyperparameters[k]) == dict:
            experiment.log_parameters(hyperparameters[k], prefix=k)
        else:
            experiment.log_parameter(k, hyperparameters[k])

# Mapping: {'Cat': 0, 'Dog': 1}
try:
    dataset = torchvision.datasets.ImageFolder(root='./trainset')
except:
    import zipfile
    zip_ref = zipfile.ZipFile('trainset.zip', 'r')
    zip_ref.extractall()
    zip_ref.close()
Example #3
0
### Cal CER ####################################################################
logger.notice("CER calculating...")
cer = 0.0
with open(Path(decode_dir, 'best-hyp'), 'r') as hyp_ref_in:
    cnt = 0
    for line in hyp_ref_in.readlines():
        cnt += 1
        ref, hyp = line.rstrip().split('\t')
        ref = filter(remove_accent(spm.DecodePieces(to_list(ref))))
        ref.upper()
        hyp = filter(remove_accent(spm.DecodePieces(to_list(hyp))))
        hyp.upper()
        cer += (editdistance.eval(ref, hyp) / len(ref) * 100)
    cer = cer / cnt
logger.log(f"CER: {cer}", prefix='test')
comet_exp.log_other(f"cer({paras.decode_mode})", round(cer, 2))
with open(Path(decode_dir, 'cer'), 'w') as fout:
    print(str(cer), file=fout)
################################################################################

### Cal SER ####################################################################
logger.notice("Symbol error rate calculating...")
with open(Path(decode_dir,'best-hyp'),'r') as hyp_ref_in, \
     open(Path(decode_dir,'hyp.trn'),'w') as hyp_out, \
     open(Path(decode_dir,'ref.trn'),'w') as ref_out:
    for i, line in enumerate(hyp_ref_in.readlines()):
        foo = line.rstrip().split('\t')
        if len(foo) == 1:
            print(f"{' '.join(to_list(foo[0]))} ({i//1000}k_{i})",
                  file=ref_out)
            print(f"({i//1000}k_{i})", file=hyp_out)