def __init__(self, config, paras, id2accent): self.config = config self.paras = paras self.train_type = 'evaluation' self.is_memmap = paras.is_memmap self.model_name = paras.model_name self.njobs = paras.njobs if paras.algo == 'no' and paras.pretrain_suffix is None: paras.pretrain_suffix = paras.eval_suffix ### Set path cur_path = Path.cwd() self.data_dir = Path(config['solver']['data_root'], id2accent[paras.accent]) self.log_dir = Path(cur_path, LOG_DIR, self.train_type, config['solver']['setting'], paras.algo, \ paras.pretrain_suffix, paras.eval_suffix, \ id2accent[paras.accent], str(paras.runs)) self.model_path = Path(self.log_dir, paras.test_model) assert self.model_path.exists(), f"{self.model_path.as_posix()} not exists..." self.decode_dir = Path(self.log_dir, paras.decode_suffix) ### Decode self.decode_mode = paras.decode_mode self.beam_decode_param = config['solver']['beam_decode'] self.batch_size = paras.decode_batch_size self.use_gpu = paras.cuda if paras.decode_mode == 'lm_beam': assert paras.lm_model_path is not None, "In LM Beam decode mode, lm_model_path should be specified" # assert self.model_name == 'blstm', "LM Beam decode is only supported in blstm model" self.lm_model_path = paras.lm_model_path else : self.lm_model_path = None # if paras.decode_mode == 'greedy': # self._decode = self.greedy_decode # elif paras.decode_mode == 'beam' or paras.decode_mode == 'lm_beam': # self._decode = self.beam_decode # else : # raise NotImplementedError ##################################################################### ### Resume Mechanism if not paras.resume: if self.decode_dir.exists(): assert paras.overwrite, \ f"Path exists ({self.decode_dir}). Use --overwrite or change decode suffix" # time.sleep(10) logger.warning('Overwrite existing directory') rmtree(self.decode_dir) self.decode_dir.mkdir(parents=True) self.prev_decode_step = -1 else: with open(Path(self.decode_dir,'best-hyp'),'r') as f: for i, l in enumerate(f): pass self.prev_decode_step = i+1 logger.notice(f"Decode resume from {self.prev_decode_step}") ### Comet with open(Path(self.log_dir,'exp_key'),'r') as f: exp_key = f.read().strip() comet_exp = ExistingExperiment(previous_experiment=exp_key, project_name=COMET_PROJECT_NAME, workspace=COMET_WORKSPACE, auto_output_logging=None, auto_metric_logging=None, display_summary_level=0, ) comet_exp.log_other('status','decode')
project_name = basename(project_dir) if args.resume: experiment = ExistingExperiment( api_key=args.api_key, previous_experiment=checkpoint['experiment_key'], auto_param_logging=False, auto_metric_logging=False, parse_args=False) else: experiment = Experiment( api_key=args.api_key, project_name=project_name, auto_param_logging=False, auto_metric_logging=False, parse_args=False) experiment.log_other('experiment_name', experiment_name) experiment.log_parameters(vars(args)) for k in hyperparameters: if type(hyperparameters[k]) == dict: experiment.log_parameters(hyperparameters[k], prefix=k) else: experiment.log_parameter(k, hyperparameters[k]) # Mapping: {'Cat': 0, 'Dog': 1} try: dataset = torchvision.datasets.ImageFolder(root='./trainset') except: import zipfile zip_ref = zipfile.ZipFile('trainset.zip', 'r') zip_ref.extractall() zip_ref.close()
### Cal CER #################################################################### logger.notice("CER calculating...") cer = 0.0 with open(Path(decode_dir, 'best-hyp'), 'r') as hyp_ref_in: cnt = 0 for line in hyp_ref_in.readlines(): cnt += 1 ref, hyp = line.rstrip().split('\t') ref = filter(remove_accent(spm.DecodePieces(to_list(ref)))) ref.upper() hyp = filter(remove_accent(spm.DecodePieces(to_list(hyp)))) hyp.upper() cer += (editdistance.eval(ref, hyp) / len(ref) * 100) cer = cer / cnt logger.log(f"CER: {cer}", prefix='test') comet_exp.log_other(f"cer({paras.decode_mode})", round(cer, 2)) with open(Path(decode_dir, 'cer'), 'w') as fout: print(str(cer), file=fout) ################################################################################ ### Cal SER #################################################################### logger.notice("Symbol error rate calculating...") with open(Path(decode_dir,'best-hyp'),'r') as hyp_ref_in, \ open(Path(decode_dir,'hyp.trn'),'w') as hyp_out, \ open(Path(decode_dir,'ref.trn'),'w') as ref_out: for i, line in enumerate(hyp_ref_in.readlines()): foo = line.rstrip().split('\t') if len(foo) == 1: print(f"{' '.join(to_list(foo[0]))} ({i//1000}k_{i})", file=ref_out) print(f"({i//1000}k_{i})", file=hyp_out)