def score(ds_iter, model, tgt_tokenizer, SRC, TGT): bleu_tot = 0.0 chrf_tot = 0.0 count = 0 model.eval() for i, batch in enumerate(ds_iter): src = batch.src.transpose(0, 1)[0].numpy() tgt = batch.tgt.view(-1).numpy() tgt_tokens = [] for index in tgt: tgt_tokens.append(TGT.vocab.itos[index]) pred_sentence = greedy_decode_ids(model, src, SRC, TGT, tgt_tokenizer).strip().split(' ') tgt_sentence = tgt_tokenizer.decode(tgt_tokens[1:-1], BLANK_WORD).strip().split(' ') bleu_tot += sentence_bleu([tgt_sentence], pred_sentence) try: chrf_tot += sentence_chrf(tgt_sentence, pred_sentence) except: # Ignore chrf_tot += 0.0 count += 1 return bleu_tot / count, chrf_tot / count
def char_rank_scorer(sent, input_tuple): sent = sent.encode("unicode-escape") input_tuple = input_tuple.encode("unicode-escape") try: score = chrf_score.sentence_chrf(input_tuple.split(), sent.split()) except Exception, error: print(str(error)) return 0
def evaluate(self, results: list, targets: list): logging.info('Chrf evaluator: Start evaluating') self._check_params(results, targets) scoring = list(map(lambda r, t: chrf_score.sentence_chrf(r.rstrip(), t.rstrip(), 1, 1), results, targets)) logging.info('Chrf evaluator: Finished evaluating') return scoring, (sum(scoring)/len(scoring))
def sent_chrF_plus(reference, hypothesis): ''' reference: a list of words of the reference. hypothesis: a list of words of the hypothesis. ''' return chrf_score.sentence_chrf(reference, hypothesis)
def get_chrf_score(sentence_chrf,hyp,ref): """ This function return the chrf-Score :param sentence_chrf: nltk.translate.chrf_score.sentence_chrf :param hyp: hypothesis sentences, list(str) :param ref: reference sentences, list(list(str)) :return chrf-score """ return sentence_chrf(ref, hyp)
def chrf(ref_lines, hyp_lines): """ sentence-level chrf, for comparison with ses""" chrfs = np.zeros(len(ref_lines)) for i in range(len(ref_lines)): refs = ref_lines[i].strip().lower().split(' ') hyp = hyp_lines[i].strip().lower().split(' ') chrfs[i] = sentence_chrf(refs, hyp) return chrfs
def calculate_reward(generated: str, train_set: List[str], num_lines=100) -> float: line_ids = np.random.choice(len(train_set), size=num_lines) chrf_total = 0. for line_id in line_ids: line = train_set[line_id] chrf = sentence_chrf(line, generated, min_len=2, max_len=6, beta=1., ignore_whitespace=False) chrf_total += chrf return chrf_total / num_lines
def chrf(reference, predict): """Compute sentence-level chrf score. Args: reference (list[str]) predict (list[str]) """ from nltk.translate import chrf_score if len(predict) == 0: if len(reference) == 0: return 1.0 else: return 0.0 return chrf_score.sentence_chrf(reference, predict)
def get_reward_chrf(instance: Instance, train_sentences: List[str], num_lines=100): generated = ''.join(token.text for token in instance.fields['tokens']) line_ids = np.random.choice(len(train_sentences), size=num_lines) chrf_total = 0. for line_id in line_ids: line = train_sentences[line_id] chrf = sentence_chrf(line, generated, min_len=2, max_len=6, beta=1., ignore_whitespace=False) chrf_total += chrf return chrf_total / num_lines
def compute_features(data): # Initialize all feature placeholders data[ratio_num_char_source_candidate] = [] data[ratio_num_tokens_source_candidate] = [] data[ratio_mean_token_length_source_candidate] = [] data[ratio_common_bigrams_candidate_reference] = [] data[ratio_num_token_candidate_reference] = [] data[gleu_scores] = [] data[bleu_scores] = [] data[chrf_scores] = [] data[labels] = [] data[ratio_tree_height_candidate_reference] = [] for line_idx in range(0, len(data[source_lines])): # Feature: gleu_scores data[gleu_scores].append(sentence_gleu(data[reference_lines][line_idx], data[candidate_lines][line_idx])) # Feature: chrf_scores data[chrf_scores].append(sentence_chrf(data[reference_lines][line_idx], data[candidate_lines][line_idx])) # Feature: bleu_scores data[bleu_scores].append(float(data[bleu_scores_lines][line_idx])) # Feature: ratio_num_char_source_candidate data[ratio_num_char_source_candidate].append( len(re.sub('[\s+]', '', data[source_lines][line_idx])) / len(re.sub('[\s+]', '', data[candidate_lines][line_idx]))) # Feature: ratio_num_tokens_source_candidate data[ratio_num_tokens_source_candidate].append( len(re.compile('\S+').findall(data[source_lines][line_idx])) / len(re.compile('\S+').findall(data[candidate_lines][line_idx]))) # Feature: ratio_num_token_candidate_reference data[ratio_num_token_candidate_reference].append( len(re.sub('[\S+]', '', data[candidate_lines][line_idx])) / len(re.sub('[\S+]', '', data[reference_lines][line_idx]))) # Feature: ratio_mean_token_length_source_candidate data[ratio_mean_token_length_source_candidate].append( np.mean(list(map(len, re.compile('\S+').findall(data[source_lines][line_idx])))) / np.mean(list(map(len, re.compile('\S+').findall(data[candidate_lines][line_idx]))))) # Feature: ratio_common_bigrams_candidate_reference data[ratio_common_bigrams_candidate_reference].append( len( set([b for b in zip(re.compile('\S+').findall(data[reference_lines][line_idx])[:-1], re.compile('\S+').findall(data[reference_lines][line_idx])[1:])]) & set([b for b in zip(re.compile('\S+').findall(data[candidate_lines][line_idx])[:-1], re.compile('\S+').findall(data[candidate_lines][line_idx])[1:])]) ) / len([b for b in zip(re.compile('\S+').findall(data[reference_lines][line_idx])[:-1], re.compile('\S+').findall(data[reference_lines][line_idx])[1:])])) # Feature: ratio_tree_height_candidate_reference data[ratio_tree_height_candidate_reference].append( data[candidate_tree_heights][line_idx] / data[reference_tree_heights][line_idx] ) # Feature: labels data[labels].append(1 if data[provided_labels][line_idx] == "H" else 0)
findMaxBLEU.append((item, 1)) findMaxCHRF.append((item, 1)) findMaxROUGE.append((item, 1)) maxEditDistanceSimilarity = 1 else: if len(item.split()) == 0 or len(truth[0].split()) == 0: print('Zero length!') continue findMaxBLEU.append( (item, sentence_bleu([truth[0].split()], item.split(), weights=get_bleu_weights(truth[0], item), smoothing_function=smoothFunction.method3))) findMaxCHRF.append( (item, sentence_chrf(truth[0].split(), item.split()))) try: findMaxROUGE.append( (item, rouge.get_scores([item], [truth[0]])[0]['rouge-l']['f'])) except: pass maxEditDistanceSimilarity = max( maxEditDistanceSimilarity, get_edit_distance_similarity(item, truth[0])) if len(findMaxBLEU) != 0 and len(findMaxCHRF) != 0 and len( findMaxROUGE) != 0: pendingCalcBLEUGroundTruth.append([truth[0].split()]) pendingCalcBLEUCandidate.append( sorted(findMaxBLEU, key=lambda x: x[1], reverse=True)[0][0].split())
def charf(df): df['charf'] = df.apply(lambda x: sentence_chrf([x['reference']], x['translation']), axis=1) return df
findMaxROUGE.append((choice, 1)) maxEditDistanceSimilarity = 1 break if len(choice.split()) == 0 or len( groundTruth.split()) == 0: print('Zero length!') continue findMaxBLEU.append( (choice, sentence_bleu( [groundTruth.split()], choice.split(), weights=get_bleu_weights(groundTruth, choice), smoothing_function=smoothFunction.method3))) findMaxCHRF.append((choice, sentence_chrf(groundTruth.split(), choice.split()))) findMaxROUGE.append( (choice, rouge.get_scores([choice], [groundTruth])[0]['rouge-l']['f'])) maxEditDistanceSimilarity = max( maxEditDistanceSimilarity, get_edit_distance_similarity(choice, groundTruth)) if match is True or (len(findMaxBLEU) != 0 and len(findMaxCHRF) != 0 and len(findMaxROUGE) != 0): pendingCalcBLEUGroundTruth.append([groundTruth.split()]) pendingCalcBLEUCandidate.append( sorted(findMaxBLEU, key=lambda x: x[1], reverse=True)[0][0].split()) pendingCalcCHRFGroundTruth.append(groundTruth.split())