def reward_progressive_match_label(sample, batch, sample_score=None): seq_len = util.find_first_min_zero(sample) mask, __ = util.masked_full_like(sample, 1, num_non_padding=seq_len + 1, dtype=np.int32) mask = mask * (seq_len > 0) sample, _sample = sample * mask, sample label = _label = batch.labels.label pad_width = abs(len(sample) - len(label)) pad_width = ((0, pad_width), (0, 0)) if len(label) < len(sample): label = np.pad(label, pad_width, 'constant', constant_values=0) elif len(sample) < len(label): sample = np.pad(sample, pad_width, 'constant', constant_values=0) diff = np.abs(sample - label) match = (diff == 0).astype(np.float32) # / batch.features.dec_seq_len if len(_label) < len(_sample): match[len(_label) - 1:, :] = 0 elif len(_sample) < len(_label): match = match[:len(_sample), :] avgmatch = np.sum(match * mask) / np.sum(mask) summatch = np.sum(match, axis=0) mismatch = np.argmin(match, axis=0) mismatch_mask, __ = util.masked_full_like(match, 1, num_non_padding=mismatch) match = match * mismatch_mask for ib in range(sample.shape[1]): if summatch[ib] > 0 and mismatch[ib] == 0: continue match[mismatch[ib], ib] = -0.1 return match, avgmatch
def reward_match_label(sample, batch, partial_match=True, sample_score=None): seq_len = util.find_first_min_zero(sample) mask, __ = util.masked_full_like(sample, 1, num_non_padding=seq_len + 1, dtype=np.int32) mask = mask * (seq_len > 0) sample, _sample = sample * mask, sample label = _label = batch.labels.label pad_width = abs(len(sample) - len(label)) pad_width = ((0, pad_width), (0, 0)) if len(label) < len(sample): label = np.pad(label, pad_width, 'constant', constant_values=0) elif len(sample) < len(label): sample = np.pad(sample, pad_width, 'constant', constant_values=0) diff = np.abs(sample - label) if partial_match: match = (diff == 0).astype(np.float32) / (seq_len + 1) if len(_label) < len(_sample): match[len(_label) - 1:, :] = 0 elif len(_sample) < len(_label): match = match[:len(_sample), :] else: sumdiff = np.sum(diff, axis=0) match = np.zeros_like(sample, dtype=np.float32) for ib in range(seq_len.shape[0]): if sumdiff[ib] == 0: match[seq_len[ib], ib] = 1 match = match * mask avgmatch = np.sum(match) / np.sum(seq_len > 0) return match, avgmatch
def reward_constant(sample, batch, constant=-0.1, sample_score=None): # return batch.labels.label_weight, np.mean(batch.labels.label_weight) seq_len = util.find_first_min_zero(sample) mask, __ = util.masked_full_like(sample, 1, num_non_padding=seq_len + 1, dtype=np.int32) mask = mask * (seq_len > 0) * constant return mask * constant, np.mean(seq_len + 1) * constant
def reward_global_ngram_stat(sample, batch, global_count, current_count, update_fn, ngram_fn, sample_score=None): seq_len = util.find_first_min_zero(sample) scores = np.zeros_like(sample, dtype=np.float32) batch_ngrams = [] for ib in range(sample.shape[1]): seq = sample[:seq_len[ib], ib] ngrams = tuple(ngram_fn(seq)) for it, ngram in enumerate(ngrams): scores[it, ib] = -np.log( (current_count[ngram] + 1) / (global_count[ngram] + 1)) batch_ngrams.append(ngrams) update_fn(batch, batch_ngrams) return scores, np.sum(scores) / np.sum(seq_len + 1)
def reward_bleu(sample, batch, ref_fn, reward_incomplete=False, sample_score=None): seq_len = util.find_first_min_zero(sample) + 1 scores = np.zeros_like(sample, dtype=np.float32) b_refs = ref_fn(batch) c = 0 for ib in range(len(seq_len)): step = seq_len[ib] refs = b_refs[ib] completed = step <= sample.shape[0] if refs is not None: c += 1 if refs is not None and (completed or reward_incomplete): if not completed: step = sample.shape[0] scores[step - 1, ib] = util.sentence_bleu(refs, sample[:step, ib]) return scores, np.sum(scores) / c