Beispiel #1
0
def pure_edit_distance(ref, hyp, ignore=None):
    '''
	Compute edit-distance score.

	Args:
		<ref>,<hyp>: iterable objects like list,tuple or NumPy array. It will be flattened before scoring.
		<ignore>: Ignoring specific symbols.	 
	Return:
		a namedtuple object including score information.	
	'''
    assert isinstance(ref, Iterable), "<ref> is not a iterable object."
    assert isinstance(hyp, Iterable), "<hyp> is not a iterable object."

    x = flatten(ref)
    x = list(filter(lambda i: i != ignore, x))
    y = flatten(hyp)
    y = list(filter(lambda i: i != ignore, y))

    lenX = len(x)
    lenY = len(y)

    mapping = np.zeros((lenX + 1, lenY + 1))

    for i in range(lenX + 1):
        mapping[i][0] = i
    for j in range(lenY + 1):
        mapping[0][j] = j
    for i in range(1, lenX + 1):
        for j in range(1, lenY + 1):
            if x[i - 1] == y[j - 1]:
                delta = 0
            else:
                delta = 1
            mapping[i][j] = min(
                mapping[i - 1][j - 1] + delta,
                min(mapping[i - 1][j] + 1, mapping[i][j - 1] + 1))

    score = int(mapping[lenX][lenY])
    return namedtuple("Score", ["editDistance", "items"])(score, len(x))
Beispiel #2
0
def accuracy(ref, hyp, ignore=None, mode='all'):
    '''
	Score one-2-one matching score between two items.

	Args:
		<ref>,<hyp>: iterable objects like list,tuple or NumPy array. It will be flattened before scoring.
		<ignore>: Ignoring specific symbols.
		<model>: If <mode> is "all",compute one-one matching score. For example,<ref> is (1,2,3,4),and <hyp> is (1,2,2,4),the score will be 0.75.
				 If <mode> is "present",only the members of <hyp> which appeared in <ref> will be scored no matter which position it is. 
	Return:
		a namedtuple object of score information.
	'''
    assert type_name(ref) != "Transcription" and type_name(
        hyp
    ) != "Transcription", "Exkaldi Transcription objects are unsupported in this function."

    assert mode in ['all',
                    'present'], 'Expected <mode> to be "present" or "all".'

    x = flatten(ref)
    x = list(filter(lambda i: i != ignore, x))
    y = flatten(hyp)
    y = list(filter(lambda i: i != ignore, y))

    if mode == 'all':
        i = 0
        score = 0
        while True:
            if i >= len(x) or i >= len(y):
                break
            elif x[i] == y[i]:
                score += 1
            i += 1
        if i < len(x) or i < len(y):
            raise WrongOperation(
                '<ref> and <hyp> have different length to score.')
        else:
            if len(x) == 0:
                accuracy = 1.0
            else:
                accuracy = score / len(x)

            return namedtuple("Score",
                              ["accuracy", "items", "rightItems"])(accuracy,
                                                                   len(x),
                                                                   score)
    else:
        x = sorted(x)
        score = 0
        for i in y:
            if i in x:
                score += 1
        if len(y) == 0:
            if len(x) == 0:
                accuracy = 1.0
            else:
                accuracy = 0.0
        else:
            accuracy = score / len(y)

        return namedtuple("Score",
                          ["accuracy", "items", "rightItems"])(accuracy,
                                                               len(y), score)
Beispiel #3
0
	def __detect_special_char(self,item):
		item = flatten(item)
		assert "|" not in item, f"'|' is special char which is not be allowed to use in option definition."