Esempio n. 1
0
def tonal_space_alignment(sem1, sem2, distance=False):
    """
    Performs the same algorithm as L{tonal_space_distance} and 
    L{tonal_space_alignment_costs}, but returns a list of the operations 
    that produce the optimal alignment: "I" - insertion; "D" - deletion; 
    "A" - alignment; "S" - full substitution; or anything else beginning with 
    "S" to indicate a partial substitution.
    
    Returns the operation list and the two sequences that were compared.
    If distance=True, also includes the distance metric. Not included by 
    default for backward compatibility.
    
    """
    from jazzparser.utils.distance import levenshtein_distance_with_pointers
    # Get a list of (coord,fun) pairs for the logical forms
    seq1 = _lf_to_coord_funs(sem1)
    seq2 = _lf_to_coord_funs(sem2)
    # Produce a version of the paths made up of steps and functions,
    #  rather than points and functions
    steps1 = _steps_list(seq1)
    steps2 = _steps_list(seq2)

    dists, pointers = levenshtein_distance_with_pointers(
        steps1, steps2, delins_cost=2, subst_cost_fun=_subst_cost)

    # We now have the matrix of costs and the pointers that generated
    #  those costs.
    # Trace back to find out what produces the optimal alignment
    # Start at the top right corner
    i, j = (len(dists) - 1), (len(dists[0]) - 1)
    oplist = []
    while not i == j == 0:
        if pointers[i][j] == "I":
            oplist.append("I")
            j -= 1
        elif pointers[i][j] == "D":
            oplist.append("D")
            i -= 1
        else:
            # Substitution: find out what kind
            step1 = steps1[i - 1]
            step2 = steps2[j - 1]
            subst_type = _subst_type(step1, step2)
            if subst_type == "both":
                oplist.append("S")
            elif subst_type == "fun":
                oplist.append("Sf")
            elif subst_type == "root":
                oplist.append("Sr")
            else:
                oplist.append("A")
            j -= 1
            i -= 1
    oplist = list(reversed(oplist))

    if distance:
        dist = float(dists[-1][-1]) / 2.0
        return oplist, steps1, steps2, dist
    else:
        return oplist, steps1, steps2
Esempio n. 2
0
def tonal_space_alignment(sem1, sem2, distance=False):
    """
    Performs the same algorithm as L{tonal_space_distance} and 
    L{tonal_space_alignment_costs}, but returns a list of the operations 
    that produce the optimal alignment: "I" - insertion; "D" - deletion; 
    "A" - alignment; "S" - full substitution; or anything else beginning with 
    "S" to indicate a partial substitution.
    
    Returns the operation list and the two sequences that were compared.
    If distance=True, also includes the distance metric. Not included by 
    default for backward compatibility.
    
    """
    from jazzparser.utils.distance import levenshtein_distance_with_pointers
    # Get a list of (coord,fun) pairs for the logical forms
    seq1 = _lf_to_coord_funs(sem1)
    seq2 = _lf_to_coord_funs(sem2)
    # Produce a version of the paths made up of steps and functions, 
    #  rather than points and functions
    steps1 = _steps_list(seq1)
    steps2 = _steps_list(seq2)
    
    dists,pointers = levenshtein_distance_with_pointers(
                                steps1, 
                                steps2,
                                delins_cost=2,
                                subst_cost_fun=_subst_cost)
    
    # We now have the matrix of costs and the pointers that generated 
    #  those costs.
    # Trace back to find out what produces the optimal alignment
    # Start at the top right corner
    i,j = (len(dists)-1), (len(dists[0])-1)
    oplist = []
    while not i == j == 0:
        if pointers[i][j] == "I":
            oplist.append("I")
            j -= 1
        elif pointers[i][j] == "D":
            oplist.append("D")
            i -= 1
        else:
            # Substitution: find out what kind
            step1 = steps1[i-1]
            step2 = steps2[j-1]
            subst_type = _subst_type(step1, step2)
            if subst_type == "both":
                oplist.append("S")
            elif subst_type == "fun":
                oplist.append("Sf")
            elif subst_type == "root":
                oplist.append("Sr")
            else:
                oplist.append("A")
            j -= 1
            i -= 1
    oplist = list(reversed(oplist))
    
    if distance:
        dist = float(dists[-1][-1]) / 2.0
        return oplist,steps1,steps2,dist
    else:
        return oplist,steps1,steps2
Esempio n. 3
0
def tonal_space_alignment_costs(sem1, sem2):
    """
    Performs the same algorithm as tonal_space_distance, but instead 
    of returning the score returns the counts of deletions, 
    insertions, root (only) substitutions, function (only) 
    substitutions, full substitutions and alignments.
    
    """
    from jazzparser.utils.distance import levenshtein_distance_with_pointers
    # Get a list of (coord,fun) pairs for the logical forms
    seq1 = _lf_to_coord_funs(sem1)
    seq2 = _lf_to_coord_funs(sem2)
    # Produce a version of the paths made up of steps and functions,
    #  rather than points and functions
    steps1 = _steps_list(seq1)
    steps2 = _steps_list(seq2)

    dists, pointers = levenshtein_distance_with_pointers(
        steps1, steps2, delins_cost=2, subst_cost_fun=_subst_cost)
    # We now have the matrix of costs and the pointers that generated
    #  those costs.
    # Trace back to find out what costs were incurred in the optimal
    #  alignment
    insertions = 0
    deletions = 0
    function_subs = 0
    root_subs = 0
    full_subs = 0
    alignments = 0
    # Start at the top right corner
    i, j = (len(dists) - 1), (len(dists[0]) - 1)
    while not i == j == 0:
        if pointers[i][j] == "I":
            insertions += 1
            j -= 1
        elif pointers[i][j] == "D":
            deletions += 1
            i -= 1
        else:
            # Substitution: find out what kind
            step1 = steps1[i - 1]
            step2 = steps2[j - 1]
            subst_type = _subst_type(step1, step2)
            if subst_type == "both":
                full_subs += 1
            elif subst_type == "fun":
                function_subs += 1
            elif subst_type == "root":
                root_subs += 1
            else:
                alignments += 1
            j -= 1
            i -= 1

    return {
        'deletions': deletions,
        'insertions': insertions,
        'root_subs': root_subs,
        'function_subs': function_subs,
        'full_subs': full_subs,
        'substitutions': root_subs + function_subs + full_subs,
        'alignments': alignments,
        'steps1': steps1,
        'steps2': steps2,
    }
Esempio n. 4
0
def tonal_space_alignment_costs(sem1, sem2):
    """
    Performs the same algorithm as tonal_space_distance, but instead 
    of returning the score returns the counts of deletions, 
    insertions, root (only) substitutions, function (only) 
    substitutions, full substitutions and alignments.
    
    """
    from jazzparser.utils.distance import levenshtein_distance_with_pointers
    # Get a list of (coord,fun) pairs for the logical forms
    seq1 = _lf_to_coord_funs(sem1)
    seq2 = _lf_to_coord_funs(sem2)
    # Produce a version of the paths made up of steps and functions, 
    #  rather than points and functions
    steps1 = _steps_list(seq1)
    steps2 = _steps_list(seq2)
    
    dists,pointers = levenshtein_distance_with_pointers(
                                steps1, 
                                steps2,
                                delins_cost=2,
                                subst_cost_fun=_subst_cost)
    # We now have the matrix of costs and the pointers that generated 
    #  those costs.
    # Trace back to find out what costs were incurred in the optimal 
    #  alignment
    insertions = 0
    deletions = 0
    function_subs = 0
    root_subs = 0
    full_subs = 0
    alignments = 0
    # Start at the top right corner
    i,j = (len(dists)-1), (len(dists[0])-1)
    while not i == j == 0:
        if pointers[i][j] == "I":
            insertions += 1
            j -= 1
        elif pointers[i][j] == "D":
            deletions += 1
            i -= 1
        else:
            # Substitution: find out what kind
            step1 = steps1[i-1]
            step2 = steps2[j-1]
            subst_type = _subst_type(step1, step2)
            if subst_type == "both":
                full_subs += 1
            elif subst_type == "fun":
                function_subs += 1
            elif subst_type == "root":
                root_subs += 1
            else:
                alignments += 1
            j -= 1
            i -= 1
    
    return {
        'deletions' : deletions, 
        'insertions' : insertions, 
        'root_subs' : root_subs, 
        'function_subs' : function_subs, 
        'full_subs' : full_subs, 
        'substitutions' : root_subs+function_subs+full_subs,
        'alignments' : alignments,
        'steps1' : steps1,
        'steps2' : steps2,
    }