def lev_sim(s1, s2):
    """
    This function computes the Levenshtein similarity between the two input
    strings.

    Args:
        s1,s2 (string): The input strings for which the similarity measure should
            be computed.

    Returns:
        The Levenshtein similarity if both the strings are not missing (i.e
        NaN), else  returns NaN.
    """

    if s1 is None or s2 is None:
        return pd.np.NaN
    if pd.isnull(s1) or pd.isnull(s2):
        return pd.np.NaN
    # if isinstance(s1, six.string_types):
    #     s1 = gh.remove_non_ascii(s1)
    # if isinstance(s2, six.string_types):
    #     s2 = gh.remove_non_ascii(s2)
    # Create the similarity measure object
    measure = sm.Levenshtein()
    if not (isinstance(s1, six.string_types) or isinstance(s1, bytes)):
        s1 = str(s1)

    if not (isinstance(s2, six.string_types) or isinstance(s2, bytes)):
        s2 = str(s2)

    # Call the function to compute the similarity measure
    return measure.get_sim_score(s1, s2)
Exemple #2
0
def matchHeaders(headers):
    jac = sm.Jaccard()
    lev = sm.Levenshtein()
    oc = sm.OverlapCoefficient()

    i = 0
    j = 0

    header_len = len(headers)

    for i in range(0, header_len - 1):
        for first in headers[i]:
            j = i + 1
            if j == header_len:
                break
            for second in headers[j]:
                #                print(first, '' , second, '')
                #        i = i + 1
                #        if(i == header_len):
                #           continue
                x = first
                y = second
                delim_tok = sm.DelimiterTokenizer(delim_set=['_'])
                jacScore = jac.get_sim_score(delim_tok.tokenize(x),
                                             delim_tok.tokenize(y))
                levScore = lev.get_sim_score(x, y)
                ocScore = oc.get_sim_score(delim_tok.tokenize(x),
                                           delim_tok.tokenize(y))

                if (ocScore == 1 or levScore >= 0.5 or jacScore >= 0.5):
                    print(first + ' of Table' + str(i + 1) + ' and ' + second +
                          ' of Table' + str(j + 1) + ' matched')
Exemple #3
0
def lev_sim(s1, s2):
    """
    This function computes the Levenshtein similarity between the two input
    strings.

    Args:
        s1,s2 (string): The input strings for which the similarity measure should
            be computed.

    Returns:
        The Levenshtein similarity if both the strings are not missing (i.e
        NaN), else  returns NaN.

    Examples:
        >>> import py_entitymatching as em
        >>> em.lev_sim('alex', 'alxe')
        0.5
        >>> em.lev_dist(None, 'alex')
        nan

    """

    if s1 is None or s2 is None:
        return pd.np.NaN
    if pd.isnull(s1) or pd.isnull(s2):
        return pd.np.NaN

    # Create the similarity measure object
    measure = sm.Levenshtein()

    s1 = gh.convert_to_str_unicode(s1)
    s2 = gh.convert_to_str_unicode(s2)

    # Call the function to compute the similarity measure
    return measure.get_sim_score(s1, s2)
Exemple #4
0
 def lev_score(self, str_pair, sim_score=True):
     """
     calculate levenshtein similarity between two strings
     :return: similarity score or raw score (0 to 1)
     """
     s1, s2 = self._check_input(str_pair)
     lev = sm.Levenshtein()
     return lev.get_sim_score(s1, s2) if sim_score else lev.get_raw_score(
         s1, s2)
def lev_sim(s1, s2):
    if s1 is None or s2 is None:
        return pd.np.NaN
    if pd.isnull(s1) or pd.isnull(s2):
        return pd.np.NaN

    s1 = helper.convert_to_str_unicode(s1)
    s2 = helper.convert_to_str_unicode(s2)

    measure = sm.Levenshtein()
    return measure.get_sim_score(s1, s2)
Exemple #6
0
    def __init__(self):
        self.similarity_function = [
            sm.BagDistance(),
            sm.Cosine(),
            sm.Dice(),
            sm.Editex(),
            sm.GeneralizedJaccard(),
            sm.Jaccard(),
            sm.Jaro(),
            sm.JaroWinkler(),
            sm.Levenshtein(),
            sm.OverlapCoefficient(),
            sm.TverskyIndex()
        ]

        self.alphanumeric_tokenizer = sm.AlphanumericTokenizer(return_set=True)
Exemple #7
0
def lev_sim(s1, s2):
    if s1 is None or s2 is None:
        return pd.np.NaN
    if pd.isnull(s1) or pd.isnull(s2):
        return pd.np.NaN
    # if isinstance(s1, six.string_types):
    #     s1 = gh.remove_non_ascii(s1)
    # if isinstance(s2, six.string_types):
    #     s2 = gh.remove_non_ascii(s2)
    # Create the similarity measure object
    measure = sm.Levenshtein()
    if not(isinstance(s1, six.string_types) or isinstance(s1, bytes)):
        s1 = str(s1)

    if not(isinstance(s2, six.string_types) or isinstance(s2, bytes)):
        s2 = str(s2)

    # Call the function to compute the similarity measure
    return measure.get_sim_score(s1, s2)
Exemple #8
0
def lev_dist(s1, s2):
    """
    This function computes the Levenshtein distance between the two input
    strings.

    Args:
        s1,s2 (string): The input strings for which the similarity measure should
            be computed.

    Returns:
        The Levenshtein distance if both the strings are not missing (i.e NaN),
        else  returns NaN.

    Examples:
        >>> import py_entitymatching as em
        >>> em.lev_dist('alex', 'alxe')
        2
        >>> em.lev_dist(None, 'alex')
        nan

    """

    if s1 is None or s2 is None:
        return pd.np.NaN
    if pd.isnull(s1) or pd.isnull(s2):
        return pd.np.NaN
    # if isinstance(s1, six.string_types):
    #     s1 = gh.remove_non_ascii(s1)
    # if isinstance(s2, six.string_types):
    #     s2 = gh.remove_non_ascii(s2)
    # Create the similarity measure object
    measure = sm.Levenshtein()
    if not (isinstance(s1, six.string_types) or isinstance(s1, bytes)):
        s1 = str(s1)

    if not (isinstance(s2, six.string_types) or isinstance(s2, bytes)):
        s2 = str(s2)

    # Call the function to compute the distance measure.
    return measure.get_raw_score(s1, s2)
Exemple #9
0
nlmInsampleFile = 'NLMdata/dataCached/insample_abstracts_outfile'
nlmOutsampleFile = 'NLMdata/dataCached/outSample_abstracts_outfile'
nlmInsampleData = pickle.load(open(nlmInsampleFile, 'rb'))
nlmOutsampleData = pickle.load(open(nlmOutsampleFile, 'rb'))

# Instantiate FVComponent instances
csAbstract = FVC.CosSim('CSAbs',
                        TfidfVectorizer(ngram_range=(1, 3), sublinear_tf=True),
                        False)
csSentence = FVC.CosSim('CSSent',
                        TfidfVectorizer(ngram_range=(1, 3), sublinear_tf=True),
                        True)
cosM = FVC.stringMatchExcerpts('CosMeasure', sm.Cosine(),
                               sm.WhitespaceTokenizer(return_set=True))
LVDist = FVC.stringMatchTitles('LVDist', sm.Levenshtein())

FVCList = [csAbstract, csSentence, cosM, LVDist]


def classifyAndPredict(insampleData, outsampleData, folderName, componentList):
    print len(insampleData[0])
    print len(outsampleData[1])
    # Declare instance of a join object with input arguments
    easyJoin = myJoin.join(insampleData, outsampleData, folderName)
    easyJoin.setComponentList(componentList)
    # Build feature vector
    easyJoin.buildInsampleFV()
    easyJoin.buildOutsampleFVReduced(0.01)
    # Classify and predict with logistic regression
    easyJoin.classify()
Exemple #10
0
ed = sm.Editex()
df['Editex'] = df.apply(
    lambda x: ed.get_sim_score(x['Sequence1'], x['Sequence2']), axis=1)
df.head()

# In[41]:

jaro = sm.Jaro()
df['Jaro'] = df.apply(
    lambda x: jaro.get_sim_score(x['Sequence1'], x['Sequence2']), axis=1)
df.head()

# In[42]:

lev = sm.Levenshtein()
df['Levenshtein'] = df.apply(
    lambda x: lev.get_sim_score(x['Sequence1'], x['Sequence2']), axis=1)
df.head()

# In[43]:

nw = sm.NeedlemanWunsch()
df['NeedlemanWunsch'] = df.apply(
    lambda x: nw.get_raw_score(x['Sequence1'], x['Sequence2']), axis=1)
df.head()

# In[44]:

sw = sm.SmithWaterman()
df['SmithWaterman'] = df.apply(
 def __init__(self):
     self.lev = py_stringmatching.Levenshtein()
Exemple #12
0
def markStudDFSBlockAnswer(processQuestionId, studentAnswerId):
    # Connect to Graph
    graph = connectToGraph()

    whiteSpaceTokenizer = py_stringmatching.WhitespaceTokenizer(
        return_set=True)
    jaccard = py_stringmatching.Jaccard()
    levenshtein = py_stringmatching.Levenshtein()

    teacherStartNodeKey = graph.data(
        "MATCH (node:Teacher) WHERE node.text='start' RETURN node.key")
    studentStartNodeKey = graph.data(
        "MATCH (node:Student) WHERE node.text='start' RETURN node.key")

    teachStack = [teacherStartNodeKey[0]['node.key']]
    studStack = [studentStartNodeKey[0]['node.key']]

    teachVisitedNodes = []
    studVisitedNodes = []

    # keeps track of the nodes matched in each level
    matchedTeacherNodes = []
    matchedStudentNodes = []

    notMatchedParentTeacherNodes = []

    # keeps track of all the nodes visited throughout graph traversal and a node is added to this each time it is visited
    allMatchedTeachNodes = []
    allMatchedStudNodes = []

    additionalNodes = []
    deletedNodes = []
    substitutedNodes = []
    addOrSubNodes = []
    delOrSubNodes = []

    totNoOfAdditionalNodes = 0
    totNoOfDeletedNodes = 0
    totNoOfSubstitutedNodes = 0
    totNoOfOtherIncorrectNodes = 0
    totNoOfOtherSubstitutedNodes = 0

    totNoOfMatchedNodes = 0

    feedback = ""

    while teachStack or studStack:

        if teachStack and studStack:

            teachCurrent = teachStack.pop()
            studCurrent = studStack.pop()

            teacherCurrentText = graph.data(
                "MATCH (node:Teacher) WHERE node.key= {key} RETURN node.text",
                parameters={"key": teachCurrent})

            studentCurrentText = graph.data(
                "MATCH (node:Student) WHERE node.key= {key} RETURN node.text",
                parameters={"key": studCurrent})

            teacherChildNodes = graph.data(
                "MATCH (parent:Teacher)-[:TO]->(child:Teacher) WHERE parent.key= {key} RETURN child",
                parameters={"key":
                            teachCurrent})  #teacherStartNodeKey[0]['node.key']

            studentChildNodes = graph.data(
                "MATCH (parent:Student)-[:TO]->(child:Student) WHERE parent.key= {key} RETURN child",
                parameters={"key":
                            studCurrent})  #studentStartNodeKey[0]['node.key']

            teachChildNodesList = list(teacherChildNodes)

            studChildNodesList = list(studentChildNodes)

            for teacherChild in teachChildNodesList:

                teachText = teacherChild['child']['text']
                # teachTextTokens = whiteSpaceTokenizer.tokenize(teacherChild['child']['text'])

                print(teachText)

                matchFound = 'false'

                for studentChild in studChildNodesList:
                    if not studentChild['child']['key'] in matchedStudentNodes:
                        print('current stud child')
                        print(studentChild['child']['text'])
                        childText = studentChild['child']['text']

                        synsetSim_score = getPhraseSimilarity(
                            teachText, childText)

                        if re.match(teachText, childText,
                                    re.IGNORECASE) or synsetSim_score >= 0.55:
                            print(
                                'threshold similarity added to Student stack')

                            feedback = feedback + 'The block:' + studentChild['child']['text'] + \
                                       ' connected to block:' + studentCurrentText[0]['node.text'] + ' is correct. '

                            matchFound = 'true'

                            if not teacherChild['child'][
                                    'key'] in teachVisitedNodes:
                                studStack.append(studentChild['child']['key'])

                                teachStack.append(teacherChild['child']['key'])

                                if not studentChild['child'][
                                        'key'] in allMatchedStudNodes and not studentChild[
                                            'child']['text'] == 'end':
                                    totNoOfMatchedNodes = totNoOfMatchedNodes + 1

                                allMatchedTeachNodes.append(
                                    teacherChild['child']['key'])
                                allMatchedStudNodes.append(
                                    studentChild['child']['key'])

                            if len(teachChildNodesList) > len(
                                    studChildNodesList):
                                matchedTeacherNodes.append(
                                    teacherChild['child']['key'])

                                # add to student matched node set too to check while looping through the current level children (above)
                                matchedStudentNodes.append(
                                    studentChild['child']['key'])
                            elif len(teachChildNodesList) < len(
                                    studChildNodesList):
                                matchedStudentNodes.append(
                                    studentChild['child']['key'])
                            else:
                                matchedStudentNodes.append(
                                    studentChild['child']['key'])

                            break

                if matchFound == 'false' and not teacherChild['child'][
                        'key'] in teachVisitedNodes:  # len(teachChildNodesList) == len(studChildNodesList) and
                    notMatchedParentTeacherNodes.append(
                        teacherChild['child']['key'])
                elif matchFound == 'false' and teacherChild['child'][
                        'key'] in teachVisitedNodes:
                    feedback = feedback + 'The block:' + teacherChild['child']['text'] + \
                               ' should be connected to block:' + teacherCurrentText[0]['node.text'] + '. '
                    totNoOfOtherIncorrectNodes = totNoOfOtherIncorrectNodes + 1

            if len(teachChildNodesList) == len(studChildNodesList) and len(
                    notMatchedParentTeacherNodes) == 1:

                print('^^^ONE SUBSTITUTED NODE')

                totNoOfSubstitutedNodes, totNoOfOtherIncorrectNodes, feedback = \
                    addTheOnlyUnmatchedNode('NotMatchedNode', graph, notMatchedParentTeacherNodes,
                                        teachStack, studChildNodesList, matchedStudentNodes,
                                        studStack, totNoOfSubstitutedNodes, feedback, studVisitedNodes,
                                        teachCurrent, studentCurrentText[0]['node.text'], totNoOfOtherIncorrectNodes)

            elif len(teachChildNodesList) == len(studChildNodesList) and len(
                    notMatchedParentTeacherNodes) > 1:

                totNoOfSubstitutedNodes = totNoOfSubstitutedNodes + len(
                    notMatchedParentTeacherNodes)

                againNotMatchedTeacherNodes, handledStudentNodeList, feedback = checkForCurrentNodeChildMatch(
                    'substitutedCaller', graph, matchedStudentNodes,
                    notMatchedParentTeacherNodes, studChildNodesList,
                    studVisitedNodes, studStack, teachStack, feedback,
                    studentCurrentText[0]['node.text'])

                if len(againNotMatchedTeacherNodes) == 1:
                    totNoOfOtherIncorrectNodes, feedback = addTheOnlyUnmatchedNode(
                        'NotMatchedChildrenNode', graph,
                        againNotMatchedTeacherNodes, teachStack,
                        studChildNodesList, handledStudentNodeList, studStack,
                        totNoOfSubstitutedNodes, feedback, studVisitedNodes,
                        teachCurrent, studentCurrentText[0]['node.text'],
                        totNoOfOtherIncorrectNodes)

                elif len(againNotMatchedTeacherNodes) > 1:
                    for studentChild in studChildNodesList:
                        if not studentChild['child'][
                                'key'] in handledStudentNodeList and not studentChild[
                                    'child']['key'] in studVisitedNodes:
                            feedback = feedback + 'The block:' + studentChild['child']['text'] + \
                                               ' connected to block:' + studentCurrentText[0]['node.text'] + ' is substituted, and it '

                            for againNotTeacherNode in againNotMatchedTeacherNodes:
                                teacherNodeText = graph.data(
                                    "MATCH (node:Teacher) WHERE node.key= {key} RETURN node.text",
                                    parameters={"key": againNotTeacherNode})

                                feedback = feedback + ' should be:' + teacherNodeText[
                                    0]['node.text'] + ' or'

                            feedback = feedback + ' one of the mentioned blocks. The immediate blocks that follow ' +\
                                       'this block:' + studentChild['child']['text'] + ' are also wrong. Please check them. '

                            substitutedNodes.append(
                                studentChild['child']['key'])

            # handles scenario where student graph has deleted child nodes for the current node under consideration
            if len(teachChildNodesList) > len(studChildNodesList):
                totNoOfDeletedNodes = totNoOfDeletedNodes + (
                    len(teachChildNodesList) - len(studChildNodesList))

                if len(matchedStudentNodes) == len(studChildNodesList):
                    for child in teachChildNodesList:
                        if not child['child'][
                                'key'] in matchedTeacherNodes and not child[
                                    'child']['key'] in teachVisitedNodes:
                            feedback = feedback + 'Missing Block:' + child['child']['text'] + \
                                               ' should be connected to block:' + studentCurrentText[0]['node.text'] + '. '
                            deletedNodes.append(child['child']['key'])
                elif len(matchedStudentNodes) < len(studChildNodesList):
                    feedback = feedback + 'There is/are ' + str(len(teachChildNodesList) - len(studChildNodesList)) + \
                               ' missing block(s) that should be connected to block:' + studentCurrentText[0]['node.text'] + \
                               ' and ' + str(len(studChildNodesList) - len(matchedStudentNodes)) + \
                               ' block(s) connected to block:' + studentCurrentText[0]['node.text'] + \
                               ' is/are substituted - The incorrect blocks are '

                    againNotMatchedTeacherNodes, handledStudentNodeList, feedback = checkForCurrentNodeChildMatch(
                        'deletedSubstitutedCaller', graph, matchedStudentNodes,
                        notMatchedParentTeacherNodes, studChildNodesList,
                        studVisitedNodes, studStack, teachStack, feedback,
                        studentCurrentText[0]['node.text'])

                    if len(handledStudentNodeList) == len(studChildNodesList):
                        for child in teachChildNodesList:
                            if child['child'][
                                    'key'] in againNotMatchedTeacherNodes and not child[
                                        'child']['key'] in teachVisitedNodes:
                                feedback = feedback + 'block:' + child['child']['text'] + \
                                           ' that should be connected to block:' + studentCurrentText[0]['node.text'] +\
                                           ' is missing and '
                                deletedNodes.append(child['child']['key'])

                    elif len(handledStudentNodeList) < len(studChildNodesList):
                        for child in teachChildNodesList:
                            if child['child'][
                                    'key'] in againNotMatchedTeacherNodes and not child[
                                        'child']['key'] in teachVisitedNodes:
                                feedback = feedback + ' block:' + child['child']['text'] + \
                                           ' that should be/is connected to block:' + studentCurrentText[0]['node.text'] + \
                                           ' is deleted/substituted and the immediate child blocks of this block are also wrong, please check them, and '

                                delOrSubNodes.append(child['child']['key'])

                    feedback = feedback + 'please check all these incorrect blocks. '

            # handles scenario where student graph has additional child nodes for the current node under consideration
            elif len(teachChildNodesList) < len(studChildNodesList):
                totNoOfAdditionalNodes = totNoOfAdditionalNodes + (
                    len(studChildNodesList) - len(teachChildNodesList))

                # handles scenario where all teacher nodes are matched and there are additional nodes
                if len(matchedStudentNodes) == len(teachChildNodesList):
                    for child in studChildNodesList:
                        if not child['child'][
                                'key'] in matchedStudentNodes and not child[
                                    'child']['key'] in studVisitedNodes:
                            feedback = feedback + 'Additional Block:' + child['child']['text'] +\
                                       ' is connected to block:' + studentCurrentText[0]['node.text'] + '. '
                            additionalNodes.append(child['child']['key'])
                        elif not child['child'][
                                'key'] in matchedStudentNodes and child[
                                    'child']['key'] in studVisitedNodes:
                            feedback = feedback + 'Additional connection from block:' + studentCurrentText[0]['node.text'] +\
                                       ' to block:' + child['child']['text'] + '. '
                elif len(matchedStudentNodes) < len(teachChildNodesList):
                    feedback = feedback + 'There is/are ' + str(len(studChildNodesList) - len(teachChildNodesList)) + \
                               ' additional block(s) connected to block:' + studentCurrentText[0]['node.text'] + ' and ' +\
                               str(len(teachChildNodesList) - len(matchedStudentNodes)) +\
                               ' block(s) connected to block:' + studentCurrentText[0]['node.text'] + ' is/are substituted - The incorrect blocks are '

                    againNotMatchedTeacherNodes, handledStudentNodeList, feedback = checkForCurrentNodeChildMatch(
                        'additionalSubstitutedCaller', graph,
                        matchedStudentNodes, notMatchedParentTeacherNodes,
                        studChildNodesList, studVisitedNodes, studStack,
                        teachStack, feedback,
                        studentCurrentText[0]['node.text'])

                    if len(handledStudentNodeList) == len(
                            teachChildNodesList
                    ):  # len(againNotMatchedTeacherNodes) == (len(studChildNodesList)-len(teachChildNodesList))
                        for child in studChildNodesList:
                            if not child['child'][
                                    'key'] in handledStudentNodeList and not child[
                                        'child']['key'] in studVisitedNodes:
                                feedback = feedback + 'block:' + child['child']['text'] + ' connected to block:' +\
                                           studentCurrentText[0]['node.text'] + ' is additional and '
                                additionalNodes.append(child['child']['key'])

                    elif len(handledStudentNodeList) < len(
                            teachChildNodesList
                    ):  # len(againNotMatchedTeacherNodes) > (len(studChildNodesList)-len(teachChildNodesList))
                        for child in studChildNodesList:
                            if not child['child'][
                                    'key'] in handledStudentNodeList and not child[
                                        'child']['key'] in studVisitedNodes:
                                feedback = feedback + ' block: ' + child['child']['text'] + ' connected to block:' +\
                                           studentCurrentText[0]['node.text'] +\
                                ' is additional/substituted and the immediate child blocks of this block are also wrong, please check them, and '

                                addOrSubNodes.append(child['child']['key'])

                    feedback = feedback + 'please check all these incorrect blocks. '

            matchedTeacherNodes = []
            matchedStudentNodes = []

            notMatchedParentTeacherNodes = []

            teachVisitedNodes.append(teachCurrent)
            studVisitedNodes.append(studCurrent)

        elif studStack and not teachStack:
            print('^^^^^^^^^^^^^^^STUDENT stack has moreeee.....')
            break

    # handles additional nodes down an additional node starting path
    if additionalNodes:
        feedback, totNoOfAdditionalNodes = detectUndetectedBlocks(
            "additionalNodes", graph, additionalNodes, studVisitedNodes,
            feedback, totNoOfAdditionalNodes)

    # handles deleted nodes down a deleted node starting path
    if deletedNodes:
        feedback, totNoOfDeletedNodes = detectUndetectedBlocks(
            "deletedNodes", graph, deletedNodes, teachVisitedNodes, feedback,
            totNoOfDeletedNodes)

    # handles substituted nodes down a substituted node starting path
    if substitutedNodes:
        feedback, totNoOfOtherSubstitutedNodes = detectUndetectedBlocks(
            "substitutedNodes", graph, substitutedNodes, studVisitedNodes,
            feedback, totNoOfOtherSubstitutedNodes)

    # handles additional/substituted nodes down a additional/substituted node starting path
    if addOrSubNodes:
        feedback, totNoOfOtherIncorrectNodes = detectUndetectedBlocks(
            "addOrSubNodes", graph, addOrSubNodes, studVisitedNodes, feedback,
            totNoOfOtherIncorrectNodes)

    # handles deleted/substituted nodes down a deleted/substituted node starting path
    if delOrSubNodes:
        feedback, totNoOfOtherIncorrectNodes = detectUndetectedBlocks(
            "delOrSubNodes", graph, delOrSubNodes, teachVisitedNodes, feedback,
            totNoOfOtherIncorrectNodes)




    if totNoOfAdditionalNodes == 0 and totNoOfDeletedNodes == 0 and totNoOfSubstitutedNodes == 0 and \
            totNoOfOtherSubstitutedNodes == 0 and totNoOfOtherIncorrectNodes == 0:
        print(totNoOfMatchedNodes)
        feedback = feedback + "Excellent Job! All the blocks and the flow are correct!"  # Number of correct blocks: " + ". "
        print(feedback)
    else:
        feedback = feedback + "Number of correct blocks except start and end blocks: " + str(
            totNoOfMatchedNodes) + ". "
        print(feedback)

    allocateMarksAndSaveToDatabase(totNoOfMatchedNodes, totNoOfAdditionalNodes,
                                   totNoOfDeletedNodes,
                                   totNoOfSubstitutedNodes,
                                   totNoOfOtherSubstitutedNodes,
                                   totNoOfOtherIncorrectNodes, feedback,
                                   processQuestionId, studentAnswerId)