Example #1
0
 def wn_similarity(synset_1, synset_2):
     if synset_1.pos() not in [
             "a", "s", "r"
     ] and synset_2.pos() not in ["a", "s", "r"]:
         return wn.res_similarity(synset_1, synset_2, brown_ic)
     else:
         return None
Example #2
0
def compare_allsynsets(method, word1, word2):
    ss1 = wordnet.synsets(word1)
    ss2 = wordnet.synsets(word2)
    simi, simi_value = 0.0, 0.0
    for (s1, s2) in product(ss1, ss2):
        # if SYNpos and s1.pos() != s2.pos():  # SYN-POS
        #     continue
        # if TWpos and s1.pos() != pos:  # Target word POS
        #     continue
        if method == "PATH":
            simi = s1.path_similarity(s2)
        elif method == "LCH":
            simi = wordnet.lch_similarity(s1, s2)
        elif method == "WUP":
            simi = wordnet.wup_similarity(s1, s2)
        elif method == "RES":
            simi = wordnet.res_similarity(s1, s2, brown_ic)
        elif method == "JCN":
            if s1.pos() == s2.pos() and s1.pos() in ['n', 'a', 'v'
                                                     ]:  # can't do diff POS
                simi = wordnet.jcn_similarity(s1, s2, brown_ic)
        elif method == "LIN":
            if s1.pos() == s2.pos() and s1.pos() in ['n', 'a', 'v'
                                                     ]:  # can't do diff POS
                simi = wordnet.lin_similarity(s1, s2, brown_ic)
        else:
            sys.exit("Error! No similarity methods!")

        if simi > simi_value:
            simi_value = simi
    return simi_value
Example #3
0
def similarity_by_infocontent(sense1, sense2, option):
    """ Returns similarity scores by information content. """
    if sense1.pos != sense2.pos:  # infocontent sim can't do diff POS.
        return 0

    info_contents = [
        'ic-bnc-add1.dat', 'ic-bnc-resnik-add1.dat', 'ic-bnc-resnik.dat',
        'ic-bnc.dat', 'ic-brown-add1.dat', 'ic-brown-resnik-add1.dat',
        'ic-brown-resnik.dat', 'ic-brown.dat', 'ic-semcor-add1.dat',
        'ic-semcor.dat', 'ic-semcorraw-add1.dat',
        'ic-semcorraw-resnik-add1.dat', 'ic-semcorraw-resnik.dat',
        'ic-semcorraw.dat', 'ic-shaks-add1.dat', 'ic-shaks-resnik.dat',
        'ic-shaks-resnink-add1.dat', 'ic-shaks.dat', 'ic-treebank-add1.dat',
        'ic-treebank-resnik-add1.dat', 'ic-treebank-resnik.dat',
        'ic-treebank.dat'
    ]

    if option in ['res', 'resnik']:
        return wn.res_similarity(sense1, sense2,
                                 wnic.ic('ic-bnc-resnik-add1.dat'))
    #return min(wn.res_similarity(sense1, sense2, wnic.ic(ic)) \
    #             for ic in info_contents)

    elif option in ['jcn', "jiang-conrath"]:
        return wn.jcn_similarity(sense1, sense2, wnic.ic('ic-bnc-add1.dat'))

    elif option in ['lin']:
        return wn.lin_similarity(sense1, sense2, wnic.ic('ic-bnc-add1.dat'))
def create_graphs(doc_list):
    documents = doc_list
    if documents is None:
        documents = default_document_list()

    distance_functions = [
        (wn.lch_similarity(SYNSETS[0], SYNSETS[0]), 'lch', lambda sense_1, sense_2: wn.lch_similarity(sense_1, sense_2)),
        (1.0, 'lin', lambda sense_1, sense_2: wn.lin_similarity(sense_1, sense_2, CORPUS)),
        (10.636958516573292, 'res', lambda sense_1, sense_2: wn.res_similarity(sense_1, sense_2, CORPUS)),
        (wn.jcn_similarity(SYNSETS[0], SYNSETS[0], CORPUS), 'jcn', lambda sense_1, sense_2: wn.jcn_similarity(sense_1, sense_2, CORPUS)),
        (1.0, 'path', lambda sense_1, sense_2: wn.path_similarity(sense_1, sense_2)),
    ]
    all_senses = []
    for doc in documents:
        for sense in doc.top_senses():
            all_senses.append((sense, doc.name))
    against_colors = ['r', 'b', 'g']
    against_to = [wn.synset(word) for word in ["economy.n.01", "philosophy.n.02", "politics.n.01"]]
    create_against_graph('phyl_eco_pol', documents, all_senses, against_to, distance_functions, against_colors)

    against_to = SYNSETS

    against_colors = [(random(), random(), random()) for _i in range(0, len(SYNSETS))]
    create_against_graph('handpicked', documents, all_senses, against_to, distance_functions, against_colors)

    create_graph_top_senses(documents, all_senses, distance_functions)
def semantic_similarity(word1, word2, speech, measure):
  """
  Finds the highest similarity score for the given pair of words. Goes through each combination of all senses.

  :param word1: First word in the pair of words
  :param word2: Second word in the pair of words
  :param speech: part of speech e.g. nw.NOUN
  :param measure: String representing the type of similarity measure ("path" = path ; "res" = Resnik  ;  "lin" = Lin)
  :return: The highest similarity score across all senses and all parts of speech
  """
  #error handling if invalid measure input is given
  if measure not in ["path","res","lin"]:
    raise ValueError("Not a valid similarity type \n Must be 'path'(path), 'res'(Resnik) or 'lin'(Lin)")

  greatest = 0
  conceptsA = wn.synsets(word1,speech)
  conceptsB = wn.synsets(word2,speech)
  #finds similarity score for every combination of senses
  for conceptA in conceptsA:
    for conceptB in conceptsB:
      if measure == "path":
        similarity = wn.path_similarity(conceptA,conceptB)
      elif measure == "res":
        similarity = wn.res_similarity(conceptA,conceptB,brown_ic)
      elif measure == "lin":
        similarity = wn.lin_similarity(conceptA,conceptB,brown_ic)
      if similarity == None : continue #error checking if similarity scorce not possible
      if similarity>greatest:
          greatest = similarity #if new highest similairty is found, set it to the greatest
  return greatest
    def get_res_min(self, sentence1, sentence2):
        sentence1_unique, sentence2_unique = self.sentence_difference(
            sentence1, sentence2)
        min_similarity = maxint
        # Measure similarity for each unique word from A to each unique word to B
        for sentence1_word in sentence1_unique:
            for sentence2_word in sentence2_unique:
                sentence1_word_tag = sentence1.get_tag(sentence1_word)
                sentence2_word_tag = sentence2.get_tag(sentence2_word)
                synsets_word1 = wordnet.synsets(sentence1_word,
                                                sentence1_word_tag)
                synsets_word2 = wordnet.synsets(sentence2_word,
                                                sentence2_word_tag)

                if len(synsets_word1) == 0:
                    synsets_word1 = wordnet.synsets(sentence1_word)
                if len(synsets_word2) == 0:
                    synsets_word2 = wordnet.synsets(sentence2_word)

                if len(synsets_word1) > 0 and len(synsets_word2) > 0:
                    # Skip words with different tags
                    if synsets_word1[0].pos() != synsets_word2[0].pos():
                        continue
                    # Try find similarity from corpus
                    try:
                        similarity = wordnet.res_similarity(
                            synsets_word1[0], synsets_word2[0], self.brown_ic)
                    except:
                        continue
                    if similarity != None:
                        min_similarity = min(similarity, min_similarity)
        if min_similarity == maxint:
            return 0
        return min_similarity
Example #7
0
 def res(self, synset_a, synset_b, ic):
     return (
         self.normalize(
             self.MAX_VALUE,
             wordnet.res_similarity(synset_a, synset_b, ic),
         )
         if synset_a.pos() == synset_b.pos()
         else 0
     )
Example #8
0
    def test_wordnet_ic(self):
        from nltk.corpus import wordnet as nltk_wn
        from nltk.corpus import wordnet_ic as nltk_wnic
        nltk_car = nltk_wn.synset('car.n.1')
        nltk_bus = nltk_wn.synset('bus.n.1')
        our_bnc_resnik_add1 = WordNetInformationContent('bnc', resnik=True, add1=True)

        our_car = our_wn.synset('car.n.1')
        our_bus = our_wn.synset('bus.n.1')
        nltk_bnc_resnik_add1 = nltk_wnic.ic('ic-bnc-resnik-add1.dat')
        assert our_wn.res_similarity(our_car, our_bus, our_bnc_resnik_add1) == nltk_wn.res_similarity(nltk_car, nltk_bus, nltk_bnc_resnik_add1)
        assert our_wn.jcn_similarity(our_car, our_bus, our_bnc_resnik_add1) == nltk_wn.jcn_similarity(nltk_car, nltk_bus, nltk_bnc_resnik_add1)
        assert our_wn.lin_similarity(our_car, our_bus, our_bnc_resnik_add1) == nltk_wn.lin_similarity(nltk_car, nltk_bus, nltk_bnc_resnik_add1)
Example #9
0
def compute_similarities(s1, s2, sim):
    if sim == "path":
        return wn.path_similarity(s1, s2)
    elif sim == "lch":
        return wn.lch_similarity(s1, s2)
    elif sim == "wup":
        return wn.wup_similarity(s1, s2)
    elif sim == "res":
        return wn.res_similarity(s1, s2, genesis_ic)
    elif sim == "jcn":
        return wn.jcn_similarity(s1, s2, genesis_ic)
    elif sim == "lin":
        return wn.lin_similarity(s1, s2, genesis_ic)
Example #10
0
    def get_res_average(self, sentence1, sentence2):
        sentence1_unique, sentence2_unique = self.sentence_difference(
            sentence1, sentence2)
        avg_similarity = 0
        total_count = 0
        # Measure similarity for each unique word from A to each unique word to B
        for sentence1_word in sentence1_unique:
            for sentence2_word in sentence2_unique:
                sentence1_word_tag = sentence1.get_tag(sentence1_word)
                sentence2_word_tag = sentence2.get_tag(sentence2_word)
                synsets_word1 = wordnet.synsets(sentence1_word,
                                                sentence1_word_tag)
                synsets_word2 = wordnet.synsets(sentence2_word,
                                                sentence2_word_tag)

                if len(synsets_word1) == 0:
                    synsets_word1 = wordnet.synsets(sentence1_word)
                if len(synsets_word2) == 0:
                    synsets_word2 = wordnet.synsets(sentence2_word)

                if len(synsets_word1) > 0 and len(synsets_word2) > 0:
                    # Skip words with different tags
                    if synsets_word1[0].pos() != synsets_word2[0].pos():
                        continue
                    # Try find similarity from corpus
                    try:
                        similarity = wordnet.res_similarity(
                            synsets_word1[0], synsets_word2[0], self.brown_ic)
                    except:
                        continue
                    if similarity == 1e+300:
                        similarity = 12.0
                    if similarity != None:
                        avg_similarity += similarity
                        total_count += 1
        if total_count == 0:
            return 0
        return float(avg_similarity) / float(total_count)
def get_sim_score(word_1, word_2, info_content):
    """ 
    Calculate the highest path similarity among all pairs. 
    """

    if word_1 == word_2:
        return 1
    else:
        max_sim = -1.0
        synsets_1 = wn.synsets(word_1)
        synsets_2 = wn.synsets(word_2)
        if synsets_1 and synsets_2:
            for synset_1, synset_2 in product(synsets_1, synsets_2):
                try:
                    sim = wn.res_similarity(synset_1, synset_2, info_content)
                    #sim = wn.wup_similarity(synset_1, synset_2)
                    if sim > max_sim:
                        max_sim = sim
                except:
                    continue

            return max_sim
        return max_sim
def create_graphs(doc_list):
    documents = doc_list
    if documents is None:
        documents = default_document_list()

    distance_functions = [
        (wn.lch_similarity(SYNSETS[0], SYNSETS[0]), 'lch',
         lambda sense_1, sense_2: wn.lch_similarity(sense_1, sense_2)),
        (1.0, 'lin',
         lambda sense_1, sense_2: wn.lin_similarity(sense_1, sense_2, CORPUS)),
        (10.636958516573292, 'res',
         lambda sense_1, sense_2: wn.res_similarity(sense_1, sense_2, CORPUS)),
        (wn.jcn_similarity(SYNSETS[0], SYNSETS[0], CORPUS), 'jcn',
         lambda sense_1, sense_2: wn.jcn_similarity(sense_1, sense_2, CORPUS)),
        (1.0, 'path',
         lambda sense_1, sense_2: wn.path_similarity(sense_1, sense_2)),
    ]
    all_senses = []
    for doc in documents:
        for sense in doc.top_senses():
            all_senses.append((sense, doc.name))
    against_colors = ['r', 'b', 'g']
    against_to = [
        wn.synset(word)
        for word in ["economy.n.01", "philosophy.n.02", "politics.n.01"]
    ]
    create_against_graph('phyl_eco_pol', documents, all_senses, against_to,
                         distance_functions, against_colors)

    against_to = SYNSETS

    against_colors = [(random(), random(), random())
                      for _i in range(0, len(SYNSETS))]
    create_against_graph('handpicked', documents, all_senses, against_to,
                         distance_functions, against_colors)

    create_graph_top_senses(documents, all_senses, distance_functions)
Example #13
0
def res_sim_fun(vq_words=[]):
    l1 = knowledge = [
        'recite', 'review', 'point', 'recognize', 'describe', 'choose',
        'examine', 'identify', 'enumerate', 'find', 'select', 'what',
        'memorize', 'collect', 'sequence', 'when', 'duplicate', 'who', 'label',
        'write', 'indicate', 'state', 'tabulate', 'which', 'relate', 'show',
        'arrange', 'cite', 'match', 'define', 'locate', 'draw', 'repeat',
        'remember', 'trace', 'read', 'quote', 'spell', 'memorise', 'how',
        'observe', 'recognise', 'copy', 'why', 'outline', 'count', 'name',
        'recall', 'study', 'omit', 'list', 'tell', 'reproduce', 'record',
        'retell', 'meet', 'listen', 'where', 'order', 'view'
    ]

    l2 = comprehension = [
        'compare', 'cite', 'give', 'predict', 'recognize', 'describe',
        'articulate', 'detail', 'order', 'characterize', 'generalize',
        'factor', 'summarize', 'select', 'illustrate', 'visualize', 'group',
        'trace', 'purpose', 'defend', 'rewrite', 'relate', 'approximate',
        'demonstrate', 'indicate', 'add', 'interact', 'tell', 'extrapolate',
        'show', 'rephrase', 'paraphrase', 'infer', 'contrast', 'locate',
        'picture', 'extend', 'associate', 'conclude', 'express', 'interpolate',
        'generalise', 'clarify', 'observe', 'understand', 'differentiate',
        'review', 'distinguish', 'estimate', 'subtract', 'discuss',
        'interpret', 'summarise', 'convert', 'translate', 'compute', 'outline',
        'identify', 'elaborate', 'ask', 'example', 'classify', 'report',
        'restate', 'explain', 'match'
    ]

    l3 = application = [
        'represent', 'show', 'identify', 'participate', 'derive', 'group',
        'calculate', 'graph', 'dramatize', 'choose', 'factor', 'include',
        'allocate', 'handle', 'practice', 'relate'
        'schedule', 'report', 'assess', 'collect', 'investigate', 'categorise',
        'ascertain', 'round', 'sketch', 'transcribe', 'sequence', 'imitate',
        'discover', 'connect', 'tabulate', 'employ', 'avoid', 'experiment',
        'manipulate', 'exercise', 'extend', 'associate', 'modify',
        'personalize', 'dramatise', 'explore', 'teach', 'change', 'perform',
        'summarise', 'act', 'implement', 'assign', 'alphabetize', 'relate',
        'articulate', 'administer', 'subscribe', 'instruct', 'determine',
        'apply', 'establish', 'select', 'illustrate', 'plot', 'use', 'prepare',
        'paint', 'transfer', 'construct', 'process', 'interpret', 'translate',
        'depreciate', 'complete', 'expose', 'acquire', 'adapt', 'link',
        'simulate', 'diminish', 'compute', 'project', 'demonstrate', 'control',
        'predict', 'contribute', 'examine', 'attain', 'capture', 'develop',
        'provide', 'utilize', 'write', 'build', 'interview', 'organise',
        'classify', 'draw', 'express', 'customize', 'price', 'chart',
        'produce', 'plan', 'inform', 'solve', 'correlation', 'model',
        'operate', 'convert'
    ]

    l4 = analysis = [
        'find', 'focus', 'identify', 'query', 'debate', 'relationships',
        'derive', 'group', 'calculate', 'explain', 'theme', 'choose', 'reason',
        'proof', 'reorganise', 'point', 'interrupt', 'difference', 'arrange',
        'list', 'investigate', 'classify', 'discover', 'motive', 'deduce',
        'connect', 'advertise', 'detect', 'confirm', 'research', 'experiment',
        'size', 'cause', 'contrast', 'inspect', 'explore', 'distinguish',
        'layout', 'optimize', 'interpret', 'question', 'omit', 'depth',
        'ensure', 'distinction', 'inference', 'divide', 'relate', 'manage',
        'rank', 'maximize', 'categorize', 'establish', 'select', 'illustrate',
        'subdivide', 'transform', 'comparing', 'assumption', 'analyze',
        'function', 'analyse', 'train', 'differentiate', 'breadboard',
        'dissect', 'see', 'limit', 'highlight', 'appraise', 'diagnose',
        'blueprint', 'compare', 'recognize', 'characterize', 'examine', 'file',
        'discriminate', 'discussion', 'isolate', 'inventory', 'test', 'survey',
        'document', 'infer', 'categorise', 'breakdown', 'separate', 'effect',
        'diagram', 'simplify', 'point', 'audit', 'criticize', 'outline',
        'correlate', 'minimize', 'prioritize', 'organise', 'model', 'order',
        'test'
    ]

    l5 = synthesis = [
        'incorporate', 'code', 'reorganize', 'invent', 'generalize', 'compose',
        'overhaul', 'explain', 'hypothesize', 'program', 'combine', 'choose',
        'frame', 'integrate', 'collaborate', 'handle', 'format', 'propose',
        'express', 'progress', 'reconstruct', 'speculate', 'discuss', 'comply',
        'arrange', 'intervene', 'collect', 'hypothesise', 'debug', 'enhance',
        'anticipate', 'originate', 'formulate', 'discover', 'reinforce',
        'design', 'animate', 'substitute', 'network', 'join', 'experiment',
        'adapt', 'lecture', 'contrast', 'extend', 'visualise', 'modify',
        'makeup', 'prescribe', 'imagine', 'interface', 'estimate', 'generate',
        'change', 'improve', 'convert', 'elaborate', 'initiate',
        'individualize', 'think', 'revise', 'organize', 'relate', 'assemble',
        'synthesize', 'categorize', 'summarize', 'prepare', 'create',
        'transform', 'construct', 'predict', 'theorise', 'minimise', 'tell',
        'cope', 'maximise', 'innovate', 'specify', 'communicate', 'setup',
        'pretend', 'budget', 'compile', 'suppose', 'tabulate', 'delete',
        'compare', 'rewrite', 'devise', 'abstract', 'dictate', 'cultivate',
        'happen', 'portray', 'depict', 'develop', 'perform', 'make', 'write',
        'build', 'test', 'negotiate', 'rearrange', 'simplify', 'produce',
        'plan', 'validate', 'structure', 'add', 'outline', 'facilitate',
        'correspond', 'solve', 'model', 'original'
    ]

    l6 = evaluation = [
        'validate', 'compare', 'deduct', 'useful', 'consider', 'conclude',
        'predict', 'relate', 'describe', 'influence', 'rank', 'assess', 'rate',
        'persuade', 'determine', 'measure', 'critique', 'mark', 'summarize',
        'select', 'discuss', 'discriminate', 'prove', 'verify', 'defend',
        'support', 'debate', 'grade', 'argue', 'disprove', 'recommend', 'test',
        'infer', 'contrast', 'choose', 'attach', 'good', 'importance',
        'evaluate', 'criteria', 'prescribe', 'hire', 'award', 'perceive',
        'dispute', 'know', 'decide', 'opinion', 'judge', 'estimate', 'why',
        'interpret', 'counsel', 'criticize', 'effective', 'prioritize',
        'value', 'agree', 'bad', 'convince', 'prioritise', 'release', 'frame',
        'appraise', 'explain', 'criticise', 'justify'
    ]

    cl_listoflist = []
    cl_listoflist.append(l1)
    cl_listoflist.append(l2)
    cl_listoflist.append(l3)
    cl_listoflist.append(l4)
    cl_listoflist.append(l5)
    cl_listoflist.append(l6)

    cnt_log = 0

    final_level_of_ques = -1
    final_sim_of_ques_with_all_levels = [0, 0, 0, 0, 0, 0]
    final_area_sim_of_ques_with_all_levels = [0, 0, 0, 0, 0, 0]
    for vq_word in vq_words:
        # calculating sum and avg of sim of word with each list
        # print("\n\ndoing for word -----" , vq_word)
        sum_of_sim_all_levels = []
        avg_of_sim_all_levels = []
        for i, list_i in enumerate(cl_listoflist):
            # print("list number  : " , i)
            sum_of_sim = 0
            for l_word in list_i:
                # print("two words " , vq_word , l_word)
                if len(wordnet.synsets(vq_word)) == 0:
                    # print vq_word
                    break
                vq_word_syn = wordnet.synsets(vq_word)[0]
                # print("l_word => wordnet.synsets(l_word)",l_word, "=>" ,wordnet.synsets(l_word))
                if len(wordnet.synsets(l_word)) == 0:
                    # print l_word
                    continue
                l_word_syn = wordnet.synsets(l_word)[0]
                try:
                    wup_sim = wordnet.res_similarity(vq_word_syn, l_word_syn,
                                                     brown_ic)
                except:
                    # print vq_word_syn,l_word_syn,"->exception"
                    continue
                # wup_sim=(vq_word_syn).jcn_similarity(l_word_syn)
                if (type(wup_sim) != type(None)):
                    sum_of_sim = sum_of_sim + wup_sim
                    # sum_of_sim += 1
                    # print(" counted ",vq_word,l_word , "synset " , vq_word_syn , l_word_syn)
                else:
                    cnt_log = cnt_log + 1
                    # print("Not counted             ",vq_word,l_word , "synset " , vq_word_syn , l_word_syn)
                # input()
            sum_of_sim_all_levels.append(sum_of_sim)
            avg_of_sim_all_levels.append(sum_of_sim / len(list_i))

        # print("\n\n printing all lists")
        # for l in cl_listoflist:
        # 	print(l)

        # QUES WORK BEGIN
        # print ("Sim")
        for i in range(0, 6):
            final_sim_of_ques_with_all_levels[i] += avg_of_sim_all_levels[i]
        # 	print (final_sim_of_ques_with_all_levels[i],",")
        # print("\n")

        # print("area sim")
        for i in range(0, 6):
            final_area_sim_of_ques_with_all_levels[i] += sum_of_sim_all_levels[
                i]
        # 	print (final_area_sim_of_ques_with_all_levels[i],",")
        # print("\n")
        # print ("cnt_log",cnt_log)

    # print ("Final Sim")
    # for i in range(0,6):
    # 	print (final_sim_of_ques_with_all_levels[i],",")
    # print("\n")
    #
    # print ("Final Area Sim")
    # for i in range(0,6):
    # 	print (final_area_sim_of_ques_with_all_levels[i],",")
    # print("\n")

    #	maximum of all similarities values to find cl level
    final_level = 0
    max_sim = final_sim_of_ques_with_all_levels[0]
    for index, sim in enumerate(final_sim_of_ques_with_all_levels):
        if sim > max_sim:
            max_sim = sim
            final_level = index

    # print("\n")
    # print("avg wali list: " , avg_of_sim_all_levels)

    # print( "sum wali list: " , sum_of_sim_all_levels)

    # 	finding if word will be classified in  more than two levels
    count = 0
    indices_of_same_sim = []
    for i, sim in enumerate(final_sim_of_ques_with_all_levels):
        if sim == max_sim:
            count += 1
            indices_of_same_sim.append(i)

    # 	if word is in more than two levels
    if len(indices_of_same_sim) > 1:
        # print ("ques is in more than two levels")
        same_sim_list = []
        for index in indices_of_same_sim:
            same_sim_list.append(final_area_sim_of_ques_with_all_levels[index])

        max_sim_area = same_sim_list[0]
        for sim_area, index_of_max_sim in zip(same_sim_list,
                                              indices_of_same_sim):
            if sim_area > max_sim_area:
                max_sim_area = sim_area
                final_level = index_of_max_sim

    # print("final_level ",final_level)
    return final_level
Example #14
0
#simFunction = wn.path_similarity
# Leacock-Chodorow Similarity:  -log(p/2d) where p is the shortest path length and d the taxonomy depth.
# 不同词性的直接认为值为0
#0.303463230095     0.33365080482       0.243127578189
#simFunction = lambda x,y : wn.lch_similarity(x, y) if x.pos()==y.pos() else 0
# based on the depth of the two senses in the taxonomy and that of their Least Common Subsumer
#0.331197865969     0.35898821251       0.258938723468
#simFunction = wn.wup_similarity

# -log P(LCS(c1,c2))
#0.332605157422     0.378351460201      0.237095177379
#simFunction = lambda x,y : wn.res_similarity(x,y,brown_ic)if x.pos()==y.pos() and not x.pos()in['a','s'] else 0
#0.333519003413     0.382270850924      0.237229430736
#simFunction = lambda x,y : wn.res_similarity(x,y,semcor_ic)if x.pos()==y.pos() and not x.pos()in['a','s'] else 0
#0.327047195075     0.378426849209      0.226561164029
simFunction = lambda x, y: wn.res_similarity(x, y, genesis_ic) if x.pos(
) == y.pos() and not x.pos() in ['a', 's'] else 0

# 1/(IC(s1) + IC(s2) - 2 * IC(lcs))
#0.283453351637     0.364845938375      0.158729843246
#simFunction = lambda x,y : wn.jcn_similarity(x,y,brown_ic)if x.pos()==y.pos() and not x.pos()in['a','s'] else 0
#0.17733734237      0.272078927265      0.0570884272107
#simFunction = lambda x,y : wn.jcn_similarity(x,y,semcor_ic)if x.pos()==y.pos() and not x.pos()in['a','s'] else 0
#0.131891595045     0.215981129294      0.0797506187655
#simFunction = lambda x,y : wn.jcn_similarity(x,y,genesis_ic)if x.pos()==y.pos() and not x.pos()in['a','s'] else 0

# 2 * IC(lcs) / (IC(s1) + IC(s2))
#0.299336498295     0.360000100519      0.197707567689
#simFunction = lambda x,y : wn.lin_similarity(x,y,brown_ic)if x.pos()==y.pos() and not x.pos()in['a','s'] else 0
#0.216159121464     0.273284313725      0.12954361359
#simFunction = lambda x,y : wn.lin_similarity(x,y,semcor_ic)if x.pos()==y.pos() and not x.pos()in['a','s'] else 0
Example #15
0
def res_similarity(synsets1, synsets2):
    similarity_function = lambda ss1, ss2: wn.res_similarity(ss1, ss2, corpus)
    return __max_similarity(synsets1, synsets2, similarity_function)
def resnik_similarity(a,b):
	if wn.res_similarity(wn.synsets(a)[0],wn.synsets(b)[0],genesis_ic) == None:
		return 0
	else:
		return wn.res_similarity(wn.synsets(a)[0],wn.synsets(b)[0],genesis_ic)