예제 #1
0
def get_greatest_synsets_similarity_between(synsets_wn, nouns):
  synset_wn_max = None
  p_max = 0

  if len(nouns) != 0:
    for synset_wn in synsets_wn:
      p_noun = 0
      for noun in nouns:
        synsets_of_noun = WordnetHandler.get_synsets_for_word(noun, 'n')

        if len(synsets_of_noun) > 0:
          p_each_noun = 0
          for synset_of_noun in synsets_of_noun:
    #        p = synset_wn.path_similarity(synset_of_noun)
            p = WordnetHandler.cal_similarity(synset_wn, synset_of_noun)
            p_each_noun += p
          p_each_noun = p_each_noun/len(synsets_of_noun)
          p_noun += p_each_noun

      p = p_noun/len(nouns)
      if p > p_max:
        synset_wn_max = synset_wn
  else:
    print "no nouns"

  return synset_wn_max
예제 #2
0
def sim_2_word(word_1, word_2):
    synsets_1 = synsets_for_word(word_1)
    synsets_2 = synsets_for_word(word_2)
    p_max = 0
    if __WSD_type__ == 1:
        for synset_1 in synsets_1:
            for synset_2 in synsets_2:
                p = WordnetHandler.cal_similarity(synset_1, synset_2)

                if p > p_max:
                    p_max = p

    if __WSD_type__ == 0:
        if len(synsets_1) == 0 or len(synsets_2) == 0:
            return 0
        p_max = WordnetHandler.cal_similarity(synsets_1[0], synsets_2[0])

    return p_max
예제 #3
0
def sim_for_synset_and_synsetvector(a_synset, vector):
  p_max = 0
  for (synset,weight) in vector:
    p = WordnetHandler.cal_similarity(a_synset, synset)
    if p is not None:
#      p = p*weight
      if p > p_max:
        p_max = p

  return p_max
예제 #4
0
def wordnet_based_synset(syn_wn, sen_ox):
    sim = 0.0001
    words_ox = split_words(sen_ox)
    count = 0
    for word in words_ox:
        p_max = 0
        synsets_1 = synsets_for_word(word)
        for synset in synsets_1:
            p = WordnetHandler.cal_similarity(synset, syn_wn)
            if p > p_max:
                p_max = p

        if p_max != 0:
            count += 1
            sim += p_max

    sim /= count + 0.001
    return sim
def get_value_synset_for(cur_synset, synsets):
  synsets_value = []
  definition = cur_synset.definition()
  nouns = PreprocessDefinition.preprocess_sentence(definition)
#  nouns = list(set(nouns))
  for synset in synsets:
    count = 0
    p = 0
    for noun in nouns:
      synset_max = get_greatest_synset_similarity_between(synset, noun)
      if synset_max is not None:
        count += 1
        sim = WordnetHandler.cal_similarity(synset, synset_max)
        if sim != None:
          p += sim

    if count != 0:
      p = p/count

    synsets_value.append(p)

  return synsets_value
예제 #6
0
def get_definition_value_with_synsetwn(definition, synsets_wn):
  synsets_value = []
#  nouns = PreprocessDefinition.preprocess_sentence_to_nouns(definition)
  nouns = PreprocessDefinition.preprocess_sentence(definition)
#  nouns = list(set(nouns))
  for synset in synsets_wn:
    count = 0
    p = 0
    for noun in nouns:
      synset_max = get_greatest_synset_similarity_between([synset], noun)
      if synset_max is not None:
        count += 1
        sim = WordnetHandler.cal_similarity(synset, synset_max)
        if sim != None:
          p += sim

    if count != 0:
      p = p/count

    synsets_value.append(p)

  return synsets_value