コード例 #1
0
ファイル: InputSVM.py プロジェクト: anhtukhtn/Similarity
def cal_feature_values_for(syn_wn, syn_ox):
  feature_values = []

  defi_wn = WordnetHandler.get_defi_for_syn(syn_wn)
  defi_ox = OxfordParser.get_defi_for_syn(syn_ox)

  gloss_wn = WordnetHandler.get_gloss_for_syn(syn_wn)
  gloss_ox = OxfordParser.get_gloss_for_syn(syn_ox)

  lemma_wn = WordnetHandler.get_lemma_for_synset(syn_wn)
  sd_ox = OxfordParser.get_short_defi_for_syn(syn_ox)

  ex_wn = WordnetHandler.get_ex_for_syn(syn_wn)
  ex_ox = OxfordParser.get_ex_for_syn(syn_ox)

  cl_ox =  OxfordParser.get_collocation_for_syn(syn_ox)
  hyper_wn = WordnetHandler.get_hyper_defi_for_synset(syn_wn)
  mero_wn = WordnetHandler.get_mero_defi_for_synset(syn_wn)

  # # # # # # # # # # # # # # # # #
  # Literal
  literal_leven_value = 1-Literal.levenshtein(defi_wn, defi_ox)
  feature_values.append(literal_leven_value)

  literal_jacc_value = 1.00001-Literal.jaccard(defi_wn, defi_ox)
  feature_values.append(literal_jacc_value)
#  feature_values.append(literal_jacc_value+literal_leven_value)

  # # # # # # # # # #

  literal_leven_value = 1-Literal.levenshtein(gloss_wn, gloss_ox)
  feature_values.append(literal_leven_value)

  literal_jacc_value = 1.00001-Literal.jaccard(gloss_wn, gloss_ox)
  feature_values.append(literal_jacc_value)
#  feature_values.append(literal_jacc_value+literal_leven_value)

  # # # # # # # # # #

  literal_leven_ngram = literal_leven_value
  literal_jacc_ngram = literal_jacc_value

  ngrams_value = Ngrams.ngrams_word_for(gloss_wn, gloss_ox, 2)
  literal_jacc_ngram += ngrams_value
  literal_leven_ngram += ngrams_value

  ngrams_value = Ngrams.ngrams_word_for(gloss_wn, gloss_ox, 3)
  literal_jacc_ngram += ngrams_value
  literal_leven_ngram += ngrams_value

  ngrams_value = Ngrams.ngrams_word_for(gloss_wn, gloss_ox, 4)
  literal_jacc_ngram += ngrams_value
  literal_leven_ngram += ngrams_value

  ngrams_value = Ngrams.ngrams_word_for(gloss_wn, gloss_ox, 5)
  literal_jacc_ngram += ngrams_value
  literal_leven_ngram += ngrams_value

  feature_values.append(literal_jacc_ngram)
#  feature_values.append(literal_leven_ngram)

  # # # # # # # # # #

#  gloss_split_wn = Literal.split_and_stem(gloss_wn)
#  gloss_split_ox = Literal.split_and_stem(gloss_ox)
#  literal_jaro_winkler = Jelly.jaro_winkler(gloss_wn, gloss_ox)
#  feature_values.append(literal_jaro_winkler + literal_jacc_value)

  # # # # # # # # # #

#  literal_jacc_value = 1.00001-Literal.jaccard(ex_wn, ex_ox)
#  feature_values.append(literal_jacc_value)

  # # # # # # # # # # # # # # # # #
  # ShallowSyntactic

#  shallow_jaccard_POS = 0
#  shallow_jaccard_POS += 1.0001 - ShallowSyntactic.jaccard_POS(gloss_wn, gloss_ox)
#  shallow_jaccard_POS += 1.0001 - ShallowSyntactic.jaccard_POS_ngrams(gloss_wn, gloss_ox, 2)
#  shallow_jaccard_POS += 1.0001 - ShallowSyntactic.jaccard_POS_ngrams(gloss_wn, gloss_ox, 3)
#  shallow_jaccard_POS += 1.0001 - ShallowSyntactic.jaccard_POS_ngrams(gloss_wn, gloss_ox, 4)
#  feature_values.append(shallow_jaccard_POS)

  # # # # # # # # # # # # # # # # #
  # wordnet-based, WSD

  wn_value = WordnetBased.wordnet_based(defi_wn, defi_ox, 0)
  feature_values.append(wn_value)

#  wn_value = WordnetBased.wordnet_based(hyper_wn, defi_ox, 0)
#  feature_values.append(wn_value)

#  hypo_value = 0
#  if len(syn_wn.hyponyms()) > 0:
#    for hypo in syn_wn.hyponyms():
#      hypo_value += WordnetBased.wordnet_based_synset(hypo, defi_ox)
#    hypo_value /= len(syn_wn.hyponyms())
#  feature_values.append(hypo_value)

#  hyper_value = 0
#  if len(syn_wn.hypernyms()) > 0:
#    for hyper in syn_wn.hypernyms():
#      hyper_value += WordnetBased.wordnet_based_synset(hyper, defi_ox)
#    hyper_value /= len(syn_wn.hypernyms())
#  feature_values.append(hyper_value)
#
#  wn_value = WordnetBased.wordnet_based(ex_wn, ex_ox,0)
#  feature_values.append(wn_value)
#
#  wn_value_1 = WordnetBased.wordnet_based(defi_wn, defi_ox, 1)
#  feature_values.append(wn_value + wn_value_1)
#
#  wn_value = WordnetBased.wordnet_based(gloss_wn, gloss_ox, 0)
#  feature_values.append(wn_value)
#
#  wn_value_1 = WordnetBased.wordnet_based(gloss_wn, gloss_ox, 1)
#  feature_values.append(wn_value + wn_value_1)

  # # # # # # # # # # # # # # # # #
  # lsa
#  lsa_tfidf = LSA.sim_tfidf(defi_wn, defi_ox)
#  feature_values.append(lsa_tfidf)
##
#  lsa_tfidf = LSA.sim_tfidf(hyper_wn, defi_ox)
#  feature_values.append(lsa_tfidf)
#
#  lsa_tfidf = LSA.sim_tfidf(gloss_wn, gloss_ox)
#  feature_values.append(lsa_tfidf)

#  lsa_tfidf = LSA.sim_tfidf(lemma_wn, sd_ox)
#  feature_values.append(lsa_tfidf)
#
#  lsa_tfidf = LSA.sim_tfidf(ex_wn, ex_ox)
#  feature_values.append(lsa_tfidf)

  return feature_values