예제 #1
0
def cal_features_from_sens_write_to_file(filename_sens, filename_output):
  f = open(filename_sens,'r');
  line = f.readline();
  while (line):
    if len(line) > 0:

      feature_values = ""

      sens = line.split("\t")

      sen_1 = sens[0]
      sen_2 = sens[1]

      feature_values += str(Literal.levenshtein_in_context(sen_1, sen_2, sens)) + "\t"
#      feature_values += str(ShallowSyntactic.jaccard_POS_in_context(sen_1, sen_2, sens)) + "\t"
      feature_values += str(WordnetBased.wordnet_based_in_context(sen_1, sen_2, sens, 0))
#      feature_values += str(WordnetBased.wordnet_based_in_context(sen_1, sen_2, sens, 1))

      FileProcess.append_value_to_file(feature_values, filename_output)

      line = f.readline();

  f.close()
예제 #2
0
def cal_feature_values_for(syn_wn, syn_ox):
  feature_values = []

  defi_wn = WordnetHandler.get_defi_for_syn(syn_wn)
  defi_ox = OxfordParser.get_defi_for_syn(syn_ox)

  gloss_wn = WordnetHandler.get_gloss_for_syn(syn_wn)
  gloss_ox = OxfordParser.get_gloss_for_syn(syn_ox)

  lemma_wn = WordnetHandler.get_lemma_for_synset(syn_wn)
  sd_ox = OxfordParser.get_short_defi_for_syn(syn_ox)

  ex_wn = WordnetHandler.get_ex_for_syn(syn_wn)
  ex_ox = OxfordParser.get_ex_for_syn(syn_ox)

  cl_ox =  OxfordParser.get_collocation_for_syn(syn_ox)
  hyper_wn = WordnetHandler.get_hyper_defi_for_synset(syn_wn)
  mero_wn = WordnetHandler.get_mero_defi_for_synset(syn_wn)

  # # # # # # # # # # # # # # # # #
  # Literal
  literal_leven_value = 1-Literal.levenshtein(defi_wn, defi_ox)
  feature_values.append(literal_leven_value)

  literal_jacc_value = 1.00001-Literal.jaccard(defi_wn, defi_ox)
  feature_values.append(literal_jacc_value)
#  feature_values.append(literal_jacc_value+literal_leven_value)

  # # # # # # # # # #

  literal_leven_value = 1-Literal.levenshtein(gloss_wn, gloss_ox)
  feature_values.append(literal_leven_value)

  literal_jacc_value = 1.00001-Literal.jaccard(gloss_wn, gloss_ox)
  feature_values.append(literal_jacc_value)
#  feature_values.append(literal_jacc_value+literal_leven_value)

  # # # # # # # # # #

  literal_leven_ngram = literal_leven_value
  literal_jacc_ngram = literal_jacc_value

  ngrams_value = Ngrams.ngrams_word_for(gloss_wn, gloss_ox, 2)
  literal_jacc_ngram += ngrams_value
  literal_leven_ngram += ngrams_value

  ngrams_value = Ngrams.ngrams_word_for(gloss_wn, gloss_ox, 3)
  literal_jacc_ngram += ngrams_value
  literal_leven_ngram += ngrams_value

  ngrams_value = Ngrams.ngrams_word_for(gloss_wn, gloss_ox, 4)
  literal_jacc_ngram += ngrams_value
  literal_leven_ngram += ngrams_value

  ngrams_value = Ngrams.ngrams_word_for(gloss_wn, gloss_ox, 5)
  literal_jacc_ngram += ngrams_value
  literal_leven_ngram += ngrams_value

  feature_values.append(literal_jacc_ngram)
#  feature_values.append(literal_leven_ngram)

  # # # # # # # # # #

#  gloss_split_wn = Literal.split_and_stem(gloss_wn)
#  gloss_split_ox = Literal.split_and_stem(gloss_ox)
#  literal_jaro_winkler = Jelly.jaro_winkler(gloss_wn, gloss_ox)
#  feature_values.append(literal_jaro_winkler + literal_jacc_value)

  # # # # # # # # # #

#  literal_jacc_value = 1.00001-Literal.jaccard(ex_wn, ex_ox)
#  feature_values.append(literal_jacc_value)

  # # # # # # # # # # # # # # # # #
  # ShallowSyntactic

#  shallow_jaccard_POS = 0
#  shallow_jaccard_POS += 1.0001 - ShallowSyntactic.jaccard_POS(gloss_wn, gloss_ox)
#  shallow_jaccard_POS += 1.0001 - ShallowSyntactic.jaccard_POS_ngrams(gloss_wn, gloss_ox, 2)
#  shallow_jaccard_POS += 1.0001 - ShallowSyntactic.jaccard_POS_ngrams(gloss_wn, gloss_ox, 3)
#  shallow_jaccard_POS += 1.0001 - ShallowSyntactic.jaccard_POS_ngrams(gloss_wn, gloss_ox, 4)
#  feature_values.append(shallow_jaccard_POS)

  # # # # # # # # # # # # # # # # #
  # wordnet-based, WSD

  wn_value = WordnetBased.wordnet_based(defi_wn, defi_ox, 0)
  feature_values.append(wn_value)

#  wn_value = WordnetBased.wordnet_based(hyper_wn, defi_ox, 0)
#  feature_values.append(wn_value)

#  hypo_value = 0
#  if len(syn_wn.hyponyms()) > 0:
#    for hypo in syn_wn.hyponyms():
#      hypo_value += WordnetBased.wordnet_based_synset(hypo, defi_ox)
#    hypo_value /= len(syn_wn.hyponyms())
#  feature_values.append(hypo_value)

#  hyper_value = 0
#  if len(syn_wn.hypernyms()) > 0:
#    for hyper in syn_wn.hypernyms():
#      hyper_value += WordnetBased.wordnet_based_synset(hyper, defi_ox)
#    hyper_value /= len(syn_wn.hypernyms())
#  feature_values.append(hyper_value)
#
#  wn_value = WordnetBased.wordnet_based(ex_wn, ex_ox,0)
#  feature_values.append(wn_value)
#
#  wn_value_1 = WordnetBased.wordnet_based(defi_wn, defi_ox, 1)
#  feature_values.append(wn_value + wn_value_1)
#
#  wn_value = WordnetBased.wordnet_based(gloss_wn, gloss_ox, 0)
#  feature_values.append(wn_value)
#
#  wn_value_1 = WordnetBased.wordnet_based(gloss_wn, gloss_ox, 1)
#  feature_values.append(wn_value + wn_value_1)

  # # # # # # # # # # # # # # # # #
  # lsa
#  lsa_tfidf = LSA.sim_tfidf(defi_wn, defi_ox)
#  feature_values.append(lsa_tfidf)
##
#  lsa_tfidf = LSA.sim_tfidf(hyper_wn, defi_ox)
#  feature_values.append(lsa_tfidf)
#
#  lsa_tfidf = LSA.sim_tfidf(gloss_wn, gloss_ox)
#  feature_values.append(lsa_tfidf)

#  lsa_tfidf = LSA.sim_tfidf(lemma_wn, sd_ox)
#  feature_values.append(lsa_tfidf)
#
#  lsa_tfidf = LSA.sim_tfidf(ex_wn, ex_ox)
#  feature_values.append(lsa_tfidf)

  return feature_values