예제 #1
0
def create_input_for_train():
  dict_vn = ReadVietNet.readVietNetFile()
  dict_ox = OxfordParser.get_dict_nouns()
#  create_input_sen_via_ox_vn(dict_vn, dict_ox)

  dict_gold = CompareWithGold.goldData
  create_input_sen_via_gold_data(dict_vn, dict_ox, dict_gold)
예제 #2
0
def sim_ox_wn_via_definition_cal_word():

  total_precision = 0;
  total_recall = 0;
  total_accuracy = 0;
  total_word = 0

  dict_ox = OxfordParser.get_dict_nouns()
  for word in dict_ox:

    if word not in __m2d_sim__:
      m2d_sim = sim_ox_wn_definition(word)
      __m2d_sim__[word] = m2d_sim

    m2d_sim = copy.deepcopy(__m2d_sim__[word])

    if m2d_sim == None or len(m2d_sim) == 0 or len(m2d_sim[0]) == 0:
      continue

    print word
#
#    if len(m2d_sim) == 1 and len(m2d_sim[0]) == 1:
#      continue
#
    m2d_sim = choose_pair_0_1(m2d_sim, len(m2d_sim), len(m2d_sim[0]))

    (precision, recall, accuracy) = CompareWithGold.compareGoldWithResult(m2d_sim,word)
    if precision != -1:
      total_precision += precision
      total_recall += recall
      total_accuracy += accuracy
      total_word += 1

  precision = total_precision/total_word
  recall = total_recall/total_word
  f_score = 0
  if precision != 0 or recall != 0:
    f_score = 2*(precision*recall)/(precision + recall)
  accuracy = total_accuracy/total_word
  print "total:"
  print total_word
  print precision
  print recall
  print f_score
  print accuracy

  Parameters.append_result_to_file( precision, recall, f_score, accuracy)
예제 #3
0
def create_input_for_test_svm():
  dict_ox =  OxfordParser.get_dict_nouns()
  flag_can_go = False
  for word in dict_ox:

#    if word == "brook":
#      flag_can_go = True
#
#    if flag_can_go == False:
#      continue

    if len(dict_ox[word]) == 0:
      continue

    syns_wn = WordnetHandler.get_synsets_for_word(word, 'n')
    syns_ox = dict_ox[word]

    if len(syns_ox) == 1 and len(syns_wn) == 1:
      continue

    write_sens_for_reading(syns_wn, syns_ox, __filename_input_sen_test__)
    cal_features_and_write_to_file_for(syns_wn, syns_ox, __filename_input_test_feature_values__)
예제 #4
0
def create_input_for_train_svm():
  dict_ox =  OxfordParser.get_dict_nouns()
  dict_gold = CompareWithGold.goldData

  for word in dict_ox:

    if len(dict_ox[word]) == 0 or word not in dict_gold:
      continue

    if word == "brook":
      return

#    if word != "bank":
#      continue

    syns_wn = WordnetHandler.get_synsets_for_word(word, 'n')
    syns_ox = dict_ox[word]

    if len(syns_ox) == 1 and len(syns_wn) == 1:
      continue

    write_label_for_svm(syns_wn, syns_ox, dict_gold[word])
    write_sens_for_reading(syns_wn, syns_ox, __filename_input_sen_train__)
    cal_features_and_write_to_file_for(syns_wn, syns_ox, __filename_input_train_feature_values__)
예제 #5
0
def create_input_sens_for_test_svm():
  dict_ox = OxfordParser.get_dict_nouns()
  for word in dict_ox:
    parse_ox_wn_defi_to_input(word)
예제 #6
0
def sim_ox_wn_via_svm():
  total_tp = 0.00001
  total_tn = 0.00001
  total_fn = 0.00001
  total_fp = 0.00001
  total_pair = 0

  dict_ox = OxfordParser.get_dict_nouns()
  flag_can_go = False
  for word in dict_ox:

#    if word == "brook":
#      flag_can_go = True
#
#    if flag_can_go == False:
#      continue

    word_syns_ox = dict_ox[word]
    wn_synsets = WordnetHandler.get_synsets_for_word(word, "n")

    m2d_sim = [[0 for x in range(len(word_syns_ox))] for x in range(len(wn_synsets))]

    if len(word_syns_ox) == 1 and len(wn_synsets) == 1:
      m2d_sim[0][0] = 1
    else:
      m2d_sim = get_m2d_sim_for_word_from_svm_result(word)

    if m2d_sim == None:
      continue

#    DebugHandler.print_2d_matrix(m2d_sim)

    m2d_sim = choose_pair_0_1(m2d_sim, len(m2d_sim), len(m2d_sim[0]))
#    DebugHandler.print_2d_matrix(m2d_sim)

    pair = count_pair(m2d_sim)
    total_pair += pair

    (tp, tn, fn, fp) = CompareWithGold.compareGoldWithResult_without_cal_result(m2d_sim,word)
    if tp != -1:
      total_tp += tp
      total_tn += tn
      total_fn += fn
      total_fp += fp

  precision = total_tp / (total_tp + total_fp)
  recall = total_tp / (total_tp + total_fn)
  accuracy = (total_tp + total_tn) / (total_tp + total_tn + total_fp + total_fn)

  f_score = 0
  if precision != 0 or recall != 0:
    f_score = 2*(precision*recall)/(precision + recall)
  print "total:"
  print total_pair
  print total_tp
  print total_tn
  print total_fn
  print total_fp

  print precision
  print recall
  print f_score
  print accuracy

  Parameters.append_result_to_file( precision, recall, f_score, accuracy)
  current_params = Parameters.get_current_params()
  current_params = copy.deepcopy(current_params)
  return f_score, current_params
예제 #7
0
def create_input_for_test():
  dict_ox = OxfordParser.get_dict_nouns()
  create_input_sens_test(dict_ox)
예제 #8
0
def sim_ox_wn_via_definition_morpho_cal_syns():
  total_tp = 0.00001
  total_tn = 0.00001;
  total_fn = 0.00001;
  total_fp = 0.00001;
  total_pair = 0

  dict_ox = OxfordParser.get_dict_nouns()
  for word in dict_ox:
#    if word != 'blaze':
#      continue
#
    if word not in __m2d_sim__:
      m2d_sim = sim_ox_wn_definition(word)
      __m2d_sim__[word] = m2d_sim

    m2d_sim = copy.deepcopy(__m2d_sim__[word])
    if m2d_sim == None or len(m2d_sim) == 0 or len(m2d_sim[0]) == 0:
      continue

#    if len(m2d_sim) == 1 or len(m2d_sim[0]) == 1:
#      continue

    if word not in __dict_ngrams__:
      m2d_jacc = __m2d_sim_jacc__[word]
      m2d_2grams = __m2d_sim_2grams__[word]
      m2d_3grams = __m2d_sim_3grams__[word]
      m2d_4grams = __m2d_sim_4grams__[word]
#      DebugHandler.print_2d_matrix(m2d_jacc)
#      DebugHandler.print_2d_matrix(m2d_2grams)
#      DebugHandler.print_2d_matrix(m2d_3grams)
#      DebugHandler.print_2d_matrix(m2d_4grams)
  #
      m2d_ngrams = [[0 for x in range(len(m2d_sim[0]))] for x in range(len(m2d_sim))]

      monogram_weight = 0.25
      bigram_weight = 0.25
      trigram_weight = 0.25
      for i in range(len(m2d_sim)):
        for j in range(len(m2d_sim[0])):
          m2d_ngrams[i][j] = m2d_jacc[i][j]*monogram_weight \
                              + m2d_2grams[i][j]*bigram_weight \
                              + m2d_3grams[i][j]*(trigram_weight) \
                              + m2d_4grams[i][j]*(1- monogram_weight - bigram_weight - trigram_weight)
      __dict_ngrams__[word] = m2d_ngrams

#    print word

    m2d_ngrams = __dict_ngrams__[word]

#    DebugHandler.print_2d_matrix(m2d_ngrams)
#    DebugHandler.print_2d_matrix(m2d_sim)

#    ngram_weight = 0.075
#    for iWnWord in range(len(m2d_sim)):
#      for iDictWord in range(len(m2d_sim[0])):
#        jacc = m2d_jacc[iWnWord][iDictWord]
#        ngrams = m2d_ngrams[iWnWord][iDictWord]
#        m2d_jacc[iWnWord][iDictWord] = jacc*(1-ngram_weight) + ngrams*ngram_weight
#
    JACCARD_WEIGHT = Parameters.MORPHO.JACCARD
    for i in range(len(m2d_sim)):
      for j in range(len(m2d_sim[0])):
        m2d_sim[i][j] = m2d_sim[i][j]*(1-JACCARD_WEIGHT) + JACCARD_WEIGHT*(m2d_ngrams[i][j]);

#    DebugHandler.print_2d_matrix(m2d_sim)
#    if len(m2d_sim) == 1 and len(m2d_sim[0]) == 1:
#      continue
#
    m2d_sim = choose_pair_0_1(m2d_sim, len(m2d_sim), len(m2d_sim[0]))
#    m2d_sim = pair_0_1_reducing_m2d_sim(m2d_sim, len(m2d_sim), len(m2d_sim[0]), word)
#    print word
#    DebugHandler.print_2d_matrix(m2d_sim)

    pair = count_pair(m2d_sim)
    total_pair += pair

    (tp, tn, fn, fp) = CompareWithGold.compareGoldWithResult_without_cal_result(m2d_sim,word)

#    precision = tp / (tp + fp + 0.0001)
#    recall = tp / (tp + fn + 0.0001)
#    accuracy = (tp + tn) / (tp + tn + fp + fn + 0.0001)
#
#    f_score = 0
#    if precision != 0 or recall != 0:
#      f_score = 2*(precision*recall)/(precision + recall)
#    if f_score < 0.5:
#      print word
#      print f_score
#      print tp
#      print tn
#      print fn
#      print fp
#
    if tp != -1:
      total_tp += tp
      total_tn += tn
      total_fn += fn
      total_fp += fp

  precision = total_tp / (total_tp + total_fp)
  recall = total_tp / (total_tp + total_fn)
  accuracy = (total_tp + total_tn) / (total_tp + total_tn + total_fp + total_fn)

  f_score = 0
  if precision != 0 or recall != 0:
    f_score = 2*(precision*recall)/(precision + recall)
  print "total:"
  print total_pair
  print total_tp
  print total_tn
  print total_fn
  print total_fp

  print precision
  print recall
  print f_score
  print accuracy

  Parameters.append_result_to_file( precision, recall, f_score, accuracy)
  current_params = Parameters.get_current_params()
  current_params = copy.deepcopy(current_params)
  return f_score, current_params
예제 #9
0
def sim_ox_wn_via_definition_cal_syns():
  total_tp = 0.;
  total_tn = 0.;
  total_fn = 0.0;
  total_fp = 0.0;
  total_pair = 0

  dict_ox = OxfordParser.get_dict_nouns()
  for word in dict_ox:
#    if word != 'bank':
#      continue
#
    if word not in __m2d_sim__:
      m2d_sim = sim_ox_wn_definition(word)
      __m2d_sim__[word] = m2d_sim

    m2d_sim = copy.deepcopy(__m2d_sim__[word])

    if m2d_sim == None or len(m2d_sim) == 0 or len(m2d_sim[0]) == 0:
      continue

#    if len(m2d_sim) == 1 and len(m2d_sim[0]) == 1:
#      continue
#
    m2d_sim = choose_pair_0_1(m2d_sim, len(m2d_sim), len(m2d_sim[0]))
#    m2d_sim = pair_0_1_reducing_m2d_sim(m2d_sim, len(m2d_sim), len(m2d_sim[0]), word)
#    print word

    pair = count_pair(m2d_sim)
    total_pair += pair

    (tp, tn, fn, fp) = CompareWithGold.compareGoldWithResult_without_cal_result(m2d_sim,word)
    if tp != -1:
      total_tp += tp
      total_tn += tn
      total_fn += fn
      total_fp += fp

  precision = total_tp / (total_tp + total_fp)
  recall = total_tp / (total_tp + total_fn)
  accuracy = (total_tp + total_tn) / (total_tp + total_tn + total_fp + total_fn)

  f_score = 0
  if precision != 0 or recall != 0:
    f_score = 2*(precision*recall)/(precision + recall)
  print "total:"
  print total_pair
  print total_tp
  print total_tn
  print total_fn
  print total_fp

  print precision
  print recall
  print f_score
  print accuracy

  Parameters.append_result_to_file( precision, recall, f_score, accuracy)
  current_params = Parameters.get_current_params()
  current_params = copy.deepcopy(current_params)
  return f_score, current_params