Ejemplo n.º 1
0
def sim_ox_wn_via_definition_cal_word():

  total_precision = 0;
  total_recall = 0;
  total_accuracy = 0;
  total_word = 0

  dict_ox = OxfordParser.get_dict_nouns()
  for word in dict_ox:

    if word not in __m2d_sim__:
      m2d_sim = sim_ox_wn_definition(word)
      __m2d_sim__[word] = m2d_sim

    m2d_sim = copy.deepcopy(__m2d_sim__[word])

    if m2d_sim == None or len(m2d_sim) == 0 or len(m2d_sim[0]) == 0:
      continue

    print word
#
#    if len(m2d_sim) == 1 and len(m2d_sim[0]) == 1:
#      continue
#
    m2d_sim = choose_pair_0_1(m2d_sim, len(m2d_sim), len(m2d_sim[0]))

    (precision, recall, accuracy) = CompareWithGold.compareGoldWithResult(m2d_sim,word)
    if precision != -1:
      total_precision += precision
      total_recall += recall
      total_accuracy += accuracy
      total_word += 1

  precision = total_precision/total_word
  recall = total_recall/total_word
  f_score = 0
  if precision != 0 or recall != 0:
    f_score = 2*(precision*recall)/(precision + recall)
  accuracy = total_accuracy/total_word
  print "total:"
  print total_word
  print precision
  print recall
  print f_score
  print accuracy

  Parameters.append_result_to_file( precision, recall, f_score, accuracy)
Ejemplo n.º 2
0
def train_sim_definition():
  Parameters.reset_params_zero()
  (f_score, curr_params) = jaccard_train()

  Parameters.PARAMETERS_CHOICE_0_1.CHOICE_1_1_MIN = curr_params[0]
  Parameters.PARAMETERS_CHOICE_0_1.CHOICE_1_COL_MIN_FIRST = curr_params[1]
  Parameters.PARAMETERS_CHOICE_0_1.CHOICE_1_COL_RANGE_FIRST = curr_params[2]
  Parameters.PARAMETERS_CHOICE_0_1.CHOICE_N_N_MIN_FIRST = curr_params[3]
  Parameters.PARAMETERS_CHOICE_0_1.CHOICE_N_N_RANGE_FIRST = curr_params[4]
  Parameters.MORPHO.JACCARD = curr_params[5]
#
#  Parameters.PARAMETERS_CHOICE_0_1.CHOICE_1_1_MIN = 0
#  Parameters.PARAMETERS_CHOICE_0_1.CHOICE_1_COL_MIN_FIRST = 0.0
#  Parameters.PARAMETERS_CHOICE_0_1.CHOICE_1_COL_RANGE_FIRST = 1.0
#  Parameters.PARAMETERS_CHOICE_0_1.CHOICE_N_N_MIN_FIRST = 1
#  Parameters.PARAMETERS_CHOICE_0_1.CHOICE_N_N_RANGE_FIRST = 1.25
#  Parameters.MORPHO.JACCARD = 0.9
#

  sim_ox_wn_via_definition()
Ejemplo n.º 3
0
def sim_ox_wn_via_definition_morpho_cal_syns():
  total_tp = 0.00001
  total_tn = 0.00001;
  total_fn = 0.00001;
  total_fp = 0.00001;
  total_pair = 0

  dict_ox = OxfordParser.get_dict_nouns()
  for word in dict_ox:
#    if word != 'blaze':
#      continue
#
    if word not in __m2d_sim__:
      m2d_sim = sim_ox_wn_definition(word)
      __m2d_sim__[word] = m2d_sim

    m2d_sim = copy.deepcopy(__m2d_sim__[word])
    if m2d_sim == None or len(m2d_sim) == 0 or len(m2d_sim[0]) == 0:
      continue

#    if len(m2d_sim) == 1 or len(m2d_sim[0]) == 1:
#      continue

    if word not in __dict_ngrams__:
      m2d_jacc = __m2d_sim_jacc__[word]
      m2d_2grams = __m2d_sim_2grams__[word]
      m2d_3grams = __m2d_sim_3grams__[word]
      m2d_4grams = __m2d_sim_4grams__[word]
#      DebugHandler.print_2d_matrix(m2d_jacc)
#      DebugHandler.print_2d_matrix(m2d_2grams)
#      DebugHandler.print_2d_matrix(m2d_3grams)
#      DebugHandler.print_2d_matrix(m2d_4grams)
  #
      m2d_ngrams = [[0 for x in range(len(m2d_sim[0]))] for x in range(len(m2d_sim))]

      monogram_weight = 0.25
      bigram_weight = 0.25
      trigram_weight = 0.25
      for i in range(len(m2d_sim)):
        for j in range(len(m2d_sim[0])):
          m2d_ngrams[i][j] = m2d_jacc[i][j]*monogram_weight \
                              + m2d_2grams[i][j]*bigram_weight \
                              + m2d_3grams[i][j]*(trigram_weight) \
                              + m2d_4grams[i][j]*(1- monogram_weight - bigram_weight - trigram_weight)
      __dict_ngrams__[word] = m2d_ngrams

#    print word

    m2d_ngrams = __dict_ngrams__[word]

#    DebugHandler.print_2d_matrix(m2d_ngrams)
#    DebugHandler.print_2d_matrix(m2d_sim)

#    ngram_weight = 0.075
#    for iWnWord in range(len(m2d_sim)):
#      for iDictWord in range(len(m2d_sim[0])):
#        jacc = m2d_jacc[iWnWord][iDictWord]
#        ngrams = m2d_ngrams[iWnWord][iDictWord]
#        m2d_jacc[iWnWord][iDictWord] = jacc*(1-ngram_weight) + ngrams*ngram_weight
#
    JACCARD_WEIGHT = Parameters.MORPHO.JACCARD
    for i in range(len(m2d_sim)):
      for j in range(len(m2d_sim[0])):
        m2d_sim[i][j] = m2d_sim[i][j]*(1-JACCARD_WEIGHT) + JACCARD_WEIGHT*(m2d_ngrams[i][j]);

#    DebugHandler.print_2d_matrix(m2d_sim)
#    if len(m2d_sim) == 1 and len(m2d_sim[0]) == 1:
#      continue
#
    m2d_sim = choose_pair_0_1(m2d_sim, len(m2d_sim), len(m2d_sim[0]))
#    m2d_sim = pair_0_1_reducing_m2d_sim(m2d_sim, len(m2d_sim), len(m2d_sim[0]), word)
#    print word
#    DebugHandler.print_2d_matrix(m2d_sim)

    pair = count_pair(m2d_sim)
    total_pair += pair

    (tp, tn, fn, fp) = CompareWithGold.compareGoldWithResult_without_cal_result(m2d_sim,word)

#    precision = tp / (tp + fp + 0.0001)
#    recall = tp / (tp + fn + 0.0001)
#    accuracy = (tp + tn) / (tp + tn + fp + fn + 0.0001)
#
#    f_score = 0
#    if precision != 0 or recall != 0:
#      f_score = 2*(precision*recall)/(precision + recall)
#    if f_score < 0.5:
#      print word
#      print f_score
#      print tp
#      print tn
#      print fn
#      print fp
#
    if tp != -1:
      total_tp += tp
      total_tn += tn
      total_fn += fn
      total_fp += fp

  precision = total_tp / (total_tp + total_fp)
  recall = total_tp / (total_tp + total_fn)
  accuracy = (total_tp + total_tn) / (total_tp + total_tn + total_fp + total_fn)

  f_score = 0
  if precision != 0 or recall != 0:
    f_score = 2*(precision*recall)/(precision + recall)
  print "total:"
  print total_pair
  print total_tp
  print total_tn
  print total_fn
  print total_fp

  print precision
  print recall
  print f_score
  print accuracy

  Parameters.append_result_to_file( precision, recall, f_score, accuracy)
  current_params = Parameters.get_current_params()
  current_params = copy.deepcopy(current_params)
  return f_score, current_params
Ejemplo n.º 4
0
def sim_ox_wn_via_definition_cal_syns():
  total_tp = 0.;
  total_tn = 0.;
  total_fn = 0.0;
  total_fp = 0.0;
  total_pair = 0

  dict_ox = OxfordParser.get_dict_nouns()
  for word in dict_ox:
#    if word != 'bank':
#      continue
#
    if word not in __m2d_sim__:
      m2d_sim = sim_ox_wn_definition(word)
      __m2d_sim__[word] = m2d_sim

    m2d_sim = copy.deepcopy(__m2d_sim__[word])

    if m2d_sim == None or len(m2d_sim) == 0 or len(m2d_sim[0]) == 0:
      continue

#    if len(m2d_sim) == 1 and len(m2d_sim[0]) == 1:
#      continue
#
    m2d_sim = choose_pair_0_1(m2d_sim, len(m2d_sim), len(m2d_sim[0]))
#    m2d_sim = pair_0_1_reducing_m2d_sim(m2d_sim, len(m2d_sim), len(m2d_sim[0]), word)
#    print word

    pair = count_pair(m2d_sim)
    total_pair += pair

    (tp, tn, fn, fp) = CompareWithGold.compareGoldWithResult_without_cal_result(m2d_sim,word)
    if tp != -1:
      total_tp += tp
      total_tn += tn
      total_fn += fn
      total_fp += fp

  precision = total_tp / (total_tp + total_fp)
  recall = total_tp / (total_tp + total_fn)
  accuracy = (total_tp + total_tn) / (total_tp + total_tn + total_fp + total_fn)

  f_score = 0
  if precision != 0 or recall != 0:
    f_score = 2*(precision*recall)/(precision + recall)
  print "total:"
  print total_pair
  print total_tp
  print total_tn
  print total_fn
  print total_fp

  print precision
  print recall
  print f_score
  print accuracy

  Parameters.append_result_to_file( precision, recall, f_score, accuracy)
  current_params = Parameters.get_current_params()
  current_params = copy.deepcopy(current_params)
  return f_score, current_params