def eval_model_from_file(): with open("../data/questions_refined.txt") as f: content = f.readlines() qs = [x1.strip() for x1 in content] with open("../data/answers_refined.txt") as f: content = f.readlines() anses = [x1.strip() for x1 in content] correct_fp = open("correct.txt", "w") wrong_fp = open("wrong.txt", "w") count1 = 0 count2 = 0 for z in range(0, len(qs)): print(z) inp = qs[z] try: with HiddenPrints(): ans = word_prob_solver(inp) count2 += 1 if str(ans) == anses[z]: count1 += 1 correct_fp.write(str(inp) + "\n" + str(ans) + "\n\n") else: wrong_fp.write(str(inp) + "\n" + str(ans) + "\n\n") except Exception as e: count2 += 1 wrong_fp.write(str(inp) + "\n" + str(e) + "\n\n") print("correct: ", str(count1)) print("total: ", str(count2)) print("acc: ", str((float(count1) * 100) / count2))
def testCombinationPOSAll(epsilon, X, Y, tag2index, postag2index, emission_weight, transition_weight, emission_weight_pos, combination_weight, combination_weight_pos, param): T, POS = len(tag2index), len(postag2index) result_loss_dff_actual = np.zeros((T + 1, T, POS)) result_loss_dff_predicted = np.zeros((T + 1, T, POS)) with HiddenPrints(): gradient = GradientCombinationPOS(X, Y, tag2index, postag2index, emission_weight, transition_weight, emission_weight_pos, combination_weight, combination_weight_pos, param) result_loss_dff_predicted = gradient * epsilon old_loss = Loss(X, Y, tag2index, emission_weight, transition_weight, emission_weight_pos, combination_weight, combination_weight_pos, param) for tag1, tag2, postag in itertools.product(range(T + 1), range(T), range(POS)): combination_weight_pos[tag1, tag2, postag] += epsilon result_loss_dff_actual[tag1, tag2, postag] = Loss( X, Y, tag2index, emission_weight, transition_weight, emission_weight_pos, combination_weight, combination_weight_pos, param) - old_loss combination_weight_pos[tag1, tag2, postag] -= epsilon difference = np.abs(result_loss_dff_actual - result_loss_dff_predicted) print('********combination pos********') print('difference: {}, argmax: {}, max: {}'.format( difference, np.argpartition(difference, -10, axis=None)[-10:], np.partition(difference, -10, axis=None)[-10:]))
def get_loss_grad(w): with HiddenPrints(): transition_weight = w[:(T + 1) * (T + 1)].reshape((T + 1, T + 1)) emission_weight = w[(T + 1) * (T + 1):].reshape((T, V)) loss = Loss(train_X, train_Y, tag2index, emission_weight, transition_weight, param=Lambda) grads_transition = GradientTransition(train_X, train_Y, tag2index, emission_weight, transition_weight, param=Lambda) grads_emission = GradientEmission(train_X, train_Y, tag2index, word2index, emission_weight, transition_weight, param=Lambda) grads = np.concatenate( (grads_transition.reshape(-1), grads_emission.reshape(-1))) return loss, grads
def testEmissionAll(epsilon, X, Y, tag2index, word2index, emission_weight, transition_weight, emission_weight_pos, combination_weight, combination_weight_pos, param): T, V = len(tag2index), len(word2index) result_loss_dff_actual = np.zeros((T, V)) result_loss_dff_predicted = np.zeros((T, V)) with HiddenPrints(): gradient = GradientEmission(X, Y, tag2index, word2index, emission_weight, transition_weight, emission_weight_pos, combination_weight, combination_weight_pos, param) result_loss_dff_predicted = gradient * epsilon old_loss = Loss(X, Y, tag2index, emission_weight, transition_weight, emission_weight_pos, combination_weight, combination_weight_pos, param) for tag, word in itertools.product(range(T), range(V)): emission_weight[tag, word] += epsilon result_loss_dff_actual[tag, word] = Loss( X, Y, tag2index, emission_weight, transition_weight, emission_weight_pos, combination_weight, combination_weight_pos, param) - old_loss emission_weight[tag, word] -= epsilon difference = np.abs(result_loss_dff_actual - result_loss_dff_predicted) print('********emission********') print('difference: {}, argmax: {}, max: {}'.format( difference, np.argpartition(difference, -10, axis=None)[-10:], np.partition(difference, -10, axis=None)[-10:]))
def testTransitionAll(epsilon, X, Y, tag2index, emission_weight, transition_weight, emission_weight_pos, combination_weight, combination_weight_pos, param): T = len(tag2index) result_loss_dff_actual = np.zeros((T + 1, T + 1)) result_loss_dff_predicted = np.zeros((T + 1, T + 1)) with HiddenPrints(): gradient = GradientTransition(X, Y, tag2index, emission_weight, transition_weight, emission_weight_pos, combination_weight, combination_weight_pos, param) result_loss_dff_predicted = gradient * epsilon old_loss = Loss(X, Y, tag2index, emission_weight, transition_weight, emission_weight_pos, combination_weight, combination_weight_pos, param) for tag1, tag2 in itertools.product(range(-1, T), range(-1, T)): transition_weight[tag1, tag2] += epsilon result_loss_dff_actual[tag1, tag2] = Loss( X, Y, tag2index, emission_weight, transition_weight, emission_weight_pos, combination_weight, combination_weight_pos, param) - old_loss transition_weight[tag1, tag2] -= epsilon difference = np.abs(result_loss_dff_actual - result_loss_dff_predicted) print('********transition********') print('difference: {}, argmax: {}, max: {}'.format(difference, np.argmax(difference), np.max(difference)))
def trainDecay(X, Y, X_dev, Y_dev_raw, tag2index, word2index, postag2index, link_weight_sum, iteration=20, random_seed=1): T, V, POS, D = len(tag2index), len(word2index), len(postag2index), len(X) f1_opti, counter = 0, 1 transition_weight, emission_weight, emission_weight_pos, combination_weight, combination_weight_pos = np.zeros( (T + 1, T + 1)), np.zeros((T, V)), np.zeros((T, POS)), np.zeros( (T + 1, T, V)), np.zeros((T + 1, T, POS)) np.random.seed(random_seed) for i in range(iteration): for j in range(D): k = random.randint(0, D - 1) x, y = X[k], Y[k] transition_count, emission_count, emission_count_pos, combination_count, combination_count_pos = feature_count( x, y, tag2index, word2index, postag2index) y_pred = viterbi(x, tag2index, emission_weight, transition_weight, emission_weight_pos, combination_weight, combination_weight_pos, link_weight_sum) transition_count_pred, emission_count_pred, emission_count_pos_pred, combination_count_pred, combination_count_pos_pred = feature_count( x, y_pred, tag2index, word2index, postag2index) transition_weight += (transition_count - transition_count_pred) * 1 / counter emission_weight += (emission_count - emission_count_pred) * 1 / counter emission_weight_pos += (emission_count_pos - emission_count_pos_pred) * 1 / counter combination_weight += (combination_count - combination_count_pred) * 1 / counter combination_weight_pos += ( combination_count_pos - combination_count_pos_pred) * 1 / counter loss = Loss(X, Y, tag2index, word2index, postag2index, transition_weight, emission_weight, emission_weight_pos, combination_weight, combination_weight_pos, link_weight_sum) with HiddenPrints(): prec, rec, f1 = eval(X_dev, Y_dev_raw, tag2index, emission_weight, transition_weight, emission_weight_pos, combination_weight, combination_weight_pos, link_weight_sum) print( 'training epoch: {} , training loss: {:.4f} dev F1: {:.4f}'.format( i + 1, loss, f1)) counter += 1 if f1 > f1_opti: transition_weight_opti, emission_weight_opti, emission_weight_pos_opti, combination_weight_opti, combination_weight_pos_opti, f1_opti = transition_weight.copy( ), emission_weight.copy(), emission_weight_pos.copy( ), combination_weight.copy(), combination_weight_pos.copy(), f1 print('better parameters found!') return transition_weight_opti, emission_weight_opti, emission_weight_pos_opti, combination_weight_opti, combination_weight_pos_opti
def resolve_corefs(text): print("resolving pronoun co-refs...") document = nlp(text) with HiddenPrints(): coref = Coref() context = "" for sentence in document: # print(str(sentence)) if "they " in str(sentence): context += " " + str(sentence).strip() continue clusters = coref.one_shot_coref(utterances=str(sentence).strip(), context=context) resolved_utterance_text = coref.get_resolved_utterances() # print(resolved_utterance_text) context += " ".join(resolved_utterance_text).strip() return context
def train(X, Y, X_test, Y_test_raw, tag2index, word2index, iteration=20, random_seed=1): T, V, D = len(tag2index), len(word2index), len(X) f1_opti = 0 transition_weight, emission_weight = np.zeros((T + 1, T + 1)), np.zeros( (T, V)) np.random.seed(random_seed) for i in range(iteration): for j in range(D): k = random.randint(0, D - 1) x, y = X[k], Y[k] transition_count, emission_count = feature_count( x, y, tag2index, word2index) y_pred = viterbi(x, tag2index, emission_weight, transition_weight, link_weight_sum) transition_count_pred, emission_count_pred = feature_count( x, y_pred, tag2index, word2index) transition_weight += (transition_count - transition_count_pred) emission_weight += (emission_count - emission_count_pred) loss = Loss(X, Y, tag2index, word2index, transition_weight, emission_weight, link_weight_sum) with HiddenPrints(): prec, rec, f1 = eval(X_test, Y_test_raw, tag2index, emission_weight, transition_weight, link_weight_sum) print('training epoch: {} , training loss: {:.4f} test F1: {:.4f}'. format(i + 1, loss, f1)) if f1 > f1_opti: transition_weight_opti, emission_weight_opti, f1_opti = transition_weight, emission_weight, f1 print('better parameters found!') return transition_weight_opti, emission_weight_opti
def get_loss_grad(w): with HiddenPrints(): transition_weight = w[:(T + 1) * (T + 1)].reshape((T + 1, T + 1)) emission_weight = w[(T + 1) * (T + 1):(T + 1) * (T + 1) + T * V].reshape((T, V)) emission_weight_pos = w[(T + 1) * (T + 1) + T * V:(T + 1) * (T + 1) + T * (V + POS)].reshape((T, POS)) combination_weight = w[(T + 1) * (T + 1) + T * (V + POS):(T + 1) * (T + 1) + T * (V + POS) + (T + 1) * T * V].reshape((T + 1, T, V)) combination_weight_pos = w[(T + 1) * (T + 1) + T * (V + POS) + (T + 1) * T * V:].reshape( (T + 1, T, POS)) loss = Loss(train_X, train_Y, tag2index, emission_weight, transition_weight, emission_weight_pos, combination_weight, combination_weight_pos, param=Lambda) grads_transition = GradientTransition(train_X, train_Y, tag2index, emission_weight, transition_weight, emission_weight_pos, combination_weight, combination_weight_pos, param=Lambda) grads_emission = GradientEmission(train_X, train_Y, tag2index, word2index, emission_weight, transition_weight, emission_weight_pos, combination_weight, combination_weight_pos, param=Lambda) grads_emission_pos = GradientEmissionPOS(train_X, train_Y, tag2index, postag2index, emission_weight, transition_weight, emission_weight_pos, combination_weight, combination_weight_pos, param=Lambda) grads_combination = GradientCombination(train_X, train_Y, tag2index, word2index, emission_weight, transition_weight, emission_weight_pos, combination_weight, combination_weight_pos, param=Lambda) grads_combination_pos = GradientCombinationPOS( train_X, train_Y, tag2index, postag2index, emission_weight, transition_weight, emission_weight_pos, combination_weight, combination_weight_pos, param=Lambda) grads = np.concatenate( (grads_transition.reshape(-1), grads_emission.reshape(-1), grads_emission_pos.reshape(-1), grads_combination.reshape(-1), grads_combination_pos.reshape(-1))) return loss, grads