def streaming_training(next_character, array1, array2, lv1, lv2, trellis,n): if next_character == ' ': #No training because it'll correctly classify return; nospace = trellis[0][1][-n+1:]+next_character space = trellis[0][1][-n+1:]+' ' nospace_val = llv.recover_frequency(lv1, nospace, array1)+llv.recover_frequency(lv2, nospace, array2) space_val = llv.recover_frequency(lv1, space, array1)+llv.recover_frequency(lv2, space, array2) # print " = " if (space_val>nospace_val): # print array1[len(nospace)-1] array1[len(nospace)-1] = array1[len(nospace)-1] + llv.get_letter_vec(nospace, lv1) array2[len(nospace)-1] = array2[len(nospace)-1] + llv.get_letter_vec(nospace, lv2) # print array1[len(nospace)-1] return array1, array2;
def garden_path_accuracy(stripped_text, array, n, original): count = 0; test_text = original[:n-1] trellis = [(0, test_text, 0)]; next_space = False; for i in original[n-1:]: count = count+1; if count%100 == 0: print count; if i == ' ': next_space = True; continue; k = []; for j in trellis: no_space = j[1][-n+1:]+i; space_first = j[1][-n+1:] + ' '; space_second = j[1][-n+1:][1:]+' '+i; no_space_cost = llv.recover_frequency(lv1, no_space, array) space_first_freq = llv.recover_frequency(lv1, space_first, array) space_second_freq = llv.recover_frequency(lv1, space_second, array) space_cost = (.50*space_first_freq+.50*space_second_freq) k.append([j[0]+no_space_cost, j[1]+i, j[2]]); k.append([j[0]+space_cost, j[1]+' '+i, j[2]]); k = sorted(k, key=lambda t:t[0], reverse=True); if(len(k) < 5): trellis = k; else: trellis = k[:30]; if next_space: for j in range(len(trellis)): if trellis[j][1][-2] == ' ': trellis[j][2] = trellis[j][2]+1; next_space = False return trellis
def get_predictions(string, array, lv): assert(len(string) < len(array)) n = len(string)+1; x = []; for j in alphabet: text_mod = string+j; num = llv.recover_frequency(lv, text_mod, array[len(text_mod)-1]); x.append((num, j)); x = sorted(x, key=lambda k:k[0], reverse = True) return x;
def garden_path(stripped_text, array1, array2, n, original, training = 0): test_text = stripped_text[:n-1] trellis = [(0, test_text)]; count = 0; for i in stripped_text[n-1:]: count = count+1 if count%100 == 0: print count seen = set(); k = []; for j in trellis: no_space = j[1][-n+1:]+i; # if no_space == 'cards': # print "cards"+ str(recover_frequency(lv, no_space, array)) # if no_space == 'car ds': # print "cards"+str(space_first_freq = recover_frequency(lv, space_first, array)) if(not no_space in seen): seen.add(no_space) no_space_cost = llv.recover_frequency(lv1, no_space, array1)+llv.recover_frequency(lv2, no_space, array2) k.append((j[0]+no_space_cost, j[1]+i)); space_first = j[1][-n+1:] + ' '; if(not space_first in seen): seen.add(space_first) space_second = j[1][-n+1:][1:]+' '+i; space_first_freq = llv.recover_frequency(lv1, space_first, array1)+llv.recover_frequency(lv2, space_first, array2) space_cost = .9*space_first_freq k.append((j[0]+space_cost, j[1]+' '+i)); if training: array1, array2 = streaming_training(i, array1, array2, lv1, lv2, trellis, 5) # print array1[len(no_space)-1] # print array1[0] k = sorted(k, key=lambda t:t[0], reverse=True); trellis = k[:25]; # count = 0; # correct = 0; # for v in trellis[0][1]: # if(len(original) == count): # print correct/float(len(''.join([x for x in trellis[0][1] if x == ' ']))) # break; # if original[count] == ' ' and v == ' ': # correct = correct+1; # count = count+1; # continue; # while original[count] == ' ': # count = count+1; # if original[count] == v: # count = count+1; # continue; # if v == ' ': # continue; # print "Correct percentage: " # print correct/float(len(''.join([x for x in trellis[0][1] if x == ' ']))) return trellis