예제 #1
0
    line = line[1:]
    line = reduce(lambda a, b: a + b, map(lambda x: '<' + x + '>', line))
    correct_phrases[choice_id] = line


cm = CostMeasurer(yaml.load(open(config_neural, 'r')))
print cm.numbers_from_text

print len(phrases)
print len(correct_phrases)

oracle_per = {'i': 0., 'r': 0., 's': 0.}
total_length = 0.

for phrase_id in phrases.keys():
    correct_tokenised = cm.tokenise(correct_phrases[phrase_id])

    rank_list = [(p[0], dist(cm.tokenise(p[0]), correct_tokenised)) for p in phrases[phrase_id]]
    
    best_oracle, editions = min(rank_list, key=lambda x: x[1][0])
    best_oracle = cm.tokenise(best_oracle)

    distance, i, r, s = editions
    print 'NEXT:'
    print distance, i, r, s
    oracle_per['i'] += i
    oracle_per['r'] += r
    oracle_per['s'] += s

    total_length += len(correct_tokenised)
    
예제 #2
0
            choice_id = line[0].split('-')
            line = line[1:-3]
            if line != []:
                line = reduce(lambda a, b: a + b, map(lambda x: '<' + x + '>', line))
            else:
                line = ''
            p_id = '-'.join(choice_id[:-1])
            if p_id in phrases:
                phrases[p_id].append( (line, ac_cost, trans_cost, lm_cost, choice_id[-1]) )
            else:
                phrases[p_id] = [(line, ac_cost, trans_cost, lm_cost, choice_id[-1])]

        total_phrases = 0
        neural_per = {'i': 0, 'r': 0, 's': 0}
        for phrase_id in phrases.keys():
            correct_tokenised = cm.tokenise(correct_phrases[phrase_id])
            rank_list = []
            for p in phrases[phrase_id]:
                rank_list.append((p[0], p[1], p[2], p[3]))
                
            best_neural = cm.tokenise(min(rank_list, key=lambda x: x[1] + 4 * x[2] + beta * (x[3]))[0])
            neural_distance, i, r, s = dist(correct_tokenised, best_neural)
            if edit_distance(correct_tokenised, best_neural) != neural_distance:
                print "!!!"
                print correct_tokenised
                print best_neural
            neural_per['i'] += i
            neural_per['r'] += r
            neural_per['s'] += s
        print name, beta, sum(neural_per.values()), neural_per['i'], neural_per['r'], neural_per['s']
        beta += 0.1
예제 #3
0
seq_gen = cm.main_loop.model.get_top_bricks()[0]


input_seq = tensor.lmatrix('x')

batch_size = 1
feedback = seq_gen.readout.feedback(input_seq)
inputs = seq_gen.fork.apply(feedback, as_dict=True)
results = seq_gen.transition.apply(
    mask=None, return_initial_states=False, as_dict=True,
    **dict_union(inputs, {}, {}))
    # **dict_union(inputs, seq_gen._state_names, seq_gen._context_names))

states = {name: results[name] for name in seq_gen._state_names}
get_states = theano.function([input_seq], states)
example_in = np.array(cm.tokenise('<d><u><p><a>>'))
new_states = get_states(example_in.reshape(example_in.shape[0], 1))


bootup_seq = ''.join(map(lambda x: x[:-1], open(sys.argv[2]).readlines()))

print cm.cost(bootup_seq)

states_list = new_states.keys()

# for (i, key) in enumerate(par_list):
#     all_param_dict[key] = np.mean(new_states[states_list[i]], axis=0).reshape(all_param_dict[key].shape)

mod = cm.main_loop.model
parameters = filter(lambda x: 'initial' in x, mod._parameter_dict.keys())