line = line[1:] line = reduce(lambda a, b: a + b, map(lambda x: '<' + x + '>', line)) correct_phrases[choice_id] = line cm = CostMeasurer(yaml.load(open(config_neural, 'r'))) print cm.numbers_from_text print len(phrases) print len(correct_phrases) oracle_per = {'i': 0., 'r': 0., 's': 0.} total_length = 0. for phrase_id in phrases.keys(): correct_tokenised = cm.tokenise(correct_phrases[phrase_id]) rank_list = [(p[0], dist(cm.tokenise(p[0]), correct_tokenised)) for p in phrases[phrase_id]] best_oracle, editions = min(rank_list, key=lambda x: x[1][0]) best_oracle = cm.tokenise(best_oracle) distance, i, r, s = editions print 'NEXT:' print distance, i, r, s oracle_per['i'] += i oracle_per['r'] += r oracle_per['s'] += s total_length += len(correct_tokenised)
choice_id = line[0].split('-') line = line[1:-3] if line != []: line = reduce(lambda a, b: a + b, map(lambda x: '<' + x + '>', line)) else: line = '' p_id = '-'.join(choice_id[:-1]) if p_id in phrases: phrases[p_id].append( (line, ac_cost, trans_cost, lm_cost, choice_id[-1]) ) else: phrases[p_id] = [(line, ac_cost, trans_cost, lm_cost, choice_id[-1])] total_phrases = 0 neural_per = {'i': 0, 'r': 0, 's': 0} for phrase_id in phrases.keys(): correct_tokenised = cm.tokenise(correct_phrases[phrase_id]) rank_list = [] for p in phrases[phrase_id]: rank_list.append((p[0], p[1], p[2], p[3])) best_neural = cm.tokenise(min(rank_list, key=lambda x: x[1] + 4 * x[2] + beta * (x[3]))[0]) neural_distance, i, r, s = dist(correct_tokenised, best_neural) if edit_distance(correct_tokenised, best_neural) != neural_distance: print "!!!" print correct_tokenised print best_neural neural_per['i'] += i neural_per['r'] += r neural_per['s'] += s print name, beta, sum(neural_per.values()), neural_per['i'], neural_per['r'], neural_per['s'] beta += 0.1
seq_gen = cm.main_loop.model.get_top_bricks()[0] input_seq = tensor.lmatrix('x') batch_size = 1 feedback = seq_gen.readout.feedback(input_seq) inputs = seq_gen.fork.apply(feedback, as_dict=True) results = seq_gen.transition.apply( mask=None, return_initial_states=False, as_dict=True, **dict_union(inputs, {}, {})) # **dict_union(inputs, seq_gen._state_names, seq_gen._context_names)) states = {name: results[name] for name in seq_gen._state_names} get_states = theano.function([input_seq], states) example_in = np.array(cm.tokenise('<d><u><p><a>>')) new_states = get_states(example_in.reshape(example_in.shape[0], 1)) bootup_seq = ''.join(map(lambda x: x[:-1], open(sys.argv[2]).readlines())) print cm.cost(bootup_seq) states_list = new_states.keys() # for (i, key) in enumerate(par_list): # all_param_dict[key] = np.mean(new_states[states_list[i]], axis=0).reshape(all_param_dict[key].shape) mod = cm.main_loop.model parameters = filter(lambda x: 'initial' in x, mod._parameter_dict.keys())