import sys import yaml from cost_measurer import CostMeasurer import numpy import pickle import random config_neural = 'configs/mgr/3x512.yaml' cm = CostMeasurer(yaml.load(open(config_neural, 'r'))) correct = sys.argv[1] with open(correct) as f: correct_lines = f.readlines() plots = [] correct_lines = filter(lambda y: len(y) == 57, map(lambda x: x.split()[1:], correct_lines)) print len(correct_lines) for line in correct_lines: print len(plots) one_plot = [] if len(line) == 57: for base in range(0, len(line)): xs = range(base, len(line)) line_versions = [line[base:j+1] for j in xs] line_versions = map(lambda x: ''.join(map(lambda y: '<' + y + '>', x)), line_versions) costs = [0.] + map(lambda x: cm.cost(x), line_versions) ys = [costs[i] - costs[i-1] for i in range(1, len(costs))] one_plot.append( (line, xs, ys) )
correct_phrases = {} for line in correct_lines: line = line.split() choice_id = line[0] line = line[1:] line = reduce(lambda a, b: a + b, map(lambda x: '<' + x + '>', line)) correct_phrases[choice_id] = line # architectures = ['2x128', '2x256', '2x512', '3x128', '3x256', '3x512', '4x128', '4x256', '4x512'] architectures = ['3x512'] config_neural = 'configs/mgr/2x128.yaml' cm = CostMeasurer(yaml.load(open(config_neural, 'r'))) beta = 4. for name in architectures: beta = 4. while beta < 7.: baseline = sys.argv[2] neural_costs = {} for line in open(name + 'boot'): split_line = line.split() neural_costs[split_line[0]] = float(split_line[1]) with open(baseline) as f: baseline_lines = f.readlines()
else: phrases[choice_id[0]] = [(line, ac_cost, lm_cost)] with open(correct) as f: correct_lines = f.readlines() correct_phrases = {} for line in correct_lines: line = line.split() choice_id = line[0] line = line[1:] line = reduce(lambda a, b: a + b, map(lambda x: '<' + x + '>', line)) correct_phrases[choice_id] = line cm = CostMeasurer(yaml.load(open(config_neural, 'r'))) print cm.numbers_from_text print len(phrases) print len(correct_phrases) oracle_per = {'i': 0., 'r': 0., 's': 0.} total_length = 0. for phrase_id in phrases.keys(): correct_tokenised = cm.tokenise(correct_phrases[phrase_id]) rank_list = [(p[0], dist(cm.tokenise(p[0]), correct_tokenised)) for p in phrases[phrase_id]] best_oracle, editions = min(rank_list, key=lambda x: x[1][0]) best_oracle = cm.tokenise(best_oracle)
else: phrases[choice_id[0]] = [(line, ac_cost, trans_cost, lm_cost, choice_id[1])] with open(correct) as f: correct_lines = f.readlines() correct_phrases = {} for line in correct_lines: line = line.split() choice_id = line[0] line = line[1:] line = reduce(lambda a, b: a + b, map(lambda x: '<' + x + '>', line)) correct_phrases[choice_id] = line cm = CostMeasurer(yaml.load(open(config_neural, 'r'))) print cm.numbers_from_text better_neural = 0. better_baseline = 0. total_phrases = 0. neural_per = {'i': 0., 'r': 0., 's': 0.} baseline_per = {'i': 0., 'r': 0., 's': 0.} total_length = 0. out_neural = open(sys.argv[4], 'wb') for phrase_id in phrases.keys(): correct_tokenised = cm.tokenise(correct_phrases[phrase_id]) bb = phrases[phrase_id][0][0] best_baseline = cm.tokenise(bb) baseline_distance, i, r, s = dist(correct_tokenised, best_baseline) '''
from cost_measurer import CostMeasurer import numpy as np import yaml import sys from blocks.utils import dict_union, dict_subset import theano from theano import tensor import zipfile dict_name = sys.argv[1] cm = CostMeasurer(yaml.load(open(dict_name))) seq_gen = cm.main_loop.model.get_top_bricks()[0] input_seq = tensor.lmatrix('x') batch_size = 1 feedback = seq_gen.readout.feedback(input_seq) inputs = seq_gen.fork.apply(feedback, as_dict=True) results = seq_gen.transition.apply( mask=None, return_initial_states=False, as_dict=True, **dict_union(inputs, {}, {})) # **dict_union(inputs, seq_gen._state_names, seq_gen._context_names)) states = {name: results[name] for name in seq_gen._state_names} get_states = theano.function([input_seq], states) example_in = np.array(cm.tokenise('<d><u><p><a>>')) new_states = get_states(example_in.reshape(example_in.shape[0], 1))