예제 #1
0
import sys
import yaml
from cost_measurer import CostMeasurer
import numpy
import pickle
import random

config_neural = 'configs/mgr/3x512.yaml'
cm = CostMeasurer(yaml.load(open(config_neural, 'r')))

correct = sys.argv[1]
        
with open(correct) as f:
    correct_lines = f.readlines()

plots = []
correct_lines = filter(lambda y: len(y) == 57, map(lambda x: x.split()[1:], correct_lines))

print len(correct_lines)

for line in correct_lines:
    print len(plots)
    one_plot = []
    if len(line) == 57:
        for base in range(0, len(line)):
            xs = range(base, len(line))
            line_versions = [line[base:j+1] for j in xs]
            line_versions = map(lambda x: ''.join(map(lambda y: '<' + y + '>', x)), line_versions)
            costs = [0.] + map(lambda x: cm.cost(x), line_versions)
            ys = [costs[i] - costs[i-1] for i in range(1, len(costs))]
            one_plot.append( (line, xs, ys) )
예제 #2
0
correct_phrases = {}
for line in correct_lines:
    line = line.split()
    choice_id = line[0]
    line = line[1:]
    line = reduce(lambda a, b: a + b, map(lambda x: '<' + x + '>', line))
    correct_phrases[choice_id] = line



# architectures = ['2x128', '2x256', '2x512', '3x128', '3x256', '3x512', '4x128', '4x256', '4x512']
architectures = ['3x512']

config_neural = 'configs/mgr/2x128.yaml'
cm = CostMeasurer(yaml.load(open(config_neural, 'r')))

beta = 4.
for name in architectures:
    beta = 4.
    while beta < 7.:
        baseline = sys.argv[2]

        neural_costs = {}
        for line in open(name + 'boot'):
            split_line = line.split()
            neural_costs[split_line[0]] = float(split_line[1])

        with open(baseline) as f:
            baseline_lines = f.readlines()
    
예제 #3
0
    else:
        phrases[choice_id[0]] = [(line, ac_cost, lm_cost)]
        
with open(correct) as f:
    correct_lines = f.readlines()

correct_phrases = {}
for line in correct_lines:
    line = line.split()
    choice_id = line[0]
    line = line[1:]
    line = reduce(lambda a, b: a + b, map(lambda x: '<' + x + '>', line))
    correct_phrases[choice_id] = line


cm = CostMeasurer(yaml.load(open(config_neural, 'r')))
print cm.numbers_from_text

print len(phrases)
print len(correct_phrases)

oracle_per = {'i': 0., 'r': 0., 's': 0.}
total_length = 0.

for phrase_id in phrases.keys():
    correct_tokenised = cm.tokenise(correct_phrases[phrase_id])

    rank_list = [(p[0], dist(cm.tokenise(p[0]), correct_tokenised)) for p in phrases[phrase_id]]
    
    best_oracle, editions = min(rank_list, key=lambda x: x[1][0])
    best_oracle = cm.tokenise(best_oracle)
예제 #4
0
    else:
        phrases[choice_id[0]] = [(line, ac_cost, trans_cost, lm_cost, choice_id[1])]
        
with open(correct) as f:
    correct_lines = f.readlines()

correct_phrases = {}
for line in correct_lines:
    line = line.split()
    choice_id = line[0]
    line = line[1:]
    line = reduce(lambda a, b: a + b, map(lambda x: '<' + x + '>', line))
    correct_phrases[choice_id] = line


cm = CostMeasurer(yaml.load(open(config_neural, 'r')))
print cm.numbers_from_text

better_neural = 0.
better_baseline = 0.
total_phrases = 0.
neural_per = {'i': 0., 'r': 0., 's': 0.}
baseline_per = {'i': 0., 'r': 0., 's': 0.}
total_length = 0.
out_neural = open(sys.argv[4], 'wb')
for phrase_id in phrases.keys():
    correct_tokenised = cm.tokenise(correct_phrases[phrase_id])
    bb = phrases[phrase_id][0][0]
    best_baseline = cm.tokenise(bb)
    baseline_distance, i, r, s = dist(correct_tokenised, best_baseline)
    '''
예제 #5
0
from cost_measurer import CostMeasurer
import numpy as np
import yaml
import sys
from blocks.utils import dict_union, dict_subset
import theano
from theano import tensor
import zipfile

dict_name = sys.argv[1]

cm = CostMeasurer(yaml.load(open(dict_name)))
seq_gen = cm.main_loop.model.get_top_bricks()[0]


input_seq = tensor.lmatrix('x')

batch_size = 1
feedback = seq_gen.readout.feedback(input_seq)
inputs = seq_gen.fork.apply(feedback, as_dict=True)
results = seq_gen.transition.apply(
    mask=None, return_initial_states=False, as_dict=True,
    **dict_union(inputs, {}, {}))
    # **dict_union(inputs, seq_gen._state_names, seq_gen._context_names))

states = {name: results[name] for name in seq_gen._state_names}
get_states = theano.function([input_seq], states)
example_in = np.array(cm.tokenise('<d><u><p><a>>'))
new_states = get_states(example_in.reshape(example_in.shape[0], 1))