def test_strictness_2(self):
     stream = DataStream(IterableDataset([1, 2, 3, 4, 5, 6]))
     transformer = Batch(stream, ConstantScheme(2), strictness=2)
     assert_equal(
         list(transformer.get_epoch_iterator()),
         [(numpy.array([1, 2]),), (numpy.array([3, 4]),), (numpy.array([5, 6]),)],
     )
def test_batch():
    stream = DataStream(IterableDataset([1, 2, 3, 4, 5]))
    wrapper = Batch(stream, ConstantScheme(2))
    batches = list(wrapper.get_epoch_iterator())
    expected = [(numpy.array([1, 2]),),
                (numpy.array([3, 4]),),
                (numpy.array([5]),)]
    assert len(batches) == len(expected)
    for b, e in zip(batches, expected):
        assert (b[0] == e[0]).all()

    # Check the `strict` flag
    def try_strict(strictness):
        return list(Batch(stream, ConstantScheme(2), strictness=strictness)
                    .get_epoch_iterator())
    assert_raises(ValueError, try_strict, 2)
    assert len(try_strict(1)) == 2
    stream2 = DataStream(IterableDataset([1, 2, 3, 4, 5, 6]))
    assert len(list(Batch(stream2, ConstantScheme(2), strictness=2)
                    .get_epoch_iterator())) == 3
def test_batch():
    stream = DataStream(IterableDataset([1, 2, 3, 4, 5]))
    wrapper = Batch(stream, ConstantScheme(2))
    batches = list(wrapper.get_epoch_iterator())
    expected = [(numpy.array([1, 2]), ), (numpy.array([3, 4]), ),
                (numpy.array([5]), )]
    assert len(batches) == len(expected)
    for b, e in zip(batches, expected):
        assert (b[0] == e[0]).all()

    # Check the `strict` flag
    def try_strict(strictness):
        return list(
            Batch(stream, ConstantScheme(2),
                  strictness=strictness).get_epoch_iterator())

    assert_raises(ValueError, try_strict, 2)
    assert len(try_strict(1)) == 2
    stream2 = DataStream(IterableDataset([1, 2, 3, 4, 5, 6]))
    assert len(
        list(
            Batch(stream2, ConstantScheme(2),
                  strictness=2).get_epoch_iterator())) == 3
Exemple #4
0
 def test_strictness_2_error(self):
     stream = DataStream(IterableDataset([1, 2, 3, 4, 5]))
     transformer = Batch(stream, ConstantScheme(2), strictness=2)
     assert_raises(ValueError, list, transformer.get_epoch_iterator())
Exemple #5
0
 def test_strictness_2(self):
     stream = DataStream(IterableDataset([1, 2, 3, 4, 5, 6]))
     transformer = Batch(stream, ConstantScheme(2), strictness=2)
     assert_equal(list(transformer.get_epoch_iterator()),
                  [(numpy.array([1, 2]),), (numpy.array([3, 4]),),
                   (numpy.array([5, 6]),)])
from addition import AdditionTask

from fuel.transformers import Mapping, Batch
from fuel.schemes import ConstantScheme

from numpy import swapaxes

def _transpose(data):
    return tuple(swapaxes(array,0,1) for array in data if len(array.shape) > 2 )

dataset = AdditionTask(17)
data_stream = dataset.get_example_stream()
data_stream = Batch(data_stream, iteration_scheme=ConstantScheme(14))
data_stream = Mapping(data_stream, _transpose)

print next(data_stream.get_epoch_iterator())[0].shape

 def test_strictness_2_error(self):
     stream = DataStream(IterableDataset([1, 2, 3, 4, 5]))
     transformer = Batch(stream, ConstantScheme(2), strictness=2)
     assert_raises(ValueError, list, transformer.get_epoch_iterator())
Exemple #8
0
def train_model(batch_size=100, n_h=50, n_epochs=40):

    # Load the datasets with Fuel
    dictionary = pkl.load(open(DICT_FILE, 'r'))
    dictionary['~'] = len(dictionary)
    reverse_mapping = dict((j, i) for i, j in dictionary.items())

    print("Loading the data")
    train = TextFile(files=[TRAIN_FILE],
                     dictionary=dictionary,
                     unk_token='~',
                     level='character',
                     preprocess=str.lower,
                     bos_token=None,
                     eos_token=None)

    train_stream = DataStream.default_stream(train)

    # organize data in batches and pad shorter sequences with zeros
    train_stream = Batch(train_stream,
                         iteration_scheme=ConstantScheme(batch_size))
    train_stream = Padding(train_stream)

    # idem dito for the validation text
    val = TextFile(files=[VAL_FILE],
                     dictionary=dictionary,
                     unk_token='~',
                     level='character',
                     preprocess=str.lower,
                     bos_token=None,
                     eos_token=None)

    val_stream = DataStream.default_stream(val)

    # organize data in batches and pad shorter sequences with zeros
    val_stream = Batch(val_stream,
                         iteration_scheme=ConstantScheme(batch_size))
    val_stream = Padding(val_stream)

    print('Building model')

    # Set the random number generator' seeds for consistency
    rng = numpy.random.RandomState(12345)

    x = T.lmatrix('x')
    mask = T.matrix('mask')

    # Construct the LSTM layer
    recurrent_layer = LstmLayer(rng=rng, input=x, mask=mask, n_in=111, n_h=n_h)

    logreg_layer = LogisticRegression(input=recurrent_layer.output[:-1],
                                      n_in=n_h, n_out=111)

    cost = sequence_categorical_crossentropy(logreg_layer.p_y_given_x,
                                             x[1:],
                                             mask[1:]) / batch_size

    # create a list of all model parameters to be fit by gradient descent
    params = logreg_layer.params + recurrent_layer.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # update_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    learning_rate = 0.1
    updates = [
        (param_i, param_i - learning_rate * grad_i)
        for param_i, grad_i in zip(params, grads)
    ]

    update_model = theano.function([x, mask], cost, updates=updates)

    evaluate_model = theano.function([x, mask], cost)

    # Define and compile a function for generating a sequence step by step.
    x_t = T.iscalar()
    h_p = T.vector()
    c_p = T.vector()
    h_t, c_t = recurrent_layer._step(T.ones(1), x_t, h_p, c_p)
    energy = T.dot(h_t, logreg_layer.W) + logreg_layer.b

    energy_exp = T.exp(energy - T.max(energy, 1)[:, None])

    output = energy_exp / energy_exp.sum(1)[:, None]
    single_step = theano.function([x_t, h_p, c_p], [output, h_t, c_t])

    start_time = time.clock()

    iteration = 0

    for epoch in range(n_epochs):
        print 'epoch:', epoch

        for x_, mask_ in train_stream.get_epoch_iterator():
            iteration += 1

            cross_entropy = update_model(x_.T, mask_.T)


            # Generate some text after each 20 minibatches
            if iteration % 40 == 0:
                try:
                    prediction = numpy.ones(111, dtype=config.floatX) / 111.0
                    h_p = numpy.zeros((n_h,), dtype=config.floatX)
                    c_p = numpy.zeros((n_h,), dtype=config.floatX)
                    initial = 'the meaning of life is '
                    sentence = initial
                    for char in initial:
                        x_t = dictionary[char]
                        prediction, h_p, c_p = single_step(x_t, h_p.flatten(),
                                                           c_p.flatten())
                    sample = numpy.random.multinomial(1, prediction.flatten())
                    for i in range(450):
                        x_t = numpy.argmax(sample)
                        prediction, h_p, c_p = single_step(x_t, h_p.flatten(),
                                                           c_p.flatten())
                        sentence += reverse_mapping[x_t]
                        sample = numpy.random.multinomial(1, prediction.flatten())
                    print 'LSTM: "' + sentence + '"'
                except ValueError:
                    print 'Something went wrong during sentence generation.'

            if iteration % 40 == 0:
                print 'epoch:', epoch, '  minibatch:', iteration
                val_scores = []
                for x_val, mask_val in val_stream.get_epoch_iterator():
                    val_scores.append(evaluate_model(x_val.T, mask_val.T))
                print 'Average validation CE per sentence:', numpy.mean(val_scores)

    end_time = time.clock()
    print('Optimization complete.')
    print('The code ran for %.2fm' % ((end_time - start_time) / 60.))
def main(mode, save_path, steps, num_batches, load_params):
    chars = (list(string.ascii_uppercase) + list(range(10)) +
             [' ', '.', ',', '\'', '"', '!', '?', '<UNK>'])
    char_to_ind = {char: i for i, char in enumerate(chars)}
    ind_to_char = {v: k for k, v in char_to_ind.iteritems()}

    train_dataset = TextFile(['/Tmp/serdyuk/data/wsj_text_train'],
                             char_to_ind, bos_token=None, eos_token=None,
                             level='character')
    valid_dataset = TextFile(['/Tmp/serdyuk/data/wsj_text_valid'],
                             char_to_ind, bos_token=None, eos_token=None,
                             level='character')

    vocab_size = len(char_to_ind)
    logger.info('Dictionary size: {}'.format(vocab_size))
    if mode == 'continue':
        continue_training(save_path)
        return
    elif mode == "sample":
        main_loop = load(open(save_path, "rb"))
        generator = main_loop.model.get_top_bricks()[-1]

        sample = ComputationGraph(generator.generate(
            n_steps=steps, batch_size=1, iterate=True)).get_theano_function()

        states, outputs, costs = [data[:, 0] for data in sample()]
        print("".join([ind_to_char[s] for s in outputs]))

        numpy.set_printoptions(precision=3, suppress=True)
        print("Generation cost:\n{}".format(costs.sum()))

        freqs = numpy.bincount(outputs).astype(floatX)
        freqs /= freqs.sum()

        trans_freqs = numpy.zeros((vocab_size, vocab_size), dtype=floatX)
        for a, b in zip(outputs, outputs[1:]):
            trans_freqs[a, b] += 1
        trans_freqs /= trans_freqs.sum(axis=1)[:, None]
        return

    # Experiment configuration
    batch_size = 20
    dim = 650
    feedback_dim = 650

    valid_stream = valid_dataset.get_example_stream()
    valid_stream = Batch(valid_stream,
                         iteration_scheme=ConstantScheme(batch_size))
    valid_stream = Padding(valid_stream)
    valid_stream = Mapping(valid_stream, _transpose)

    # Build the bricks and initialize them

    transition = GatedRecurrent(name="transition", dim=dim,
                                activation=Tanh())
    generator = SequenceGenerator(
        Readout(readout_dim=vocab_size, source_names=transition.apply.states,
                emitter=SoftmaxEmitter(name="emitter"),
                feedback_brick=LookupFeedback(
                    vocab_size, feedback_dim, name='feedback'),
                name="readout"),
        transition,
        weights_init=Uniform(std=0.04), biases_init=Constant(0),
        name="generator")
    generator.push_initialization_config()
    transition.weights_init = Orthogonal()
    transition.push_initialization_config()
    generator.initialize()

    # Build the cost computation graph.
    features = tensor.lmatrix('features')
    features_mask = tensor.matrix('features_mask')
    cost_matrix = generator.cost_matrix(
        features, mask=features_mask)
    batch_cost = cost_matrix.sum()
    cost = aggregation.mean(
        batch_cost,
        features.shape[1])
    cost.name = "sequence_log_likelihood"
    char_cost = aggregation.mean(
        batch_cost, features_mask.sum())
    char_cost.name = 'character_log_likelihood'
    ppl = 2 ** (cost / numpy.log(2))
    ppl.name = 'ppl'
    bits_per_char = char_cost / tensor.log(2)
    bits_per_char.name = 'bits_per_char'
    length = features.shape[0]
    length.name = 'length'

    model = Model(batch_cost)
    if load_params:
        params = load_parameter_values(save_path)
        model.set_parameter_values(params)

    if mode == "train":
        # Give an idea of what's going on.
        logger.info("Parameters:\n" +
                    pprint.pformat(
                        [(key, value.get_value().shape) for key, value
                         in Selector(generator).get_parameters().items()],
                        width=120))

        train_stream = train_dataset.get_example_stream()
        train_stream = Mapping(train_stream, _truncate)
        train_stream = Batch(train_stream,
                             iteration_scheme=ConstantScheme(batch_size))
        train_stream = Padding(train_stream)
        train_stream = Mapping(train_stream, _transpose)

        parameters = model.get_parameter_dict()
        maxnorm_subjects = VariableFilter(roles=[WEIGHT])(parameters.values())
        algorithm = GradientDescent(
            cost=batch_cost,
            parameters=parameters.values(),
            step_rule=CompositeRule([StepClipping(1000.), 
                AdaDelta(epsilon=1e-8) #, Restrict(VariableClipping(1.0, axis=0), maxnorm_subjects)
                                     ]))
        ft = features[:6, 0]
        ft.name = 'feature_example'

        observables = [cost, ppl, char_cost, length, bits_per_char]
        for name, param in parameters.items():
            num_elements = numpy.product(param.get_value().shape)
            norm = param.norm(2) / num_elements ** 0.5
            grad_norm = algorithm.gradients[param].norm(2) / num_elements ** 0.5
            step_norm = algorithm.steps[param].norm(2) / num_elements ** 0.5
            stats = tensor.stack(norm, grad_norm, step_norm, step_norm / grad_norm)
            stats.name = name + '_stats'
            observables.append(stats)
        track_the_best_bpc = TrackTheBest('valid_bits_per_char')
        root_path, extension = os.path.splitext(save_path)

        this_step_monitoring = TrainingDataMonitoring(
            observables + [ft], prefix="this_step", after_batch=True)
        average_monitoring = TrainingDataMonitoring(
            observables + [algorithm.total_step_norm,
                           algorithm.total_gradient_norm], 
            prefix="average",
            every_n_batches=10)
        valid_monitoring = DataStreamMonitoring(
            observables, prefix="valid",
            every_n_batches=1500, before_training=False,
            data_stream=valid_stream)
        main_loop = MainLoop(
            algorithm=algorithm,
            data_stream=train_stream,
            model=model,
            extensions=[
                this_step_monitoring,
                average_monitoring,
                valid_monitoring,
                track_the_best_bpc,
                Checkpoint(save_path, ),
                Checkpoint(save_path,
                           every_n_batches=500,
                           save_separately=["model", "log"],
                           use_cpickle=True)
                    .add_condition(
                    ['after_epoch'],
                    OnLogRecord(track_the_best_bpc.notification_name),
                    (root_path + "_best" + extension,)),
                Timing(after_batch=True),
                Printing(every_n_batches=10),
                Plot(root_path,
                     [[average_monitoring.record_name(cost),
                       valid_monitoring.record_name(cost)],
                      [average_monitoring.record_name(algorithm.total_step_norm)],
                      [average_monitoring.record_name(algorithm.total_gradient_norm)],
                      [average_monitoring.record_name(ppl),
                       valid_monitoring.record_name(ppl)],
                      [average_monitoring.record_name(char_cost),
                       valid_monitoring.record_name(char_cost)],
                      [average_monitoring.record_name(bits_per_char),
                       valid_monitoring.record_name(bits_per_char)]],
                     every_n_batches=10)
            ])
        main_loop.run()

    elif mode == 'evaluate':
        with open('/data/lisatmp3/serdyuk/wsj_lms/lms/wsj_trigram_with_initial_eos/lexicon.txt') as f:
            raw_words = [line.split()[1:-1] for line in f.readlines()]
            words = [[char_to_ind[c] if c in char_to_ind else char_to_ind['<UNK>'] for c in w] 
                     for w in raw_words]
        max_word_length = max([len(w) for w in words])
        
        initial_states = tensor.matrix('init_states')
        cost_matrix_step = generator.cost_matrix(features, mask=features_mask,
                                                 states=initial_states)
        cg = ComputationGraph(cost_matrix_step)
        states = cg.auxiliary_variables[-2]
        compute_cost = theano.function([features, features_mask, initial_states], 
                                       [cost_matrix_step.sum(axis=0), states])

        cost_matrix = generator.cost_matrix(features, mask=features_mask)
        initial_cg = ComputationGraph(cost_matrix)
        initial_states = initial_cg.auxiliary_variables[-2]

        total_word_cost = 0
        num_words = 0
        examples = numpy.zeros((max_word_length + 1, len(words)),
                               dtype='int64')
        all_masks = numpy.zeros((max_word_length + 1, len(words)),
                                dtype=floatX)

        for i, word in enumerate(words):
            examples[:len(word), i] = word
            all_masks[:len(word), i] = 1.

        single_space = numpy.array([char_to_ind[' ']])[:, None]

        for batch in valid_stream.get_epoch_iterator():
            for example, mask in equizip(batch[0].T, batch[1].T):
                example = example[:(mask.sum())]
                spc_inds = list(numpy.where(example == char_to_ind[" "])[0])
                state = generator.transition.transition.initial_states_.get_value()[None, :]
                for i, j in equizip([-1] + spc_inds, spc_inds + [-1]):
                    word = example[(i+1):j, None]
                    word_cost, states = compute_cost(
                        word, numpy.ones_like(word, dtype=floatX), state)
                    state = states[-1]

                    costs = numpy.exp(-compute_cost(
                        examples, all_masks, numpy.tile(state, [examples.shape[1], 1]))[0])

                    _, space_states = compute_cost(
                        single_space, numpy.ones_like(single_space, dtype=floatX), state)
                    state = space_states[-1]

                    word_prob = numpy.exp(-word_cost)
                    total_word_cost += word_cost + numpy.log(numpy.sum(costs))
                    num_words += 1
                    print(word_prob)
                    print(numpy.sum(costs))
                    print("Average cost", total_word_cost / num_words)
                    print("PPL", numpy.exp(total_word_cost / num_words))

        print("Word-level perplexity")
        print(total_word_cost / num_words)
    else:
        assert False
from fuel.transformers import Flatten

from extensions.plot import Plot
from datasets.addition import AdditionTask

from numpy import swapaxes

def _transpose(data):
    return tuple(swapaxes(array,0,1) if len(array.shape) > 2 else array for array in data)

dataset = AdditionTask(1000)
train_stream = dataset.get_example_stream()
train_stream = Batch(train_stream, iteration_scheme=ConstantScheme(10))
train_stream = Mapping(train_stream, _transpose)

features_test, targets_test = next(train_stream.get_epoch_iterator())

x = tensor.tensor3('features')
y = tensor.matrix('targets')

n_batchs = 1000
h_dim = 2
x_dim = 2

encode = Linear(name='encode',
                input_dim=x_dim,
                output_dim=h_dim)

gates  = Linear(name = 'gates',
                input_dim = x_dim,
                output_dim = 2*h_dim)
Exemple #11
0
def DStream(datatype, config):
    if datatype=='train':
        filename = config['train_file']
        filename_morph = config['train_morph_file']
        filename_rel = config['train_rel_file']
    elif datatype == 'valid':
        filename = config['valid_file']
        filename_morph = config['valid_morph_file']
        filename_rel = config['valid_rel_file']
    elif datatype == 'test':
        filename = config['test_file']
        filename_morph = config['test_morph_file']
        filename_rel = config['test_rel_file']
    else:
        logger.error('wrong datatype, train, valid, or test')
    data = TextFile(files=[filename],
                    dictionary=pickle.load(open(config['train_dic'],'rb')),
                    unk_token=config['unk_token'],
                    level='word',
                    bos_token=config['bos_token'],
                    eos_token=config['eos_token'])
    data_morph = TextFile(files=[filename_morph],
                    dictionary=pickle.load(open(config['train_morph_dic'],'rb')),
                    unk_token=config['unk_token'],
                    level='word',
                    bos_token=config['bos_token'],
                    eos_token=config['eos_token'])
    data_stream = DataStream.default_stream(data)
    data_stream.sources = ('sentence',)
    data_morph_stream = DataStream.default_stream(data_morph)
    data_morph_stream.sources = ('sentence',)
    # organize data in batches and pad shorter sequences with zeros
    batch_size = config['batch_size']
    rels_stream = []
    with open(filename_rel , "r") as fin:
        lines = fin.readlines()
        i = 0
        while i < len(lines):
            if i + batch_size < len(lines):
                rels_stream.append(padding(lines[i : i + batch_size]))
                i = i + batch_size
            else:
                rels_stream.append(padding(lines[i : len(lines)]))
                i = i + batch_size
    data_stream = Batch(data_stream, iteration_scheme=ConstantScheme(batch_size))
    data_stream = Padding(data_stream)

    data_morph_stream = Batch(data_morph_stream, iteration_scheme=ConstantScheme(batch_size))
    data_morph_stream = Padding(data_morph_stream)
    data_morph_tensor3 = []
    mask_morph_tensor3 = []
    #data_morph_stream : batch_num * batch * sentence
    #rels_stream : batch_num * batch * sentence
    #dta_morph_tensor3 : batch_num * batch * sentence * morph
    for data_morph_tuple , rel in zip(data_morph_stream.get_epoch_iterator() , rels_stream):
        data_morph , mask_morph = data_morph_tuple
        #data_morph : batch * sentence
        #rel : batch * sentence
        tmp = []
        tmp_mask = []
        for m , mask , r in zip(data_morph , mask_morph , rel):
            #m : sentence
            #r : sentence
            start = 0
            tmp2 = []
            tmp_mask2 = []
            for idx in r:
                tmp2.append(m[start:start+idx].tolist())
                tmp_mask2.append(mask[start:start+idx].tolist())
                #print m[start:start+idx]
                start = start + idx
            #print len(tmp)
            #print padding2(tmp2)
            tmp.append(tmp2)
            tmp_mask.append(tmp_mask2)
            #print len(tmp) , tmp
            #print m , r
            #print m.shape , r.shape
        #print padding2(tmp)
        data_morph_tensor3.append(np.array(padding2(tmp)))
        mask_morph_tensor3.append(np.array(padding2(tmp_mask) , dtype='float32'))
    return data_stream , data_morph_tensor3 , mask_morph_tensor3
Exemple #12
0
from addition import AdditionTask

from fuel.transformers import Mapping, Batch
from fuel.schemes import ConstantScheme

from numpy import swapaxes


def _transpose(data):
    return tuple(
        swapaxes(array, 0, 1) for array in data if len(array.shape) > 2)


dataset = AdditionTask(17)
data_stream = dataset.get_example_stream()
data_stream = Batch(data_stream, iteration_scheme=ConstantScheme(14))
data_stream = Mapping(data_stream, _transpose)

print next(data_stream.get_epoch_iterator())[0].shape
Exemple #13
0
def DStream(datatype, config):
    if datatype in ['train','valid','test']:
        filename = config[datatype + '_file']
        filename_morph = config[datatype + '_morph_file']
        filename_rel = config[datatype + '_rel_file']
    else:
        logger.error('wrong datatype, train, valid, or test')
    data_stream = getTextFile(filename, config['train_dic'], config)
    data_morph_stream = getTextFile(filename_morph, config['train_morph_dic'], config)
    # organize data in batches and pad shorter sequences with zeros
    batch_size = config['batch_size']
    rels_stream = []
    cnt = 0
    with open(filename_rel , "r") as fin:
        lines = fin.readlines()
        i = 0
        while i < len(lines):
            if i + batch_size < len(lines):
                rels_stream.append(padding(lines[i : i + batch_size]))
                i = i + batch_size
            else:
                rels_stream.append(padding(lines[i : len(lines)]))
                i = i + batch_size
    data_stream = Batch(data_stream, iteration_scheme=ConstantScheme(batch_size))
    data_stream = Padding(data_stream)
    data_morph_stream = Batch(data_morph_stream, iteration_scheme=ConstantScheme(batch_size))
    data_morph_stream = Padding(data_morph_stream)
    data_morph_tensor3 = []
    mask_morph_tensor3 = []
    #data_morph_stream : batch_size * batch * sentence
    #rels_stream : batch_num * batch * sentence
    #data_morph_tensor3 : batch_num * batch * sentence * morph
    cnt = 0
    for data_morph_tuple , rel in zip(data_morph_stream.get_epoch_iterator() , rels_stream):
        data_morph , mask_morph = data_morph_tuple
        #data_morph : batch * sentence
        #rel : batch * sentence
        tmp = []
        tmp_mask = []
        for m , mask , r in zip(data_morph , mask_morph , rel):
            start = 0
            tmp2 = []
            tmp_mask2 = []
            for idx in r:
                tmp2.append(m[start:start+idx].tolist())
                tmp_mask2.append(mask[start:start+idx].tolist())
                #print m[start:start+idx]
                start = start + idx
            #print len(tmp)
            #print padding2(tmp2)
            tmp.append(tmp2)
            tmp_mask.append(tmp_mask2)
            #print len(tmp) , tmp
            #print m.shape , r.shape
        #print padding2(tmp)
        data_morph_tensor3.append(np.array(padding2(tmp)))
        mask_morph_tensor3.append(np.array(padding2(tmp_mask) , dtype='float32'))
        cnt += 1
    '''
    cnt = 0
    for a, b, c in zip(data_stream.get_epoch_iterator() , mask_morph_tensor3, mask_morph_tensor3):
        data , mask = a
        if data.shape[1] != b.shape[1]:
            print data.shape , b.shape, c.shape
            cnt2 = 0
            for i , d in enumerate(data):
                if cnt2 == 42:
                    print i , len(d) , d
                    dic2 = load_dic()
                    for key in d:
                        if key in dic2 and key != 0:
                            print dic2[key],
                cnt2 += 1
            print cnt
            #print data.shape , b[99]
            exit(0)
            print "###"
        cnt += 1
    exit(0)
    '''
    return data_stream , data_morph_tensor3 , mask_morph_tensor3
Exemple #14
0
def train_model(batch_size=100, n_h=50, n_epochs=40):

    # Load the datasets with Fuel
    dictionary = pkl.load(open(DICT_FILE, 'r'))
    dictionary['~'] = len(dictionary)
    reverse_mapping = dict((j, i) for i, j in dictionary.items())

    print("Loading the data")
    train = TextFile(files=[TRAIN_FILE],
                     dictionary=dictionary,
                     unk_token='~',
                     level='character',
                     preprocess=str.lower,
                     bos_token=None,
                     eos_token=None)

    train_stream = DataStream.default_stream(train)

    # organize data in batches and pad shorter sequences with zeros
    train_stream = Batch(train_stream,
                         iteration_scheme=ConstantScheme(batch_size))
    train_stream = Padding(train_stream)

    # idem dito for the validation text
    val = TextFile(files=[VAL_FILE],
                   dictionary=dictionary,
                   unk_token='~',
                   level='character',
                   preprocess=str.lower,
                   bos_token=None,
                   eos_token=None)

    val_stream = DataStream.default_stream(val)

    # organize data in batches and pad shorter sequences with zeros
    val_stream = Batch(val_stream, iteration_scheme=ConstantScheme(batch_size))
    val_stream = Padding(val_stream)

    print('Building model')

    # Set the random number generator' seeds for consistency
    rng = numpy.random.RandomState(12345)

    x = T.lmatrix('x')
    mask = T.matrix('mask')

    # Construct the LSTM layer
    recurrent_layer = LstmLayer(rng=rng, input=x, mask=mask, n_in=111, n_h=n_h)

    logreg_layer = LogisticRegression(input=recurrent_layer.output[:-1],
                                      n_in=n_h,
                                      n_out=111)

    cost = sequence_categorical_crossentropy(logreg_layer.p_y_given_x, x[1:],
                                             mask[1:]) / batch_size

    # create a list of all model parameters to be fit by gradient descent
    params = logreg_layer.params + recurrent_layer.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # update_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    learning_rate = 0.1
    updates = [(param_i, param_i - learning_rate * grad_i)
               for param_i, grad_i in zip(params, grads)]

    update_model = theano.function([x, mask], cost, updates=updates)

    evaluate_model = theano.function([x, mask], cost)

    # Define and compile a function for generating a sequence step by step.
    x_t = T.iscalar()
    h_p = T.vector()
    c_p = T.vector()
    h_t, c_t = recurrent_layer._step(T.ones(1), x_t, h_p, c_p)
    energy = T.dot(h_t, logreg_layer.W) + logreg_layer.b

    energy_exp = T.exp(energy - T.max(energy, 1)[:, None])

    output = energy_exp / energy_exp.sum(1)[:, None]
    single_step = theano.function([x_t, h_p, c_p], [output, h_t, c_t])

    start_time = time.clock()

    iteration = 0

    for epoch in range(n_epochs):
        print 'epoch:', epoch

        for x_, mask_ in train_stream.get_epoch_iterator():
            iteration += 1

            cross_entropy = update_model(x_.T, mask_.T)

            # Generate some text after each 20 minibatches
            if iteration % 40 == 0:
                try:
                    prediction = numpy.ones(111, dtype=config.floatX) / 111.0
                    h_p = numpy.zeros((n_h, ), dtype=config.floatX)
                    c_p = numpy.zeros((n_h, ), dtype=config.floatX)
                    initial = 'the meaning of life is '
                    sentence = initial
                    for char in initial:
                        x_t = dictionary[char]
                        prediction, h_p, c_p = single_step(
                            x_t, h_p.flatten(), c_p.flatten())
                    sample = numpy.random.multinomial(1, prediction.flatten())
                    for i in range(450):
                        x_t = numpy.argmax(sample)
                        prediction, h_p, c_p = single_step(
                            x_t, h_p.flatten(), c_p.flatten())
                        sentence += reverse_mapping[x_t]
                        sample = numpy.random.multinomial(
                            1, prediction.flatten())
                    print 'LSTM: "' + sentence + '"'
                except ValueError:
                    print 'Something went wrong during sentence generation.'

            if iteration % 40 == 0:
                print 'epoch:', epoch, '  minibatch:', iteration
                val_scores = []
                for x_val, mask_val in val_stream.get_epoch_iterator():
                    val_scores.append(evaluate_model(x_val.T, mask_val.T))
                print 'Average validation CE per sentence:', numpy.mean(
                    val_scores)

    end_time = time.clock()
    print('Optimization complete.')
    print('The code ran for %.2fm' % ((end_time - start_time) / 60.))
Exemple #15
0
from numpy import swapaxes


def _transpose(data):
    return tuple(
        swapaxes(array, 0, 1) if len(array.shape) > 2 else array
        for array in data)


dataset = AdditionTask(1000)
train_stream = dataset.get_example_stream()
train_stream = Batch(train_stream, iteration_scheme=ConstantScheme(10))
train_stream = Mapping(train_stream, _transpose)

features_test, targets_test = next(train_stream.get_epoch_iterator())

x = tensor.tensor3('features')
y = tensor.matrix('targets')

n_batchs = 1000
h_dim = 2
x_dim = 2

encode = Linear(name='encode', input_dim=x_dim, output_dim=h_dim)

gates = Linear(name='gates', input_dim=x_dim, output_dim=2 * h_dim)

#lstm = LSTM(activation=Tanh(),
#            dim=h_dim, name="lstm")
Exemple #16
0
def train_model(seed = 12345,model='rnn',batch_size=50, n_h=50, n_epochs=40, updater='sgd', lr = 0.002,
    recThetaName='',outThetaName='',modelPath='',error_mark=40,fit_model=1,wght_sd=0.005,out_wght_sd=0.005,
    useLN=False, drop_p=0., grad_clip=0, patience=1, dyn_eval=0, funtype="identity", norm_max=-1.0):
	
    # load in feature dictionary
    dictionary = pkl.load(open(DICT_FILE, 'r'))
    #dictionary['~'] = len(dictionary)
    reverse_mapping = dict((j, i) for i, j in dictionary.items())

    n_in = len(dictionary) # number inputs determined by size of lexicon
    print(" > Input.dim = ",n_in)

    # Load the datasets with Fuel
    print(" > Loading train data: ",TRAIN_FILE)
    train = TextFile(files=[TRAIN_FILE],
                     dictionary=dictionary,
                     unk_token=None,
                     level='word',
                     preprocess=None,
                     bos_token=None,
                     eos_token=None)

    train_stream = DataStream.default_stream(train) # get text-stream

    # organize data in batches and pad shorter sequences with zeros
    train_stream = Batch(train_stream,
                         iteration_scheme=ConstantScheme(batch_size))
    train_stream = Padding(train_stream)

    # idem dito for the validation text
    print(" > Loading valid data: ",VAL_FILE)
    val = TextFile(files=[VAL_FILE],
                     dictionary=dictionary,
                     unk_token=None,
                     level='word',
                     preprocess=None,
                     bos_token=None,
                     eos_token=None)

    val_stream = DataStream.default_stream(val)

    # organize data in batches and pad shorter sequences with zeros
    val_stream = Batch(val_stream,
                         iteration_scheme=ConstantScheme(batch_size))

    # pad text-token sequences & user-id sequences
    val_stream = Padding(val_stream)

    print(' > Building model : ',model)

    # Set the random number generator' seeds for consistency
    numpy.random.seed(seed)
    random.seed(seed)
    rng = numpy.random.RandomState(seed)

    x = T.lmatrix('x')
    mask = T.matrix('mask')
    dmask = T.tensor3('dmask') # drop-out mask

    # Construct the LSTM layer
    if model == 'drnn':
        recurrent_layer = DeltaRNN.DeltaRNNLayer(rng=rng, input=x, mask=mask, dmask=dmask, n_in=n_in, n_h=n_h,
                                                 sd=wght_sd,useLN=useLN,drop_p=drop_p,funtype=funtype,useFanInFanOut=useFanInFanOut,useOrtho=useOrtho,drop_inner=drop_inner)
    elif model == 'gru':
        recurrent_layer = GRU.GRULayer(rng=rng, input=x, mask=mask, dmask=dmask, n_in=n_in, n_h=n_h, sd=wght_sd,useLN=useLN,drop_p=drop_p,useFanInFanOut=useFanInFanOut,useOrtho=useOrtho)
    else:
        raise Exception(" Model not understood: ",model)
    if len(recThetaName) > 0: # Load in any pre-built parameters for recurrent layer
        print(" >> Loading old params for recurrent-layer: ",recThetaName)
        recurrent_layer.load(filename=recThetaName)

    print(" Using fan-in-fan-out for softmax weights? {0}".format(useFanInFanOut))
    logreg_layer = MaxEnt.LogisticRegression(input=recurrent_layer.output[:-1],
                                      n_in=n_h, n_out=n_in, sd=out_wght_sd, useFanInFanOut=useFanInFanOut, useOrtho=useOrtho)
    if len(outThetaName) > 0: # Load in any pre-built parameters for output layer
        print(" >> Loading old params for output-layer: ",outThetaName)
        logreg_layer.load(filename=outThetaName)

    cost = Util.sequence_categorical_crossentropy(logreg_layer.p_y_given_x,
                                             x[1:],
                                             mask[1:]) / batch_size

    # create a list of all model parameters to be fit by gradient descent
    params = logreg_layer.params + recurrent_layer.params

    # create a list of gradients for all model parameters
    if grad_clip > 0.0:
        print(" >> Clipping grads of magnitude > ",grad_clip)
        #grads = T.grad(cost, params, disconnected_inputs='ignore')
        #grad_lst = [ T.sum( (  grad / float(batch_size) )**2  ) for grad in grads ]
        #grad_norm = T.sqrt( T.sum( grad_lst ))
        #all_grads = ifelse(T.gt(grad_norm, max_norm),
        #           [grads*(max_norm / grad_norm) for grads in all_grads],
        #           all_grads)
        grads = T.grad(theano.gradient.grad_clip(cost, -1 * grad_clip, grad_clip), params, disconnected_inputs='ignore')
    else:
        grads = T.grad(cost, params, disconnected_inputs='ignore')

    # set up update rules for model parameters
    print(" Clipping param norms to max of {0}".format(norm_max))
    #learning_rate = lr
    learning_rate = T.scalar('learning_rate', dtype=theano.config.floatX)
    if updater == 'sgd': # use classical SGD
        updates = [
            (param_i, param_i - learning_rate * grad_i)
            for param_i, grad_i in zip(params, grads)
        ]
    elif updater == 'adam': # use Adam adaptive learning rate update rule
        grads = OrderedDict(zip(params, grads))
        updates = UpdateRule.Adam(grads, learning_rate, norm_max=norm_max)
    elif updater == 'rmsprop': # use RMSprop adaptive learning rate update rule
        grads = OrderedDict(zip(params, grads))
        updates = UpdateRule.RMSprop(grads, learning_rate, norm_max=norm_max)
    else:
        raise Exception("Updater not understood: ",updater)

    update_model = theano.function([x, mask, dmask, learning_rate], cost, updates=updates) #, allow_input_downcast=True)

    evaluate_model = theano.function([x, mask, dmask], cost) #, allow_input_downcast=True)

    numParams = recurrent_layer.getNumParams() + logreg_layer.getNumParams()
    print(" -> Number Parameters = ",numParams)

    start_time = time.clock()

    iteration = 0
    print(" Random.NLL = ",numpy.log(1. * n_in))
    best_nll = -1.0

    if fit_model == 1: # FIT MODEL TO THE DATA IF FLAG RAISED

        logfd = open(modelPath + "performance.csv", 'wb')
        writer = csv.writer(logfd)
        writer.writerow(["Epoch","AVG_NLL","BPC","AVG_PPL"])

        # Get initial scores before any training
        val_scores = []
        N = 0
        nll = 0.

        for x_val, x_mask in val_stream.get_epoch_iterator():
            # 3D tensor shape:  variable dim x batch-size dim x time-window dim
            #d_mask = Utils.create_ones(n_h,x_mask.shape[0],x_mask.shape[1])
            d_mask = Utils.create_zone_out_mask(rng, n_h, x_mask.shape[0],x_mask.shape[1], drop_p, sample=False)
            batch_score = evaluate_model(x_val.T, x_mask.T, d_mask.T)
            #batch_score = 0.0
            nll += batch_score * x_mask.shape[0]
            val_scores.append(batch_score)
            N += numpy.sum(x_mask)
        nll = nll / N
        ce = numpy.mean(val_scores)
        ppl = numpy.exp(nll)
        print(' >> Epoch = {0} NLL = {1} log2(NLL) = {2} exp(NLL) = {3} '.format(-1,nll,(nll/math.log(2)),ppl))
        writer.writerow([0,nll,(nll/math.log(2)),ppl])
        logfd.flush()
        best_nll = nll
        best_epoch = -1

        impatience = 0
        l_r = lr # set initial learning rate
        for epoch in range(n_epochs):
            print('Epoch:', epoch)

            improve_flag = False
            for x_, x_mask_ in train_stream.get_epoch_iterator():
                iteration += 1
                #print("\r  {0} mini-batches seen...".format(iteration),end='')
                #d_mask = Utils.create_ones(n_h,x_mask_.shape[0],x_mask_.shape[1])
                d_mask = Utils.create_zone_out_mask(rng, n_h, x_mask_.shape[0],x_mask_.shape[1], drop_p)
                cross_entropy = update_model(x_.T, x_mask_.T, d_mask.T, l_r)
                #print("\r  {0} mini-batches seen CE = {1}".format(iteration,cross_entropy),end='')
                #print("  {0} --> {1} mini-batches seen CE = {2}".format(epoch,iteration,cross_entropy))

                if iteration % error_mark == 0:
                    #print("")
                    #print('epoch:', epoch, '  minibatch:', iteration)
                    val_scores = []
                    N = 0
                    nll = 0.
                    for x_val, x_mask in val_stream.get_epoch_iterator():
			if x_val.size > 0: # as long as sample sequence is non-emtpy
	                        #d_mask = Utils.create_ones(n_h,x_mask.shape[0],x_mask.shape[1])
	                        d_mask = Utils.create_zone_out_mask(rng, n_h, x_mask.shape[0],x_mask.shape[1], drop_p, sample=False)
	                        batch_score = evaluate_model(x_val.T, x_mask.T, d_mask.T)
	                        nll += batch_score * x_mask.shape[0] # un-normalize mini-batch scores
	                        val_scores.append(batch_score)
	                        N += numpy.sum(x_mask)
                    nll = nll / N
                    ce = numpy.mean(val_scores)
                    ppl = numpy.exp(nll)
                    writer.writerow([(epoch),nll,(nll/math.log(2)),ppl])
                    logfd.flush()
                    if nll < best_nll:
                        best_nll = nll
                        best_epoch = epoch
                        print(" >> Saving best model at epoch {0} with NLL = {1}".format(best_epoch, best_nll))
                        # Check-point save at end of epoch
                        recSave = "{0}rec-params-best-{1}".format(modelPath,epoch)
                        recurrent_layer.save(recSave)
                        outSave = "{0}out-params-best-{1}".format(modelPath,epoch)
                        logreg_layer.save(outSave)
                        # Save best params so far
                        recSave = "{0}rec-params-best".format(modelPath,epoch)
                        recurrent_layer.save(recSave)
                        outSave = "{0}out-params-best".format(modelPath,epoch)
                        logreg_layer.save(outSave)
                        improve_flag = True # raise improvement flag since improvement was observed
                    print(' >> Epoch = {0} Avg.NLL = {1} (Best = {5}) Avg.BPC = {2} PPL = {3} Iter = {4}'.format(epoch,nll,(nll/math.log(2)),ppl,iteration,best_nll))

                    # adapt the learning rate based on patience schedule
                    if improve_flag is False:
                        if patience > 0: # we only consider positive/non-zero patience values (otherwise, turn this option off)
                            impatience += 1
                            if impatience >= patience:
                                l_r = (numpy.maximum(1e-4,l_r * l_r_decay)).astype(theano.config.floatX)
                                print(" __Decreasing learning rate to ",l_r)
                                impatience = 0
                    improve_flag = False


            # Evaluate generalization at end of epoch
            if iteration % error_mark != 0: # this if-stmt avoids redundant evaluation computation
                #print("")
                val_scores = []
                N = 0
                nll = 0.
                for x_val, x_mask in val_stream.get_epoch_iterator():
			if x_val.size > 0: # as long as sample sequence is non-emtpy
	                    #d_mask = Utils.create_ones(n_h,x_mask.shape[0],x_mask.shape[1])
	                    d_mask = Utils.create_zone_out_mask(rng, n_h, x_mask.shape[0],x_mask.shape[1], drop_p, sample=False)
	                    batch_score = evaluate_model(x_val.T, x_mask.T, d_mask.T)
        	            nll += batch_score * x_mask.shape[0]
	                    val_scores.append(batch_score)
	                    N += numpy.sum(x_mask)
                nll = nll / N
                ce = numpy.mean(val_scores)
                ppl = numpy.exp(nll)

                writer.writerow([(epoch+1),nll,(nll/math.log(2)),ppl])
                logfd.flush()
                # Check-point
                recSave = "{0}rec-params-end-{1}".format(modelPath,epoch)
                recurrent_layer.save(recSave)
                outSave = "{0}out-params-end-{1}".format(modelPath,epoch)
                logreg_layer.save(outSave)
                if nll < best_nll:
                    best_nll = nll
                    best_epoch = epoch
                    print(" >> Saving best model at epoch {0} with NLL = {1}".format(best_epoch, best_nll))
                    # Check-point save at end of epoch
                    recSave = "{0}rec-params-best-{1}".format(modelPath,epoch)
                    recurrent_layer.save(recSave)
                    outSave = "{0}out-params-best-{1}".format(modelPath,epoch)
                    logreg_layer.save(outSave)
                    # Save best params so far
                    recSave = "{0}rec-params-best".format(modelPath,epoch)
                    recurrent_layer.save(recSave)
                    outSave = "{0}out-params-best".format(modelPath,epoch)
                    logreg_layer.save(outSave)
                    improve_flag = True
                print(' >> Epoch = {0} Avg.NLL = {1} (Best = {4}) Avg.BPC = {2} PPL = {3} '.format(epoch,nll,(nll/math.log(2)),ppl,best_nll))

            # adapt the learning rate based on patience schedule
            '''
            if improve_flag is False:
                if patience > 0:
                    impatience += 1
                    if impatience >= patience:
                        l_r = (numpy.maximum(0.00001,l_r / 2.0)).astype(theano.config.floatX)
                        print(" __Decreasing learning rate to ",l_r)
                        impatience = 0
            '''

        print("")
        print(' > Optimization complete.')
        print(' >>>> Best NLL = {0} at Epoch {1}'.format(best_nll,best_epoch))
        end_time = time.clock()
        print(' > The code ran for %.2fm' % ((end_time - start_time) / 60.))
        print('---------------------------------------')
	logfd.close()
    else:
        print(' > Skipping model fit directly to evaluation...')
        print(' > dynamic eval code := ',dyn_eval)
        # EVALUATION-ONLY
        print(' > FINAL.VALID := ',VAL_FILE)
        val_scores = []
        N = 0
        nll = 0.
        l_r = lr # set initial learning rate
        for x_val, x_mask in val_stream.get_epoch_iterator():
                        #print(' type = ',type(x_val))
                        #x_val = x_val.astype(numpy.int64)
                        #print(x_val)
			#print('  = ',(x_val).size)
                        if x_val.size > 0: # as long as sample sequence is non-emtpy
				if dyn_eval > 0:
					d_mask = Utils.create_zone_out_mask(rng, n_h, x_mask.shape[0],x_mask.shape[1], drop_p)
					batch_score = update_model(x_val.T, x_mask.T, d_mask.T, l_r)
				else:
					#d_mask = Utils.create_ones(n_h,x_mask.shape[0],x_mask.shape[1])
					d_mask = Utils.create_zone_out_mask(rng, n_h, x_mask.shape[0],x_mask.shape[1], drop_p, sample=False)
					batch_score = evaluate_model(x_val.T, x_mask.T, d_mask.T)
				#print(" B{0} vs X{1} N{2}".format(batch_size,x_mask.shape[0],numpy.sum(x_mask)))
				nll += batch_score * batch_size #* x_mask.shape[0] # un-normalize mini-batch scores
				val_scores.append(batch_score)
				N += numpy.sum(x_mask)
				print('\r NLL.tmp = {0} over {1}'.format((nll / N),N),end='')
        print('')
        nll = nll / N
        ce = numpy.mean(val_scores)
        ppl = numpy.exp(nll)
        print(' > FINAL.VALID: Avg.NLL = {0} Avg.BPC = {1} PPL = {2} N.tokens = {3}'.format(nll,(nll/math.log(2)),ppl,N))
		
        if dyn_eval > 1:
        	dyn_eval = 0

        # Evaluate model on training as well (as measure of overfitting)
        print(' > FINAL.TRAIN := ',TRAIN_FILE)
        val_scores = []
        N = 0
        nll = 0.
        for x_val, x_mask in train_stream.get_epoch_iterator():
			if x_val.size > 0: # as long as sample sequence is non-emtpy
				if dyn_eval > 0:
					d_mask = Utils.create_zone_out_mask(rng, n_h, x_mask.shape[0],x_mask.shape[1], drop_p)
					batch_score = update_model(x_val.T, x_mask.T, d_mask.T, l_r)
				else:
					#d_mask = Utils.create_ones(n_h,x_mask.shape[0],x_mask.shape[1])
					d_mask = Utils.create_zone_out_mask(rng, n_h, x_mask.shape[0],x_mask.shape[1], drop_p, sample=False)
					batch_score = evaluate_model(x_val.T, x_mask.T, d_mask.T)
				nll += batch_score * batch_size # * x_mask.shape[0] # un-normalize mini-batch scores
				val_scores.append(batch_score)
				N += numpy.sum(x_mask)
				print('\r NLL.tmp = {0} over {1}'.format((nll / N),N),end='')
        print('')
        nll = nll / N
        ce = numpy.mean(val_scores)
        ppl = numpy.exp(nll)
        print(' > FINAL.TRAIN: Avg.NLL = {0} Avg.BPC = {1} PPL = {2} N.tokens = {3}'.format(nll,(nll/math.log(2)),ppl,N))

        end_time = time.clock()
        print(' > Final Evaluation complete.')
        print(' > The code ran for %.2fm' % ((end_time - start_time) / 60.))
        print('---------------------------------------')
        recSave = modelPath + "rec-params"
        print(' > Saving model.recurrent params to disk: ',recSave)
        recurrent_layer.save(recSave)
        outSave = modelPath + "out-params"
        print(' > Saving model.output params to disk: ',outSave)
        logreg_layer.save(outSave)