Exemple #1
0
def load_models(model_path=save_path,
                in_size=len(input_columns),
                out_size=len(output_columns) -
                1 if cost_mode == 'RL-MDN' else len(output_columns),
                hidden_size=hidden_size,
                num_recurrent_layers=num_recurrent_layers,
                model=layer_models[0]):
    initials = []
    if not os.path.isfile(model_path):
        print 'Could not find model file.'
        sys.exit(0)
    print 'Loading model from {0}...'.format(model_path)
    x = tensor.tensor3('features', dtype=theano.config.floatX)
    y = tensor.tensor3('targets', dtype='floatX')
    train_flag = [theano.shared(0)]
    _, latent_size = load_encoder()
    in_size = latent_size + len(input_columns)
    y_hat, cost, cells = nn_fprop(x, y, in_size, out_size, hidden_size,
                                  num_recurrent_layers, train_flag)
    main_loop = MainLoop(algorithm=None,
                         data_stream=None,
                         model=Model(cost),
                         extensions=[saveload.Load(model_path)])
    for extension in main_loop.extensions:
        extension.main_loop = main_loop
    main_loop._run_extensions('before_training')
    bin_model = main_loop.model
    print 'Model loaded. Building prediction function...'
    hiddens = []
    for i in range(num_recurrent_layers):
        brick = [
            b for b in bin_model.get_top_bricks()
            if b.name == layer_models[i] + str(i)
        ][0]
        hiddens.extend(
            VariableFilter(theano_name=brick.name + '_apply_states')(
                bin_model.variables))
        hiddens.extend(
            VariableFilter(theano_name=brick.name + '_apply_cells')(cells))
        initials.extend(
            VariableFilter(roles=[roles.INITIAL_STATE])(brick.parameters))
    predict_func = theano.function([x], hiddens + [y_hat])
    encoder, code_size = load_encoder()
    return predict_func, initials, encoder, code_size
Exemple #2
0
step_rules = [RMSProp(learning_rate=learning_rate, decay_rate=decay_rate),
              StepClipping(step_clipping)]
algorithm = GradientDescent(cost=cost, parameters=cg.parameters,
                            step_rule=CompositeRule(step_rules))

# Extensions
gradient_norm = aggregation.mean(algorithm.total_gradient_norm)
step_norm = aggregation.mean(algorithm.total_step_norm)
monitored_vars = [cost, gradient_norm, step_norm]

dev_monitor = DataStreamMonitoring(variables=[cost], after_epoch=True,
                                   before_first_epoch=True, data_stream=dev_stream, prefix="dev")
train_monitor = TrainingDataMonitoring(variables=monitored_vars, after_batch=True,
                                       before_first_epoch=True, prefix='tra')

extensions = [dev_monitor, train_monitor, Timing(), Printing(after_batch=True),
              FinishAfter(after_n_epochs=nepochs),
              saveload.Load(load_path),
              saveload.Checkpoint(last_path),
              ] + track_best('dev_cost', save_path)

if learning_rate_decay not in (0, 1):
    extensions.append(SharedVariableModifier(step_rules[0].learning_rate,
                                             lambda n, lr: numpy.cast[theano.config.floatX](learning_rate_decay * lr), after_epoch=True, after_batch=False))

print 'number of parameters in the model: ' + str(tensor.sum([p.size for p in cg.parameters]).eval())
# Finally build the main loop and train the model
main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm,
                     model=Model(cost), extensions=extensions)
main_loop.run()
Exemple #3
0
step_rules = [RMSProp(learning_rate=learning_rate, decay_rate=decay_rate),
              StepClipping(step_clipping)]
algorithm = GradientDescent(cost=cost, parameters=cg.parameters,
                            step_rule=CompositeRule(step_rules))

# Extensions
gradient_norm = aggregation.mean(algorithm.total_gradient_norm)
step_norm = aggregation.mean(algorithm.total_step_norm)
monitored_vars = [cost, gradient_norm, step_norm]

dev_monitor = DataStreamMonitoring(variables=[cost], after_epoch=True,
                                   before_first_epoch=True, data_stream=dev_stream, prefix="dev")
train_monitor = TrainingDataMonitoring(variables=monitored_vars, after_batch=True,
                                       before_first_epoch=True, prefix='tra')

extensions = [dev_monitor, train_monitor, Timing(), Printing(after_batch=True),
              FinishAfter(after_n_epochs=nepochs),
              saveload.Load(load_path),
              saveload.Checkpoint(last_path),
              ] + track_best('dev_cost', save_path)

if learning_rate_decay not in (0, 1):
    extensions.append(SharedVariableModifier(step_rules[0].learning_rate,
                                             lambda n, lr: numpy.cast[theano.config.floatX](learning_rate_decay * lr), after_epoch=True, after_batch=False))

print('number of parameters in the model: ' + str(tensor.sum([p.size for p in cg.parameters]).eval()))
# Finally build the main loop and train the model
main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm,
                     model=Model(cost), extensions=extensions)
main_loop.run()
Exemple #4
0
        args.primetext = ix_to_char[numpy.random.randint(vocab_size)]
    primetext = ''.join(
        [ch for ch in args.primetext if ch in char_to_ix.keys()])
    if len(primetext) == 0:
        raise Exception('primetext characters are not in the vocabulary')
    x_curr = numpy.expand_dims(numpy.array(
        [char_to_ix[ch] for ch in primetext], dtype='uint8'),
                               axis=1)

    print 'Loading model from {0}...'.format(args.model)
    x = tensor.matrix('features', dtype='uint8')
    y = tensor.matrix('targets', dtype='uint8')
    y_hat, cost, cells = nn_fprop(x, y, vocab_size, hidden_size, num_layers,
                                  model)
    main_loop = MainLoop(algorithm=None,
                         data_stream=None,
                         model=Model(cost),
                         extensions=[saveload.Load(args.model)])
    for extension in main_loop.extensions:
        extension.main_loop = main_loop
    main_loop._run_extensions('before_training')
    bin_model = main_loop.model
    activations = []
    initial_states = []
    for i in range(num_layers):
        brick = [
            b for b in bin_model.get_top_bricks() if b.name == model + str(i)
        ][0]
        activations.extend(
            VariableFilter(theano_name=brick.name + '_apply_states')(
                bin_model.variables))
        activations.extend(
Exemple #5
0
    # Define primetext
    ix_to_char, char_to_ix, vocab_size = get_metadata(hdf5_file)
    if not args.primetext or len(args.primetext) == 0:
        args.primetext = ix_to_char[numpy.random.randint(vocab_size)]
    primetext = ''.join([ch for ch in args.primetext if ch in char_to_ix.keys()])
    if len(primetext) == 0:
        raise Exception('primetext characters are not in the vocabulary')
    x_curr = numpy.expand_dims(
        numpy.array([char_to_ix[ch] for ch in primetext], dtype='uint8'), axis=1)

    print('Loading model from {0}...'.format(args.model))
    x = tensor.matrix('features', dtype='uint8')
    y = tensor.matrix('targets', dtype='uint8')
    y_hat, cost, cells = nn_fprop(x, y, vocab_size, hidden_size, num_layers, model)
    main_loop = MainLoop(algorithm=None, data_stream=None, model=Model(cost))
    with open(args.model) as f:
        main_loop.model.set_parameter_values(load_parameters(f))
    bin_model = main_loop.model
    activations = []
    initial_states = []
    for i in range(num_layers):
        brick = [b for b in bin_model.get_top_bricks() if b.name==model+str(i)][0]
        activations.extend(VariableFilter(theano_name=brick.name+'_apply_states')(bin_model.variables))
        activations.extend(VariableFilter(theano_name=brick.name+'_apply_cells')(cells))
        initial_states.extend(VariableFilter(roles=[roles.INITIAL_STATE])(brick.parameters))

    #take activations of last element
    activations = [act[-1].flatten() for act in activations]
    states_as_params = [tensor.vector(dtype=initial.dtype) for initial in initial_states]
    #Get prob. distribution of the last element in the last seq of the batch
Exemple #6
0
    y = tensor.tensor3('targets', dtype='floatX')
    x = x.swapaxes(0, 1)
    y = y.swapaxes(0, 1)
    in_size = num_features
    out_size = num_features
    y_hat, cost, cells = nn_fprop(x,
                                  y,
                                  in_size,
                                  out_size,
                                  hidden_size[network_mode],
                                  num_layers,
                                  layer_models[network_mode][0],
                                  'MDN',
                                  training=False)
    main_loop = MainLoop(algorithm=None,
                         data_stream=None,
                         model=Model(cost),
                         extensions=[saveload.Load(save_path[network_mode])])

    for extension in main_loop.extensions:
        extension.main_loop = main_loop
    main_loop._run_extensions('before_training')
    bin_model = main_loop.model
    print 'Model loaded. Building prediction function...'
    hiddens = []
    initials = []
    for i in range(num_layers):
        brick = [
            b for b in bin_model.get_top_bricks()
            if b.name == layer_models[network_mode][i] + str(i) + '-'
        ][0]
        hiddens.extend(
Exemple #7
0
def load_models(
        models=hierarchy_models,
        in_size=len(hierarchy_input_columns[level_number_in_hierarchy]),
        out_size=len(hierarchy_output_columns[level_number_in_hierarchy]),
        hidden_size=hidden_size,
        num_layers=num_layers,
        model=layer_models[0]):
    predict_funcs = []
    initials = []
    for hierarchy_index in range(len(models)):
        saved_model = models[hierarchy_index]
        print 'Loading model from {0}...'.format(models[hierarchy_index])
        x = tensor.tensor3('features', dtype=theano.config.floatX)
        y = tensor.tensor3('targets', dtype=theano.config.floatX)
        y_hat, cost, cells = nn_fprop(x,
                                      y,
                                      in_size,
                                      out_size,
                                      hidden_size,
                                      num_layers,
                                      model,
                                      training=False)
        main_loop = MainLoop(algorithm=None,
                             data_stream=None,
                             model=Model(cost),
                             extensions=[saveload.Load(saved_model)])
        for extension in main_loop.extensions:
            extension.main_loop = main_loop
        main_loop._run_extensions('before_training')
        bin_model = main_loop.model
        print 'Model loaded. Building prediction function...'
        hiddens = []
        initials.append([])
        for i in range(num_layers - specialized_layer_num):
            brick = [
                b for b in bin_model.get_top_bricks()
                if b.name == layer_models[i] + str(i) + '-' + str(-1)
            ][0]
            hiddens.extend(
                VariableFilter(theano_name=brick.name + '_apply_states')(
                    bin_model.variables))
            hiddens.extend(
                VariableFilter(theano_name=brick.name + '_apply_cells')(cells))
            initials[hierarchy_index].extend(
                VariableFilter(roles=[roles.INITIAL_STATE])(brick.parameters))
        specialized_count = len(game_tasks) if task_specialized else 0
        for task in range(specialized_count):
            for i in range(num_layers - specialized_layer_num, num_layers):
                brick = [
                    b for b in bin_model.get_top_bricks()
                    if b.name == layer_models[i] + str(i) + '-' + str(task)
                ][0]
                hiddens.extend(
                    VariableFilter(theano_name=brick.name + '_apply_states')(
                        bin_model.variables))
                hiddens.extend(
                    VariableFilter(theano_name=brick.name +
                                   '_apply_cells')(cells))
                initials[hierarchy_index].extend(
                    VariableFilter(roles=[roles.INITIAL_STATE])(
                        brick.parameters))
        output_count = len(game_tasks) if task_specialized else 1
        predict_funcs.append([])
        for task in range(output_count):
            predict_funcs[hierarchy_index].append(
                theano.function([x], hiddens + [y_hat[task]]))
    return predict_funcs, initials
Exemple #8
0
    # Define primetext
    ix_to_char, char_to_ix, vocab_size = get_metadata(hdf5_file)
    if not args.primetext or len(args.primetext) == 0:
        args.primetext = ix_to_char[numpy.random.randint(vocab_size)]
    primetext = ''.join([ch for ch in args.primetext if ch in char_to_ix.keys()])
    if len(primetext) == 0:
        raise Exception('primetext characters are not in the vocabulary')
    x_curr = numpy.expand_dims(
        numpy.array([char_to_ix[ch] for ch in primetext], dtype='uint8'), axis=1)

    print 'Loading model from {0}...'.format(args.model)
    x = tensor.matrix('features', dtype='uint8')
    y = tensor.matrix('targets', dtype='uint8')
    y_hat, cost, cells = nn_fprop(x, y, vocab_size, hidden_size, num_layers, model)
    main_loop = MainLoop(algorithm=None, data_stream=None, model=Model(cost),
                         extensions=[saveload.Load(args.model)])
    for extension in main_loop.extensions:
        extension.main_loop = main_loop
    main_loop._run_extensions('before_training')
    bin_model = main_loop.model
    activations = []
    initial_states = []
    for i in range(num_layers):
        brick = [b for b in bin_model.get_top_bricks() if b.name==model+str(i)][0]
        activations.extend(VariableFilter(theano_name=brick.name+'_apply_states')(bin_model.variables))
        activations.extend(VariableFilter(theano_name=brick.name+'_apply_cells')(cells))
        initial_states.extend(VariableFilter(roles=[roles.INITIAL_STATE])(brick.parameters))

    #take activations of last element
    activations = [act[-1].flatten() for act in activations]
    states_as_params = [tensor.vector(dtype=initial.dtype) for initial in initial_states]