Ejemplo n.º 1
0
def load_models(model_path=save_path,
                in_size=len(input_columns),
                out_size=len(output_columns) -
                1 if cost_mode == 'RL-MDN' else len(output_columns),
                hidden_size=hidden_size,
                num_recurrent_layers=num_recurrent_layers,
                model=layer_models[0]):
    initials = []
    if not os.path.isfile(model_path):
        print 'Could not find model file.'
        sys.exit(0)
    print 'Loading model from {0}...'.format(model_path)
    x = tensor.tensor3('features', dtype=theano.config.floatX)
    y = tensor.tensor3('targets', dtype='floatX')
    train_flag = [theano.shared(0)]
    _, latent_size = load_encoder()
    in_size = latent_size + len(input_columns)
    y_hat, cost, cells = nn_fprop(x, y, in_size, out_size, hidden_size,
                                  num_recurrent_layers, train_flag)
    main_loop = MainLoop(algorithm=None,
                         data_stream=None,
                         model=Model(cost),
                         extensions=[saveload.Load(model_path)])
    for extension in main_loop.extensions:
        extension.main_loop = main_loop
    main_loop._run_extensions('before_training')
    bin_model = main_loop.model
    print 'Model loaded. Building prediction function...'
    hiddens = []
    for i in range(num_recurrent_layers):
        brick = [
            b for b in bin_model.get_top_bricks()
            if b.name == layer_models[i] + str(i)
        ][0]
        hiddens.extend(
            VariableFilter(theano_name=brick.name + '_apply_states')(
                bin_model.variables))
        hiddens.extend(
            VariableFilter(theano_name=brick.name + '_apply_cells')(cells))
        initials.extend(
            VariableFilter(roles=[roles.INITIAL_STATE])(brick.parameters))
    predict_func = theano.function([x], hiddens + [y_hat])
    encoder, code_size = load_encoder()
    return predict_func, initials, encoder, code_size
Ejemplo n.º 2
0
    x_curr = numpy.expand_dims(numpy.array(
        [char_to_ix[ch] for ch in primetext], dtype='uint8'),
                               axis=1)

    print 'Loading model from {0}...'.format(args.model)
    x = tensor.matrix('features', dtype='uint8')
    y = tensor.matrix('targets', dtype='uint8')
    y_hat, cost, cells = nn_fprop(x, y, vocab_size, hidden_size, num_layers,
                                  model)
    main_loop = MainLoop(algorithm=None,
                         data_stream=None,
                         model=Model(cost),
                         extensions=[saveload.Load(args.model)])
    for extension in main_loop.extensions:
        extension.main_loop = main_loop
    main_loop._run_extensions('before_training')
    bin_model = main_loop.model
    activations = []
    initial_states = []
    for i in range(num_layers):
        brick = [
            b for b in bin_model.get_top_bricks() if b.name == model + str(i)
        ][0]
        activations.extend(
            VariableFilter(theano_name=brick.name + '_apply_states')(
                bin_model.variables))
        activations.extend(
            VariableFilter(theano_name=brick.name + '_apply_cells')(cells))
        initial_states.extend(
            VariableFilter(roles=[roles.INITIAL_STATE])(brick.parameters))
Ejemplo n.º 3
0
def load_models(
        models=hierarchy_models,
        in_size=len(hierarchy_input_columns[level_number_in_hierarchy]),
        out_size=len(hierarchy_output_columns[level_number_in_hierarchy]),
        hidden_size=hidden_size,
        num_layers=num_layers,
        model=layer_models[0]):
    predict_funcs = []
    initials = []
    for hierarchy_index in range(len(models)):
        saved_model = models[hierarchy_index]
        print 'Loading model from {0}...'.format(models[hierarchy_index])
        x = tensor.tensor3('features', dtype=theano.config.floatX)
        y = tensor.tensor3('targets', dtype=theano.config.floatX)
        y_hat, cost, cells = nn_fprop(x,
                                      y,
                                      in_size,
                                      out_size,
                                      hidden_size,
                                      num_layers,
                                      model,
                                      training=False)
        main_loop = MainLoop(algorithm=None,
                             data_stream=None,
                             model=Model(cost),
                             extensions=[saveload.Load(saved_model)])
        for extension in main_loop.extensions:
            extension.main_loop = main_loop
        main_loop._run_extensions('before_training')
        bin_model = main_loop.model
        print 'Model loaded. Building prediction function...'
        hiddens = []
        initials.append([])
        for i in range(num_layers - specialized_layer_num):
            brick = [
                b for b in bin_model.get_top_bricks()
                if b.name == layer_models[i] + str(i) + '-' + str(-1)
            ][0]
            hiddens.extend(
                VariableFilter(theano_name=brick.name + '_apply_states')(
                    bin_model.variables))
            hiddens.extend(
                VariableFilter(theano_name=brick.name + '_apply_cells')(cells))
            initials[hierarchy_index].extend(
                VariableFilter(roles=[roles.INITIAL_STATE])(brick.parameters))
        specialized_count = len(game_tasks) if task_specialized else 0
        for task in range(specialized_count):
            for i in range(num_layers - specialized_layer_num, num_layers):
                brick = [
                    b for b in bin_model.get_top_bricks()
                    if b.name == layer_models[i] + str(i) + '-' + str(task)
                ][0]
                hiddens.extend(
                    VariableFilter(theano_name=brick.name + '_apply_states')(
                        bin_model.variables))
                hiddens.extend(
                    VariableFilter(theano_name=brick.name +
                                   '_apply_cells')(cells))
                initials[hierarchy_index].extend(
                    VariableFilter(roles=[roles.INITIAL_STATE])(
                        brick.parameters))
        output_count = len(game_tasks) if task_specialized else 1
        predict_funcs.append([])
        for task in range(output_count):
            predict_funcs[hierarchy_index].append(
                theano.function([x], hiddens + [y_hat[task]]))
    return predict_funcs, initials
Ejemplo n.º 4
0
        args.primetext = ix_to_char[numpy.random.randint(vocab_size)]
    primetext = ''.join([ch for ch in args.primetext if ch in char_to_ix.keys()])
    if len(primetext) == 0:
        raise Exception('primetext characters are not in the vocabulary')
    x_curr = numpy.expand_dims(
        numpy.array([char_to_ix[ch] for ch in primetext], dtype='uint8'), axis=1)

    print 'Loading model from {0}...'.format(args.model)
    x = tensor.matrix('features', dtype='uint8')
    y = tensor.matrix('targets', dtype='uint8')
    y_hat, cost, cells = nn_fprop(x, y, vocab_size, hidden_size, num_layers, model)
    main_loop = MainLoop(algorithm=None, data_stream=None, model=Model(cost),
                         extensions=[saveload.Load(args.model)])
    for extension in main_loop.extensions:
        extension.main_loop = main_loop
    main_loop._run_extensions('before_training')
    bin_model = main_loop.model
    activations = []
    initial_states = []
    for i in range(num_layers):
        brick = [b for b in bin_model.get_top_bricks() if b.name==model+str(i)][0]
        activations.extend(VariableFilter(theano_name=brick.name+'_apply_states')(bin_model.variables))
        activations.extend(VariableFilter(theano_name=brick.name+'_apply_cells')(cells))
        initial_states.extend(VariableFilter(roles=[roles.INITIAL_STATE])(brick.parameters))

    #take activations of last element
    activations = [act[-1].flatten() for act in activations]
    states_as_params = [tensor.vector(dtype=initial.dtype) for initial in initial_states]
    zip(initial_states, states_as_params)
    #Get prob. distribution of the last element in the last seq of the batch
    fprop = theano.function([x] + states_as_params, activations + [y_hat[-1, -1, :]], givens=zip(initial_states, states_as_params))