Beispiel #1
0
def load_models(net, model_path=save_path, in_size=len(input_columns),
                out_size=len(output_columns) - 1 if cost_mode == 'RL-MDN' else len(output_columns),
                hidden_size=hidden_size, num_recurrent_layers=num_recurrent_layers, model=layer_models[0]):
    
    initials = []
    if not os.path.isfile(model_path):
        print 'Could not find model file.'
        sys.exit(0)
    print 'Loading model from {0}...'.format(model_path)
    x = tensor.tensor3('features', dtype=theano.config.floatX)
    y = tensor.tensor3('targets', dtype='floatX')
    train_flag = [theano.shared(0)]
    
    latent_size = net.get_size() # latent_size
    
    in_size = latent_size + len(input_columns)
    y_hat, cost, cells = nn_fprop(x, y, in_size, out_size, hidden_size, num_recurrent_layers, train_flag)
    main_loop = MainLoop(algorithm=None, data_stream=None, model=Model(cost),
                         extensions=[saveload.Load(model_path)])
    for extension in main_loop.extensions:
        extension.main_loop = main_loop
    main_loop._run_extensions('before_training')
    bin_model = main_loop.model
    print 'Model loaded. Building prediction function...'
    hiddens = []
    for i in range(num_recurrent_layers):
        brick = [b for b in bin_model.get_top_bricks() if b.name == layer_models[i] + str(i)][0]
        hiddens.extend(VariableFilter(theano_name=brick.name + '_apply_states')(bin_model.variables))
        hiddens.extend(VariableFilter(theano_name=brick.name + '_apply_cells')(cells))
        initials.extend(VariableFilter(roles=[roles.INITIAL_STATE])(brick.parameters))
    predict_func = theano.function([x], hiddens + [y_hat])
    encoder, code_size = load_encoder(net)
    return predict_func, initials, encoder, code_size
Beispiel #2
0
step_rules = [RMSProp(learning_rate=learning_rate, decay_rate=decay_rate),
              StepClipping(step_clipping)]
algorithm = GradientDescent(cost=cost, parameters=cg.parameters,
                            step_rule=CompositeRule(step_rules))

# Extensions
gradient_norm = aggregation.mean(algorithm.total_gradient_norm)
step_norm = aggregation.mean(algorithm.total_step_norm)
monitored_vars = [cost, gradient_norm, step_norm]

dev_monitor = DataStreamMonitoring(variables=[cost], after_epoch=True,
                                   before_first_epoch=True, data_stream=dev_stream, prefix="dev")
train_monitor = TrainingDataMonitoring(variables=monitored_vars, after_batch=True,
                                       before_first_epoch=True, prefix='tra')

extensions = [dev_monitor, train_monitor, Timing(), Printing(after_batch=True),
              FinishAfter(after_n_epochs=nepochs),
              saveload.Load(load_path),
              saveload.Checkpoint(last_path),
              ] + track_best('dev_cost', save_path)

if learning_rate_decay not in (0, 1):
    extensions.append(SharedVariableModifier(step_rules[0].learning_rate,
                                             lambda n, lr: numpy.cast[theano.config.floatX](learning_rate_decay * lr), after_epoch=True, after_batch=False))

print('number of parameters in the model: ' + str(tensor.sum([p.size for p in cg.parameters]).eval()))
# Finally build the main loop and train the model
main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm,
                     model=Model(cost), extensions=extensions)
main_loop.run()
Beispiel #3
0
        [ch for ch in args.primetext if ch in char_to_ix.keys()])
    if len(primetext) == 0:
        raise Exception('primetext characters are not in the vocabulary')
    x_curr = numpy.expand_dims(numpy.array(
        [char_to_ix[ch] for ch in primetext], dtype='uint8'),
                               axis=1)

    print 'Loading model from {0}...'.format(args.model)
    x = tensor.matrix('features', dtype='uint8')
    y = tensor.matrix('targets', dtype='uint8')
    y_hat, cost, cells = nn_fprop(x, y, vocab_size, hidden_size, num_layers,
                                  model)
    main_loop = MainLoop(algorithm=None,
                         data_stream=None,
                         model=Model(cost),
                         extensions=[saveload.Load(args.model)])
    for extension in main_loop.extensions:
        extension.main_loop = main_loop
    main_loop._run_extensions('before_training')
    bin_model = main_loop.model
    activations = []
    initial_states = []
    for i in range(num_layers):
        brick = [
            b for b in bin_model.get_top_bricks() if b.name == model + str(i)
        ][0]
        activations.extend(
            VariableFilter(theano_name=brick.name + '_apply_states')(
                bin_model.variables))
        activations.extend(
            VariableFilter(theano_name=brick.name + '_apply_cells')(cells))
Beispiel #4
0
    y = y.swapaxes(0, 1)
    in_size = num_features
    out_size = num_features
    y_hat, cost, cells = nn_fprop(x,
                                  y,
                                  in_size,
                                  out_size,
                                  hidden_size[network_mode],
                                  num_layers,
                                  layer_models[network_mode][0],
                                  'MDN',
                                  training=False)
    main_loop = MainLoop(algorithm=None,
                         data_stream=None,
                         model=Model(cost),
                         extensions=[saveload.Load(save_path[network_mode])])

    for extension in main_loop.extensions:
        extension.main_loop = main_loop
    main_loop._run_extensions('before_training')
    bin_model = main_loop.model
    print 'Model loaded. Building prediction function...'
    hiddens = []
    initials = []
    for i in range(num_layers):
        brick = [
            b for b in bin_model.get_top_bricks()
            if b.name == layer_models[network_mode][i] + str(i) + '-'
        ][0]
        hiddens.extend(
            VariableFilter(theano_name=brick.name + '_apply_states')(
Beispiel #5
0
 def load_model(self, load_path):
     load_pre = saveload.Load(load_path)
     self.extensions.append(load_pre)
Beispiel #6
0
    def training(self,
                 fea2obj,
                 batch_size,
                 learning_rate=0.005,
                 steprule='adagrad',
                 wait_epochs=5,
                 kl_weight_init=None,
                 klw_ep=50,
                 klw_inc_rate=0,
                 num_epochs=None):
        networkfile = self._config['net']

        n_epochs = num_epochs or int(self._config['nepochs'])
        reg_weight = float(self._config['loss_weight'])
        reg_type = self._config['loss_reg']
        numtrain = int(
            self._config['num_train']) if 'num_train' in self._config else None
        train_stream, num_samples_train = get_comb_stream(
            fea2obj, 'train', batch_size, shuffle=True, num_examples=numtrain)
        dev_stream, num_samples_dev = get_comb_stream(fea2obj,
                                                      'dev',
                                                      batch_size=None,
                                                      shuffle=False)
        logger.info('sources: %s -- number of train/dev samples: %d/%d',
                    train_stream.sources, num_samples_train, num_samples_dev)

        t2idx = fea2obj['targets'].t2idx
        klw_init = kl_weight_init or float(
            self._config['kld_weight']) if 'kld_weight' in self._config else 1
        logger.info('kl_weight_init: %d', klw_init)
        kl_weight = shared_floatx(klw_init, 'kl_weight')
        entropy_weight = shared_floatx(1., 'entropy_weight')

        cost, p_at_1, _, KLD, logpy_xz, pat1_recog, misclassify_rate = build_model_new(
            fea2obj, len(t2idx), self._config, kl_weight, entropy_weight)

        cg = ComputationGraph(cost)

        weights = VariableFilter(roles=[WEIGHT])(cg.parameters)
        logger.info('Model weights are: %s', weights)
        if 'L2' in reg_type:
            cost += reg_weight * l2_norm(weights)
            logger.info('applying %s with weight: %f ', reg_type, reg_weight)

        dropout = -0.1
        if dropout > 0:
            cg = apply_dropout(cg, weights, dropout)
            cost = cg.outputs[0]

        cost.name = 'cost'
        logger.info('Our Algorithm is : %s, and learning_rate: %f', steprule,
                    learning_rate)
        if 'adagrad' in steprule:
            cnf_step_rule = AdaGrad(learning_rate)
        elif 'adadelta' in steprule:
            cnf_step_rule = AdaDelta(decay_rate=0.95)
        elif 'decay' in steprule:
            cnf_step_rule = RMSProp(learning_rate=learning_rate,
                                    decay_rate=0.90)
            cnf_step_rule = CompositeRule([cnf_step_rule, StepClipping(1)])
        elif 'momentum' in steprule:
            cnf_step_rule = Momentum(learning_rate=learning_rate, momentum=0.9)
        elif 'adam' in steprule:
            cnf_step_rule = Adam(learning_rate=learning_rate)
        else:
            logger.info('The steprule param is wrong! which is: %s', steprule)

        algorithm = GradientDescent(cost=cost,
                                    parameters=cg.parameters,
                                    step_rule=cnf_step_rule,
                                    on_unused_sources='warn')
        #algorithm.add_updates(updates)
        gradient_norm = aggregation.mean(algorithm.total_gradient_norm)
        step_norm = aggregation.mean(algorithm.total_step_norm)
        monitored_vars = [
            cost, gradient_norm, step_norm, p_at_1, KLD, logpy_xz, kl_weight,
            pat1_recog
        ]
        train_monitor = TrainingDataMonitoring(variables=monitored_vars,
                                               after_batch=True,
                                               before_first_epoch=True,
                                               prefix='tra')

        dev_monitor = DataStreamMonitoring(variables=[
            cost, p_at_1, KLD, logpy_xz, pat1_recog, misclassify_rate
        ],
                                           after_epoch=True,
                                           before_first_epoch=True,
                                           data_stream=dev_stream,
                                           prefix="dev")

        extensions = [
            dev_monitor,
            train_monitor,
            Timing(),
            TrackTheBest('dev_cost'),
            FinishIfNoImprovementAfter('dev_cost_best_so_far',
                                       epochs=wait_epochs),
            Printing(after_batch=False),  #, ProgressBar()
            FinishAfter(after_n_epochs=n_epochs),
            saveload.Load(networkfile + '.toload.pkl'),
        ] + track_best('dev_cost', networkfile + '.best.pkl')

        #extensions.append(SharedVariableModifier(kl_weight,
        #                                          lambda n, klw: numpy.cast[theano.config.floatX] (klw_inc_rate + klw), after_epoch=False, every_n_epochs=klw_ep, after_batch=False))
        #         extensions.append(SharedVariableModifier(entropy_weight,
        #                                                   lambda n, crw: numpy.cast[theano.config.floatX](crw - klw_inc_rate), after_epoch=False, every_n_epochs=klw_ep, after_batch=False))

        logger.info('number of parameters in the model: %d',
                    tensor.sum([p.size for p in cg.parameters]).eval())
        logger.info('Lookup table sizes: %s',
                    [p.size.eval() for p in cg.parameters if 'lt' in p.name])

        main_loop = MainLoop(data_stream=train_stream,
                             algorithm=algorithm,
                             model=Model(cost),
                             extensions=extensions)
        main_loop.run()
Beispiel #7
0
def run():

    # Load Model
    net_size = 256  #Hard-code instead of loading model (takes too long to set up network)
    #net = vaegan.VAEGAN()
    #network_saver = saver.NetworkSaver('vaegan/models/', net=net)
    #network_saver.load()

    # DATA
    train_stream = get_stream(hdf5_file, 'train', batch_size)  #TODO jonathan ?
    test_stream = get_stream(hdf5_file, 'test', batch_size)  #TODO jonathan ?

    # MODEL
    x = T.TensorType('floatX', [False] * 3)('features')
    y = T.tensor3('targets', dtype='floatX')
    train_flag = [theano.shared(0)]
    x = x.swapaxes(0, 1)
    y = y.swapaxes(0, 1)

    # More Config
    out_size = len(output_columns) - 1  # code_mode=RL-MDN
    latent_size = net_size
    in_size = latent_size + len(input_columns)

    # NN fprop
    y_hat, cost, cells = nn_fprop(x, y, in_size, out_size, hidden_size,
                                  num_recurrent_layers, train_flag)

    # COST
    cg = ComputationGraph(cost)
    extra_updates = []

    # RMS Prop training optimizer
    step_rules = [
        RMSProp(learning_rate=learning_rate, decay_rate=decay_rate),
        StepClipping(step_clipping)
    ]

    parameters_to_update = cg.parameters
    algorithm = GradientDescent(cost=cg.outputs[0],
                                parameters=parameters_to_update,
                                step_rule=CompositeRule(step_rules))
    algorithm.add_updates(
        extra_updates)  # TODO jonathan what is this, is this needed?

    # Extensions
    gradient_norm = aggregation.mean(algorithm.total_gradient_norm)
    step_norm = aggregation.mean(algorithm.total_step_norm)
    monitored_vars = [
        cost, step_rules[0].learning_rate, gradient_norm, step_norm
    ]

    test_monitor = DataStreamMonitoring(variables=[cost],
                                        after_epoch=True,
                                        before_first_epoch=True,
                                        data_stream=test_stream,
                                        prefix="test")
    train_monitor = TrainingDataMonitoring(variables=monitored_vars,
                                           after_epoch=True,
                                           before_first_epoch=True,
                                           prefix='train')

    set_train_flag = SetTrainFlag(after_epoch=True,
                                  before_epoch=True,
                                  flag=train_flag)

    # plot = Plot('Plotting example', channels=[['cost']], after_batch=True, open_browser=True)
    extensions = [
        set_train_flag,
        test_monitor,
        train_monitor,
        Timing(),
        Printing(after_epoch=True),
        FinishAfter(after_n_epochs=nepochs),
        saveload.Load(load_path),
        saveload.Checkpoint(last_path, every_n_epochs=10000),
    ] + track_best('test_cost',
                   save_path)  #+ track_best('train_cost', last_path)

    if learning_rate_decay not in (0, 1):
        extensions.append(
            SharedVariableModifier(step_rules[0].learning_rate,
                                   lambda n, lr: np.cast[theano.config.floatX]
                                   (learning_rate_decay * lr),
                                   after_epoch=False,
                                   every_n_epochs=lr_decay_every_n_epochs,
                                   after_batch=False))

    print 'number of parameters in the model: ' + str(
        T.sum([p.size for p in cg.parameters]).eval())
    # Finally build the main loop and train the model
    mainLoop = MainLoop(data_stream=train_stream,
                        algorithm=algorithm,
                        model=Model(cost),
                        extensions=extensions)
    mainLoop.run()
Beispiel #8
0
def load_models(
        models=hierarchy_models,
        in_size=len(hierarchy_input_columns[level_number_in_hierarchy]),
        out_size=len(hierarchy_output_columns[level_number_in_hierarchy]),
        hidden_size=hidden_size,
        num_layers=num_layers,
        model=layer_models[0]):
    predict_funcs = []
    initials = []
    for hierarchy_index in range(len(models)):
        saved_model = models[hierarchy_index]
        print 'Loading model from {0}...'.format(models[hierarchy_index])
        x = tensor.tensor3('features', dtype=theano.config.floatX)
        y = tensor.tensor3('targets', dtype=theano.config.floatX)
        y_hat, cost, cells = nn_fprop(x,
                                      y,
                                      in_size,
                                      out_size,
                                      hidden_size,
                                      num_layers,
                                      model,
                                      training=False)
        main_loop = MainLoop(algorithm=None,
                             data_stream=None,
                             model=Model(cost),
                             extensions=[saveload.Load(saved_model)])
        for extension in main_loop.extensions:
            extension.main_loop = main_loop
        main_loop._run_extensions('before_training')
        bin_model = main_loop.model
        print 'Model loaded. Building prediction function...'
        hiddens = []
        initials.append([])
        for i in range(num_layers - specialized_layer_num):
            brick = [
                b for b in bin_model.get_top_bricks()
                if b.name == layer_models[i] + str(i) + '-' + str(-1)
            ][0]
            hiddens.extend(
                VariableFilter(theano_name=brick.name + '_apply_states')(
                    bin_model.variables))
            hiddens.extend(
                VariableFilter(theano_name=brick.name + '_apply_cells')(cells))
            initials[hierarchy_index].extend(
                VariableFilter(roles=[roles.INITIAL_STATE])(brick.parameters))
        specialized_count = len(game_tasks) if task_specialized else 0
        for task in range(specialized_count):
            for i in range(num_layers - specialized_layer_num, num_layers):
                brick = [
                    b for b in bin_model.get_top_bricks()
                    if b.name == layer_models[i] + str(i) + '-' + str(task)
                ][0]
                hiddens.extend(
                    VariableFilter(theano_name=brick.name + '_apply_states')(
                        bin_model.variables))
                hiddens.extend(
                    VariableFilter(theano_name=brick.name +
                                   '_apply_cells')(cells))
                initials[hierarchy_index].extend(
                    VariableFilter(roles=[roles.INITIAL_STATE])(
                        brick.parameters))
        output_count = len(game_tasks) if task_specialized else 1
        predict_funcs.append([])
        for task in range(output_count):
            predict_funcs[hierarchy_index].append(
                theano.function([x], hiddens + [y_hat[task]]))
    return predict_funcs, initials