Esempio n. 1
0
def evaluate(model, load_path):
    with open(load_path + '/trained_params_best.npz') as f:
        loaded = np.load(f)
        blocks_model = Model(model.cost)
        params_dicts = blocks_model.get_parameter_dict()
        params_names = params_dicts.keys()
        for param_name in params_names:
            param = params_dicts[param_name]
            # '/f_6_.W' --> 'f_6_.W'
            slash_index = param_name.find('/')
            param_name = param_name[slash_index + 1:]
            assert param.get_value().shape == loaded[param_name].shape
            param.set_value(loaded[param_name])

    train_data_stream, valid_data_stream = get_cmv_v2_streams(100)
    # T x B x F
    data = train_data_stream.get_epoch_iterator().next()
    cg = ComputationGraph(model.cost)
    f = theano.function(cg.inputs, [model.location, model.scale],
                        on_unused_input='ignore',
                        allow_input_downcast=True)
    res = f(data[1], data[0])
    for i in range(10):
        visualize_attention(data[0][:, i, :],
                            res[0][:, i, :],
                            res[1][:, i, :],
                            prefix=str(i))
Esempio n. 2
0
def evaluate(model, load_path):
    with open(load_path + '/trained_params_best.npz') as f:
        loaded = np.load(f)
        blocks_model = Model(model.cost)
        params_dicts = blocks_model.get_parameter_dict()
        params_names = params_dicts.keys()
        for param_name in params_names:
                    param = params_dicts[param_name]
                    # '/f_6_.W' --> 'f_6_.W'
                    slash_index = param_name.find('/')
                    param_name = param_name[slash_index + 1:]
                    assert param.get_value().shape == loaded[param_name].shape
                    param.set_value(loaded[param_name])

    train_data_stream, valid_data_stream = get_cmv_v2_streams(100)
    # T x B x F
    data = train_data_stream.get_epoch_iterator().next()
    cg = ComputationGraph(model.cost)
    f = theano.function(cg.inputs, [model.location, model.scale],
                        on_unused_input='ignore',
                        allow_input_downcast=True)
    res = f(data[1], data[0])
    for i in range(10):
        visualize_attention(data[0][:, i, :],
                            res[0][:, i, :], res[1][:, i, :], prefix=str(i))
Esempio n. 3
0
def train(model, batch_size=100, num_epochs=1000):
    cost = model.cost
    monitorings = model.monitorings
    # Setting Loggesetr
    timestr = time.strftime("%Y_%m_%d_at_%H_%M")
    save_path = 'results/CMV_V2_' + timestr
    log_path = os.path.join(save_path, 'log.txt')
    os.makedirs(save_path)
    fh = logging.FileHandler(filename=log_path)
    fh.setLevel(logging.DEBUG)
    logger.addHandler(fh)

    # Training
    blocks_model = Model(cost)
    all_params = blocks_model.parameters
    print "Number of found parameters:" + str(len(all_params))
    print all_params

    clipping = StepClipping(threshold=np.cast[floatX](10))

    adam = Adam(learning_rate=model.lr_var)
    step_rule = CompositeRule([adam, clipping])
    training_algorithm = GradientDescent(cost=cost,
                                         parameters=all_params,
                                         step_rule=step_rule)

    monitored_variables = [
        model.lr_var, cost,
        aggregation.mean(training_algorithm.total_gradient_norm)
    ] + monitorings

    blocks_model = Model(cost)
    params_dicts = blocks_model.get_parameter_dict()
    for name, param in params_dicts.iteritems():
        to_monitor = training_algorithm.gradients[param].norm(2)
        to_monitor.name = name + "_grad_norm"
        monitored_variables.append(to_monitor)
        to_monitor = param.norm(2)
        to_monitor.name = name + "_norm"
        monitored_variables.append(to_monitor)

    train_data_stream, valid_data_stream = get_cmv_v2_streams(batch_size)

    train_monitoring = TrainingDataMonitoring(variables=monitored_variables,
                                              prefix="train",
                                              after_epoch=True)

    valid_monitoring = DataStreamMonitoring(variables=monitored_variables,
                                            data_stream=valid_data_stream,
                                            prefix="valid",
                                            after_epoch=True)

    main_loop = MainLoop(
        algorithm=training_algorithm,
        data_stream=train_data_stream,
        model=blocks_model,
        extensions=[
            train_monitoring, valid_monitoring,
            FinishAfter(after_n_epochs=num_epochs),
            SaveParams('valid_misclassificationrate_apply_error_rate',
                       blocks_model, save_path),
            SaveLog(save_path, after_epoch=True),
            ProgressBar(),
            LRDecay(model.lr_var, [0.001, 0.0001, 0.00001, 0.000001],
                    [8, 15, 30, 1000],
                    after_epoch=True),
            Printing()
        ])
    main_loop.run()
Esempio n. 4
0
def train(model, batch_size=100, num_epochs=1000):
    cost = model.cost
    monitorings = model.monitorings
    # Setting Loggesetr
    timestr = time.strftime("%Y_%m_%d_at_%H_%M")
    save_path = 'results/CMV_V2_' + timestr
    log_path = os.path.join(save_path, 'log.txt')
    os.makedirs(save_path)
    fh = logging.FileHandler(filename=log_path)
    fh.setLevel(logging.DEBUG)
    logger.addHandler(fh)

    # Training
    blocks_model = Model(cost)
    all_params = blocks_model.parameters
    print "Number of found parameters:" + str(len(all_params))
    print all_params

    clipping = StepClipping(threshold=np.cast[floatX](10))

    adam = Adam(learning_rate=model.lr_var)
    step_rule = CompositeRule([adam, clipping])
    training_algorithm = GradientDescent(
        cost=cost, parameters=all_params,
        step_rule=step_rule)

    monitored_variables = [
        model.lr_var,
        cost,
        aggregation.mean(training_algorithm.total_gradient_norm)] + monitorings

    blocks_model = Model(cost)
    params_dicts = blocks_model.get_parameter_dict()
    for name, param in params_dicts.iteritems():
        to_monitor = training_algorithm.gradients[param].norm(2)
        to_monitor.name = name + "_grad_norm"
        monitored_variables.append(to_monitor)
        to_monitor = param.norm(2)
        to_monitor.name = name + "_norm"
        monitored_variables.append(to_monitor)

    train_data_stream, valid_data_stream = get_cmv_v2_streams(batch_size)

    train_monitoring = TrainingDataMonitoring(
        variables=monitored_variables,
        prefix="train",
        after_epoch=True)

    valid_monitoring = DataStreamMonitoring(
        variables=monitored_variables,
        data_stream=valid_data_stream,
        prefix="valid",
        after_epoch=True)

    main_loop = MainLoop(
        algorithm=training_algorithm,
        data_stream=train_data_stream,
        model=blocks_model,
        extensions=[
            train_monitoring,
            valid_monitoring,
            FinishAfter(after_n_epochs=num_epochs),
            SaveParams('valid_misclassificationrate_apply_error_rate',
                       blocks_model, save_path),
            SaveLog(save_path, after_epoch=True),
            ProgressBar(),
            LRDecay(model.lr_var,
                    [0.001, 0.0001, 0.00001, 0.000001],
                    [8, 15, 30, 1000],
                    after_epoch=True),
            Printing()])
    main_loop.run()