예제 #1
0
def Adam(cost, params, lr=0.002, b1=0.2, b2=0.001, e=1e-8):
        decay_factor = 1-e
	updates=[]
	grads=T.grad(cost, params)
	i = shared_floatx(0.,"adam_t")
	i_t = i+1
	updates.append((i,i_t))
        lr = (lr *T.sqrt((1. - (1. - b2)**i_t)) /
                         (1. - (1. - b1)**i_t))
        b1_t = 1 - (1 - b1) * decay_factor ** (i_t - 1)

	updates_init=[]
        for p,g in zip(params, grads):
            m = shared_floatx(p.get_value() * 0.,
                                                "adam_m_"+p.name)
            v = shared_floatx(p.get_value() *0.,
                                                "adam_v_"+p.name)

            m_t = b1_t*g + (1-b1_t)*g
            v_t = b2*T.sqr(g) + (1-b2)*v
            g_t = m_t/(T.sqrt(v_t)+e)
            updates.append((m,m_t))
            updates.append((v,v_t))
            updates.append((p, p-lr*g_t))
	    updates_init.append((m, 0*m))
	    updates_init.append((v, 0*v))
        return updates, updates_init
예제 #2
0
def test_perclass_accuracy_monitor():
    features = [numpy.array(f, dtype=floatX)
                for f in [[1, 2], [3, 4], [5, 6]]]
    dataset = IterableDataset(dict(features=features))
    datastream = DataStream(dataset)
    label_i_to_c = {0:"a", 1:"b", 2:"c"}
    test_probs = shared_floatx(numpy.array([
        [0.0, 0.0, 1.0],
        [0.75, 0.25, 0.0],
        [0.0, 0.75, 0.25],
        [0.25, 0.75, 0.0],
    ], dtype=floatX))
    targets = shared_floatx(numpy.array([
        [2.0],
        [0.0],
        [1.0],
        [2.0]
    ], dtype=floatX))
    perclass_accuracy_monitor = PerClassAccuracyMonitor(datastream,
        prediction=numpy.argmax(test_probs, axis=1),
        targets=targets.ravel(),
        label_i_to_c=label_i_to_c)
    perclass_accuracy_monitor.main_loop = setup_mainloop([])
    perclass_accuracy_monitor.do('after_batch')

    assert perclass_accuracy_monitor.main_loop.log[0]['perclass accuracy_a']==1.0
    assert perclass_accuracy_monitor.main_loop.log[0]['perclass accuracy_b']==1.0
    assert perclass_accuracy_monitor.main_loop.log[0]['perclass accuracy_c']==0.5
예제 #3
0
파일: __init__.py 프로젝트: andhus/blocks
    def compute_step(self, parameter, previous_step):
        mean_square_step_tm1 = shared_floatx(parameter.get_value() * 0.,
                                             "mean_square_step_tm1")
        add_role(mean_square_step_tm1, ALGORITHM_BUFFER)
        mean_square_delta_x_tm1 = shared_floatx(parameter.get_value() * 0.,
                                                "mean_square_delta_x_tm1")
        add_role(mean_square_delta_x_tm1, ALGORITHM_BUFFER)

        mean_square_step_t = (
            self.decay_rate * mean_square_step_tm1 +
            (1 - self.decay_rate) * tensor.sqr(previous_step)
        )

        rms_delta_x_tm1 = tensor.sqrt(mean_square_delta_x_tm1 + self.epsilon)
        rms_step_t = tensor.sqrt(mean_square_step_t + self.epsilon)
        delta_x_t = rms_delta_x_tm1 / rms_step_t * previous_step

        mean_square_delta_x_t = (
            self.decay_rate * mean_square_delta_x_tm1 +
            (1 - self.decay_rate) * tensor.sqr(delta_x_t)
        )

        step = delta_x_t
        updates = [(mean_square_step_tm1, mean_square_step_t),
                   (mean_square_delta_x_tm1, mean_square_delta_x_t)]
        return step, updates
예제 #4
0
def test_perclass_accuracy_monitor():
    features = [numpy.array(f, dtype=floatX) for f in [[1, 2], [3, 4], [5, 6]]]
    dataset = IterableDataset(dict(features=features))
    datastream = DataStream(dataset)
    label_i_to_c = {0: "a", 1: "b", 2: "c"}
    test_probs = shared_floatx(
        numpy.array([
            [0.0, 0.0, 1.0],
            [0.75, 0.25, 0.0],
            [0.0, 0.75, 0.25],
            [0.25, 0.75, 0.0],
        ],
                    dtype=floatX))
    targets = shared_floatx(
        numpy.array([[2.0], [0.0], [1.0], [2.0]], dtype=floatX))
    perclass_accuracy_monitor = PerClassAccuracyMonitor(
        datastream,
        prediction=numpy.argmax(test_probs, axis=1),
        targets=targets.ravel(),
        label_i_to_c=label_i_to_c)
    perclass_accuracy_monitor.main_loop = setup_mainloop([])
    perclass_accuracy_monitor.do('after_batch')

    assert perclass_accuracy_monitor.main_loop.log[0][
        'perclass accuracy_a'] == 1.0
    assert perclass_accuracy_monitor.main_loop.log[0][
        'perclass accuracy_b'] == 1.0
    assert perclass_accuracy_monitor.main_loop.log[0][
        'perclass accuracy_c'] == 0.5
예제 #5
0
def test_training_data_monitoring():
    weights = numpy.array([-1, 1], dtype=theano.config.floatX)
    features = [
        numpy.array(f, dtype=theano.config.floatX)
        for f in [[1, 2], [3, 4], [5, 6]]
    ]
    targets = [(weights * f).sum() for f in features]
    n_batches = 3
    dataset = IterableDataset(dict(features=features, targets=targets))

    x = tensor.vector('features')
    y = tensor.scalar('targets')
    W = shared_floatx([0, 0], name='W')
    V = shared_floatx(7, name='V')
    W_sum = named_copy(W.sum(), 'W_sum')
    cost = ((x * W).sum() - y)**2
    cost.name = 'cost'

    class TrueCostExtension(TrainingExtension):
        def before_batch(self, data):
            self.main_loop.log.current_row['true_cost'] = ((
                (W.get_value() * data["features"]).sum() - data["targets"])**2)

    main_loop = MainLoop(model=None,
                         data_stream=dataset.get_example_stream(),
                         algorithm=GradientDescent(cost=cost,
                                                   parameters=[W],
                                                   step_rule=Scale(0.001)),
                         extensions=[
                             FinishAfter(after_n_epochs=1),
                             TrainingDataMonitoring([W_sum, cost, V],
                                                    prefix="train1",
                                                    after_batch=True),
                             TrainingDataMonitoring(
                                 [aggregation.mean(W_sum), cost],
                                 prefix="train2",
                                 after_epoch=True),
                             TrueCostExtension()
                         ])

    main_loop.run()

    # Check monitoring of a shared varible
    assert_allclose(main_loop.log.current_row['train1_V'], 7.0)

    for i in range(n_batches):
        # The ground truth is written to the log before the batch is
        # processed, where as the extension writes after the batch is
        # processed. This is why the iteration numbers differs here.
        assert_allclose(main_loop.log[i]['true_cost'],
                        main_loop.log[i + 1]['train1_cost'])
    assert_allclose(
        main_loop.log[n_batches]['train2_cost'],
        sum([main_loop.log[i]['true_cost']
             for i in range(n_batches)]) / n_batches)
    assert_allclose(
        main_loop.log[n_batches]['train2_W_sum'],
        sum([
            main_loop.log[i]['train1_W_sum'] for i in range(1, n_batches + 1)
        ]) / n_batches)
예제 #6
0
def test_gradient_descent_updates_keyword():
    W = shared_floatx(numpy.array([[1, 2], [3, 4]]))
    z = shared_floatx(5)
    algorithm = GradientDescent(gradients=OrderedDict([(W, W/2)]),
                                updates=[(z, z + 1)])
    assert len(algorithm.updates) == 2
    assert z in dict(algorithm.updates)
예제 #7
0
def plot_energy_surface(model):
    from mpl_toolkits.mplot3d import Axes3D
    from matplotlib import cm
    from matplotlib.ticker import LinearLocator, FormatStrFormatter
    import matplotlib.pyplot as plt

    (x1, x2) = numpy.meshgrid(numpy.arange(-0.5, 0.5, 0.05), numpy.arange(-0.5, 0.5, 0.05))
    x = shared_floatx(numpy.vstack((x1.flatten(), x2.flatten())).T)
    h = shared_floatx(numpy.zeros((x.get_value().shape[0], model.nhid)))
    map_f = theano.function([], updates=OrderedDict([(h, model.map_update(x, h))]))
    energy_f = theano.function([], [model.energy(x, h)])

    for i in range(100):
        map_f()
    (E_,) = energy_f()
    E_ = E_.reshape(x1.shape)
    fig = plt.figure()
    ax = fig.gca(projection="3d")
    surf = ax.plot_surface(x1, x2, E_, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=0, antialiased=False)
    ax.set_zlim(numpy.min(E_), numpy.max(E_))
    ax.zaxis.set_major_locator(LinearLocator(10))
    ax.zaxis.set_major_formatter(FormatStrFormatter("%.02f"))
    fig.colorbar(surf, shrink=0.5, aspect=5)
    plt.show()
    plt.savefig("E.png")
예제 #8
0
 def __init__(self, decay_rate=0.95, epsilon=1e-6):
     if not 0.0 <= decay_rate <= 1.0:
         raise ValueError("decay rate needs to be in [0, 1]")
     self.decay_rate = shared_floatx(decay_rate, "decay_rate")
     add_role(self.decay_rate, ALGORITHM_HYPERPARAMETER)
     self.epsilon = shared_floatx(epsilon, "epsilon")
     add_role(self.epsilon, ALGORITHM_HYPERPARAMETER)
예제 #9
0
파일: __init__.py 프로젝트: vyraun/blocks
 def __init__(self, decay_rate=0.95, epsilon=1e-6):
     if not 0.0 <= decay_rate <= 1.0:
         raise ValueError("decay rate needs to be in [0, 1]")
     self.decay_rate = shared_floatx(decay_rate, "decay_rate")
     add_role(self.decay_rate, ALGORITHM_HYPERPARAMETER)
     self.epsilon = shared_floatx(epsilon, "epsilon")
     add_role(self.epsilon, ALGORITHM_HYPERPARAMETER)
예제 #10
0
def test_gradient_descent_updates_keyword():
    W = shared_floatx(numpy.array([[1, 2], [3, 4]]))
    z = shared_floatx(5)
    algorithm = GradientDescent(gradients=OrderedDict([(W, W / 2)]),
                                updates=[(z, z + 1)])
    assert len(algorithm.updates) == 2
    assert z in dict(algorithm.updates)
예제 #11
0
def test_confusion_matrix():
    features = [numpy.array(f, dtype=floatX) for f in [[1, 2], [3, 4], [5, 6]]]
    dataset = IterableDataset(dict(features=features))
    datastream = DataStream(dataset)
    label_i_to_c = {0: "a", 1: "b", 2: "c"}
    test_probs = shared_floatx(
        numpy.array([[0.75, 0.0, 0.0], [0.75, 0.0, 0.0], [0.0, 0.0, 0.75],
                     [0.0, 0.0, 0.75], [0.75, 0.0, 0.0], [0.0, 0.0, 0.75]],
                    dtype=floatX))
    targets = shared_floatx(
        numpy.array([[2.0], [0.0], [2.0], [2.0], [0.0], [1.0]], dtype=floatX))
    d = DirectoryCreator(directory="confusionMatrixTest")
    extension = ConfusionMatrixMonitor(datastream,
                                       prediction=numpy.argmax(test_probs,
                                                               axis=1),
                                       targets=targets.ravel(),
                                       dest_directory="confusionMatrixTest",
                                       every_n_batches=3)
    main_loop = setup_mainloop([d, extension])

    main_loop.run()
    path = 'confusionMatrixTest/confusion_iterations_3.npz'
    expected = numpy.array(
        [[1.0, 0.0, 0.0], [0.0, 0.0, 1.0], [(1.0 / 3.0), 0.0, (2.0 / 3.0)]],
        dtype=floatX)
    assert_allclose(numpy.load(path), expected)
    shutil.rmtree('confusionMatrixTest')
예제 #12
0
def test_step_clipping_no_threshold_regression():
    """Test regression for #1145, incorrect output when threshold=None."""
    rule1 = StepClipping()
    assert rule1.threshold is None
    gradients = {0: shared_floatx(3.0), 1: shared_floatx(4.0)}
    clipped1, updates = rule1.compute_steps(gradients)
    assert len(updates) == 0
    assert clipped1 == gradients
예제 #13
0
def test_gradient_descent_finds_inputs_additional_updates():
    W = shared_floatx(numpy.array([[1, 2], [3, 4]]))
    n = shared_floatx(1)
    m = tensor.scalar('m')
    algorithm = GradientDescent(gradients=OrderedDict([(W, W + 1)]))
    algorithm.add_updates([(n, n + m)])
    algorithm.initialize()
    assert m in algorithm.inputs
예제 #14
0
 def __init__(self, learning_rate=0.002, beta1=0.1, beta2=0.001, epsilon=1e-8, decay_factor=(1 - 1e-8)):
     self.learning_rate = shared_floatx(learning_rate, "learning_rate")
     self.beta1 = shared_floatx(beta1, "beta1")
     self.beta2 = shared_floatx(beta2, "beta2")
     self.epsilon = shared_floatx(epsilon, "epsilon")
     self.decay_factor = shared_floatx(decay_factor, "decay_factor")
     for param in [self.learning_rate, self.beta1, self.beta2, self.epsilon, self.decay_factor]:
         add_role(param, ALGORITHM_HYPERPARAMETER)
예제 #15
0
def test_gradient_descent_finds_inputs_additional_updates():
    W = shared_floatx(numpy.array([[1, 2], [3, 4]]))
    n = shared_floatx(1)
    m = tensor.scalar('m')
    algorithm = GradientDescent(gradients=OrderedDict([(W, W + 1)]))
    algorithm.add_updates([(n, n + m)])
    algorithm.initialize()
    assert m in algorithm.inputs
예제 #16
0
def test_gradient_descent_non_match_parameters_gradients_not_ordered():
    W = shared_floatx(numpy.array([[1, 2], [3, 4]]))
    z = shared_floatx(5)
    assert_raises_regex(ValueError,
                        "fixed order",
                        GradientDescent,
                        parameters=[z],
                        gradients={W: 2 * W})
예제 #17
0
def test_step_clipping_no_threshold_regression():
    """Test regression for #1145, incorrect output when threshold=None."""
    rule1 = StepClipping()
    assert rule1.threshold is None
    gradients = {0: shared_floatx(3.0), 1: shared_floatx(4.0)}
    clipped1, updates = rule1.compute_steps(gradients)
    assert len(updates) == 0
    assert clipped1 == gradients
예제 #18
0
    def __init__(self, eta=0, gamma=0.55, seed=180891):

        self.eta_sqrt = shared_floatx(sqrt(eta), "eta")
        add_role(self.eta_sqrt, ALGORITHM_HYPERPARAMETER)

        self.gamma_half = shared_floatx(gamma/2, "gamma")
        add_role(self.gamma_half, ALGORITHM_HYPERPARAMETER)

        self.theano_random = rng_mrg.MRG_RandomStreams(seed=seed)
예제 #19
0
def test_step_clipping():
    rule1 = StepClipping(4)
    rule2 = StepClipping(5)

    gradients = {0: shared_floatx(3.0), 1: shared_floatx(4.0)}
    clipped1, _ = rule1.compute_steps(gradients)
    assert_allclose(clipped1[0].eval(), 12 / 5.0)
    assert_allclose(clipped1[1].eval(), 16 / 5.0)
    clipped2, _ = rule2.compute_steps(gradients)
    assert_allclose(clipped2[0].eval(), 3.0)
    assert_allclose(clipped2[1].eval(), 4.0)
예제 #20
0
def test_remove_not_finite():
    rule1 = RemoveNotFinite()
    rule2 = RemoveNotFinite(1.)

    gradients = {1: shared_floatx(numpy.nan), 2: shared_floatx(numpy.inf)}
    rval1, _ = rule1.compute_steps(gradients)
    assert_allclose(rval1[1].eval(), 0.1)
    assert_allclose(rval1[2].eval(), 0.2)
    rval2, _ = rule2.compute_steps(gradients)
    assert_allclose(rval2[1].eval(), 1.0)
    assert_allclose(rval2[2].eval(), 2.0)
예제 #21
0
def test_training_data_monitoring():
    weights = numpy.array([-1, 1], dtype=theano.config.floatX)
    features = [numpy.array(f, dtype=theano.config.floatX)
                for f in [[1, 2], [3, 4], [5, 6]]]
    targets = [(weights * f).sum() for f in features]
    n_batches = 3
    dataset = IterableDataset(dict(features=features, targets=targets))

    x = tensor.vector('features')
    y = tensor.scalar('targets')
    W = shared_floatx([0, 0], name='W')
    V = shared_floatx(7, name='V')
    W_sum = named_copy(W.sum(), 'W_sum')
    cost = ((x * W).sum() - y) ** 2
    cost.name = 'cost'

    class TrueCostExtension(TrainingExtension):

        def before_batch(self, data):
            self.main_loop.log.current_row['true_cost'] = (
                ((W.get_value() * data["features"]).sum() -
                 data["targets"]) ** 2)

    main_loop = MainLoop(
        model=None, data_stream=dataset.get_example_stream(),
        algorithm=GradientDescent(cost=cost, params=[W],
                                  step_rule=Scale(0.001)),
        extensions=[
            FinishAfter(after_n_epochs=1),
            TrainingDataMonitoring([W_sum, cost, V], prefix="train1",
                                   after_batch=True),
            TrainingDataMonitoring([aggregation.mean(W_sum), cost],
                                   prefix="train2", after_epoch=True),
            TrueCostExtension()])

    main_loop.run()

    # Check monitoring of a shared varible
    assert_allclose(main_loop.log.current_row['train1_V'], 7.0)

    for i in range(n_batches):
        # The ground truth is written to the log before the batch is
        # processed, where as the extension writes after the batch is
        # processed. This is why the iteration numbers differs here.
        assert_allclose(main_loop.log[i]['true_cost'],
                        main_loop.log[i + 1]['train1_cost'])
    assert_allclose(
        main_loop.log[n_batches]['train2_cost'],
        sum([main_loop.log[i]['true_cost']
             for i in range(n_batches)]) / n_batches)
    assert_allclose(
        main_loop.log[n_batches]['train2_W_sum'],
        sum([main_loop.log[i]['train1_W_sum']
             for i in range(1, n_batches + 1)]) / n_batches)
예제 #22
0
def test_remove_not_finite():
    rule1 = RemoveNotFinite()
    rule2 = RemoveNotFinite(1.)

    gradients = {1: shared_floatx(numpy.nan), 2: shared_floatx(numpy.inf)}
    rval1, _ = rule1.compute_steps(gradients)
    assert_allclose(rval1[1].eval(), 0.1)
    assert_allclose(rval1[2].eval(), 0.2)
    rval2, _ = rule2.compute_steps(gradients)
    assert_allclose(rval2[1].eval(), 1.0)
    assert_allclose(rval2[2].eval(), 2.0)
예제 #23
0
def test_step_clipping():
    rule1 = StepClipping(4)
    rule2 = StepClipping(5)

    gradients = {0: shared_floatx(3.0), 1: shared_floatx(4.0)}
    clipped1, _ = rule1.compute_steps(gradients)
    assert_allclose(clipped1[0].eval(), 12 / 5.0)
    assert_allclose(clipped1[1].eval(), 16 / 5.0)
    clipped2, _ = rule2.compute_steps(gradients)
    assert_allclose(clipped2[0].eval(), 3.0)
    assert_allclose(clipped2[1].eval(), 4.0)
예제 #24
0
 def __init__(self, D_params, D_kind, momentum=0.):
     self.momentum = shared_floatx(momentum)
     # dictionary of velocities
     self.velocities = OrderedDict()
     self.D_kind = {}
     for p_name in D_params:
         param_i = D_params[p_name]
         velocity = shared_floatx(param_i.get_value() * 0.)
         velocity.name = p_name+ "_momentum"
         self.velocities[velocity.name] = velocity
         self.D_kind[velocity.name] = D_kind[p_name]
예제 #25
0
 def __init__(self, learning_rate=0.002,
              beta1=0.1, beta2=0.001, epsilon=1e-8,
              decay_factor=(1 - 1e-8)):
     self.learning_rate = shared_floatx(learning_rate, "learning_rate")
     self.beta1 = shared_floatx(beta1, "beta1")
     self.beta2 = shared_floatx(beta2, "beta2")
     self.epsilon = shared_floatx(epsilon, "epsilon")
     self.decay_factor = shared_floatx(decay_factor, "decay_factor")
     for param in [self.learning_rate, self.beta1, self.beta2, self.epsilon,
                   self.decay_factor]:
         add_role(param, ALGORITHM_HYPERPARAMETER)
예제 #26
0
 def __init__(self, learning_rate=0.002,
              mu1=0.99, nu2=0.999, epsilon=1e-8,
              decay_prod=(1.)):
     self.learning_rate = shared_floatx(learning_rate, "learning_rate")
     self.mu1 = shared_floatx(mu1, "mu1")
     self.nu2 = shared_floatx(nu2, "nu2")
     self.epsilon = shared_floatx(epsilon, "epsilon")
     self.decay_prod = shared_floatx(decay_prod, "decay_prod")
     for param in [self.learning_rate, self.mu1, self.nu2, self.epsilon,
                   self.decay_prod]:
         add_role(param, ALGORITHM_HYPERPARAMETER)
예제 #27
0
def test_updates_algorithm_add_updates():
    n = shared_floatx(1)
    m = shared_floatx(0)
    algorithm = UpdatesAlgorithm(updates=[(n, n + 1)])
    algorithm.add_updates([(m, n % 2)])
    assert len(algorithm.updates) == 2
    algorithm.initialize()
    algorithm.process_batch({})
    assert_allclose(n.get_value(), 2)
    assert_allclose(m.get_value(), 1)
    algorithm.process_batch({})
    assert_allclose(n.get_value(), 3)
    assert_allclose(m.get_value(), 0)
예제 #28
0
 def __init__(
     self, initial_threshold=1.0, stdevs=4, decay=0.96, clip_to_mean=True, quick_variance_convergence=True, **kwargs
 ):
     super(AdaptiveStepClipping, self).__init__(**kwargs)
     self.gnorm_log_ave = shared_floatx(numpy.log(initial_threshold), name="gnorm_log_ave")
     self.gnorm_log2_ave = shared_floatx(0, name="gnorm_log2_ave")
     self.adapt_steps = shared_floatx(0, name="adapt_steps")
     self.clip_threshold = shared_floatx(numpy.nan, name="clip_threshold")
     self.clip_level = shared_floatx(numpy.nan, name="clip_level")
     self.decay = decay
     self.stdevs = stdevs
     self.clip_to_mean = clip_to_mean
     self.quick_variance_convergence = quick_variance_convergence
예제 #29
0
def test_updates_algorithm_add_updates():
    n = shared_floatx(1)
    m = shared_floatx(0)
    algorithm = UpdatesAlgorithm(updates=[(n, n + 1)])
    algorithm.add_updates([(m, n % 2)])
    assert len(algorithm.updates) == 2
    algorithm.initialize()
    algorithm.process_batch({})
    assert_allclose(n.get_value(), 2)
    assert_allclose(m.get_value(), 1)
    algorithm.process_batch({})
    assert_allclose(n.get_value(), 3)
    assert_allclose(m.get_value(), 0)
예제 #30
0
    def testing(self, fea2obj):
        config = self._config
        dsdir = config['dsdir']
        devfile = dsdir + '/dev.txt'
        testfile = dsdir + '/test.txt'
        networkfile = config['net']
        batch_size = 10000#int(config['batchsize'])
        devMentions = load_ent_ds(devfile)
        tstMentions = load_ent_ds(testfile)
        logger.info('#dev: %d #test: %d', len(devMentions), len(tstMentions))

        main_loop = load(networkfile + '.best.pkl')
        logger.info('Model loaded. Building prediction function...')
        old_model = main_loop.model
        logger.info(old_model.inputs)
        sources = [inp.name for inp in old_model.inputs]
#         fea2obj = build_input_objs(sources, config)
        t2idx = fea2obj['targets'].t2idx
        deterministic = str_to_bool(config['use_mean_pred']) if 'use_mean_pred' in config else True
        kl_weight = shared_floatx(0.001, 'kl_weight')
        entropy_weight= shared_floatx(0.001, 'entropy_weight')


        cost, _, y_hat, _, _,_,_ = build_model_new(fea2obj, len(t2idx), self._config, kl_weight, entropy_weight, deterministic=deterministic, test=True)
        model = Model(cost)
        model.set_parameter_values(old_model.get_parameter_values())

        theinputs = []
        for fe in fea2obj.keys():
            if 'targets' in fe:
                continue
            for inp in model.inputs:
                if inp.name == fe:
                    theinputs.append(inp)

#         theinputs = [inp for inp in model.inputs if inp.name != 'targets']
        print "theinputs: ", theinputs
        predict = theano.function(theinputs, y_hat)

        test_stream, num_samples_test = get_comb_stream(fea2obj, 'test', batch_size, shuffle=False)
        dev_stream, num_samples_dev = get_comb_stream(fea2obj, 'dev', batch_size, shuffle=False)
        logger.info('sources: %s -- number of test/dev samples: %d/%d', test_stream.sources, num_samples_test, num_samples_dev)
        idx2type = {idx:t for t,idx in t2idx.iteritems()}

        logger.info('Starting to apply on dev inputs...')
        self.applypredict(theinputs, predict, dev_stream, devMentions, num_samples_dev, batch_size, os.path.join(config['exp_dir'], config['matrixdev']), idx2type)
        logger.info('...apply on dev data finished')

        logger.info('Starting to apply on test inputs...')
        self.applypredict(theinputs, predict, test_stream, tstMentions, num_samples_test, batch_size, os.path.join(config['exp_dir'], config['matrixtest']), idx2type)
        logger.info('...apply on test data finished')
예제 #31
0
    def testing(self, fea2obj):
        config = self._config
        dsdir = config['dsdir']
        devfile = dsdir + '/dev.txt'
        testfile = dsdir + '/test.txt'
        networkfile = config['net']
        batch_size = 10000#int(config['batchsize'])
        devMentions = load_ent_ds(devfile)
        tstMentions = load_ent_ds(testfile)
        logger.info('#dev: %d #test: %d', len(devMentions), len(tstMentions))
        
        main_loop = load(networkfile + '.best.pkl')
        logger.info('Model loaded. Building prediction function...')
        old_model = main_loop.model
        logger.info(old_model.inputs)
        sources = [inp.name for inp in old_model.inputs]
#         fea2obj = build_input_objs(sources, config)
        t2idx = fea2obj['targets'].t2idx
        deterministic = str_to_bool(config['use_mean_pred']) if 'use_mean_pred' in config else True 
        kl_weight = shared_floatx(0.001, 'kl_weight')
        entropy_weight= shared_floatx(0.001, 'entropy_weight')
       
       
        cost, _, y_hat, _, _,_,_ = build_model_new(fea2obj, len(t2idx), self._config, kl_weight, entropy_weight, deterministic=deterministic, test=True)
        model = Model(cost)
        model.set_parameter_values(old_model.get_parameter_values())
        
        theinputs = []
        for fe in fea2obj.keys():
            if 'targets' in fe:
                continue
            for inp in model.inputs:
                if inp.name == fe:
                    theinputs.append(inp)
                    
#         theinputs = [inp for inp in model.inputs if inp.name != 'targets']
        print "theinputs: ", theinputs
        predict = theano.function(theinputs, y_hat)
        
        test_stream, num_samples_test = get_comb_stream(fea2obj, 'test', batch_size, shuffle=False)
        dev_stream, num_samples_dev = get_comb_stream(fea2obj, 'dev', batch_size, shuffle=False)
        logger.info('sources: %s -- number of test/dev samples: %d/%d', test_stream.sources, num_samples_test, num_samples_dev)
        idx2type = {idx:t for t,idx in t2idx.iteritems()}
        
        logger.info('Starting to apply on dev inputs...')
        self.applypredict(theinputs, predict, dev_stream, devMentions, num_samples_dev, batch_size, os.path.join(config['exp_dir'], config['matrixdev']), idx2type)
        logger.info('...apply on dev data finished')
        
        logger.info('Starting to apply on test inputs...')
        self.applypredict(theinputs, predict, test_stream, tstMentions, num_samples_test, batch_size, os.path.join(config['exp_dir'], config['matrixtest']), idx2type)
        logger.info('...apply on test data finished')
예제 #32
0
 def __init__(self, initial_threshold=1.0, stdevs=4, decay=0.96,
              clip_to_mean=True, quick_variance_convergence=True,
              **kwargs):
     super(AdaptiveStepClipping, self).__init__(**kwargs)
     self.gnorm_log_ave = shared_floatx(numpy.log(initial_threshold),
                                        name='gnorm_log_ave')
     self.gnorm_log2_ave = shared_floatx(0, name='gnorm_log2_ave')
     self.adapt_steps = shared_floatx(0, name='adapt_steps')
     self.clip_threshold = shared_floatx(numpy.nan, name='clip_threshold')
     self.clip_level = shared_floatx(numpy.nan, name='clip_level')
     self.decay = decay
     self.stdevs = stdevs
     self.clip_to_mean = clip_to_mean
     self.quick_variance_convergence = quick_variance_convergence
예제 #33
0
 def __init__(self, D_params, D_kind, decay_rate=0.9, max_scaling=1e5):
     if not 0.0 <= decay_rate <= 1.0:
         raise ValueError("decay rate needs to be in [0, 1]")
     if max_scaling <= 0:
         raise ValueError("max. scaling needs to be greater than 0")
     self.decay_rate = shared_floatx(decay_rate)
     self.epsilon = 1. / max_scaling
     self.velocities = OrderedDict()
     self.D_kind = {}
     for p_name in D_params:
         param_i = D_params[p_name]
         velocity = shared_floatx(param_i.get_value() * 0.)
         velocity.name = p_name+ "_decay"
         self.velocities[velocity.name] = velocity
         self.D_kind[velocity.name] = D_kind[p_name]
예제 #34
0
 def __init__(self, threshold, axis=None):
     axis = pack(axis) if axis is not None else ()
     self.axis = set(axis)
     self.threshold = shared_floatx(threshold, "threshold")
     add_role(self.threshold, ALGORITHM_HYPERPARAMETER)
     if len(axis) != len(self.axis):
         raise ValueError("axis must be unique")
예제 #35
0
파일: __init__.py 프로젝트: Fdenpc/blocks
 def __init__(self, decay_rate=0.9, max_scaling=1e5):
     if not 0.0 <= decay_rate <= 1.0:
         raise ValueError("decay rate needs to be in [0, 1]")
     if max_scaling <= 0:
         raise ValueError("max. scaling needs to be greater than 0")
     self.decay_rate = shared_floatx(decay_rate)
     self.epsilon = 1. / max_scaling
예제 #36
0
def test_shared_variable_modifier_two_params():
    weights = numpy.array([-1, 1], dtype=theano.config.floatX)
    features = [numpy.array(f, dtype=theano.config.floatX)
                for f in [[1, 2], [3, 4], [5, 6]]]
    targets = [(weights * f).sum() for f in features]
    n_batches = 3
    dataset = IterableDataset(dict(features=features, targets=targets))

    x = tensor.vector('features')
    y = tensor.scalar('targets')
    W = shared_floatx([0, 0], name='W')
    cost = ((x * W).sum() - y) ** 2
    cost.name = 'cost'

    step_rule = Scale(0.001)
    sgd = GradientDescent(cost=cost, params=[W],
                          step_rule=step_rule)
    modifier = SharedVariableModifier(
        step_rule.learning_rate,
        lambda _, val: numpy.cast[theano.config.floatX](val * 0.2))
    main_loop = MainLoop(
        model=None, data_stream=dataset.get_example_stream(),
        algorithm=sgd,
        extensions=[FinishAfter(after_n_epochs=1), modifier])

    main_loop.run()

    new_value = step_rule.learning_rate.get_value()
    assert_allclose(new_value,
                    0.001 * 0.2 ** n_batches,
                    atol=1e-5)
예제 #37
0
def setup_mainloop(extension):
    """Set up a simple main loop for progress bar tests.

    Create a MainLoop, register the given extension, supply it with a
    DataStream and a minimal model/cost to optimize.

    """
    features = [numpy.array(f, dtype=theano.config.floatX)
                for f in [[1, 2], [3, 4], [5, 6]]]
    dataset = IterableDataset(dict(features=features))

    W = shared_floatx([0, 0], name='W')
    x = tensor.vector('features')
    cost = tensor.sum((x-W)**2)
    cost.name = "cost"

    algorithm = GradientDescent(cost=cost, parameters=[W],
                                step_rule=Scale(1e-3))

    main_loop = MainLoop(
        model=None, data_stream=dataset.get_example_stream(),
        algorithm=algorithm,
        extensions=[
            FinishAfter(after_n_epochs=1),
            extension])

    return main_loop
예제 #38
0
 def compute_step(self, parameter, previous_step):
     velocity = shared_floatx(parameter.get_value() * 0.)
     velocity_update = self.momentum*velocity + previous_step
     step = (self.momentum**2 * velocity + previous_step *
             (1 + self.momentum))
     updates = [(velocity, velocity_update)]
     return step, updates
예제 #39
0
파일: __init__.py 프로젝트: vyraun/blocks
 def __init__(self, threshold, axis=None):
     axis = pack(axis) if axis is not None else ()
     self.axis = set(axis)
     self.threshold = shared_floatx(threshold, "threshold")
     add_role(self.threshold, ALGORITHM_HYPERPARAMETER)
     if len(axis) != len(self.axis):
         raise ValueError("axis must be unique")
예제 #40
0
def test_linear_decay():
    lr = shared_floatx(100.0)
    decay = LinearDecay(lr, 1.0)
    assert_allclose(decay.compute_value(0.0), 100.0)
    assert_allclose(decay.compute_value(50), 50.0)
    assert_allclose(decay.compute_value(100), 0.0)
    assert_allclose(decay.compute_value(200), 0.0)
예제 #41
0
def test_polynomial_decay():
    lr = shared_floatx(100.0)
    decay = PolynomialDecay(lr,100.0,1.0)
    assert_allclose( decay.compute_value(0), 100.0)
    assert_allclose( decay.compute_value(50), 50.0)
    assert_allclose( decay.compute_value(100), 0.0)
    assert_allclose( decay.compute_value(200), 0.0)
예제 #42
0
def test_graph_inputs():
    a = tensor.matrix('a')
    b = shared_floatx(0, 'b')
    c = 3

    d = a + b + c
    assert graph_inputs([d]) == [a]
예제 #43
0
def setup_mainloop(extension, iteration_scheme=None):
    """Set up a simple main loop for progress bar tests.

    Create a MainLoop, register the given extension, supply it with a
    DataStream and a minimal model/cost to optimize.

    """
    # Since progressbar2 3.6.0, the `maxval` kwarg has been replaced by
    # `max_value`, which has a default value of 100. If we're still using
    # `maxval` by accident, this test should fail complaining that
    # the progress bar has received a value out of range.
    features = [numpy.array(f, dtype=theano.config.floatX)
                for f in [[1, 2]] * 101]
    dataset = IterableDataset(dict(features=features))
    data_stream = DataStream(dataset, iteration_scheme=iteration_scheme)

    W = shared_floatx([0, 0], name='W')
    x = tensor.vector('features')
    cost = tensor.sum((x-W)**2)
    cost.name = "cost"

    algorithm = GradientDescent(cost=cost, parameters=[W],
                                step_rule=Scale(1e-3))

    main_loop = MainLoop(
        model=None,
        data_stream=data_stream,
        algorithm=algorithm,
        extensions=[
            FinishAfter(after_n_epochs=1),
            extension])

    return main_loop
예제 #44
0
def test_linear_decay():
    lr = shared_floatx(100.0)
    decay = LinearDecay(lr,1.0)
    assert_allclose( decay.compute_value(0.0), 100.0)
    assert_allclose( decay.compute_value(50), 50.0)
    assert_allclose( decay.compute_value(100), 0.0)
    assert_allclose( decay.compute_value(200), 0.0)
예제 #45
0
def setup_mainloop(extension):
    """Set up a simple main loop for progress bar tests.

    Create a MainLoop, register the given extension, supply it with a
    DataStream and a minimal model/cost to optimize.

    """
    # Since progressbar2 3.6.0, the `maxval` kwarg has been replaced by
    # `max_value`, which has a default value of 100. If we're still using
    # `maxval` by accident, this test should fail complaining that
    # the progress bar has received a value out of range.
    features = [numpy.array(f, dtype=theano.config.floatX)
                for f in [[1, 2]] * 101]
    dataset = IterableDataset(dict(features=features))

    W = shared_floatx([0, 0], name='W')
    x = tensor.vector('features')
    cost = tensor.sum((x-W)**2)
    cost.name = "cost"

    algorithm = GradientDescent(cost=cost, parameters=[W],
                                step_rule=Scale(1e-3))

    main_loop = MainLoop(
        model=None, data_stream=dataset.get_example_stream(),
        algorithm=algorithm,
        extensions=[
            FinishAfter(after_n_epochs=1),
            extension])

    return main_loop
예제 #46
0
def test_polynomial_decay():
    lr = shared_floatx(100.0)
    decay = PolynomialDecay(lr, 100.0, 1.0)
    assert_allclose(decay.compute_value(0), 100.0)
    assert_allclose(decay.compute_value(50), 50.0)
    assert_allclose(decay.compute_value(100), 0.0)
    assert_allclose(decay.compute_value(200), 0.0)
예제 #47
0
def test_shared_variable_modifier():
    weights = numpy.array([-1, 1], dtype=theano.config.floatX)
    features = [numpy.array(f, dtype=theano.config.floatX)
                for f in [[1, 2], [3, 4], [5, 6]]]
    targets = [(weights * f).sum() for f in features]
    n_batches = 3
    dataset = IterableDataset(dict(features=features, targets=targets))

    x = tensor.vector('features')
    y = tensor.scalar('targets')
    W = shared_floatx([0, 0], name='W')
    cost = ((x * W).sum() - y) ** 2
    cost.name = 'cost'

    step_rule = Scale(0.001)
    sgd = GradientDescent(cost=cost, parameters=[W],
                          step_rule=step_rule)
    main_loop = MainLoop(
        model=None, data_stream=dataset.get_example_stream(),
        algorithm=sgd,
        extensions=[
            FinishAfter(after_n_epochs=1),
            SharedVariableModifier(
                step_rule.learning_rate,
                lambda n: numpy.cast[theano.config.floatX](10. / n)
            )])

    main_loop.run()

    assert_allclose(step_rule.learning_rate.get_value(),
                    numpy.cast[theano.config.floatX](10. / n_batches))
예제 #48
0
    def __init__(self, decay=0.95,
                 gamma_clip=0.0,
                 grad_clip=None,
                 start_var_reduction=0,
                 delta_clip=25,
                 gamma_reg=1e-6,
                 slow_decay=0.995,
                 use_adagrad=True,
                 perform_update=True,
                 skip_nan_inf=False,
                 use_corrected_grad=True):

        assert decay >= 0.
        assert decay < 1.

        self.start_var_reduction = start_var_reduction
        self.delta_clip = delta_clip
        self.gamma_clip = gamma_clip
        self.grad_clip = grad_clip
        self.slow_decay = slow_decay
        self.decay = shared_floatx(decay, "decay")
        self.use_corrected_grad = use_corrected_grad
        self.use_adagrad = use_adagrad
        self.gamma_reg = gamma_reg
        self.damping = 1e-7
        self.perform_update = perform_update

        # We have to bound the tau to prevent it to
        # grow to an arbitrarily large number, oftenwise
        # that causes numerical instabilities for very deep
        # networks. Note that once tau become very large, it will keep,
        # increasing indefinitely.
        self.skip_nan_inf = skip_nan_inf
        self.upper_bound_tau = 1e7
        self.lower_bound_tau = 1.5
예제 #49
0
def test_graph_inputs():
    a = tensor.matrix('a')
    b = shared_floatx(0, 'b')
    c = 3

    d = a + b + c
    assert graph_inputs([d]) == [a]
예제 #50
0
def test_shared_variable_modifier():
    weights = numpy.array([-1, 1], dtype=theano.config.floatX)
    features = [numpy.array(f, dtype=theano.config.floatX) for f in [[1, 2], [3, 4], [5, 6]]]
    targets = [(weights * f).sum() for f in features]
    n_batches = 3
    dataset = IterableDataset(dict(features=features, targets=targets))

    x = tensor.vector("features")
    y = tensor.scalar("targets")
    W = shared_floatx([0, 0], name="W")
    cost = ((x * W).sum() - y) ** 2
    cost.name = "cost"

    step_rule = Scale(0.001)
    sgd = GradientDescent(cost=cost, parameters=[W], step_rule=step_rule)
    main_loop = MainLoop(
        model=None,
        data_stream=dataset.get_example_stream(),
        algorithm=sgd,
        extensions=[
            FinishAfter(after_n_epochs=1),
            SharedVariableModifier(step_rule.learning_rate, lambda n: numpy.cast[theano.config.floatX](10.0 / n)),
        ],
    )

    main_loop.run()

    assert_allclose(step_rule.learning_rate.get_value(), numpy.cast[theano.config.floatX](10.0 / n_batches))
예제 #51
0
def test_shared_variable_modifier_two_params():
    weights = numpy.array([-1, 1], dtype=floatX)
    features = [numpy.array(f, dtype=floatX) for f in [[1, 2], [3, 4], [5, 6]]]
    targets = [(weights * f).sum() for f in features]
    n_batches = 3
    dataset = ContainerDataset(dict(features=features, targets=targets))

    x = tensor.vector('features')
    y = tensor.scalar('targets')
    W = shared_floatx([0, 0], name='W')
    cost = ((x * W).sum() - y)**2
    cost.name = 'cost'

    step_rule = Scale(0.001)
    sgd = GradientDescent(cost=cost, params=[W], step_rule=step_rule)
    modifier = SharedVariableModifier(
        step_rule.learning_rate, lambda _, val: numpy.cast[floatX](val * 0.2))
    main_loop = MainLoop(model=None,
                         data_stream=dataset.get_default_stream(),
                         algorithm=sgd,
                         extensions=[FinishAfter(after_n_epochs=1), modifier])

    main_loop.run()

    new_value = step_rule.learning_rate.get_value()
    assert_allclose(new_value, 0.001 * 0.2**n_batches, atol=1e-5)
예제 #52
0
def test_model_handles_brickless_parameteres():
    x = tensor.matrix('x')
    v = shared_floatx(numpy.zeros((10, 10)), name='V')
    add_role(v, PARAMETER)
    y = x.dot(v)
    model = Model(y)
    assert list(model.get_parameter_dict().items()) == [('V', v)]
예제 #53
0
def setup_mainloop(extension):
    """Set up a simple main loop for progress bar tests.

    Create a MainLoop, register the given extension, supply it with a
    DataStream and a minimal model/cost to optimize.

    """
    features = [
        numpy.array(f, dtype=theano.config.floatX)
        for f in [[1, 2], [3, 4], [5, 6]]
    ]
    dataset = IterableDataset(dict(features=features))

    W = shared_floatx([0, 0], name='W')
    x = tensor.vector('features')
    cost = tensor.sum((x - W)**2)
    cost.name = "cost"

    algorithm = GradientDescent(cost=cost, params=[W], step_rule=Scale(1e-3))

    main_loop = MainLoop(model=None,
                         data_stream=dataset.get_example_stream(),
                         algorithm=algorithm,
                         extensions=[FinishAfter(after_n_epochs=1), extension])

    return main_loop
예제 #54
0
def setup_mainloop(extensions):
    """Create a MainLoop, register the given extension, supply it with a
        DataStream and a minimal model/cost to optimize.
    """
    features = [numpy.array(f, dtype=floatX)
                for f in [[1, 2], [3, 4], [5, 6]]]
    dataset = IterableDataset(dict(features=features))
    datastream = DataStream(dataset)

    W = shared_floatx([0, 0], name='W')
    add_role(W, PARAMETER)
    x = tensor.vector('features')
    cost = tensor.sum((x-W)**2)
    cost.name = "cost"

    algorithm = GradientDescent(cost=cost, parameters=[W],
                                step_rule=Scale(1e-3))

    main_loop = MainLoop(
        model=Model(cost), data_stream=datastream,
        algorithm=algorithm,
        extensions=[
            FinishAfter(after_n_epochs=1),
            ] + extensions)

    return main_loop
예제 #55
0
def setup_mainloop(extensions):
    """Create a MainLoop, register the given extension, supply it with a
        DataStream and a minimal model/cost to optimize.
    """
    features = [numpy.array(f, dtype=floatX) for f in [[1, 2], [3, 4], [5, 6]]]
    dataset = IterableDataset(dict(features=features))
    datastream = DataStream(dataset)

    W = shared_floatx([0, 0], name='W')
    add_role(W, PARAMETER)
    x = tensor.vector('features')
    cost = tensor.sum((x - W)**2)
    cost.name = "cost"

    algorithm = GradientDescent(cost=cost,
                                parameters=[W],
                                step_rule=Scale(1e-3))

    main_loop = MainLoop(model=Model(cost),
                         data_stream=datastream,
                         algorithm=algorithm,
                         extensions=[
                             FinishAfter(after_n_epochs=1),
                         ] + extensions)

    return main_loop
예제 #56
0
 def __init__(self, decay_rate=0.9, max_scaling=1e5):
     if not 0.0 <= decay_rate <= 1.0:
         raise ValueError("decay rate needs to be in [0, 1]")
     if max_scaling <= 0:
         raise ValueError("max. scaling needs to be greater than 0")
     self.decay_rate = shared_floatx(decay_rate)
     self.epsilon = 1. / max_scaling
예제 #57
0
 def __init__(self,
              learning_rate=0.002,
              mu1=0.99,
              nu2=0.999,
              epsilon=1e-8,
              decay_prod=(1.)):
     self.learning_rate = shared_floatx(learning_rate, "learning_rate")
     self.mu1 = shared_floatx(mu1, "mu1")
     self.nu2 = shared_floatx(nu2, "nu2")
     self.epsilon = shared_floatx(epsilon, "epsilon")
     self.decay_prod = shared_floatx(decay_prod, "decay_prod")
     for param in [
             self.learning_rate, self.mu1, self.nu2, self.epsilon,
             self.decay_prod
     ]:
         add_role(param, ALGORITHM_HYPERPARAMETER)