コード例 #1
0
ファイル: accumulators.py プロジェクト: leotam/synkhronos
def make_accum_f(mode, var, op=None):
    import theano
    import theano.tensor as T
    dtype = var.dtype
    broadcastable = var.broadcastable
    bcast = broadcastable_string(broadcastable)
    ndim = var.ndim

    if mode == "avg_shared":
        import numpy as np
        arr = np.zeros([1] * ndim, dtype=dtype)
        s = theano.shared(arr, 's', broadcastable=broadcastable)
        y = T.scalar('avg_fac', dtype=dtype)
        name = make_name(mode, dtype, bcast, op)
        return theano.function([y], updates={s: s * y}, name=name)

    t_type = T.TensorType(dtype=dtype, broadcastable=broadcastable)
    x = t_type('accum').transfer(None)
    if mode == "reduce":
        y = t_type('slice').transfer(None)
        T_op = getattr(T, op)
        x_pad = T.shape_padaxis(x, axis=0)
        y_pad = T.shape_padaxis(y, axis=0)
        z = T_op(T.concatenate([x_pad, y_pad], axis=0), axis=0)
    elif mode == "gather":
        y = t_type('slice').transfer(None)
        z = T.concatenate([x, y])
    elif mode == "avg_output":
        y = T.scalar('avg_fac', dtype=dtype)
        z = x * y
    else:
        raise ValueError("Unrecognized mode: ", mode)
    name = make_name(mode, dtype, bcast, op)
    return theano.function([x, y], z.transfer(None), name=name)
コード例 #2
0
    def _step(self, st_s, t, onoise, inoise):

        on_t = onoise[:, :, t]
        in_t = inoise[:, :, t:t + 1]

        # get action
        at_s = self.predict(st_s)

        # obtain new steering variables
        A_t1 = self.aAction(st_s, at_s)

        # time-shift steerings 1 into the future
        # (A(t-15),..A(t)  ->  A(t-14),..,A(t+1)
        st_s3 = st_s[:, 1:].reshape(
            (st_s.shape[0], self.params_task['history'], 4))
        st1_s3 = T.set_subtensor(
            st_s3[:, :, :3],
            T.concatenate((st_s3[:, 1:, :3], T.shape_padaxis(A_t1, 1)),
                          axis=1))
        xt1_s = T.concatenate(
            (st_s[:, :1], st1_s3.reshape((st_s.shape[0], st_s.shape[1] - 1))),
            axis=1)

        # Obtain \delta R(t+1) by BNN
        xt1_s = xt1_s.reshape(
            (self.params['samples'], xt1_s.shape[0] / self.params['samples'],
             xt1_s.shape[1]))
        drt1_s, vdrt1_s = self.model.predict(xt1_s,
                                             mode='symbolic',
                                             provide_noise=True,
                                             noise=in_t)
        drt1_s = drt1_s.reshape(
            (drt1_s.shape[0] * drt1_s.shape[1], drt1_s.shape[2]))
        vdrt1_s = vdrt1_s.reshape(
            (vdrt1_s.shape[0] * vdrt1_s.shape[1], vdrt1_s.shape[2]))

        # sample from output noise
        drt1_s = on_t * T.sqrt(vdrt1_s) + drt1_s

        #obtain R(t+1) by adding \delta R(t+1)
        rt1_s = st_s[:, -1:] + drt1_s[:, 0:1]

        # undo log-logit transformation to obtain unnormalized reward
        rew1 = 1. / (1. + T.exp(-rt1_s))  # undo logit
        rew1 = rew1 * (self.model.params['bounds'][3] - self.model.
                       params['bounds'][1]) + self.model.params['bounds'][1]
        rew1 = T.exp(rew1) - 1

        # update time-embedding: R(t-15)..R(t) -> R(t-14) .. R(t+1)
        st1_s3 = T.set_subtensor(
            st1_s3[:, :, 3:],
            T.concatenate((st1_s3[:, 1:, 3:], T.shape_padaxis(rt1_s, 1)),
                          axis=1))
        st1_s = T.concatenate(
            (st_s[:, :1], st1_s3.reshape((st_s.shape[0], st_s.shape[1] - 1))),
            axis=1)

        return [st1_s, t + 1, rew1[:, 0]]
コード例 #3
0
ファイル: ablation.py プロジェクト: davidbau/net-intent
 def apply(self, y, y_hat, x):
     predicted = y_hat.argmax(axis=1)
     # both expanded_y and expanded_y_hat are shape (cases, labels)
     expanded_y = tensor.extra_ops.to_one_hot(y, y_hat.shape[1])
     expanded_y_hat = tensor.extra_ops.to_one_hot(predicted, y_hat.shape[1])
     # pad vectors and elementwise multiply for (cases, labels, labels)
     expanded_confusion = (tensor.shape_padaxis(expanded_y, 2) *
             tensor.shape_padaxis(expanded_y_hat, 1))
     # now result is (labels, labels, y_dim, x_dim)
     result = tensor.tensordot(expanded_confusion, x, axes=([0], [0]))
     return result
    def log_likelihood_values(self, x, y, location=0.0, scale=1.0):

        o = self.output(x)
        noise_variance = T.tile(
            T.shape_padaxis(T.exp(self.log_v_noise[0, :]) * scale**2, 0),
            [o.shape[0], o.shape[1], 1])
        location = T.tile(T.shape_padaxis(location, 0),
                          [o.shape[0], o.shape[1], 1])
        scale = T.tile(T.shape_padaxis(scale, 0), [o.shape[0], o.shape[1], 1])
        return -0.5 * T.log(2 * math.pi * noise_variance) - \
            0.5 * (o * scale + location - T.tile(T.shape_padaxis(y, 0), [ o.shape[ 0 ], 1, 1 ]))**2 / noise_variance
コード例 #5
0
ファイル: ablation.py プロジェクト: chargen/net-intent
 def apply(self, y, y_hat, x):
     predicted = y_hat.argmax(axis=1)
     # both expanded_y and expanded_y_hat are shape (cases, labels)
     expanded_y = tensor.extra_ops.to_one_hot(y, y_hat.shape[1])
     expanded_y_hat = tensor.extra_ops.to_one_hot(predicted, y_hat.shape[1])
     # pad vectors and elementwise multiply for (cases, labels, labels)
     expanded_confusion = (tensor.shape_padaxis(expanded_y, 2) *
                           tensor.shape_padaxis(expanded_y_hat, 1))
     # now result is (labels, labels, y_dim, x_dim)
     result = tensor.tensordot(expanded_confusion, x, axes=([0], [0]))
     return result
コード例 #6
0
    def __init__(self, rng, input, batch_size, latent_size, label_size, out_size, activation, W_z, W_y, b):
        
        # init parent class                          
        super(Marginalized_Decoder, self).__init__(rng=rng, input=input, latent_size=latent_size, out_size=out_size, activation=activation, W_z=W_z, b=b)

        # setup the params           
        self.W_y = W_y

        # compute marginalized outputs                                                                                                                 
        labels_tensor = T.extra_ops.repeat( T.shape_padaxis(T.eye(n=label_size, m=label_size), axis=0), repeats=batch_size, axis=0)
        self.output = self.activation(T.extra_ops.repeat(T.shape_padaxis(T.dot(self.input, self.W_z), axis=1), repeats=label_size, axis=1) + T.dot(labels_tensor, self.W_y) + self.b)
コード例 #7
0
    def __init__(self, eta, cutpoints, *args, **kwargs):
        eta = tt.as_tensor_variable(floatX(eta))
        cutpoints = tt.concatenate(
            [tt.as_tensor_variable([0.0]),
             tt.as_tensor_variable(cutpoints)])
        cutpoints = tt.shape_padaxis(cutpoints, 0)
        eta = tt.shape_padaxis(eta, 1)

        p = softmax(cumsum(eta - cutpoints, axis=1))

        super().__init__(p=p, *args, **kwargs)
コード例 #8
0
def make_reduce_f(var, mode):
    dtype = var.dtype
    bcast = var.broadcastable
    t_type = T.TensorType(dtype=dtype, broadcastable=bcast)
    x = t_type('accum').transfer(None)
    y = t_type('slice').transfer(None)
    if mode == "gather":
        z = T.concatenate([x, y])
    else:
        T_op = getattr(T, mode)
        x_pad = T.shape_padaxis(x, axis=0)
        y_pad = T.shape_padaxis(y, axis=0)
        z = T_op(T.concatenate([x_pad, y_pad], axis=0), axis=0)
    name = mode + "_" + str(dtype) + broadcastable_string(bcast)
    return theano.function([x, y], z.transfer(None), name=name)
コード例 #9
0
def get_conv_xy(layer, deterministic=True):
    w_np = layer.W.get_value()
    input_layer = layer.input_layer
    if layer.pad == 'same':
        input_layer = L.PadLayer(layer.input_layer,
                                 width=np.array(w_np.shape[2:])/2,
                                 batch_ndim=2)
    input_shape = L.get_output_shape(input_layer)
    max_x = input_shape[2] - w_np.shape[2]
    max_y = input_shape[3] - w_np.shape[3]
    srng = RandomStreams()
    patch_x = srng.random_integers(low=0, high=max_x)
    patch_y = srng.random_integers(low=0, high=max_y)

    #print("input_shape shape: ", input_shape)
    #print("pad: \"%s\""% (layer.pad,))
    #print(" stride: " ,layer.stride)
    #print("max_x %d max_y %d"%(max_x,max_y))

    x = L.get_output(input_layer, deterministic=deterministic)
    x = x[:, :,
          patch_x:patch_x + w_np.shape[2], patch_y:patch_y + w_np.shape[3]]
    x = T.flatten(x, 2)  # N,D

    w = layer.W
    if layer.flip_filters:
        w = w[:, :, ::-1, ::-1]
    w = T.flatten(w, outdim=2).T  # D,O
    y = T.dot(x, w) # N,O
    if layer.b is not None:
        y += T.shape_padaxis(layer.b, axis=0)
    return x, y
コード例 #10
0
def make_reduce_f(mode, dtype, ndim):
    t_type = T.TensorType(dtype=dtype, broadcastable=[False] * ndim)
    x = t_type('accum').transfer(None)
    y = t_type('slice').transfer(None)
    if mode == "gather":
        z = T.concatenate([x, y])
    else:
        T_op = getattr(T, mode)
        x_pad = T.shape_padaxis(x, axis=0)
        y_pad = T.shape_padaxis(y, axis=0)
        z = T_op(T.concatenate([x_pad, y_pad], axis=0), axis=0)
    name = mode + "_" + str(dtype)
    return theano.function([x, y],
                           z.transfer(None),
                           name=name,
                           allow_input_downcast=True)
コード例 #11
0
def calc_poissonVal_negative_log_likelihood(data, recon, axis_to_sum=1):
    if axis_to_sum != 1:
        # addresses the case where we marginalize
        data = T.extra_ops.repeat(T.shape_padaxis(data, axis=1),
                                  repeats=recon.shape[1],
                                  axis=1)
    return T.sum(T.exp(recon) - data * recon, axis=axis_to_sum)
コード例 #12
0
ファイル: model.py プロジェクト: tarbaig/covid19_inference
def week_modulation(
        new_cases_inferred,
        week_modulation_type="abs_sine",
        pr_mean_weekend_factor=0.7,
        pr_sigma_weekend_factor=0.2,
        week_end_days=(6, 7),
        model=None,
        save_in_trace=True,
):
    """

    Parameters
    ----------
    new_cases_inferred
    week_modulation_type
    pr_mean_weekend_factor
    pr_sigma_weekend_factor
    week_end_days
    model

    Returns
    -------

    """
    model = modelcontext(model)
    shape_modulation = list(model.sim_shape)
    shape_modulation[0] -= model.sim_diff_data

    len_L2 = () if model.sim_ndim == 1 else model.sim_shape[1]

    week_end_factor, _ = hierarchical_normal(
        "weekend_factor",
        "sigma_weekend_factor",
        pr_mean=pr_mean_weekend_factor,
        pr_sigma=pr_sigma_weekend_factor,
        len_L2=len_L2,
    )
    if week_modulation_type == "step":
        modulation = np.zeros(shape_modulation[0])
        for i in range(shape_modulation[0]):
            date_curr = model.data_begin + datetime.timedelta(days=i)
            if date_curr.isoweekday() in week_end_days:
                modulation[i] = 1
    elif week_modulation_type == "abs_sine":
        offset_rad = pm.VonMises("offset_modulation_rad", mu=0, kappa=0.01)
        offset = pm.Deterministic("offset_modulation",
                                  offset_rad / (2 * np.pi) * 7)
        t = np.arange(
            shape_modulation[0]) - model.data_begin.weekday()  # Sunday @ zero
        modulation = 1 - tt.abs_(tt.sin(t / 7 * np.pi + offset_rad / 2))

    if model.sim_ndim == 2:
        modulation = tt.shape_padaxis(modulation, axis=-1)

    multiplication_vec = np.ones(
        shape_modulation) - (1 - week_end_factor) * modulation
    new_cases_inferred_eff = new_cases_inferred * multiplication_vec
    if save_in_trace:
        pm.Deterministic("new_cases", new_cases_inferred_eff)
    return new_cases_inferred_eff
コード例 #13
0
        def step(l, x_prev_sampled, x_prev_argmax, z, all_embeddings):

            x_prev_sampled_embedded = self.embedder(
                x_prev_sampled, all_embeddings)  # N * max(L) * E

            probs_sampled = self.get_probs(x_prev_sampled_embedded,
                                           z,
                                           all_embeddings,
                                           mode='all')  # N * max(L) * D

            x_sampled_one_hot = self.output_dist.get_samples(
                [T.shape_padaxis(probs_sampled[:, l], 1)])  # N * 1 * D

            x_sampled_l = T.argmax(x_sampled_one_hot, axis=-1).flatten()  # N

            x_current_sampled = T.set_subtensor(x_prev_sampled[:, l],
                                                x_sampled_l)  # N * max(L)

            #

            x_prev_argmax_embedded = self.embedder(
                x_prev_argmax, all_embeddings)  # N * max(L) * E

            probs_argmax = self.get_probs(x_prev_argmax_embedded,
                                          z,
                                          all_embeddings,
                                          mode='all')  # N * max(L) * D

            x_argmax_l = T.argmax(probs_argmax[:, l], axis=-1)  # N

            x_current_argmax = T.set_subtensor(x_prev_argmax[:, l],
                                               x_argmax_l)  # N * max(L)

            return T.cast(x_current_sampled,
                          'int32'), T.cast(x_current_argmax, 'int32')
コード例 #14
0
def calc_realVal_negative_log_likelihood(data, recon, axis_to_sum=1):
    if axis_to_sum != 1:
        # addresses the case where we marginalize
        data = T.extra_ops.repeat(T.shape_padaxis(data, axis=1),
                                  repeats=recon.shape[1],
                                  axis=1)
    return .5 * T.sum((data - recon)**2, axis=axis_to_sum)
    def output(self, x):

        x = T.tile(T.shape_padaxis(x, 0), [self.n_samples, 1, 1])

        for layer in self.layers:
            x = layer.output(x)

        return x
    def output(self, x):

        x = T.tile(T.shape_padaxis(x, 0), [ self.n_samples, 1, 1 ])

        for layer in self.layers:
            x = layer.output(x)

        return x
コード例 #17
0
def get_dense_xy(layer, deterministic=True):
    x = L.get_output(L.FlattenLayer(layer.input_layer),
                     deterministic=deterministic)  # N, D
    w = layer.W # D, O
    y = T.dot(x, w)  # (N,O)
    if layer.b is not None:
        y += T.shape_padaxis(layer.b, axis=0)
    return x, y
コード例 #18
0
ファイル: functions.py プロジェクト: runngezhang/aed-by-cnn
def gated_mean(x, p=0.5, axis=2):
    import theano.tensor as T
    thres = T.shape_padaxis(
        (p * T.mean(x, axis=axis) + (1 - p) * T.max(x, axis=axis)), axis=-1)
    mask = T.ge(x, thres)
    g_values = mask * x
    g_means = T.sum(g_values, axis=-1) / T.sum(mask, axis=-1)
    return g_means
コード例 #19
0
    def process(self, gstate, input_vector, dropout_masks=Ellipsis):
        """
        Process an input vector and update the state accordingly. Each node runs a GRU step
        with previous state from the node state and input from the vector.

        Params:
            gstate: A GraphState giving the current state
            input_vector: A tensor of the form (n_batch, input_width)
        """
        if dropout_masks is Ellipsis:
            dropout_masks = None
            append_masks = False
        else:
            append_masks = True

        # gstate.edge_states is of shape (n_batch, n_nodes, n_nodes, id+state)
        # combined input should be broadcasted to (n_batch, n_nodes, n_nodes, X)
        input_vector_part = T.shape_padaxis(T.shape_padaxis(input_vector, 1),
                                            2)
        source_state_part = T.shape_padaxis(
            T.concatenate([gstate.node_ids, gstate.node_states], 2), 2)
        dest_state_part = T.shape_padaxis(
            T.concatenate([gstate.node_ids, gstate.node_states], 2), 1)
        full_input = broadcast_concat(
            [input_vector_part, source_state_part, dest_state_part], 3)

        # we flatten to process updates
        flat_input = full_input.reshape([-1, self._process_input_size])
        flat_result, dropout_masks = self._update_stack.process(
            flat_input, dropout_masks)
        result = flat_result.reshape([
            gstate.n_batch, gstate.n_nodes, gstate.n_nodes,
            self._graph_spec.num_edge_types, 2
        ])
        should_set = result[:, :, :, :, 0]
        should_clear = result[:, :, :, :, 1]

        new_strengths = gstate.edge_strengths * (1 - should_clear) + (
            1 - gstate.edge_strengths) * should_set

        new_gstate = gstate.with_updates(edge_strengths=new_strengths)
        if append_masks:
            return new_gstate, dropout_masks
        else:
            return new_gstate
    def output(self, x):

        x = T.tile(T.shape_padaxis(x, 0), [self.n_samples, 1, 1])
        x = T.concatenate((x, 0 * self.randomness_z[:, 0:x.shape[1], :]), 2)

        for layer in self.layers:
            x = layer.output(x)

        return x
コード例 #21
0
def calc_binaryVal_negative_log_likelihood(data, probabilities, axis_to_sum=1):
    if axis_to_sum != 1:
        # addresses the case where we marginalize
        data = T.extra_ops.repeat(T.shape_padaxis(data, axis=1),
                                  repeats=probabilities.shape[1],
                                  axis=1)
    return -T.sum(data * T.log(probabilities) +
                  (1 - data) * T.log(1 - probabilities),
                  axis=axis_to_sum)
コード例 #22
0
    def pt_forward_all(self, x, posit_x, mask):
        h0 = T.zeros((x.shape[1], self.n_out * 2), dtype=theano.config.floatX)

        padded = T.shape_padaxis(T.zeros_like(x[0]), axis=1).dimshuffle(
            (1, 0, 2))
        x_shifted = T.concatenate([padded, x[:-1]], axis=0)

        padded_mask = T.shape_padaxis(T.zeros_like(mask[0]),
                                      axis=1).dimshuffle((1, 0))
        mask = T.concatenate([padded_mask, mask[:-1]], axis=0).dimshuffle(
            (0, 1, 'x'))

        o, _ = theano.scan(fn=self._forward,
                           sequences=[x, x_shifted, posit_x, mask],
                           outputs_info=[h0, None])

        new_probs = o[1].reshape((x.shape[0], x.shape[1]))
        return new_probs
コード例 #23
0
ファイル: functions.py プロジェクト: tweihaha/aed-by-cnn
def gated_mean(x, p=0.5, axis=2):
    import theano.tensor as T
    thres = T.shape_padaxis((p * T.mean(x, axis=axis) + 
                            (1 - p) * T.max(x, axis=axis)), 
                            axis=-1)
    mask = T.ge(x, thres)
    g_values = mask*x
    g_means = T.sum(g_values, axis=-1) / T.sum(mask, axis=-1)
    return g_means
コード例 #24
0
ファイル: test.py プロジェクト: gumaojie/morphlm
def test14():
    x = T.iscalar('x')
    y = T.iscalar('y')
    z = T.arange(x)
    z = T.shape_padaxis(z, axis=1)
    z2 = T.zeros((x,y))
    z2 = z + z2
    fn = theano.function(inputs=[x,y],outputs=[z2],allow_input_downcast=True)
    res = fn(3,4)
    print res, res[0].shape
コード例 #25
0
ファイル: utils.py プロジェクト: jiri-hron/masterproject
def prepare_toy_data(n_train, n_valid, batch_size):
    n_train_batches = n_train // batch_size if batch_size < n_train else 1
    n_valid_batches = n_valid // batch_size if batch_size < n_valid else 1

    rng = np.random.RandomState(1234)  # always return the same

    n_train_per_int = n_train // 2

    # interpolation on [-0.5, 0.0], extrapolation on [0.5, 1.0]
    # X_train = np.concatenate((
    #     rng.uniform(low=-1.0, high=-0.5, size=n_train_per_int),
    #     rng.uniform(low=0.0, high=0.5, size=n_train - n_train_per_int)
    # )).astype(floatX)
    # X_valid = rng.uniform(low=-1.0, high=0.5, size=n_valid).astype(floatX)
    X_train = np.asarray(rng.uniform(low=-1.0, high=0.5, size=n_train),
                         dtype=floatX)
    X_valid = np.asarray(rng.uniform(low=-1.0, high=1.0, size=n_valid),
                         dtype=floatX)

    y_train = np.asarray(
        # 0.4*np.sin(3 * 2*np.pi*X_train) + 0.05*rng.normal(size=n_train),
        0.4 * np.cos(2 * np.pi * X_train) ** 2 *
        np.sin(2 * np.pi * X_train + 0.1) +
        0.01 * rng.normal(size=n_train),
        dtype=floatX
    )
    y_valid = np.asarray(
        # 0.4*np.sin(3 * 2*np.pi*X_valid) + 0.05*rng.normal(size=n_valid),
        0.4 * np.cos(2 * np.pi * X_valid) ** 2 *
        np.sin(2 * np.pi * X_valid + 0.1) +
        0.01 * rng.normal(size=n_valid),
        dtype=floatX
    )

    X_train = T.shape_padaxis(theano.shared(X_train, name='X_train'), axis=1)
    y_train = theano.shared(y_train, name='y_train')
    X_valid = T.shape_padaxis(theano.shared(X_valid, name='X_valid'), axis=1)
    y_valid = theano.shared(y_valid, name='y_valid')

    # used in evaluation with multiple samples
    y_valid = np.array(y_valid.eval())

    return X_train, y_train, X_valid, y_valid, n_train_batches, n_valid_batches
コード例 #26
0
    def _get_split(self,
                   layer,
                   deterministic=True,
                   conv_all_patches=True,
                   **kwargs):

        # Get the patches and the outputs without the non-linearities.
        if type(layer) is L.DenseLayer:
            x, y = putils.get_dense_xy(layer, deterministic)
        elif type(layer) is L.Conv2DLayer:
            if conv_all_patches is True:
                x, y = putils.get_conv_xy_all(layer, deterministic)
            else:
                x, y = putils.get_conv_xy(layer, deterministic)
        else:
            raise ValueError("Unknown layer as input")

        # Create an output dictionary
        outputs = dict()

        for name, fun in subtypes:
            outputs[name] = dict()
            mrk_y = 1.0 * T.cast(fun(y), dtype=theano.config.floatX)  # (N,O)
            y_current = y * mrk_y  # This has a binary mask
            cnt_y = T.shape_padaxis(T.sum(mrk_y, axis=0), axis=0)  # (1,O)
            norm = T.maximum(cnt_y, 1.)

            # Count how many datapoints are considered
            outputs[name]['cnt'] = cnt_y

            # The mean of the current batch
            outputs[name]['m_y'] = T.shape_padaxis(
                y_current.sum(axis=0),
                axis=0) / norm  # (1,O) mean output for batch
            outputs[name]['m_x'] = T.dot(
                x.T, mrk_y) / norm  # (D,O) mean input for batch

            # The mean of the current batch
            outputs[name]['yty'] = T.shape_padaxis(
                T.sum(y_current**2., axis=0), axis=0) / norm  # (1,O)
            outputs[name]['xty'] = T.dot(x.T, y_current) / norm  # D,O

        return dict_to_list(outputs)
コード例 #27
0
    def process(self, input_vector):
        """
        Convert an input vector into a categorical distribution across num_categories categories

        Params:
            input_vector: Vector of shape (n_batch, input_width)

        Returns: Categorical distribution of shape (n_batch, 1, num_categories), such that it sums to 1 across
            all categories for each instance in the batch
        """
        transformed = self._transform_stack.process(input_vector)
        return T.shape_padaxis(transformed, 1)
コード例 #28
0
ファイル: AD.py プロジェクト: Wongcheukwai/sepsisrl
    def predict(self,
                X_test,
                mode='numerical',
                provide_noise=False,
                noise=None):
        """ Prediction wrapper-method 
        Requires X_test to be [n_samples,n,d], so use np.tile(X_test,[samples,1,1]) 
        before prediction.

        For policy search we use theano.scan. In that case we need to be able
        to feed in the input noise externally (provide_noise,noise)

        mode='symbolic' if we want to use this model as part of a larger graph(as in the policy search), 
        mode='numerical' for standard predictions, using compiled functions
        """
        print "HERE"
        X_test_n = (X_test - self.mean_X) / self.std_X

        #X_test_n = X_test

        if mode == 'symbolic':

            if provide_noise == True:
                # X_test_n.shape[0] refers to the number of samples, ie draws from the weight distribution
                m = self.bb_alpha.network.output_gn(X_test_n, noise,
                                                    X_test_n.shape[0])
            else:
                m = self.bb_alpha.network.output(X_test_n,
                                                 False,
                                                 X_test_n.shape[0],
                                                 use_indices=False)

            log_v_noise = self.bb_alpha.network.log_v_noise
            noise_variance = T.tile(
                T.shape_padaxis(T.exp(log_v_noise[0, :]), 0),
                [m.shape[0], m.shape[1], 1])

        else:
            if X_test_n.ndim == 2:
                X_test_n = np.tile(X_test_n, [self.params['samples'], 1, 1])

            m = self.bb_alpha.fwpass(X_test_n, X_test_n.shape[0])
            log_v_noise = self.bb_alpha.network.log_v_noise.get_value()[0, :]
            noise_variance = np.tile(np.exp(log_v_noise),
                                     [m.shape[0], m.shape[1], 1])

        mt = m
        vt = noise_variance

        # TODO double check we don't need this?
        mt = mt * self.std_Y + self.mean_Y
        vt *= self.std_Y**2
        return mt, vt
コード例 #29
0
ファイル: main.py プロジェクト: abiraja2004/summarization-5
    def pretrain(self):
        bm = self.bm = T.imatrix('bm')

        padded = T.shape_padaxis(T.zeros_like(bm[0]), axis=1).dimshuffle(
            (1, 0))
        bm_shift = T.concatenate([padded, bm[:-1]], axis=0)

        new_bm = T.cast(T.or_(bm, bm_shift), theano.config.floatX)
        new_probs = self.output_layer.forward_all(self.h_final, new_bm)

        cross_ent = T.nnet.binary_crossentropy(new_probs, new_bm) * self.masks
        self.obj = obj = T.mean(T.sum(cross_ent, axis=0))
        self.cost_g = obj * args.coeff_cost_scale + self.l2_cost
コード例 #30
0
    def process(self, input_vector):
        """
        Convert an input vector into a probabilistic set, i.e. a list of probabilities of item i being in
        the output set.

        Params:
            input_vector: Vector of shape (n_batch, input_width)

        Returns: Set distribution of shape (n_batch, 1, num_categories), where each value is independent from
            the others.
        """
        transformed = self._transform_stack.process(input_vector)
        return T.shape_padaxis(transformed,1)
コード例 #31
0
    def _forward_all_sample(self, x, posit_x, h0):
        padded = T.shape_padaxis(T.zeros_like(x[0]), axis=1).dimshuffle(
            (1, 0, 2))
        x_shifted = T.concatenate([padded, x[:-1]], axis=0)
        mask = T.zeros(shape=(x.shape[1], )).dimshuffle((0, 'x'))

        [s, _], updates = theano.scan(fn=self._forward_sample,
                                      sequences=[x, x_shifted, posit_x],
                                      outputs_info=[mask, h0])
        samples = theano.gradient.disconnected_grad(s).reshape(
            (x.shape[0], x.shape[1]))
        padded_mask = T.shape_padaxis(T.zeros_like(samples[0]),
                                      axis=1).dimshuffle((1, 0))
        mask_from_samples = T.concatenate([padded_mask, samples[:-1]],
                                          axis=0).dimshuffle((0, 1, 'x'))

        [_, probs], _ = theano.scan(
            fn=self._forward,
            sequences=[x, x_shifted, posit_x, mask_from_samples],
            outputs_info=[h0, None])

        return probs.reshape((x.shape[0], x.shape[1])), updates, samples
コード例 #32
0
    def _forward(self):
        if theano.config.device.startswith('gpu'):
            from theano.tensor.nnet.abstract_conv import bilinear_upsampling
        else:
            raise AssertionError('Bilinear interpolation requires GPU and cuDNN.')

        inpt = T.reshape(self.inpt, (self.inpt_depth, self.n_inpt, self.inpt_height, self.inpt_width))
        pre_res = bilinear_upsampling(input=inpt, ratio=self.up_factor)
        shuffle_res = pre_res.dimshuffle((2, 3, 0, 1))
        res = self._bilinear_upsampling_1D(inpt=shuffle_res, ratio=self.up_factor)
        self.output = res.dimshuffle((2, 3, 0, 1))
        self.output = T.shape_padaxis(self.output, axis=0)
        self.output = T.unbroadcast(self.output, 0)
コード例 #33
0
ファイル: rbm_quantum.py プロジェクト: sylvialee12/RBM
    def changing_weight2(self, v_sample):
        """
        function to compute the transition probability flipping spins for v_sample,
        which is Ts's=conj(psi(s',M))/conj(psi(s,M)),
        as for transverse field Ising model the flipping term in the Hamiltonian
        is hf=h/2(sp_i+sm_i), therefore flip each site contribute the same energy h/2,
        but the Ts's is different, but one can sum up Ts's for all s'
         :param v_sample: one sample of the visible layer
         :param Hamiltonian: Hamiltonian of the physical system we concern, 
         we mainly use Hamiltonian.h
         :pbc: periodic boundary condition, 1:periodic, 0: open

        """
        # As self.W_real has the size of nvisible*nhidden, and here v_sample is a
        # vector of nvisible, so ones needs to transpose self.W_real to make it broadcastable
        exponent=-2*v_sample*self.vbias+\
                 T.sum(
                     T.log(T.cosh(self.hbias-T.shape_padaxis(self.W_real,axis=0)*T.shape_padaxis(v_sample,axis=-1))),axis=2)-\
                 T.sum(
                     T.log(T.cosh(self.hbias+T.shape_padaxis(self.W_real,axis=0)*T.shape_padaxis(v_sample,axis=-1))),axis=2)

        return T.sum(T.exp(exponent), axis=1)
コード例 #34
0
    def decode_to_probs(self, activations, relative_position, low_bound, high_bound):
        assert (low_bound%12==0) and (high_bound-low_bound == self.num_octaves*12), "Circle of thirds must evenly divide into octaves"
        squashed = T.reshape(activations, (-1,self.RAW_ENCODING_WIDTH))

        rsp = T.nnet.softmax(squashed[:,:3])
        c1 = T.nnet.softmax(squashed[:,3:7])
        c2 = T.nnet.softmax(squashed[:,7:10])
        octave_choice = T.nnet.softmax(squashed[:,10:])
        octave_notes = T.tile(c1,(1,3)) * T.tile(c2,(1,4))
        full_notes = T.reshape(T.shape_padright(octave_choice) * T.shape_padaxis(octave_notes, 1), (-1,12*self.num_octaves))
        full_probs = T.concatenate([rsp[:,:2], T.shape_padright(rsp[:,2])*full_notes], 1)

        newshape = T.concatenate([activations.shape[:-1],[2+high_bound-low_bound]],0)
        fixed = T.reshape(full_probs, newshape, ndim=activations.ndim)
        return fixed
コード例 #35
0
    def process(self, gstate, dropout_masks=Ellipsis):
        """
        Process a graph state.
          1. Data is transfered from each node to each other node along both forward and backward edges.
                This data is processed with a Wx+b style update, and an optional transformation is applied
          2. Nodes sum the transfered data, weighted by the existence of the other node and the edge.
          3. Nodes perform a GRU update with this input

        Params:
            gstate: A GraphState giving the current state
        """
        if dropout_masks is Ellipsis:
            dropout_masks = None
            append_masks = False
        else:
            append_masks = True

        node_obs = T.concatenate([gstate.node_ids, gstate.node_states],2)
        flat_node_obs = node_obs.reshape([-1, self._process_input_size])
        transformed, dropout_masks = self._transfer_stack.process(flat_node_obs,dropout_masks)
        transformed = transformed.reshape([gstate.n_batch, gstate.n_nodes, 2*self._graph_spec.num_edge_types, self._transfer_size])
        scaled_transformed = transformed * T.shape_padright(T.shape_padright(gstate.node_strengths))
        # scaled_transformed is of shape (n_batch, n_nodes, 2*num_edge_types, transfer_size)
        # We want to multiply  through by edge strengths, which are of shape
        # (n_batch, n_nodes, n_nodes, num_edge_types), both fwd and backward
        edge_strength_scale = T.concatenate([gstate.edge_strengths, gstate.edge_strengths.swapaxes(1,2)], 3)
        # edge_strength_scale is of (n_batch, n_nodes, n_nodes, 2*num_edge_types)
        intermed = T.shape_padaxis(scaled_transformed, 2) * T.shape_padright(edge_strength_scale)
        # intermed is of shape (n_batch, n_nodes "source", n_nodes "dest", 2*num_edge_types, transfer_size)
        # now reduce along the "source" and "edge_types" dimensions to get dest activations
        # of shape (n_batch, n_nodes, transfer_size)
        reduced_result = T.sum(T.sum(intermed, 3), 1)

        # now add information fom current node id
        full_input = T.concatenate([gstate.node_ids, reduced_result], 2)

        # we flatten to apply GRU
        flat_input = full_input.reshape([-1, self._graph_spec.num_node_ids + self._transfer_size])
        flat_state = gstate.node_states.reshape([-1, self._graph_spec.node_state_size])
        new_flat_state, dropout_masks = self._propagation_gru.step(flat_input, flat_state, dropout_masks)

        new_node_states = new_flat_state.reshape(gstate.node_states.shape)

        new_gstate = gstate.with_updates(node_states=new_node_states)
        if append_masks:
            return new_gstate, dropout_masks
        else:
            return new_gstate
コード例 #36
0
ファイル: test.py プロジェクト: gumaojie/morphlm
def test13():
    x = T.fmatrix('x')
    x2 = T.zeros((4,3,5))
    y = T.shape_padaxis(x, axis=1)
    z = y
    z2 =  y + x2
    fn = theano.function(inputs=[x],outputs=[z,z2],allow_input_downcast=True)
    a =[float(i) for i in range(20)]
    b = [1,2,3]
    a = np.array(a)
    a = a.reshape(4,5)
    print a
    res,res2 = fn(a)
    print res, res.shape
    print res2, res2.shape
    exit(0)
    a = a.reshape(5,3,4)
    print a
    b = [[1,1],[2,2]]
    print a[[1,2],[[0,1],[0,1]],[1]]
    print T.arange(10)
コード例 #37
0
    def __init__(self, rng, input, batch_size, in_size, latent_size, W_a = None, W_b = None, epsilon = 0.01):
        self.srng = theano.tensor.shared_randomstreams.RandomStreams(rng.randint(999999))
        self.input = input
        
        # setup variational params
        if W_a is None:
            W_values = np.asarray(0.01 * rng.standard_normal(size=(in_size, latent_size-1)), dtype=theano.config.floatX)
            W_a = theano.shared(value=W_values, name='W_a')
        if W_b is None:
            W_values = np.asarray(0.01 * rng.standard_normal(size=(in_size, latent_size-1)), dtype=theano.config.floatX)
            W_b = theano.shared(value=W_values, name='W_b')
        self.W_a = W_a
        self.W_b = W_b

        # compute Kumaraswamy samples                                                                                                                                                      
        uniform_samples = T.cast(self.srng.uniform(size=(batch_size, latent_size-1), low=0.01, high=0.99), theano.config.floatX)
        self.a = Softplus(T.dot(self.input, self.W_a))
        self.b = Softplus(T.dot(self.input, self.W_b))
        v_samples = (1-(uniform_samples**(1/self.b)))**(1/self.a)

        # setup variables for recursion                                                                                                                                   
        stick_segment = theano.shared(value=np.zeros((batch_size,), dtype=theano.config.floatX), name='stick_segment')
        remaining_stick = theano.shared(value=np.ones((batch_size,), dtype=theano.config.floatX), name='remaining_stick')

        def compute_latent_vars(i, stick_segment, remaining_stick, v_samples):
            # compute stick segment                                                                                                     
            stick_segment = v_samples[:,i] * remaining_stick
            remaining_stick *= (1-v_samples[:,i])
            return (stick_segment, remaining_stick)

        (stick_segments, remaining_sticks), updates = theano.scan(fn=compute_latent_vars,
                                                                  outputs_info=[stick_segment, remaining_stick],sequences=T.arange(latent_size-1),
                                                                  non_sequences=[v_samples], strict=True)

        self.avg_used_dims = T.mean(T.sum(remaining_sticks > epsilon, axis=0))
        self.latent_vars = T.transpose(T.concatenate([stick_segments, T.shape_padaxis(remaining_sticks[-1, :],axis=1).T], axis=0))
        
        self.params = [self.W_a, self.W_b]
コード例 #38
0
    def compute_output(self):
        
        # We compute the output mean

        self.Kzz = compute_kernel(self.lls, self.lsf, self.z, self.z) + T.eye(self.z.shape[ 0 ]) * self.jitter * T.exp(self.lsf)
        self.KzzInv = T.nlinalg.MatrixInversePSD()(self.Kzz)
        LLt = T.dot(self.LParamPost, T.transpose(self.LParamPost))
        self.covCavityInv = self.KzzInv + LLt * casting(self.n_points - self.set_for_training) / casting(self.n_points)
        self.covCavity = T.nlinalg.MatrixInversePSD()(self.covCavityInv)
        self.meanCavity = T.dot(self.covCavity, casting(self.n_points - self.set_for_training) / casting(self.n_points) * self.mParamPost)
        self.KzzInvcovCavity = T.dot(self.KzzInv, self.covCavity)
        self.KzzInvmeanCavity = T.dot(self.KzzInv, self.meanCavity)
        self.covPosteriorInv = self.KzzInv + LLt
        self.covPosterior = T.nlinalg.MatrixInversePSD()(self.covPosteriorInv)
        self.meanPosterior = T.dot(self.covPosterior, self.mParamPost)
        self.Kxz = compute_kernel(self.lls, self.lsf, self.input_means, self.z)
        self.B = T.dot(self.KzzInvcovCavity, self.KzzInv) - self.KzzInv 
        v_out = T.exp(self.lsf) + T.dot(self.Kxz * T.dot(self.Kxz, self.B), T.ones_like(self.z[ : , 0 : 1 ]))

        if self.ignore_variances:

            self.output_means = T.dot(self.Kxz, self.KzzInvmeanCavity)
            self.output_vars = abs(v_out) + casting(0) * T.sum(self.input_vars)

        else:

            self.EKxz = compute_psi1(self.lls, self.lsf, self.input_means, self.input_vars, self.z)
            self.output_means = T.dot(self.EKxz, self.KzzInvmeanCavity)

            # In other layers we have to compute the expected variance

            self.B2 = T.outer(T.dot(self.KzzInv, self.meanCavity), T.dot(self.KzzInv, self.meanCavity))

            exact_output_vars = True

            if exact_output_vars:

                # We compute the exact output variance

                self.psi2 = compute_psi2(self.lls, self.lsf, self.z, self.input_means, self.input_vars)
                ll = T.transpose(self.EKxz[ :, None, : ] * self.EKxz[ : , : , None ], [ 1, 2, 0 ])
                kk = T.transpose(self.Kxz[ :, None, : ] * self.Kxz[ : , : , None ], [ 1, 2, 0 ])
                v1 = T.transpose(T.sum(T.sum(T.shape_padaxis(self.B2, 2) * (self.psi2 - ll), 0), 0, keepdims = True))
                v2 = T.transpose(T.sum(T.sum(T.shape_padaxis(self.B, 2) * (self.psi2 - kk), 0), 0, keepdims = True))

            else:

                # We compute the approximate output variance using the unscented kalman filter

                v1 = 0
                v2 = 0

                n = self.input_d
                for j in range(1, n + 1):
                    mask = T.zeros_like(self.input_vars)
                    mask = T.set_subtensor(mask[ :, j - 1 ] , 1)
                    inc = mask * T.sqrt(casting(n) * self.input_vars)
                    self.kplus = T.sqrt(casting(1.0) / casting(2 * n)) * compute_kernel(self.lls, self.lsf, self.input_means + inc, self.z)
                    self.kminus = T.sqrt(casting(1.0) / casting(2 * n)) * compute_kernel(self.lls, self.lsf, self.input_means - inc, self.z)

                    v1 += T.dot(self.kplus * T.dot(self.kplus, self.B2), T.ones_like(self.z[ : , 0 : 1 ]))
                    v1 += T.dot(self.kminus * T.dot(self.kminus, self.B2), T.ones_like(self.z[ : , 0 : 1 ]))
                    v2 += T.dot(self.kplus * T.dot(self.kplus, self.B), T.ones_like(self.z[ : , 0 : 1 ]))
                    v2 += T.dot(self.kminus * T.dot(self.kminus, self.B), T.ones_like(self.z[ : , 0 : 1 ]))

                v1 -= T.dot(self.EKxz * T.dot(self.EKxz, self.B2), T.ones_like(self.z[ : , 0 : 1 ]))
                v2 -= T.dot(self.Kxz * T.dot(self.Kxz, self.B), T.ones_like(self.z[ : , 0 : 1 ]))

            self.output_vars = abs(v_out) + abs(v2) + abs(v1)

        self.output_vars = self.output_vars + T.exp(self.lvar_noise)

        return
コード例 #39
0
ファイル: utils.py プロジェクト: robertostling/bnas
def expand_to_batch(x, batch_size, dim=-2):
    """Expand one dimension of `x` to `batch_size`."""
    return T.shape_padaxis(x, dim).repeat(batch_size, axis=dim)
コード例 #40
0
def calc_realVal_negative_log_likelihood(data, recon, axis_to_sum=1):
	if axis_to_sum != 1:
		# addresses the case where we marginalize                 
		data = T.extra_ops.repeat(T.shape_padaxis(data, axis=1), repeats = recon.shape[1], axis=1)
	return .5 * T.sum( (data - recon)**2, axis=axis_to_sum )
コード例 #41
0
def calc_poissonVal_negative_log_likelihood(data, recon, axis_to_sum=1):
	if axis_to_sum != 1:
		# addresses the case where we marginalize                                              
		data = T.extra_ops.repeat(T.shape_padaxis(data, axis=1), repeats = recon.shape[1], axis=1)
	return T.sum( T.exp(recon) - data * recon, axis=axis_to_sum )
コード例 #42
0
def calc_categoricalVal_negative_log_likelihood(data, probabilities, axis_to_sum=1):
	if axis_to_sum != 1:
            # addresses the case where we marginalize                                                                                                                                                                    
            data = T.extra_ops.repeat(T.shape_padaxis(data, axis=1), repeats = probabilities.shape[1], axis=1)
        return - T.sum(data * T.log(probabilities), axis=axis_to_sum)