Exemple #1
0
def test_with_owner():
    a = L.Attend(Owner())
    a.build([(4, 10, 16), (), (4, 18, 16), ()])
    src = torch.constant([0.0] * (4 * 10 * 16), shape=(4, 10, 16))
    bias = torch.constant([0.0] * (4 * 10), shape=(4, 10))
    bias = torch.expand_dims(torch.expand_dims(bias, axis=1), axis=3)
    mem = torch.constant([0.0] * (4 * 15 * 16), shape=(4, 15, 16))
    ctx = torch.constant([0.0] * (4 * 15 * 16), shape=(4, 15, 16))
    a.call([src, bias, mem, ctx])
Exemple #2
0
def test_owner_none():
    a = L.Attend(Owner())
    a.build([(4, 10, 16)])
    src = torch.constant([0.0] * (4 * 10 * 16), shape=(4, 10, 16))
    a.call([src])
    bias = torch.constant([0.0] * (4 * 10), shape=(4, 10))
    bias = torch.expand_dims(torch.expand_dims(bias, axis=1), axis=3)
    a.call([src, bias])
    ctx = torch.constant([0.0] * (4 * 15 * 16), shape=(4, 15, 16))
    a.call([src, bias, None, ctx])
Exemple #3
0
def sensemap_model(x, sensemap, name="sensemap_model", do_transpose=False):
    """Apply sensitivity maps."""
    if do_transpose:
        x_shape = x.get_shape().as_list()
        x = torch.expand_dims(x, axis=-2)
        x = torch.multiply(torch.conj(sensemap), x)
        x = torch.sum(x, axis=-1)
    else:
        x = torch.expand_dims(x, axis=-1)
        x = torch.multiply(x, sensemap)
        x = torch.sum(x, axis=3)
    return x
Exemple #4
0
def min_distance_better_than_threshold(pred_control_points,
                                       gt_control_points,
                                       confidence,
                                       confidence_threshold,
                                       device="cpu"):
    error = torch.expand_dims(pred_control_points, 1) - torch.expand_dims(
        gt_control_points, 0)
    error = torch.sum(torch.abs(error),
                      -1)  # L1 distance of error (N_pred, N_gt, M)
    error = torch.mean(
        error, -1)  # average L1 for all the control points. (N_pred, N_gt)
    error = torch.min(error, -1)  # (B, N_pred)
    mask = torch.greater_equal(confidence, confidence_threshold)
    mask = torch.squeeze(mask, dim=-1)

    return torch.mean(error[mask]), torch.mean(mask)
Exemple #5
0
    def call(self, q_head, k_head_h, v_head_h, k_head_r, seg_embed, seg_mat,
             r_w_bias, r_r_bias, r_s_bias, attn_mask):
        # content based attention score
        ac = torch.einsum('ibnd,jbnd->ijbn', q_head + r_w_bias, k_head_h)

        # position based attention score
        bd = torch.einsum('ibnd,jbnd->ijbn', q_head + r_r_bias, k_head_r)
        bd = rel_shift(bd, klen=tf.shape(ac)[1])

        # segment-based attention score
        if seg_mat is None:
            ef = 0
        else:
            ef = torch.einsum('ibnd,snd->isbn', q_head + r_s_bias, seg_embed)
            tgt_shape = torch.shape(bd)
            ef = torch.where(
                torch.Tensor(
                    np.broadcast_to(torch.expand_dims(seg_mat, 3), tgt_shape)),
                torch.Tensor(np.broadcast_to(ef[:, 1:, :, :], tgt_shape)),
                torch.Tensor(np.broadcast_to(ef[:, :1, :, :], tgt_shape)))

        # merges attention scores and performs masking
        attn_score = (ac + bd + ef) * self.scale
        if attn_mask is not None:
            attn_score = attn_score - 1e30 * attn_mask

        # attention probability
        attn_prob = functional.softmax(attn_score, 1)
        attn_prob = self.attention_probs_dropout(attn_prob)

        # attention output
        attn_vec = torch.einsum('ijbn,jbnd->ibnd', attn_prob, v_head_h)
Exemple #6
0
def Fusion(closeness_output, period_output, trend_output, scope, shape):
    '''
    Combining the output from the module into one tec map
    '''
    closeness_output = torch.squeeze(closeness_output)
    period_output = torch.squeeze(period_output)
    trend_output = torch.squeeze(trend_output)
    # apply a linear transformation to each of the outputs: closeness, period, trend and then combine
    Wc = torch.get_variable("closeness_matrix", dtype=torch.float32, shape=shape, initializer=torch.contrib.layers.xavier_initializer(), trainable=True)
    Wp = torch.get_variable("period_matrix", dtype=torch.float32, shape=shape, initializer=torch.contrib.layers.xavier_initializer(), trainable=True)
    Wt = torch.get_variable("trend_matrix", dtype=torch.float32, shape=shape, initializer=torch.contrib.layers.xavier_initializer(), trainable=True)

    output = torch.reshape(closeness_output, [closeness_output.shape[0]*closeness_output.shape[1], closeness_output.shape[2]])
    output = torch.matmul(output, Wc)
    closeness_output = torch.reshape(output, [closeness_output.shape[0], closeness_output.shape[1], closeness_output.shape[2]])

    output = torch.reshape(period_output, [period_output.shape[0]*period_output.shape[1], period_output.shape[2]])
    output = torch.matmul(output, Wp)
    period_output = torch.reshape(output, [period_output.shape[0], period_output.shape[1], period_output.shape[2]])

    output = torch.reshape(trend_output, [trend_output.shape[0]*trend_output.shape[1], trend_output.shape[2]])
    output = torch.matmul(output, Wt)
    trend_output = torch.reshape(output, [trend_output.shape[0], trend_output.shape[1], trend_output.shape[2]])
    # fusion
    outputs = torch.add(torch.add(closeness_output, period_output), trend_output)
    # adding non-linearity
    outputs = torch.F.tanh(outputs)
    # converting the dimension from (B, H, W) -> (B, H, W, 1) to match ground truth labels
    outputs = torch.expand_dims(outputs, axis=3)
    return outputs
Exemple #7
0
def get_dists(X):
    """Keras code to compute the pairwise distance matrix for a set of
    vectors specifie by the matrix X.
    """
    x2 = torch.expand_dims(torch.sum(torch.power(X, 2), axis=1), 1)
    dists = x2 + torch.transpose(x2) - 2 * torch.dot(X, torch.transpose(X))
    return dists
Exemple #8
0
    def gpi(self, observation, cumulant_weights):
        q_values = self.__call__(th.expand_dims(observation, axis=0))[0]
        q_w = th.tensordot(q_values, cumulant_weights, axes=[1, 0])  # [P,a]
        q_w_actions = th.reduce_max(q_w, axis=0)

        action = th.cast(th.argmax(q_w_actions), th.int32)

        return action
Exemple #9
0
def convert_to_tensorboard_image_shape(image):
    """Ensures an image has shape [B, H, W, C], used for visualizing latents."""
    image_shape = image.get_shape().as_list()
    if image_shape[-1] not in [1, 3]:
        image = torch.expand_dims(image, -1)
        image_shape = image.get_shape().as_list()
    output_image_shape = (4 - len(image_shape)) * [1] + image_shape
    if output_image_shape[0] is None:
        output_image_shape[0] = -1

    image = torch.reshape(image, output_image_shape)
    return image
Exemple #10
0
def axis_angle_to_rotation_matrix(axis, angle):
    B = angle.size(0)
    z = angle[:, 2]
    zeros = z.detach() * 0
    ones = zeros.detach() + 1

    Mat1 = torch.cat([
        zeros, -torch.expand_dims(torch.expand_dims(axis[:, 2], -1), -1),
        torch.expand_dims(torch.expand_dims(axis[:, 1], -1), -1)
    ],
                     axis=2)
    Mat2 = torch.cat([
        zeros, zeros, -torch.expand_dims(torch.expand_dims(axis[:, 0], -1), -1)
    ],
                     axis=2)
    Mat3 = torch.cat([zeros, zeros, zeros], axis=2)

    Mat = torch.cat([Mat1, Mat2, Mat3], axis=1)

    cp_axis = Mat - torch.transpose(Mat, perm=[0, 2, 1])

    RotMat = torch.eye(3, batch_shape=[B]) + torch.sin(angle) * cp_axis + (
        ones - torch.cose(angle)) * torch.matmaul(cp_axis, cp_axis)

    return RotMat
Exemple #11
0
 def append_tok(self, idx, i, **kw):
     cfg = self.cfg
     k = 2 * cfg.beam_size
     b = torch.range(cfg.batch_size * k) // k
     b = torch.reshape(b, (cfg.batch_size, k))
     beam = idx // cfg.num_toks
     sel = torch.stack([b, beam], axis=2)
     y = torch.gather_nd(self.tgt, sel)
     ii = torch.constant([i] * cfg.batch_size * k)
     ii = torch.reshape(ii, (cfg.batch_size, k))
     sel = torch.stack([b, beam, ii], axis=2)
     u = torch.expand_dims(idx % cfg.num_toks, axis=2)
     tgt = torch.tensor_scatter_nd_update(y, sel, u)
     return tgt
Exemple #12
0
 def top_logp(self, ctx, bias, i):
     cfg = self.cfg
     y = torch.zeros((
         cfg.batch_size,
         cfg.beam_size,
         cfg.num_toks,
     ))
     y += torch.expand_dims(self.logp, axis=2)
     b = torch.range(cfg.batch_size)
     ii = torch.constant([i] * cfg.batch_size)
     for j in range(cfg.beam_size):
         jj = torch.constant([j] * cfg.batch_size)
         sel = torch.stack([b, jj, ii])
         yj = self.to_logp(self.tgt[:, j, :], ctx, bias, i)[1]
         y = torch.tensor_scatter_nd_add(y, sel, yj)
     y = torch.reshape(y, (-1, cfg.beam_size * cfg.num_toks))
     logp, idx = torch.top_k(y, k=2 * cfg.beam_size)
     return logp, idx
Exemple #13
0
    def propagate(self, X, full_cov=False, S=1, zs=None):
        sX = bf.tile(th.expand_dims(X, 0), [S, 1, 1])

        Fs, Fmeans, Fvars = [], [], []

        F = sX
        zs = zs or [
            None,
        ] * len(self.layers)
        for layer, z in zip(self.layers, zs):
            F, Fmean, Fvar = layer.sample_from_conditional(F,
                                                           z=z,
                                                           full_cov=full_cov)

            Fs.append(F)
            Fmeans.append(Fmean)
            Fvars.append(Fvar)

        return Fs, Fmeans, Fvars
def relative_logits_1d(q, rel_k, H, W, Nh, transpose_mask):
    """Compute relative logits along one dimension."""
    """Need to document inputs to make sure we test right"""
    """
    q is B * H * W * Nh
    rel_k is from tf.get_variable and is of shape 2*H-1, dk/Nh
    H / W / Nh are ints and we transpose positions
    """

    rel_logits = torch.einsum('bhxyd,md->bhxym', q, rel_k)
    # Collapse height and heads
    rel_logits = torch.reshape(rel_logits, [-1, Nh * H, W, 2 * W - 1])
    rel_logits = rel_to_abs(rel_logits)
    # Shape it and tile height times
    rel_logits = torch.reshape(rel_logits, [-1, Nh, H, W, W])
    rel_logits = torch.expand_dims(rel_logits, axis=3)
    rel_logits = torch.tile(rel_logits, [1, 1, 1, H, 1, 1])
    # Reshape for adding to the logits.
    rel_logits = torch.transpose(rel_logits, transpose_mask)
    rel_logits = torch.reshape(rel_logits, [-1, Nh, H * W, H * W])
    return rel_logits
Exemple #15
0
def merge_pc_and_gripper_pc(pc,
                            gripper_pc,
                            instance_mode=0,
                            pc_latent=None,
                            gripper_pc_latent=None):
    """
    Merges the object point cloud and gripper point cloud and
    adds a binary auxilary feature that indicates whether each point
    belongs to the object or to the gripper.
    """

    pc_shape = pc.shape
    gripper_shape = gripper_pc.shape
    assert (len(pc_shape) == 3)
    assert (len(gripper_shape) == 3)
    assert (pc_shape[0] == gripper_shape[0])

    npoints = pc.shape[1]
    batch_size = pc.shape[0]

    if instance_mode == 1:
        assert pc_shape[-1] == 3
        latent_dist = [pc_latent, gripper_pc_latent]
        latent_dist = torch.cat(latent_dist, 1)

    l0_xyz = torch.cat((pc, gripper_pc), 1)
    labels = [
        torch.ones((pc.shape[1], 1), dtype=torch.float32),
        torch.zeros((gripper_pc.shape[1], 1), dtype=torch.float32)
    ]
    labels = torch.cat(labels, 0)
    labels = torch.expand_dims(labels, 0)
    labels = torch.tile(labels, [batch_size, 1, 1])

    if instance_mode == 1:
        l0_points = torch.cat([l0_xyz, latent_dist, labels], -1)
    else:
        l0_points = torch.cat([l0_xyz, labels], -1)

    return l0_xyz, l0_points
Exemple #16
0
 def search(self, tgt, ctx, i=None):
     cfg = self.cfg
     unk = torch.equal(tgt, cfg.UNK)
     prior = torch.one_hot(tgt, cfg.num_toks, 0.0, utils.big_neg)
     if i is not None:
         unk = unk[:, i]
         prior = prior[:, i, :]
     if torch.reduce_all(unk) is True:
         logi = prior
     else:
         y = self.decode(tgt, ctx)
         if i is not None:
             y = y[:, i, :]
         sh = y.shape  # torch.int_shape(y)
         y = torch.reshape(y, (-1, sh[-1]))
         y = self.logits(y)
         y = torch.reshape(y, sh[:-1] + y.shape[-1:])
         u = torch.expand_dims(unk, axis=2)
         u = torch.broadcast_to(u, y.shape)
         logi = torch.where(u, y, prior)
     logp = y - torch.reduce_logsumexp(y, axis=-1, keepdims=True)
     return logp, logi, unk
Exemple #17
0
def quaternion_rotate(pc, q, inverse=False):
    """rotates a set of 3D points by a rotation,
    represented as a quaternion
    Args:
        pc: [B,N,3] point cloud
        q: [B,4] rotation quaternion
    Returns:
        q * pc * q'
    """
    q_norm = q.norm(p=2, dim=-1).reshape(q.shape[0], 1)
    # TODO: Detach or not, the denominatior
    q = torch.div(q, q_norm)
    q = q.reshape(q.shape[0], 1, q.shape[1])  # [B,1,4]
    q_ = quaternion_conjugate(q)
    qmul = quaternion_multiply
    if not inverse:
        wxyz = qmul(qmul(q, pc), q_)  # [B,N,4]
    else:
        wxyz = qmul(qmul(q_, pc), q)  # [B,N,4]
    if len(wxyz.shape) == 2:  # bug with batch size of 1
        wxyz = torch.expand_dims(wxyz, axis=0)
    xyz = wxyz[:, :, 1:4]  # [B,N,3]
    return xyz
Exemple #18
0
def images_to_grid(images,
                   grid_height=4,
                   grid_width=4,
                   image_border_value=0.5):
    """Combine images and arrange them in a grid.

    Args:
        images: Tensor of shape [B, H], [B, H, W], or [B, H, W, C].
        grid_height: Height of the grid of images to output, or None. Either
            `grid_width` or `grid_height` must be set to an integer value. If None,
            `grid_height` is set to ceil(B/`grid_width`), and capped at
            `max_grid_height` when provided.
        grid_width: Width of the grid of images to output, or None. Either
            `grid_width` or `grid_height` must be set to an integer value. If None,
            `grid_width` is set to ceil(B/`grid_height`), and capped at
            `max_grid_width` when provided.
        max_grid_height: Maximum allowable height of the grid of images to output,
            or None. Only used when `grid_height` is None.
        max_grid_width: Maximum allowable width of the grid of images to output,
            or None. Only used when `grid_width` is None.
        image_border_value: None or scalar value of greyscale borderfor images. If
            None, then no border is rendered.

    Raises:
        ValueError: if neither of grid_width or grid_height are set to a positive
            integer.

    Returns:
        images: Tensor of shape [height*H, width*W, C]. C will be set to 1 if the
        input was provided with no channels. Contains all input images in a grid.
    """

    # If only one dimension is set, infer how big the other one should be.
    images = images[:grid_height * grid_width, ...]

    # Pad with extra blank frames if grid_height x grid_width is less than the
    # number of frames provided.
    pre_images_shape = images.get_shape().as_list()
    if pre_images_shape[0] < grid_height * grid_width:
        pre_images_shape[0] = grid_height * grid_width - pre_images_shape[0]
        if image_border_value is not None:
            dummy_frames = image_border_value * torch.ones(
                shape=pre_images_shape, dtype=images.dtype)
        else:
            dummy_frames = torch.zeros(shape=pre_images_shape,
                                       dtype=images.dtype)
            images = torch.concat([images, dummy_frames], axis=0)

    if image_border_value is not None:
        images = _pad_images(images, image_border_value=image_border_value)
    images_shape = images.get_shape().as_list()
    images = torch.reshape(images,
                           [grid_height, grid_width] + images_shape[1:])
    if len(images_shape) == 2:
        images = torch.expand_dims(images, -1)
    if len(images_shape) <= 3:
        images = torch.expand_dims(images, -1)
    image_height, image_width, channels = images.get_shape().as_list()[2:]
    images = torch.transpose(images, perm=[0, 2, 1, 3, 4])
    images = torch.reshape(
        images,
        [grid_height * image_height, grid_width * image_width, channels])
    return images
Exemple #19
0
    def forward(self, h, x, presence=None):

        batch_size, n_input_points = int(x.shape[0]), int(x.shape[0])
        self.vote_shape = [batch_size, self._n_caps, self._n_votes, 6]

        res = self.capsule(h)

        # The paper literally just grabs these coordinates from our OV * OP
        # composition. Constructing a two-dimensional vote.
        res.vote = res.vote[..., :-1, -1]

        # Reshape dimensions for constellation specific use case.
        def pool_dim(x, dim_begin, dim_end):
            combined_shape = list(x.shape[:dim_begin]) + [-1] + list(
                x.shape[dim_end:])
            return x.view(combined_shape)

        for k, v in res.items():
            print("raw out : ", v.shape)
            if k == "vote" or k == "scale":
                res[k] = pool_dim(v, 1, 3)
            if k == "vote_presence":
                print("ROSA PARKS: ", v.shape)
                res[k] = pool_dim(v, 1, 3)
                print("ROSA PARKS: ", res[k].shape)

        likelihood = _capsule.OrderInvariantCapsuleLikelihood(
            self._n_votes, res.vote, res.scale, res.vote_presence)
        ll_res = likelihood(x, presence)

        #
        # Mixing KL div.
        #

        soft_layer = torch.nn.Softmax(dim=1)
        mixing_probs = soft_layer(ll_res.mixing_logits)
        prior_mixing_log_prob = math.scalar_log(1. / n_input_points)
        mixing_kl = mixing_probs * \
            (ll_res.mixing_log_prob - prior_mixing_log_prob)
        mixing_kl = torch.mean(torch.sum(mixing_kl, -1))

        #
        # Sparsity loss.
        #

        from_capsule = ll_res.is_from_capsule

        # torch implementation of tf.one_hot
        idx = torch.eye(self._n_caps)
        wins_per_caps = torch.stack([
            idx[from_capsule[b].type(torch.LongTensor)]
            for b in range(from_capsule.shape[0])
        ])

        if presence is not None:
            wins_per_caps *= torch.expand_dims(presence, -1)

        wins_per_caps = torch.sum(wins_per_caps, 1)

        has_any_wins = torch.gt(wins_per_caps, 0).float()
        should_be_active = torch.gt(wins_per_caps, 1).float()

        # https://pytorch.org/docs/stable/generated/torch.nn.MultiLabelSoftMarginLoss.html
        # From math, looks to be same as `tf.nn.sigmoid_cross_entropy_with_logits`.

        # TODO: not rigorous cross-implementation
        softmargin_loss = torch.nn.MultiLabelSoftMarginLoss()
        sparsity_loss = softmargin_loss(should_be_active,
                                        res.pres_logit_per_caps)

        # sparsity_loss = tf.reduce_sum(sparsity_loss * has_any_wins, -1)
        # sparsity_loss = tf.reduce_mean(sparsity_loss)

        caps_presence_prob = torch.max(
            torch.reshape(res.vote_presence,
                          [batch_size, self._n_caps, self._n_votes]), 2)[0]

        #
        # Constructing loss ensemble.
        #
        print(torch.mean(res.scale))

        return EasyDict(mixing_kl=mixing_kl,
                        sparsity_loss=sparsity_loss,
                        caps_presence_prob=caps_presence_prob,
                        mean_scale=torch.mean(res.scale))
    def forward(self, inputs):
        #if not (self.built):
        #   self.build(inputs.shape)
        # inputs = torch.ops.convert_to_tensor(inputs, dtype=self.dtype)
        print(inputs.dtype, self.kernel.dtype, self.dendriticW.dtype)
        # if not (inputs.dtype == self.dendriticW.dtype):
        #    print("casting")
        #    inputs = torch.cast(inputs, dtype=self.dendriticW.dtype)
        print('input shape', inputs.shape)

        if self.weight_twice:
            # each dendrit COULD have unique weight for each input, meaning Wshape=[input,dendrite,units]
            output = inputs.unsqueeze(-1)
            print(output.dtype)
            print(output.shape, self.kernel.shape)
            if self.uniqueW:
                output = torch.multiply(output, self.kernel)
            else:
                output = torch.tensordot(output,
                                         self.kernel,
                                         dims=([
                                             -1,
                                         ], [
                                             0,
                                         ]))
            print(output.shape, 'first weighting')
            print(self.bias.shape)
            if self.use_bias:
                output += self.bias  # torch.transpose(output + self.bias)
            print(output.shape, 'bias1')

        else:
            output = torch.transpose(inputs)  # units,x,batch

            # loopv1
            # condition= lambda inn,hold: torch.less(inn,self.input_shapes[-1])
            # looper=lambda inn,hold : [torch.add(inn,incr), torch.unsorted_segment_sum(output[inn],self.dendrites[inn],self.seql)]
            # output=torch.while_loop(condition,looper,[ix,hold])
            # output=torch.stack(output[1])
        if self.version == 3:
            if self.weight_twice:
                output = torch.unstack(output)
                output = torch.stack([
                    torch.unsorted_segment_sum(data, self.dendrites[i],
                                               self.seql)
                    for i, data in enumerate(output)
                ])
            else:
                output = torch.stack([
                    torch.unsorted_segment_sum(output, seq, self.seql)
                    for seq in self.dendrites
                ])
        if self.version == 2:
            if self.weight_twice:
                print(self.debuildshape)
                output = torch.reshape(output, self.debuildshape)
                output = torch.unsorted_segment_sum(
                    output, torch.reshape(self.dendrites, self.deseqshape),
                    self.num_id)
                output = torch.reshape(output, self.rebuildshape)
                print(torch.transpose(output).shape, self.rebuildshape)
            else:
                print(self.debuildshape)
                output = torch.matmul(
                    torch.expand_dims(output, 0),
                    torch.ones(
                        (self.units, *[1 for _ in range(self.len_input)]),
                        dtype=output.dtype))
                output = torch.reshape(torch.transpose(output),
                                       self.debuildshape)
                output = torch.unsorted_segment_sum(
                    output, torch.reshape(self.dendrites, self.deseqshape),
                    self.num_id)
                output = torch.reshape(output, self.rebuildshape)
                # print(torch.transpose(output).shape, self.rebuildshape)
        if self.version == 1:
            output = self.dendritic_op(output, )
            # too much squashing
            print(output.shape, 'unsorted shape', self.dendriticW.shape,
                  output.dtype, self.dendriticW.dtype)
            output = torch.mul(output, self.dendriticW)  # matmul
            # output = torch.tensordot(output, self.dendriticW,)# dims=([[-1,],[0,]]))
        else:
            print(output.shape, self.dendriticW.shape)
            output = torch.mul(
                output, self.dendriticW
            )  # perfect since it's elementwise and not dot product
        print(output.shape, '2w shape', self.dendriticB.shape)
        if self.use_bias:
            output += self.dendriticB
        print(output.shape, '2b shape')
        output = torch.sum(output, -2)  # sum the dendrites
        if self.activation is not None:
            return self.activation(output)
        print('outshap is {}'.format(output.shape))
        # print('GOOD OUTPUT SHAPE') if output.shape==(*self.input_shapes[:-1],self.units) else print("BAD OUTPUT SHAPE")
        return (output)
 def test_expand_dims_variable(self):
     var = Variable(torch.range(0, 9).view(-1, 2))
     ret = torch.expand_dims(var, 0)
     expected = var.view(1, -1, 2)
     assert ret.size() == expected.size()
 def test_expand_dims_tensor(self):
     var = torch.range(0, 9).view(-1, 2)
     ret = torch.expand_dims(var, 0)
     expected = var.view(1, -1, 2)
     assert ret.size() == expected.size()