Ejemplo n.º 1
0
    def forward(self, inputs, aspects, lengths, aspects_lengths):

        if torch.cuda.is_available():
            torch.cuda.manual_seed(self.seed)

        inputs = self.embedding(inputs)
        inputs = self.noise_emb(inputs)
        inputs = self.drop_emb(inputs)

        aspects = self.embedding(aspects)
        aspects = self.drop_emb(aspects)

        mask = (aspects > 0).float()
        aspects = torch.sum(aspects * mask, dim=1)
        new_asp = aspects / aspects_lengths.unsqueeze(-1).float()
        new_asp = torch.unsqueeze(new_asp, 1)
        new_asp = new_asp.expand(inputs.size(0), inputs.size(1),
                                 inputs.size(2))

        concat = torch.cat((inputs, new_asp), 2)
        inputs = concat.unsqueeze(1)

        inputs = [F.relu(conv(inputs)).squeeze(3) for conv in self.convs]
        inputs = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in inputs]
        concatenated = torch.cat(inputs, 1)

        concatenated = self.dropout(concatenated)

        return self.fc(concatenated)
Ejemplo n.º 2
0
    def forward(self, rel_embed, nodes_embed, h_t_pair_label, h_t_pair_path,
                h_t_pair_path_len, b_ind, h_ind, t_ind, global_step):

        max_path_num = h_t_pair_path.shape[-2]
        max_step_num = h_t_pair_path.shape[-1]
        N_bt = nodes_embed.shape[0]

        # no_rel_mask = (torch.sum(h_t_pair_label[b_ind,h_ind,t_ind,1:],dim=-1)==0)

        select_path_len = h_t_pair_path_len[b_ind, h_ind, t_ind]
        select = (torch.cumsum(
            (select_path_len > 0).long(), dim=-1) == 1) & (select_path_len > 0)
        path_select_id = torch.nonzero(select.long())[:, 1]

        select_path_id = h_t_pair_path[b_ind, h_ind, t_ind][
            torch.arange(path_select_id.shape[0]).cuda(), path_select_id]

        select_path_len = h_t_pair_path_len[b_ind, h_ind, t_ind][
            torch.arange(path_select_id.shape[0]).cuda(), path_select_id]

        path_bt_id = b_ind[..., None].repeat(1, max_step_num)
        path_embed = nodes_embed[path_bt_id, select_path_id]
        path_embed = torch.cat(
            (self.eos_embed.repeat(path_embed.shape[0], 1, 1), path_embed),
            dim=1)

        init_h = torch.relu(self.trans_hidden(rel_embed)).unsqueeze(dim=0)
        init_c = torch.relu(self.trans_ceil(rel_embed)).unsqueeze(dim=0)

        seq_hidden, _, _ = self.rnn(path_embed, select_path_len, init_h,
                                    init_c)

        nodes_ext = torch.cat((self.nop_embed.repeat(N_bt, 1, 1), nodes_embed),
                              dim=1)
        vocb = torch.relu(self.trans_pred(nodes_ext[b_ind]))

        seq_pred = torch.einsum("abd,acd->abc", seq_hidden, vocb)

        select_path_id = select_path_id + 1
        select_path_id = torch.cat(
            (select_path_id,
             torch.zeros(select_path_id.shape[0], 1, dtype=torch.long).cuda()),
            dim=-1)
        # select_path_len -= 1

        seqlen, w_ids = torch.broadcast_tensors(
            select_path_len.unsqueeze(-1),
            torch.arange(0, max_step_num + 1).cuda()[None, ...])
        seq_mask = w_ids < seqlen
        select_path_id[~seq_mask] = 0

        return seq_pred, select_path_id, seq_mask
Ejemplo n.º 3
0
    def forward(self, h_text, w_text, h_audio, w_audio, lengths):
        '''
        INPUTS:
            h_text:
            w_text:
            h_audio:
            w_audio:
            lengths:
        OUTPUTS:
        '''

        # get weighted representations
        text_weighted = self.weighted_timestep(h_text, w_text)
        audio_weighted = self.weighted_timestep(h_audio, w_audio)

        # cat features
        fused_timestep = torch.cat((text_weighted, audio_weighted), 2)

        ################################################
        ## linear projection
        #h_fused = self.dense(fused_timestep)

        ## apply generalized attention
        #fusion_representation, w_fusion = self.attn(h_fused, lengths)

        return fused_timestep
Ejemplo n.º 4
0
    def last_timestep(self, outputs, lengths, bi=False):
        if bi:
            forward, backward = self.split_directions(outputs)
            # tensor([[[-0.0050,  0.0346, -0.0540,  ...,  0.2081,  0.0078, -0.3285],
            #          [ 0.0382,  0.0580, -0.0032,  ...,  0.3200,  0.0821, -0.4469],
            #          [ 0.0598,  0.1161,  0.0144,  ...,  0.4412,  0.0225, -0.4357],
            #          ...,
            #          [ 0.0147,  0.0658,  0.0158,  ...,  0.4789,  0.0061, -0.4225],
            #          [ 0.0247,  0.0505,  0.0365,  ...,  0.3581,  0.0228, -0.2056],
            #          [ 0.0867,  0.1089, -0.2493,  ...,  0.5207,  0.0136, -0.2245]]],
            #        device='cuda:0', grad_fn=<SliceBackward>)
            # torch.Size([1, 16, 250])
            # print(forward)
            # print(forward.size())

            # print(backward)
            # print(backward.size())
            last_forward = self.last_by_index(forward, lengths)
            if len(last_forward.size()) == 1:
                last_forward = last_forward.unsqueeze(0)
            last_backward = backward[:, 0, :]  # 1*1*250
            # print("last forward and backward demension")
            # print(last_forward)
            # print(last_forward.size())
            # print(last_backward)
            # print(last_backward.size())
            return torch.cat((last_forward, last_backward), dim=-1)

        else:
            return self.last_by_index(outputs, lengths)
Ejemplo n.º 5
0
    def last_timestep(self, outputs, lengths, bi=False):
        if bi:
            forward, backward = self.split_directions(outputs)
            last_forward = self.last_by_index(forward, lengths)
            last_backward = backward[:, 0, :]
            return torch.cat((last_forward, last_backward), dim=-1)

        else:
            return self.last_by_index(outputs, lengths)
Ejemplo n.º 6
0
    def forward(self, data_in):
        # the view is to add the minibatch dimension (which is 1)
        # print(data_in.view(1, 1, -1).size())
        out = F.leaky_relu(self.linear1(data_in.view(1, -1)))
        out, self.hidden = self.lstm1(out.view(1, 1, -1), self.hidden)
        out = F.leaky_relu(out)
        out = self.linear2(out.view(1, -1))

        out_pos = F.tanh(out[:, :2])
        out_vel = out[:, 2:]

        lstm_input = torch.cat((joint_data, img_features), dim=2)
        out, h = self.lstm1(lstm_input)
        out = self.linear(out)
        out_pos = F.tanh(out[:, :, :2])
        out_vel = out[:, :, 2:]

        return torch.cat((out_pos, out_vel), dim=2)
Ejemplo n.º 7
0
    def last_timestep(self, outputs, lengths, bi=False):
        if bi:
            forward, backward = self.split_directions(outputs)
            last_forward = self.last_by_index(forward, lengths)
            last_backward = backward[:, 0, :]
            return torch.cat((last_forward, last_backward), dim=-1)

        else:
            return self.last_by_index(outputs, lengths)
Ejemplo n.º 8
0
    def forward(self, data_in):
        # the view is to add the minibatch dimension (which is 1)
        # print(data_in.view(1, 1, -1).size())
        # import ipdb; ipdb.set_trace()

        out = torch.cat((data_in[0], data_in[1], data_in[2]), dim=2)
        out = F.leaky_relu(self.linear1(out))
        out, self.hidden = self.lstm1(out.permute((1, 0, 2)), self.hidden)
        out = F.leaky_relu(out.permute((1, 0, 2)))
        out = self.linear2(out)
        return out
Ejemplo n.º 9
0
    def zero_pad(tensor):
        batch_size = tensor.size(0)
        real_len = tensor.size(1)
        dim = tensor.size(2)

        if MAX_LEN > real_len:
            zeros = \
                Variable(torch.zeros(batch_size,
                                    MAX_LEN-real_len,
                                    dim)).detach()
            zeros = zeros.to(DEVICE)

            tensor = torch.cat((tensor, zeros), 1)

        return (tensor)
Ejemplo n.º 10
0
    def forward(self, message, topic, lengths, topic_lengths):

        if torch.cuda.is_available():
            torch.cuda.manual_seed(self.seed)

        ###MESSAGE MODEL###

        embeds = self.embedding(message)
        embeds = self.noise_emb(embeds)
        embeds = self.dropout_embeds(embeds)

        # pack the batch
        embeds_pckd = pack_padded_sequence(embeds,
                                           list(lengths.data),
                                           batch_first=True)

        mout_pckd, (hx1, cx1) = self.shared_lstm(embeds_pckd)

        # unpack output - no need if we are going to use only the last outputs
        mout_unpckd, _ = pad_packed_sequence(
            mout_pckd, batch_first=True)  # [batch_size,seq_length,300]

        # Last timestep output is not used
        # message_output = self.last_timestep(self.shared_lstm, hx1)
        # message_output = self.dropout_rnn(message_output)

        ###TOPIC MODEL###

        topic_embeds = self.embedding(topic)
        topic_embeds = self.dropout_embeds(topic_embeds)

        tout, (hx2, cx2) = self.shared_lstm(topic_embeds)
        tout = self.dropout_rnn(tout)

        mask = (topic > 0).float().unsqueeze(-1)
        tout = torch.sum(tout * mask, dim=1)
        tout = tout / topic_lengths.unsqueeze(-1).float()
        tout = torch.unsqueeze(tout, 1)
        tout = tout.expand(mout_unpckd.size(0), mout_unpckd.size(1),
                           mout_unpckd.size(2))

        out = torch.cat((mout_unpckd, tout), 2)
        representations, attentions = self.attention(out, lengths)

        return self.linear(representations)
    def forward(self, x):

        # input x should be in size [B,T,F] , where B = Batch size
        #                                         T = Time sampels
        #                                         F = features

        h0, c0 = self.init_hidden()
        x1, (ht, ct) = self.lstm(x, (h0, c0))
        x1 = x1[:, -1, :]

        x2 = x.transpose(2, 1)
        x2 = self.ConvDrop(self.relu(self.BN1(self.C1(x2))))
        x2 = self.ConvDrop(self.relu(self.BN2(self.C2(x2))))
        x2 = self.ConvDrop(self.relu(self.BN3(self.C3(x2))))
        x2 = torch.mean(x2, 2)

        x_all = torch.cat((x1, x2), dim=1)
        x_out = self.FC(x_all)
        return x_out
Ejemplo n.º 12
0
    def forward(self, x, lengths):
        embeddings = self.embedding(x)

        attentions = None
        outputs, last_output = self.encoder(embeddings, lengths)

        if hasattr(self, 'attention'):
            if self.attention_context:
                context = self._mean_pooling(outputs, lengths)
                context = context.unsqueeze(1).expand(-1, outputs.size(1), -1)
                outputs = torch.cat([outputs, context], -1)

            representations, attentions = self.attention(outputs, lengths)

            if self.attention_context:
                representations = representations[:, :context.size(-1)]
        else:
            representations = last_output

        return representations, attentions
Ejemplo n.º 13
0
    def forward(self, x, lengths):
        embeddings = self.embedding(x)

        attentions = None
        outputs, last_output = self.encoder(embeddings, lengths)

        if hasattr(self, 'attention'):
            if self.attention_context:
                context = self._mean_pooling(outputs, lengths)
                context = context.unsqueeze(1).expand(-1, outputs.size(1), -1)
                outputs = torch.cat([outputs, context], -1)

            representations, attentions = self.attention(outputs, lengths)

            if self.attention_context:
                representations = representations[:, :context.size(-1)]
        else:
            representations = last_output

        return representations, attentions
Ejemplo n.º 14
0
    def forward(self, x):
        # If we are forwarding the flux to the fully connected layers save it.
        if self.forward_flux:
            f = x

        # Pass batch of spectra through conv layers.
        for cl in self.conv_layers:
            x = cl(x)

        # Flatten flux filters.
        x = x.view(x.size(0), -1)

        # concate the full flux values before the fully connected layers.
        if self.forward_flux:
            # Need to take remove channel dim from f.
            x = torch.cat([x, f.view(f.size(0), -1)], dim=1)

        # run the data through the fully connected layers.
        for fl in self.fc_layers:
            x = fl(x)
        x = self.final_act(x)
        return x
Ejemplo n.º 15
0
    def forward(self, h_text, w_text, h_audio, w_audio, lengths):
        '''
        INPUTS:
            h_text:
            w_text:
            h_audio:
            w_audio:
            lengths:
        OUTPUTS:
        '''

        # cat features
        h_fused = torch.cat((h_text, h_audio), 2)

        # linear projection
        h_fused = self.dense(h_fused)

        # average attention energies
        w_averaged = torch.add(w_text, w_audio) / 2.0

        # apply generalized attention
        fusion_representation, w_fusion = self.attn(h_fused, w_averaged)

        return fusion_representation, w_fusion
Ejemplo n.º 16
0
    loss_epoch = 0
    diff_epoch = 0

    epi_x_old = 0
    x_buf = []
    y_buf = []

    for epi, data in enumerate(dataloader_train):
        x, y, epi_x = extract(data)

        net.zero_grad()
        net.zero_hidden()
        optimizer.zero_grad()

        if epi_x != epi_x_old or epi == len(dataset_train) - 1:
            x_cat = torch.cat(x_buf, 0).unsqueeze(1)
            y_cat = torch.cat(y_buf, 0).unsqueeze(1)

            delta = net.forward(x_cat)

            loss = loss_function(x_cat[:, :, :12] + delta, y_cat)
            loss.backward()
            optimizer.step()

            x_buf = []
            y_buf = []
            epi_x_old = epi_x

            loss_episode = loss.clone().cpu().data.numpy()[0]
            diff_episode = F.mse_loss(x_cat[:, :, :12],
                                      y_cat).clone().cpu().data.numpy()[0]
Ejemplo n.º 17
0
def non_max_suppression(prediction,
                        conf_thres=0.1,
                        iou_thres=0.6,
                        fast=False,
                        classes=None,
                        agnostic=False):
    """
    Performs  Non-Maximum Suppression on inference results
    Returns detections with shape:
        nx6 (x1, y1, x2, y2, conf, cls)
    """
    nc = prediction[0].shape[1] - 5  # number of classes
    xc = prediction[..., 4] > conf_thres  # candidates

    # Settings
    min_wh, max_wh = 2, 4096  # (pixels) minimum and maximum box width and height
    max_det = 300  # maximum number of detections per image
    time_limit = 10.0  # seconds to quit after
    redundant = True  # require redundant detections
    fast |= conf_thres > 0.001  # fast mode
    if fast:
        merge = False
        multi_label = False
    else:
        merge = True  # merge for best mAP (adds 0.5ms/img)
        multi_label = nc > 1  # multiple labels per box (adds 0.5ms/img)

    t = time.time()
    output = [None] * prediction.shape[0]
    for xi, x in enumerate(prediction):  # image index, image inference
        # Apply constraints
        # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0  # width-height
        x = x[xc[xi]]  # confidence

        # If none remain process next image
        if not x.shape[0]:
            continue

        # Compute conf
        x[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf

        # Box (center x, center y, width, height) to (x1, y1, x2, y2)
        box = xywh2xyxy(x[:, :4])

        # Detections matrix nx6 (xyxy, conf, cls)
        if multi_label:
            i, j = (x[:, 5:] > conf_thres).nonzero().t()
            x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
        else:  # best class only
            conf, j = x[:, 5:].max(1, keepdim=True)
            x = torch.cat((box, conf, j.float()),
                          1)[conf.view(-1) > conf_thres]

        # Filter by class
        if classes:
            x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]

        # Apply finite constraint
        # if not torch.isfinite(x).all():
        #     x = x[torch.isfinite(x).all(1)]

        # If none remain process next image
        n = x.shape[0]  # number of boxes
        if not n:
            continue

        # Sort by confidence
        # x = x[x[:, 4].argsort(descending=True)]

        # Batched NMS
        c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
        boxes, scores = x[:, :4] + c, x[:,
                                        4]  # boxes (offset by class), scores
        i = torchvision.ops.boxes.nms(boxes, scores, iou_thres)
        if i.shape[0] > max_det:  # limit detections
            i = i[:max_det]
        if merge and (1 < n <
                      3E3):  # Merge NMS (boxes merged using weighted mean)
            try:  # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
                iou = box_iou(boxes[i], boxes) > iou_thres  # iou matrix
                weights = iou * scores[None]  # box weights
                x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(
                    1, keepdim=True)  # merged boxes
                if redundant:
                    i = i[iou.sum(1) > 1]  # require redundancy
            except:  # possible CUDA error https://github.com/ultralytics/yolov3/issues/1139
                print(x, i, x.shape, i.shape)
                pass

        output[xi] = x[i]
        if (time.time() - t) > time_limit:
            break  # time limit exceeded

    return output
Ejemplo n.º 18
0
 def last_timestep(self, rnn, h):
     if rnn.bidirectional:
         return torch.cat((h[-2], h[-1]), 1)
     else:
         return h[-1]
Ejemplo n.º 19
0
    def forward(self, covarep, glove, lengths):
        # sorted features accepted as input
        # text rnn
        self.text_rnn.eval()
        logits_text, deep_T, hidden_t, weighted_t, T = self.text_rnn(
            glove, lengths)

        # audio rnn
        self.audio_rnn.eval()
        logits_audio, deep_A, hidden_a, weighted_a, A = self.audio_rnn(
            covarep, lengths)

        # cat-fusion attention subnetwork
        f_i = self.fusion_net(hidden_t, weighted_t, hidden_a, weighted_a,
                              lengths)

        # mul-fusion attention network
        m_i = self.mul_fusion(hidden_t, weighted_t, hidden_a, weighted_a,
                              lengths)

        fused_i = torch.cat((f_i, m_i), 2)
        fused_i = self.zero_pad(fused_i)

        ##fused_i = self.fusion_transform(fused_i)
        _, F, _, _ = self.fusion_rnn(fused_i, lengths)

        # dense representations
        deep_A_fusion = self.deep_audio(A)

        deep_T_fusion = self.deep_text(T)

        mid_F = torch.cat((deep_A_fusion, deep_T_fusion, F), 1)

        deep_F = self.deep_fused(mid_F)

        # concatenate features

        # deep_A = torch.cat((A, deep_A_), 1)
        # deep_T = torch.cat((T, deep_T_), 1)
        # deep_F = torch.cat((F, deep_F_), 1)

        # extract generalized features
        #deep_A = self.deep_audio_2(deep_A)
        #deep_T = self.deep_text_2(deep_T)
        #deep_F = self.deep_fusion_2(deep_F)

        # final feature list
        representations_list = [deep_A, deep_T,
                                deep_F]  # deep_A, deep_T, deep_F]

        # concatenate all existing representations
        deep_representations = torch.cat(representations_list, 1)

        # dense layers
        representations = self.dense(deep_representations)

        # project to task space
        logits_fusion = self.fusion_mapping(representations)
        #logits_audio = self.audio_mapping(deep_A)
        #logits_text = self.text_mapping(deep_T)

        return logits_fusion, logits_audio, logits_text