def forward(self, inputs, aspects, lengths, aspects_lengths): if torch.cuda.is_available(): torch.cuda.manual_seed(self.seed) inputs = self.embedding(inputs) inputs = self.noise_emb(inputs) inputs = self.drop_emb(inputs) aspects = self.embedding(aspects) aspects = self.drop_emb(aspects) mask = (aspects > 0).float() aspects = torch.sum(aspects * mask, dim=1) new_asp = aspects / aspects_lengths.unsqueeze(-1).float() new_asp = torch.unsqueeze(new_asp, 1) new_asp = new_asp.expand(inputs.size(0), inputs.size(1), inputs.size(2)) concat = torch.cat((inputs, new_asp), 2) inputs = concat.unsqueeze(1) inputs = [F.relu(conv(inputs)).squeeze(3) for conv in self.convs] inputs = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in inputs] concatenated = torch.cat(inputs, 1) concatenated = self.dropout(concatenated) return self.fc(concatenated)
def forward(self, rel_embed, nodes_embed, h_t_pair_label, h_t_pair_path, h_t_pair_path_len, b_ind, h_ind, t_ind, global_step): max_path_num = h_t_pair_path.shape[-2] max_step_num = h_t_pair_path.shape[-1] N_bt = nodes_embed.shape[0] # no_rel_mask = (torch.sum(h_t_pair_label[b_ind,h_ind,t_ind,1:],dim=-1)==0) select_path_len = h_t_pair_path_len[b_ind, h_ind, t_ind] select = (torch.cumsum( (select_path_len > 0).long(), dim=-1) == 1) & (select_path_len > 0) path_select_id = torch.nonzero(select.long())[:, 1] select_path_id = h_t_pair_path[b_ind, h_ind, t_ind][ torch.arange(path_select_id.shape[0]).cuda(), path_select_id] select_path_len = h_t_pair_path_len[b_ind, h_ind, t_ind][ torch.arange(path_select_id.shape[0]).cuda(), path_select_id] path_bt_id = b_ind[..., None].repeat(1, max_step_num) path_embed = nodes_embed[path_bt_id, select_path_id] path_embed = torch.cat( (self.eos_embed.repeat(path_embed.shape[0], 1, 1), path_embed), dim=1) init_h = torch.relu(self.trans_hidden(rel_embed)).unsqueeze(dim=0) init_c = torch.relu(self.trans_ceil(rel_embed)).unsqueeze(dim=0) seq_hidden, _, _ = self.rnn(path_embed, select_path_len, init_h, init_c) nodes_ext = torch.cat((self.nop_embed.repeat(N_bt, 1, 1), nodes_embed), dim=1) vocb = torch.relu(self.trans_pred(nodes_ext[b_ind])) seq_pred = torch.einsum("abd,acd->abc", seq_hidden, vocb) select_path_id = select_path_id + 1 select_path_id = torch.cat( (select_path_id, torch.zeros(select_path_id.shape[0], 1, dtype=torch.long).cuda()), dim=-1) # select_path_len -= 1 seqlen, w_ids = torch.broadcast_tensors( select_path_len.unsqueeze(-1), torch.arange(0, max_step_num + 1).cuda()[None, ...]) seq_mask = w_ids < seqlen select_path_id[~seq_mask] = 0 return seq_pred, select_path_id, seq_mask
def forward(self, h_text, w_text, h_audio, w_audio, lengths): ''' INPUTS: h_text: w_text: h_audio: w_audio: lengths: OUTPUTS: ''' # get weighted representations text_weighted = self.weighted_timestep(h_text, w_text) audio_weighted = self.weighted_timestep(h_audio, w_audio) # cat features fused_timestep = torch.cat((text_weighted, audio_weighted), 2) ################################################ ## linear projection #h_fused = self.dense(fused_timestep) ## apply generalized attention #fusion_representation, w_fusion = self.attn(h_fused, lengths) return fused_timestep
def last_timestep(self, outputs, lengths, bi=False): if bi: forward, backward = self.split_directions(outputs) # tensor([[[-0.0050, 0.0346, -0.0540, ..., 0.2081, 0.0078, -0.3285], # [ 0.0382, 0.0580, -0.0032, ..., 0.3200, 0.0821, -0.4469], # [ 0.0598, 0.1161, 0.0144, ..., 0.4412, 0.0225, -0.4357], # ..., # [ 0.0147, 0.0658, 0.0158, ..., 0.4789, 0.0061, -0.4225], # [ 0.0247, 0.0505, 0.0365, ..., 0.3581, 0.0228, -0.2056], # [ 0.0867, 0.1089, -0.2493, ..., 0.5207, 0.0136, -0.2245]]], # device='cuda:0', grad_fn=<SliceBackward>) # torch.Size([1, 16, 250]) # print(forward) # print(forward.size()) # print(backward) # print(backward.size()) last_forward = self.last_by_index(forward, lengths) if len(last_forward.size()) == 1: last_forward = last_forward.unsqueeze(0) last_backward = backward[:, 0, :] # 1*1*250 # print("last forward and backward demension") # print(last_forward) # print(last_forward.size()) # print(last_backward) # print(last_backward.size()) return torch.cat((last_forward, last_backward), dim=-1) else: return self.last_by_index(outputs, lengths)
def last_timestep(self, outputs, lengths, bi=False): if bi: forward, backward = self.split_directions(outputs) last_forward = self.last_by_index(forward, lengths) last_backward = backward[:, 0, :] return torch.cat((last_forward, last_backward), dim=-1) else: return self.last_by_index(outputs, lengths)
def forward(self, data_in): # the view is to add the minibatch dimension (which is 1) # print(data_in.view(1, 1, -1).size()) out = F.leaky_relu(self.linear1(data_in.view(1, -1))) out, self.hidden = self.lstm1(out.view(1, 1, -1), self.hidden) out = F.leaky_relu(out) out = self.linear2(out.view(1, -1)) out_pos = F.tanh(out[:, :2]) out_vel = out[:, 2:] lstm_input = torch.cat((joint_data, img_features), dim=2) out, h = self.lstm1(lstm_input) out = self.linear(out) out_pos = F.tanh(out[:, :, :2]) out_vel = out[:, :, 2:] return torch.cat((out_pos, out_vel), dim=2)
def forward(self, data_in): # the view is to add the minibatch dimension (which is 1) # print(data_in.view(1, 1, -1).size()) # import ipdb; ipdb.set_trace() out = torch.cat((data_in[0], data_in[1], data_in[2]), dim=2) out = F.leaky_relu(self.linear1(out)) out, self.hidden = self.lstm1(out.permute((1, 0, 2)), self.hidden) out = F.leaky_relu(out.permute((1, 0, 2))) out = self.linear2(out) return out
def zero_pad(tensor): batch_size = tensor.size(0) real_len = tensor.size(1) dim = tensor.size(2) if MAX_LEN > real_len: zeros = \ Variable(torch.zeros(batch_size, MAX_LEN-real_len, dim)).detach() zeros = zeros.to(DEVICE) tensor = torch.cat((tensor, zeros), 1) return (tensor)
def forward(self, message, topic, lengths, topic_lengths): if torch.cuda.is_available(): torch.cuda.manual_seed(self.seed) ###MESSAGE MODEL### embeds = self.embedding(message) embeds = self.noise_emb(embeds) embeds = self.dropout_embeds(embeds) # pack the batch embeds_pckd = pack_padded_sequence(embeds, list(lengths.data), batch_first=True) mout_pckd, (hx1, cx1) = self.shared_lstm(embeds_pckd) # unpack output - no need if we are going to use only the last outputs mout_unpckd, _ = pad_packed_sequence( mout_pckd, batch_first=True) # [batch_size,seq_length,300] # Last timestep output is not used # message_output = self.last_timestep(self.shared_lstm, hx1) # message_output = self.dropout_rnn(message_output) ###TOPIC MODEL### topic_embeds = self.embedding(topic) topic_embeds = self.dropout_embeds(topic_embeds) tout, (hx2, cx2) = self.shared_lstm(topic_embeds) tout = self.dropout_rnn(tout) mask = (topic > 0).float().unsqueeze(-1) tout = torch.sum(tout * mask, dim=1) tout = tout / topic_lengths.unsqueeze(-1).float() tout = torch.unsqueeze(tout, 1) tout = tout.expand(mout_unpckd.size(0), mout_unpckd.size(1), mout_unpckd.size(2)) out = torch.cat((mout_unpckd, tout), 2) representations, attentions = self.attention(out, lengths) return self.linear(representations)
def forward(self, x): # input x should be in size [B,T,F] , where B = Batch size # T = Time sampels # F = features h0, c0 = self.init_hidden() x1, (ht, ct) = self.lstm(x, (h0, c0)) x1 = x1[:, -1, :] x2 = x.transpose(2, 1) x2 = self.ConvDrop(self.relu(self.BN1(self.C1(x2)))) x2 = self.ConvDrop(self.relu(self.BN2(self.C2(x2)))) x2 = self.ConvDrop(self.relu(self.BN3(self.C3(x2)))) x2 = torch.mean(x2, 2) x_all = torch.cat((x1, x2), dim=1) x_out = self.FC(x_all) return x_out
def forward(self, x, lengths): embeddings = self.embedding(x) attentions = None outputs, last_output = self.encoder(embeddings, lengths) if hasattr(self, 'attention'): if self.attention_context: context = self._mean_pooling(outputs, lengths) context = context.unsqueeze(1).expand(-1, outputs.size(1), -1) outputs = torch.cat([outputs, context], -1) representations, attentions = self.attention(outputs, lengths) if self.attention_context: representations = representations[:, :context.size(-1)] else: representations = last_output return representations, attentions
def forward(self, x): # If we are forwarding the flux to the fully connected layers save it. if self.forward_flux: f = x # Pass batch of spectra through conv layers. for cl in self.conv_layers: x = cl(x) # Flatten flux filters. x = x.view(x.size(0), -1) # concate the full flux values before the fully connected layers. if self.forward_flux: # Need to take remove channel dim from f. x = torch.cat([x, f.view(f.size(0), -1)], dim=1) # run the data through the fully connected layers. for fl in self.fc_layers: x = fl(x) x = self.final_act(x) return x
def forward(self, h_text, w_text, h_audio, w_audio, lengths): ''' INPUTS: h_text: w_text: h_audio: w_audio: lengths: OUTPUTS: ''' # cat features h_fused = torch.cat((h_text, h_audio), 2) # linear projection h_fused = self.dense(h_fused) # average attention energies w_averaged = torch.add(w_text, w_audio) / 2.0 # apply generalized attention fusion_representation, w_fusion = self.attn(h_fused, w_averaged) return fusion_representation, w_fusion
loss_epoch = 0 diff_epoch = 0 epi_x_old = 0 x_buf = [] y_buf = [] for epi, data in enumerate(dataloader_train): x, y, epi_x = extract(data) net.zero_grad() net.zero_hidden() optimizer.zero_grad() if epi_x != epi_x_old or epi == len(dataset_train) - 1: x_cat = torch.cat(x_buf, 0).unsqueeze(1) y_cat = torch.cat(y_buf, 0).unsqueeze(1) delta = net.forward(x_cat) loss = loss_function(x_cat[:, :, :12] + delta, y_cat) loss.backward() optimizer.step() x_buf = [] y_buf = [] epi_x_old = epi_x loss_episode = loss.clone().cpu().data.numpy()[0] diff_episode = F.mse_loss(x_cat[:, :, :12], y_cat).clone().cpu().data.numpy()[0]
def non_max_suppression(prediction, conf_thres=0.1, iou_thres=0.6, fast=False, classes=None, agnostic=False): """ Performs Non-Maximum Suppression on inference results Returns detections with shape: nx6 (x1, y1, x2, y2, conf, cls) """ nc = prediction[0].shape[1] - 5 # number of classes xc = prediction[..., 4] > conf_thres # candidates # Settings min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height max_det = 300 # maximum number of detections per image time_limit = 10.0 # seconds to quit after redundant = True # require redundant detections fast |= conf_thres > 0.001 # fast mode if fast: merge = False multi_label = False else: merge = True # merge for best mAP (adds 0.5ms/img) multi_label = nc > 1 # multiple labels per box (adds 0.5ms/img) t = time.time() output = [None] * prediction.shape[0] for xi, x in enumerate(prediction): # image index, image inference # Apply constraints # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height x = x[xc[xi]] # confidence # If none remain process next image if not x.shape[0]: continue # Compute conf x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf # Box (center x, center y, width, height) to (x1, y1, x2, y2) box = xywh2xyxy(x[:, :4]) # Detections matrix nx6 (xyxy, conf, cls) if multi_label: i, j = (x[:, 5:] > conf_thres).nonzero().t() x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1) else: # best class only conf, j = x[:, 5:].max(1, keepdim=True) x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres] # Filter by class if classes: x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] # Apply finite constraint # if not torch.isfinite(x).all(): # x = x[torch.isfinite(x).all(1)] # If none remain process next image n = x.shape[0] # number of boxes if not n: continue # Sort by confidence # x = x[x[:, 4].argsort(descending=True)] # Batched NMS c = x[:, 5:6] * (0 if agnostic else max_wh) # classes boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores i = torchvision.ops.boxes.nms(boxes, scores, iou_thres) if i.shape[0] > max_det: # limit detections i = i[:max_det] if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) try: # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix weights = iou * scores[None] # box weights x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum( 1, keepdim=True) # merged boxes if redundant: i = i[iou.sum(1) > 1] # require redundancy except: # possible CUDA error https://github.com/ultralytics/yolov3/issues/1139 print(x, i, x.shape, i.shape) pass output[xi] = x[i] if (time.time() - t) > time_limit: break # time limit exceeded return output
def last_timestep(self, rnn, h): if rnn.bidirectional: return torch.cat((h[-2], h[-1]), 1) else: return h[-1]
def forward(self, covarep, glove, lengths): # sorted features accepted as input # text rnn self.text_rnn.eval() logits_text, deep_T, hidden_t, weighted_t, T = self.text_rnn( glove, lengths) # audio rnn self.audio_rnn.eval() logits_audio, deep_A, hidden_a, weighted_a, A = self.audio_rnn( covarep, lengths) # cat-fusion attention subnetwork f_i = self.fusion_net(hidden_t, weighted_t, hidden_a, weighted_a, lengths) # mul-fusion attention network m_i = self.mul_fusion(hidden_t, weighted_t, hidden_a, weighted_a, lengths) fused_i = torch.cat((f_i, m_i), 2) fused_i = self.zero_pad(fused_i) ##fused_i = self.fusion_transform(fused_i) _, F, _, _ = self.fusion_rnn(fused_i, lengths) # dense representations deep_A_fusion = self.deep_audio(A) deep_T_fusion = self.deep_text(T) mid_F = torch.cat((deep_A_fusion, deep_T_fusion, F), 1) deep_F = self.deep_fused(mid_F) # concatenate features # deep_A = torch.cat((A, deep_A_), 1) # deep_T = torch.cat((T, deep_T_), 1) # deep_F = torch.cat((F, deep_F_), 1) # extract generalized features #deep_A = self.deep_audio_2(deep_A) #deep_T = self.deep_text_2(deep_T) #deep_F = self.deep_fusion_2(deep_F) # final feature list representations_list = [deep_A, deep_T, deep_F] # deep_A, deep_T, deep_F] # concatenate all existing representations deep_representations = torch.cat(representations_list, 1) # dense layers representations = self.dense(deep_representations) # project to task space logits_fusion = self.fusion_mapping(representations) #logits_audio = self.audio_mapping(deep_A) #logits_text = self.text_mapping(deep_T) return logits_fusion, logits_audio, logits_text