def predict(model, batch, flipped_batch, use_gpu): image_ids, inputs = batch['image_id'], batch['input'] if use_gpu: inputs = inputs.cuda() outputs, _, _ = model(inputs) probs = torch.sigmoid(outputs) if flipped_batch is not None: flipped_image_ids, flipped_inputs = flipped_batch['image_id'], flipped_batch['input'] # assert image_ids == flipped_image_ids if use_gpu: flipped_inputs = flipped_inputs.cuda() flipped_outputs, _, _ = model(flipped_inputs) flipped_probs = torch.sigmoid(flipped_outputs) probs += torch.flip(flipped_probs, (3,)) # flip back and add probs *= 0.5 probs = probs.squeeze(1).cpu().numpy() if args.resize: probs = np.swapaxes(probs, 0, 2) probs = cv2.resize(probs, (orig_img_size, orig_img_size), interpolation=cv2.INTER_LINEAR) probs = np.swapaxes(probs, 0, 2) else: probs = probs[:, y0:y1, x0:x1] return probs
def flip_tensor(x): return torch.flip(x, [3])
def forward(self, input, var_rnn_hidden, visit_rnn_hidden): """ :param input: :param var_rnn_hidden: :param visit_rnn_hidden: :return: """ # emb_layer: input(*): LongTensor of arbitrary shape containing the indices to extract # emb_layer: output(*,H): where * is the input shape and H = embedding_dim # print("size of input:") # print(input.shape) v = self.emb_layer(input) # print("size of v:") # print(v.shape) v = self.dropout(v) # GRU: # input of shape (seq_len, batch, input_size) # seq_len: visit_seq_len # batch: batch_size # input_size: embedding dimension # # h_0 of shape (num_layers*num_directions, batch, hidden_size) # num_layers(1)*num_directions(1) # batch: batch_size # hidden_size: if self.reverse_rnn_feeding: visit_rnn_output, visit_rnn_hidden = self.visit_level_rnn( torch.flip(v, [0]), visit_rnn_hidden) alpha = self.visit_level_attention( torch.flip(visit_rnn_output, [0])) else: visit_rnn_output, visit_rnn_hidden = self.visit_level_rnn( v, visit_rnn_hidden) alpha = self.visit_level_attention(visit_rnn_output) visit_attn_w = F.softmax(alpha, dim=0) if self.reverse_rnn_feeding: var_rnn_output, var_rnn_hidden = self.variable_level_rnn( torch.flip(v, [0]), var_rnn_hidden) beta = self.variable_level_attention( torch.flip(var_rnn_output, [0])) else: var_rnn_output, var_rnn_hidden = self.variable_level_rnn( v, var_rnn_hidden) beta = self.variable_level_attention(var_rnn_output) var_attn_w = torch.tanh(beta) # print("beta attn:") # print(var_attn_w.shape) # '*' = hadamard product (element-wise product) attn_w = visit_attn_w * var_attn_w c = torch.sum(attn_w * v, dim=0) # print("context:") # print(c.shape) c = self.output_dropout(c) #print("context:") #print(c.shape) output = self.output_layer(c) #print("output:") #print(output.shape) output = F.softmax(output, dim=1) # print("output:") # print(output.shape) return output, var_rnn_hidden, visit_rnn_hidden
z0 = epsilon * torch.exp(.5 * qz0_logvar) + qz0_mean orig_ts = torch.from_numpy(orig_ts).float().to(device) # take first trajectory for visualization z0 = z0[0] ts_pos = np.linspace(0., 2. * np.pi, num=2000) ts_neg = np.linspace(-np.pi, 0., num=2000)[::-1].copy() ts_pos = torch.from_numpy(ts_pos).float().to(device) ts_neg = torch.from_numpy(ts_neg).float().to(device) zs_pos = odeint(func, z0, ts_pos) zs_neg = odeint(func, z0, ts_neg) xs_pos = dec(zs_pos) xs_neg = torch.flip(dec(zs_neg), dims=[0]) xs_pos = xs_pos.cpu().numpy() xs_neg = xs_neg.cpu().numpy() orig_traj = orig_trajs[0].cpu().numpy() samp_traj = samp_trajs[0].cpu().numpy() plt.figure() plt.plot(orig_traj[:, 0], orig_traj[:, 1], 'g', label='true trajectory') plt.plot(xs_pos[:, 0], xs_pos[:, 1], 'r', label='learned trajectory (t>0)')
parser.add_argument('--batch_size', type=int, default=1, help='バッチサイズ') parser.add_argument('--device', type=str, default='gpu', choices=['gpu', 'cpu'], help='デバイス') args = parser.parse_args() args.resolution = 1024 return args # 変換関数 ops_dict = { # 変調転置畳み込みの重み (iC,oC,kH,kW) 'mTc': lambda weight: torch.flip(torch.from_numpy(weight.transpose( (2, 3, 0, 1))), [2, 3]), # 転置畳み込みの重み (iC,oC,kH,kW) 'Tco': lambda weight: torch.from_numpy(weight.transpose((2, 3, 0, 1))), # 畳み込みの重み (oC,iC,kH,kW) 'con': lambda weight: torch.from_numpy(weight.transpose((3, 2, 0, 1))), # 全結合層の重み (oD, iD) 'fc_': lambda weight: torch.from_numpy(weight.transpose((1, 0))), # 全結合層のバイアス項, 固定入力, 固定ノイズ, v1ノイズの重み (無変換) 'any': lambda weight: torch.from_numpy(weight), # Style-Mixingの値, v2ノイズの重み (scalar) 'uns': lambda weight: torch.from_numpy(np.array(weight).reshape(1)),
def constrained_max_pooling_binary_OHEM_focal_ratio(all_output, num_samples, all_target, gamma_n=0, gamma_p=0, OHEM_Thr=10000, max_ratio=1, random_n=False, constraints=None, clamp=0): num_hit = 0 # Here we clamp the sigmoid output to prevent NaN problem # When we calculate loss all_output = torch.clamp(torch.sigmoid(all_output), clamp, 1.0 - clamp) num_utts = all_output.shape[0] num_sigmoid = all_output.shape[2] new_outputs = [] new_targets = [] for j in range(num_sigmoid): new_outputs.append([]) new_targets.append([]) for i in range(num_utts): end_idx = num_samples[i] sorted_output, sorted_index = torch.sort(all_output[i, :end_idx], dim=0) reversed_index = torch.flip(sorted_index, dims=[0]) if all_target[i][0] == 0: # target is 0, so we don't need constraint for j in range(num_sigmoid): selected_indexes = OHEM(reversed_index[:, j], OHEM_Thr) new_outputs[j].append(all_output[i, selected_indexes, j]) new_targets[j].append([0] * len(selected_indexes)) if torch.sum(sorted_output[-1, :] >= 0.5) <= 0: # all the binary probilities are smaller than 0.5 num_hit += 1 else: # target is not 0, we should calculate constraint, here we support multiple constraint index_constraint = set() if constraints != None: for x in constraints[i]: index_constraint = set.union(index_constraint, set(range(x[0], x[1]))) # we calculate negative loss for all non-target sigmoid target_sigmoid = all_target[i][0] - 1 non_target_sigmoids = range(num_sigmoid) non_target_sigmoids.remove(target_sigmoid) for j in non_target_sigmoids: selected_indexes = OHEM(reversed_index[:, j], OHEM_Thr) new_outputs[j].append(all_output[i, selected_indexes, j]) new_targets[j].append([0] * len(selected_indexes)) # calculate positive loss for target sigmoid if len(index_constraint) == 0: # non-constraint. constraints == None or this utterance don't have # constraint (is possible) new_outputs[target_sigmoid].append( sorted_output[-1, [target_sigmoid]]) new_targets[target_sigmoid].append([1]) if sorted_output[-1, target_sigmoid] >= 0.5: num_hit += 1 else: # with constraint. constraints != None and this utterance do have # constraint index_constraint = torch.tensor(list(index_constraint)) sorted_output_short, _ = torch.sort( all_output[i, index_constraint, target_sigmoid]) new_outputs[target_sigmoid].append( sorted_output_short[-1].view(1, )) new_targets[target_sigmoid].append([1]) if sorted_output_short[-1] >= 0.5: num_hit += 1 if torch.sum(torch.isnan(sorted_output)) > 0: print("Error: output NaNs\n") exit(1) # Here we select training samples acorrding to max_ratio loss, num_training = select_training_samples(new_outputs, new_targets, ratio=max_ratio, gamma_n=gamma_n, gamma_p=gamma_p, random_n=random_n) if torch.isnan(loss) > 0: print("Error: Loss NaNs\n") exit(1) return loss, float(num_hit) * 100 / len(num_samples), num_training
def transpose(self, t, trans_idx): # print('transpose jt .. ', t.size()) if trans_idx >= 4: t = torch.flip(t, [3]) return torch.rot90(t, trans_idx % 4, [2, 3])
def _augment_channelswap(audio): """Swap channels of stereo signals with a probability of p=0.5""" if audio.shape[0] == 2 and torch.FloatTensor(1).uniform_() < 0.5: return torch.flip(audio, [0]) else: return audio
def forward( self, tokens: torch.Tensor, seq_lens: torch.Tensor, dict_feat: Optional[Tuple[torch.Tensor, ...]] = None, actions: Optional[List[List[int]]] = None, contextual_token_embeddings: Optional[torch.Tensor] = None, ) -> List[Tuple[torch.Tensor, torch.Tensor]]: """RNNG forward function. Args: tokens (torch.Tensor): list of tokens seq_lens (torch.Tensor): list of sequence lengths dict_feat (Optional[Tuple[torch.Tensor, ...]]): dictionary or gazetteer features for each token actions (Optional[List[List[int]]]): Used only during training. Oracle actions for the instances. Returns: list of top k tuple of predicted actions tensor and corresponding scores tensor. Tensor shape: (batch_size, action_length) (batch_size, action_length, number_of_actions) """ beam_size = self.beam_size top_k = self.top_k if self.stage != Stage.TEST: beam_size = 1 top_k = 1 if self.training: assert actions is not None, "actions must be provided for training" actions_idx_rev = list(reversed(actions[0])) else: torch.manual_seed(0) beam_size = max(beam_size, 1) # Reverse the order of input tokens. tokens_list_rev = torch.flip(tokens, [len(tokens.size()) - 1]) # Aggregate inputs for embedding module. embedding_input = [tokens] if dict_feat is not None: embedding_input.append(dict_feat) if contextual_token_embeddings is not None: embedding_input.append(contextual_token_embeddings) # Embed and reverse the order of tokens. token_embeddings = self.embedding(*embedding_input) token_embeddings = torch.flip(token_embeddings, [len(tokens.size()) - 1]) # Batch size is always = 1. So we squeeze the batch_size dimension. token_embeddings = token_embeddings.squeeze(0) tokens_list_rev = tokens_list_rev.squeeze(0) initial_state = ParserState(self) for i in range(token_embeddings.size()[0]): token_embedding = token_embeddings[i].unsqueeze(0) tok = tokens_list_rev[i] initial_state.buffer_stackrnn.push(token_embedding, Element(tok)) beam = [initial_state] while beam and any(not state.finished() for state in beam): # Stores plans for expansion as (score, state, action) plans: List[Tuple[float, ParserState, int]] = [] # Expand current beam states for state in beam: # Keep terminal states if state.finished(): plans.append((state.neg_prob, state, -1)) continue # translating Expression p_t = affine_transform({pbias, S, # stack_summary, B, buffer_summary, A, action_summary}); stack = state.stack_stackrnn stack_summary = stack.embedding() action_summary = state.action_stackrnn.embedding() buffer_summary = state.buffer_stackrnn.embedding() if self.dropout_layer.p > 0: stack_summary = self.dropout_layer(stack_summary) action_summary = self.dropout_layer(action_summary) buffer_summary = self.dropout_layer(buffer_summary) # feature for index of last open non-terminal last_open_NT_feature = torch.zeros(len(self.actions_vocab)) open_NT_exists = state.num_open_NT > 0 if (len(stack) > 0 and open_NT_exists and self.ablation_use_last_open_NT_feature): last_open_NT = None try: open_NT = state.is_open_NT[::-1].index(True) last_open_NT = stack.element_from_top(open_NT) except ValueError: pass if last_open_NT: last_open_NT_feature[last_open_NT.node] = 1.0 last_open_NT_feature = last_open_NT_feature.unsqueeze(0) summaries = [] if self.ablation_use_buffer: summaries.append(buffer_summary) if self.ablation_use_stack: summaries.append(stack_summary) if self.ablation_use_action: summaries.append(action_summary) if self.ablation_use_last_open_NT_feature: summaries.append(last_open_NT_feature) state.action_p = self.action_linear(torch.cat(summaries, dim=1)) log_probs = F.log_softmax(state.action_p, dim=1)[0] for action in self.valid_actions(state): plans.append((state.neg_prob - log_probs[action].item(), state, action)) beam = [] # Take actions to regenerate the beam for neg_prob, state, predicted_action_idx in sorted( plans)[:beam_size]: # Skip terminal states if state.finished(): beam.append(state) continue # Only branch out states when needed if beam_size > 1: state = state.copy() state.predicted_actions_idx.append(predicted_action_idx) target_action_idx = predicted_action_idx if self.training: assert (len(actions_idx_rev) > 0), "Actions and tokens may not be in sync." target_action_idx = actions_idx_rev[-1] actions_idx_rev = actions_idx_rev[:-1] if (self.constraints_ignore_loss_for_unsupported and state.found_unsupported): pass else: state.action_scores.append(state.action_p) self.push_action(state, target_action_idx) state.neg_prob = neg_prob beam.append(state) # End for # End while assert len(beam) > 0, "How come beam is empty?" assert len(state.stack_stackrnn) == 1, "How come stack len is " + str( len(state.stack_stackrnn)) assert len( state.buffer_stackrnn) == 0, "How come buffer len is " + str( len(state.buffer_stackrnn)) # Unsqueeze to add batch dimension before returning. return [( cuda_utils.LongTensor(state.predicted_actions_idx).unsqueeze(0), torch.cat(state.action_scores).unsqueeze(0), ) for state in sorted(beam)[:top_k]]
def softmax_rgb_blend(colors, fragments, blend_params, znear: float = 1.0, zfar: float = 100) -> torch.Tensor: """ RGB and alpha channel blending to return an RGBA image based on the method proposed in [0] - **RGB** - blend the colors based on the 2D distance based probability map and relative z distances. - **A** - blend based on the 2D distance based probability map. Args: colors: (N, H, W, K, 3) RGB color for each of the top K faces per pixel. fragments: namedtuple with outputs of rasterization. We use properties - pix_to_face: LongTensor of shape (N, H, W, K) specifying the indices of the faces (in the packed representation) which overlap each pixel in the image. - dists: FloatTensor of shape (N, H, W, K) specifying the 2D euclidean distance from the center of each pixel to each of the top K overlapping faces. - zbuf: FloatTensor of shape (N, H, W, K) specifying the interpolated depth from each pixel to to each of the top K overlapping faces. blend_params: instance of BlendParams dataclass containing properties - sigma: float, parameter which controls the width of the sigmoid function used to calculate the 2D distance based probability. Sigma controls the sharpness of the edges of the shape. - gamma: float, parameter which controls the scaling of the exponential function used to control the opacity of the color. - background_color: (3) element list/tuple/torch.Tensor specifying the RGB values for the background color. znear: float, near clipping plane in the z direction zfar: float, far clipping plane in the z direction Returns: RGBA pixel_colors: (N, H, W, 4) [0] Shichen Liu et al, 'Soft Rasterizer: A Differentiable Renderer for Image-based 3D Reasoning' """ N, H, W, K = fragments.pix_to_face.shape device = fragments.pix_to_face.device pix_colors = torch.ones((N, H, W, 4), dtype=colors.dtype, device=colors.device) background = blend_params.background_color if not torch.is_tensor(background): background = torch.tensor(background, dtype=torch.float32, device=device) # Background color delta = np.exp(1e-10 / blend_params.gamma) * 1e-10 delta = torch.tensor(delta, device=device) # Mask for padded pixels. mask = fragments.pix_to_face >= 0 # Sigmoid probability map based on the distance of the pixel to the face. prob_map = torch.sigmoid(-fragments.dists / blend_params.sigma) * mask # The cumulative product ensures that alpha will be 1 if at least 1 face # fully covers the pixel as for that face prob will be 1.0 # TODO: investigate why torch.cumprod backwards is very slow for large # values of K. # Temporarily replace this with exp(sum(log))) using the fact that # a*b = exp(log(a*b)) = exp(log(a) + log(b)) # alpha = 1.0 - torch.cumprod((1.0 - prob), dim=-1)[..., -1] alpha = 1.0 - torch.exp(torch.log((1.0 - prob_map)).sum(dim=-1)) # Weights for each face. Adjust the exponential by the max z to prevent # overflow. zbuf shape (N, H, W, K), find max over K. # TODO: there may still be some instability in the exponent calculation. z_inv = (zfar - fragments.zbuf) / (zfar - znear) * mask z_inv_max = torch.max(z_inv, dim=-1).values[..., None] weights_num = prob_map * torch.exp( (z_inv - z_inv_max) / blend_params.gamma) # Normalize weights. # weights_num shape: (N, H, W, K). Sum over K and divide through by the sum. denom = weights_num.sum(dim=-1)[..., None] + delta weights = weights_num / denom # Sum: weights * textures + background color weighted_colors = (weights[..., None] * colors).sum(dim=-2) weighted_background = (delta / denom) * background pix_colors[..., :3] = weighted_colors + weighted_background pix_colors[..., 3] = alpha # Clamp colors to the range 0-1 and flip y axis. pix_colors = torch.clamp(pix_colors, min=0, max=1.0) return torch.flip(pix_colors, [1])
def mf_ensemble_test(self): img_list = fnmatch.filter(os.listdir(self.video_i), '*.png') img_list.sort(key=lambda x: int(x[:-4])) with torch.no_grad(): for index in tqdm(range(len(img_list))): if ((index + 1) % 10): continue list_mf = [] for i in [ index - 4 if (index - 4) > 0 else 0, index - 3 if (index - 3) > 0 else 0, index - 2 if (index - 2) > 0 else 0, index - 1 if (index - 1) > 0 else 0, index, index + 1 if (index + 1) < len(img_list) else len(img_list) - 1, index + 2 if (index + 2) < len(img_list) else len(img_list) - 1, index + 3 if (index + 3) < len(img_list) else len(img_list) - 1, index + 4 if (index + 4) < len(img_list) else len(img_list) - 1 ]: img_path = os.path.join(self.video_i, img_list[i]) img = Image.open(img_path).convert('RGB') img = np.asarray(img) list_mf.append(img) info_num = int(img_list[index].split('.')[0]) - 1 mod = info_num % 4 if mod == 0: pqf = np.array([1, 0.9, 0.9, 0.9, 1.1, 0.9, 0.9, 0.9, 1]) elif mod == 1: pqf = np.array([0.9, 0.9, 0.9, 1, 1, 0.9, 0.9, 1, 0.9]) elif mod == 2: pqf = np.array([0.9, 0.9, 1, 0.9, 1, 0.9, 1, 0.9, 0.9]) elif mod == 3: pqf = np.array([0.9, 1, 0.9, 0.9, 1, 1, 0.9, 0.9, 0.9]) else: pass pqf = torch.from_numpy(pqf).float() pqf = torch.unsqueeze(pqf, 0) pqf_lst = [pqf, pqf, pqf, pqf] pqf = torch.cat(pqf_lst, 0).cuda() info_filename = str(info_num) + '.tuLayer.png' info_path = os.path.join(self.info, info_filename) info = Image.open(info_path).convert('RGB') info = np.asarray(info)[..., 0:1] input_root = np.stack(list_mf, axis=0) info_root = info # rot ? input_1F = np.ascontiguousarray(input_root) input_1T = np.ascontiguousarray( input_root.transpose(0, 2, 1, 3)) info_1F = np.ascontiguousarray(info_root) info_1T = np.ascontiguousarray(info_root.transpose(1, 0, 2)) # rot_F hflip ? vflip ? input_1F_2F = input_1F input_1F_2F_3F = input_1F_2F input_1F_2F_3T = np.ascontiguousarray( input_1F_2F[:, ::-1, :, :]) input_1F_2T = np.ascontiguousarray(input_1F[:, :, ::-1, :]) input_1F_2T_3F = input_1F_2T input_1F_2T_3T = np.ascontiguousarray( input_1F_2T[:, ::-1, :, :]) info_1F_2F = info_1F info_1F_2F_3F = info_1F_2F info_1F_2F_3T = np.ascontiguousarray(info_1F_2F[::-1, :, :]) info_1F_2T = np.ascontiguousarray(info_1F[:, ::-1, :]) info_1F_2T_3F = info_1F_2T info_1F_2T_3T = np.ascontiguousarray(info_1F_2T[::-1, :, :]) # rot_T hflip ? vflip ? input_1T_2F = input_1T input_1T_2F_3F = input_1T_2F input_1T_2F_3T = np.ascontiguousarray( input_1T_2F[:, ::-1, :, :]) input_1T_2T = np.ascontiguousarray(input_1T[:, :, ::-1, :]) input_1T_2T_3F = input_1T_2T input_1T_2T_3T = np.ascontiguousarray( input_1T_2T[:, ::-1, :, :]) info_1T_2F = info_1T info_1T_2F_3F = info_1T_2F info_1T_2F_3T = np.ascontiguousarray(info_1T_2F[::-1, :, :]) info_1T_2T = np.ascontiguousarray(info_1T[:, ::-1, :]) info_1T_2T_3F = info_1T_2T info_1T_2T_3T = np.ascontiguousarray(info_1T_2T[::-1, :, :]) # print(input.shape) # N H W C # print(torch.from_numpy(img).shape) input_1F_2F_3F = torch.from_numpy(input_1F_2F_3F).permute( 0, 3, 1, 2).float() / 255 input_1F_2F_3T = torch.from_numpy(input_1F_2F_3T).permute( 0, 3, 1, 2).float() / 255 input_1F_2T_3F = torch.from_numpy(input_1F_2T_3F).permute( 0, 3, 1, 2).float() / 255 input_1F_2T_3T = torch.from_numpy(input_1F_2T_3T).permute( 0, 3, 1, 2).float() / 255 input_1T_2F_3F = torch.from_numpy(input_1T_2F_3F).permute( 0, 3, 1, 2).float() / 255 input_1T_2F_3T = torch.from_numpy(input_1T_2F_3T).permute( 0, 3, 1, 2).float() / 255 input_1T_2T_3F = torch.from_numpy(input_1T_2T_3F).permute( 0, 3, 1, 2).float() / 255 input_1T_2T_3T = torch.from_numpy(input_1T_2T_3T).permute( 0, 3, 1, 2).float() / 255 info_1F_2F_3F = torch.from_numpy(info_1F_2F_3F).permute( 2, 0, 1).float() / 255 info_1F_2F_3T = torch.from_numpy(info_1F_2F_3T).permute( 2, 0, 1).float() / 255 info_1F_2T_3F = torch.from_numpy(info_1F_2T_3F).permute( 2, 0, 1).float() / 255 info_1F_2T_3T = torch.from_numpy(info_1F_2T_3T).permute( 2, 0, 1).float() / 255 info_1T_2F_3F = torch.from_numpy(info_1T_2F_3F).permute( 2, 0, 1).float() / 255 info_1T_2F_3T = torch.from_numpy(info_1T_2F_3T).permute( 2, 0, 1).float() / 255 info_1T_2T_3F = torch.from_numpy(info_1T_2T_3F).permute( 2, 0, 1).float() / 255 info_1T_2T_3T = torch.from_numpy(info_1T_2T_3T).permute( 2, 0, 1).float() / 255 # B N C H W input_norot = [ torch.unsqueeze(input_1F_2F_3F, 0), torch.unsqueeze(input_1F_2F_3T, 0), torch.unsqueeze(input_1F_2T_3F, 0), torch.unsqueeze(input_1F_2T_3T, 0) ] input_rot = [ torch.unsqueeze(input_1T_2F_3F, 0), torch.unsqueeze(input_1T_2F_3T, 0), torch.unsqueeze(input_1T_2T_3F, 0), torch.unsqueeze(input_1T_2T_3T, 0) ] input_norot = torch.cat(input_norot, 0).cuda() input_rot = torch.cat(input_rot, 0).cuda() info_norot = [ torch.unsqueeze(info_1F_2F_3F, 0), torch.unsqueeze(info_1F_2F_3T, 0), torch.unsqueeze(info_1F_2T_3F, 0), torch.unsqueeze(info_1F_2T_3T, 0) ] info_rot = [ torch.unsqueeze(info_1T_2F_3F, 0), torch.unsqueeze(info_1T_2F_3T, 0), torch.unsqueeze(info_1T_2T_3F, 0), torch.unsqueeze(info_1T_2T_3T, 0) ] info_norot = torch.cat(info_norot, 0).cuda() info_rot = torch.cat(info_rot, 0).cuda() model = self.model with timer('EMGA_ensemble'): # 4, C, H, W out = model(input_norot, info_norot, pqf) out = out[4] out_rot = model(input_rot, info_rot, pqf) # multiscale outputs out_rot = out_rot[4] out_0, out_1, out_2, out_3 = out[0], out[1], out[2], out[3] out_4, out_5, out_6, out_7 = out_rot[0], out_rot[1], out_rot[ 2], out_rot[3] out_x4 = out_0 + torch.flip(out_1, [ 1, ]) + torch.flip(out_2, [ 2, ]) + torch.flip(out_3, [1, 2]) out_rot_x4 = out_4 + torch.flip(out_5, [ 1, ]) + torch.flip(out_6, [ 2, ]) + torch.flip(out_7, [1, 2]) out_rot_x4 = out_rot_x4.permute( 0, 2, 1) # 注意顺序,input先rot,output则最后rot. out = (out_x4 + out_rot_x4) / 8.0 out = out.cpu() out = out.detach().numpy() * 255.0 out = out.clip(0, 255).transpose(1, 2, 0) out_img = Image.fromarray(out.astype(np.uint8), mode='RGB') output_path = os.path.join(self.video_o, img_list[index]) out_img.save(output_path)
def flip_3d(input_, list_axes): input_ = torch.flip(input_, list_axes) return input_
def train(train_loader, model, optimizer, epoch, args, log): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to train mode model.train() end = time.time() for i, (input, target) in enumerate(train_loader): target = target.long() input, target = input.cuda(), target.cuda() data_time.update(time.time() - end) if args.train == 'mixup': input_var, target_var = Variable(input), Variable(target) output, reweighted_target = model(input_var,target_var, mixup= True, mixup_alpha = args.mixup_alpha) loss = bce_loss(softmax(output), reweighted_target)#mixup_criterion(target_a, target_b, lam) elif args.train== 'mixup_hidden': input_var, target_var = Variable(input), Variable(target) output, reweighted_target = model(input_var, target_var, mixup_hidden= True, mixup_alpha = args.mixup_alpha) loss = bce_loss(softmax(output), reweighted_target)#mixup_criterion(target_a, target_b, lam) elif args.train == 'vanilla': input_var, target_var = Variable(input), Variable(target) output, reweighted_target = model(input_var, target_var) loss = bce_loss(softmax(output), reweighted_target) elif args.train == 'cutout': cutout = Cutout(1, args.cutout) cut_input = cutout.apply(input) input_var = torch.autograd.Variable(input) target_var = torch.autograd.Variable(target) cut_input_var = torch.autograd.Variable(cut_input) output, reweighted_target = model(cut_input_var, target_var) loss = bce_loss(softmax(output), reweighted_target) elif args.train == 'vanilla_cutout': cutout = Cutout(1, args.cutout) cut_input = cutout.apply(input) input_var = torch.autograd.Variable(input) target_var = torch.autograd.Variable(target) cut_input_var = torch.autograd.Variable(cut_input) output, reweighted_target = model(input_var, target_var) loss = bce_loss(softmax(output), reweighted_target) output_aug, reweighted_target_aug = model(cut_input_var, target_var) loss_aug = bce_loss(softmax(output_aug), reweighted_target_aug) loss = loss + loss_aug elif args.train == 'vanilla_cutout_consistency_reg': cutout = Cutout(1, args.cutout) cut_input = cutout.apply(input) input_var = torch.autograd.Variable(input) target_var = torch.autograd.Variable(target) cut_input_var = torch.autograd.Variable(cut_input) output, reweighted_target = model(input_var, target_var) loss = bce_loss(softmax(output), reweighted_target) output_aug, reweighted_target_aug = model(cut_input_var, target_var) output_anchor = Variable(output.detach().data, requires_grad=False) loss_aug = mse_loss(output_anchor, output_aug) loss = loss + loss_aug # print("=" * 100) # print("Cutout function : ", Cutout) # print("Cutout : ", cutout) # print("cut_input : ", cut_input) # exit(1) elif args.train == 'vanilla_cutout_consistency_proposed': # cutout = Cutout(1, args.cutout) # cut_input = cutout.apply(input) cut_input = input + torch.randn(input.size()).cuda() * 0.01 input_var = torch.autograd.Variable(input) target_var = torch.autograd.Variable(target) cut_input_var = torch.autograd.Variable(cut_input) target_layer_num = torch.randperm(4)[0]+1 output, reweighted_target, output_anchor = model(input_var, target_var, layer_num_out=target_layer_num) loss = bce_loss(softmax(output), reweighted_target) output_aug, reweighted_target_aug = model.forward_n_layers(cut_input_var, target_var, layer_num=target_layer_num) output_anchor = Variable(output_anchor.detach().data, requires_grad=False) # loss_aug softmax loss_aug = mse_loss(softmax(output_anchor), softmax(output_aug)) # loss_aug = mse_loss(output_anchor, output_aug) alpha = (epoch / 400) loss = loss + alpha * loss_aug elif args.train == 'horizontal_flip': # cutout = Cutout(1, args.cutout) flip_input = torch.flip(input, (3,)) input_var = torch.autograd.Variable(input) target_var = torch.autograd.Variable(target) flip_input_var = torch.autograd.Variable(flip_input) output, reweighted_target = model(input_var, target_var) loss = bce_loss(softmax(output), reweighted_target) elif args.train == 'vanilla_horizontal_flip': flip_input = torch.flip(input, (3,)) input_var = torch.autograd.Variable(input) target_var = torch.autograd.Variable(target) flip_input_var = torch.autograd.Variable(flip_input) output, reweighted_target = model(input_var, target_var) loss = bce_loss(softmax(output), reweighted_target) output_aug, _ = model(flip_input_var, target_var) loss_aug = bce_loss(softmax(output_aug), reweighted_target) loss = (loss + loss_aug)/2 elif args.train == 'vanilla_horizontal_flip_consistency_reg': # {vanilla loss}와 {flip의 output과 vanilla output}의 차이의 loss를 더함 flip_input = torch.flip(input, (3,)) input_var, target_var = torch.autograd.Variable(input), torch.autograd.Variable(target) flip_input_var = torch.autograd.Variable(flip_input) output, reweighted_target = model(input_var, target_var) loss = bce_loss(softmax(output), reweighted_target) #우선 vanilla loss부터 구함 output_aug, reweighted_target_aug = model(flip_input_var, target_var) output_anchor = Variable(output.detach().data, requires_grad=False) loss_aug = mse_loss(output_anchor, output_aug) # flip한 결과와 그냥 vanilla output의 loss를 구함 loss = (loss + loss_aug)/2 # measure accuracy and record loss prec1, prec5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() max_grad_norm = 5. torch_utils.clip_grad_norm_(model.parameters(),max_grad_norm) optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print_log(' Epoch: [{:03d}][{:03d}/{:03d}] ' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Loss {loss.val:.4f} ({loss.avg:.4f}) ' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f}) ' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f}) '.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5) + time_string(), log) print_log(' **Train** Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f} Error@1 {error1:.3f}'.format(top1=top1, top5=top5, error1=100-top1.avg), log) return top1.avg, top5.avg, losses.avg
def get_multi_stage_outputs(outputs, outputs_flip, num_joints, with_heatmaps, with_ae, tag_per_joint=True, flip_index=None, project2image=True, size_projected=None): """Inference the model to get multi-stage outputs (heatmaps & tags), and resize them to base sizes. Args: outputs (list(torch.Tensor)): Outputs of network outputs_flip (list(torch.Tensor)): Flip outputs of network num_joints (int): Number of joints with_heatmaps (list[bool]): Option to output heatmaps for different stages. with_ae (list[bool]): Option to output ae tags for different stages. tag_per_joint (bool): Option to use one tag map per joint. flip_index (list[int]): Keypoint flip index. project2image (bool): Option to resize to base scale. size_projected ([w, h]): Base size of heatmaps. Returns: tuple: A tuple containing multi-stage outputs. - outputs (list(torch.Tensor)): List of simple outputs and flip outputs. - heatmaps (torch.Tensor): Multi-stage heatmaps that are resized to the base size. - tags (torch.Tensor): Multi-stage tags that are resized to the base size. """ heatmaps_avg = 0 num_heatmaps = 0 heatmaps = [] tags = [] flip_test = outputs_flip is not None # aggregate heatmaps from different stages for i, output in enumerate(outputs): if i != len(outputs) - 1: output = torch.nn.functional.interpolate( output, size=(outputs[-1].size(2), outputs[-1].size(3)), mode='bilinear', align_corners=False) # staring index of the associative embeddings offset_feat = num_joints if with_heatmaps[i] else 0 if with_heatmaps[i]: heatmaps_avg += output[:, :num_joints] num_heatmaps += 1 if with_ae[i]: tags.append(output[:, offset_feat:]) if num_heatmaps > 0: heatmaps.append(heatmaps_avg / num_heatmaps) if flip_test and flip_index: # perform flip testing heatmaps_avg = 0 num_heatmaps = 0 for i, output in enumerate(outputs_flip): if i != len(outputs_flip) - 1: output = torch.nn.functional.interpolate( output, size=(outputs_flip[-1].size(2), outputs_flip[-1].size(3)), mode='bilinear', align_corners=False) output = torch.flip(output, [3]) outputs.append(output) offset_feat = num_joints if with_heatmaps[i] else 0 if with_heatmaps[i]: heatmaps_avg += output[:, :num_joints][:, flip_index, :, :] num_heatmaps += 1 if with_ae[i]: tags.append(output[:, offset_feat:]) if tag_per_joint: tags[-1] = tags[-1][:, flip_index, :, :] heatmaps.append(heatmaps_avg / num_heatmaps) if project2image and size_projected: heatmaps = [ torch.nn.functional.interpolate(hms, size=(size_projected[1], size_projected[0]), mode='bilinear', align_corners=False) for hms in heatmaps ] tags = [ torch.nn.functional.interpolate(tms, size=(size_projected[1], size_projected[0]), mode='bilinear', align_corners=False) for tms in tags ] return outputs, heatmaps, tags
def horisontal_flip(self, images, targets): images = torch.flip(images, [-1]) targets[:, 2] = 1 - targets[:, 2] # horizontal flip targets[:, 6] = -targets[:, 6] # yaw angle flip return images, targets
def flip(image, label=None): if np.random.rand() < 0.5: image = torch.flip(image, [3]) if label is not None: label = torch.flip(label, [3]) return image, label
def matrix_to_vector(self, matrix): return torch.flip(matrix, dims=[0]).flatten()
def _(x): return torch.flip(x, [-1])
def vector_to_matrix(self, vector): output_h, output_w = [ self.im_H + self.ker_H - 1, self.im_W + self.ker_W - 1 ] return torch.flip(vector.reshape(output_h, output_w), dims=[0])
def transpose_inverse(self, t, trans_idx): # print( 'inverse transpose .. t', t.size()) t = torch.rot90(t, 4 - trans_idx % 4, [2, 3]) if trans_idx >= 4: t = torch.flip(t, [3]) return t
def main(): # logging.basicConfig(level=logging.DEBUG) # Initial parsing looking for `RunPath` ... opts = AppSettings() opts = update_settings(opts) if not opts.path.key: raise ValueError('opts.path.key required for evaluation (For now)') path = RunPath(opts.path) # Re-parse full args with `base_opts` as default instead # TODO(ycho): Verify if this works. base_opts = update_settings( opts, argv=['--config_file', str(path.dir / 'opts.yaml')]) opts = update_settings(base_opts) # Instantiation ... device = resolve_device(opts.device) model = KeypointNetwork2D(opts.model).to(device) # Load checkpoint. ckpt_file = get_latest_file(path.ckpt) print('ckpt = {}'.format(ckpt_file)) Saver(model, None).load(ckpt_file) # NOTE(ycho): Forcing data loading on the CPU. # TODO(ycho): Consider scripted compositions? transform = Compose([ DenseMapsMobilePose(opts.maps, th.device('cpu:0')), Normalize(Normalize.Settings()), InstancePadding(opts.padding) ]) _, test_loader = get_loaders(opts.dataset, device=th.device('cpu:0'), batch_size=opts.batch_size, transform=transform) model.eval() for data in test_loader: # Now that we're here, convert all inputs to the device. data = { k: (v.to(device) if isinstance(v, th.Tensor) else v) for (k, v) in data.items() } image = data[Schema.IMAGE] image_scale = th.as_tensor(image.shape[-2:]) # (h,w) order print('# instances = {}'.format(data[Schema.INSTANCE_NUM])) with th.no_grad(): outputs = model(image) heatmap = outputs[Schema.HEATMAP] kpt_heatmap = outputs[Schema.KEYPOINT_HEATMAP] # FIXME(ycho): hardcoded obj==1 assumption scores, indices = decode_kpt_heatmap(kpt_heatmap, max_num_instance=4) # hmm... upsample_ratio = th.as_tensor(image_scale / th.as_tensor(heatmap.shape[-2:]), device=indices.device) upsample_ratio = upsample_ratio[None, None, None, :] scaled_indices = indices * upsample_ratio # Visualize inferred keypoints ... if False: # FIXME(ycho): Pedantically incorrect!! heatmap_vis = DrawKeypointMap( DrawKeypointMap.Settings(as_displacement=False))(heatmap) kpt_heatmap_vis = DrawKeypointMap( DrawKeypointMap.Settings(as_displacement=False))(kpt_heatmap) fig, ax = plt.subplots(3, 1) hv_cpu = heatmap_vis[0].detach().cpu().numpy().transpose(1, 2, 0) khv_cpu = kpt_heatmap_vis[0].detach().cpu().numpy().transpose( 1, 2, 0) img_cpu = th.clip(0.5 + (image[0] * 0.25), 0.0, 1.0).detach().cpu().numpy().transpose(1, 2, 0) ax[0].imshow(hv_cpu) ax[1].imshow(khv_cpu / khv_cpu.max()) ax[2].imshow(img_cpu) plt.show() # scores = (32,9,4) # (i,j) = (32,2,9,4) for i_batch in range(scores.shape[0]): # GROUND_TRUTH kpt_in = data[Schema.KEYPOINT_2D][i_batch, ..., :2] kpt_in = kpt_in * image_scale.to(kpt_in.device) # X-Y order (J-I order) # print(kpt_in) # print(scaled_indices[i_batch]) # Y-X order (I-J order) print('scale.shape') # 32,4,3 print(data[Schema.SCALE].shape) sol = compute_pose_epnp( data[Schema.PROJECTION][i_batch], # not estimating scale info for now ..., data[Schema.SCALE][i_batch], th.flip(scaled_indices[i_batch], dims=(-1, )) / image_scale.to(scaled_indices.device)) if sol is None: continue R, T = sol print(R, data[Schema.ORIENTATION][i_batch]) print(T, data[Schema.TRANSLATION][i_batch]) break np.save(F'/tmp/heatmap.npy', heatmap.cpu().numpy()) np.save(F'/tmp/kpt_heatmap.npy', kpt_heatmap.cpu().numpy()) break
def reverse_input(self, input): reverse_input = torch.flip(input, [1]) return reverse_input
image = np.zeros((TEST_WINDOW, TEST_WINDOW, 3), dtype=np.uint8) for fl in range(3): image[:, :, fl] = layers[fl].read( window=Window.from_slices((x1, x2), (y1, y2))) # print("Test {}-{}:Shape is:{}".format(filename,index,image.shape)) image = cv2.resize(image, (TEST_NEW_SIZE, TEST_NEW_SIZE)) image = trfm(image) with torch.no_grad(): if Open_Classifer: image = image.to(DEVICE)[ None] # 这里加入的是batch维度 这里的测试是每张图的测试 score = model(image)[0][0][0] score2 = model(torch.flip(image, [0, 3]))[0] score2 = torch.flip(score2, [3, 0])[0][0] score3 = model(torch.flip(image, [1, 2]))[0] score3 = torch.flip(score3, [2, 1])[0][0] else: image = image.to(DEVICE)[ None] # 这里加入的是batch维度 这里的测试是每张图的测试 score = model(image)[0][0] score2 = model(torch.flip(image, [0, 3])) score2 = torch.flip(score2, [3, 0])[0][0] score3 = model(torch.flip(image, [1, 2])) score3 = torch.flip(score3, [2, 1])[0][0]
def horizontal_flip(images, targets): images = torch.flip(images, [-1]) targets[:, 2] = 1 - targets[:, 2] return images, targets
def random_hflip(tensor, prob): if prob > random(): return tensor return torch.flip(tensor, dims=(3, ))
def flip_tensor(x): return torch.flip(x, [x.dim() - 1])
def forward(self, postnet_output, decoder_output, mel_input, linear_input, stopnet_output, stopnet_target, output_lens, decoder_b_output, alignments, alignment_lens, alignments_backwards, input_lens): return_dict = {} # decoder and postnet losses if self.config.loss_masking: decoder_loss = self.criterion(decoder_output, mel_input, output_lens) if self.config.model in ["Tacotron", "TacotronGST"]: postnet_loss = self.criterion(postnet_output, linear_input, output_lens) else: postnet_loss = self.criterion(postnet_output, mel_input, output_lens) else: decoder_loss = self.criterion(decoder_output, mel_input) if self.config.model in ["Tacotron", "TacotronGST"]: postnet_loss = self.criterion(postnet_output, linear_input) else: postnet_loss = self.criterion(postnet_output, mel_input) loss = decoder_loss + postnet_loss return_dict['decoder_loss'] = decoder_loss return_dict['postnet_loss'] = postnet_loss # stopnet loss stop_loss = self.criterion_st( stopnet_output, stopnet_target, output_lens) if self.config.stopnet else torch.zeros(1) if not self.config.separate_stopnet and self.config.stopnet: loss += stop_loss return_dict['stopnet_loss'] = stop_loss # backward decoder loss (if enabled) if self.config.bidirectional_decoder: if self.config.loss_masking: decoder_b_loss = self.criterion( torch.flip(decoder_b_output, dims=(1, )), mel_input, output_lens) else: decoder_b_loss = self.criterion( torch.flip(decoder_b_output, dims=(1, )), mel_input) decoder_c_loss = torch.nn.functional.l1_loss( torch.flip(decoder_b_output, dims=(1, )), decoder_output) loss += decoder_b_loss + decoder_c_loss return_dict['decoder_b_loss'] = decoder_b_loss return_dict['decoder_c_loss'] = decoder_c_loss # double decoder consistency loss (if enabled) if self.config.double_decoder_consistency: decoder_b_loss = self.criterion(decoder_b_output, mel_input, output_lens) # decoder_c_loss = torch.nn.functional.l1_loss(decoder_b_output, decoder_output) attention_c_loss = torch.nn.functional.l1_loss( alignments, alignments_backwards) loss += decoder_b_loss + attention_c_loss return_dict['decoder_coarse_loss'] = decoder_b_loss return_dict['decoder_ddc_loss'] = attention_c_loss # guided attention loss (if enabled) if self.config.ga_alpha > 0: ga_loss = self.criterion_ga(alignments, input_lens, alignment_lens) loss += ga_loss * self.ga_alpha return_dict['ga_loss'] = ga_loss * self.ga_alpha return_dict['loss'] = loss return return_dict
def __call__(self, sample): new_sample = torch.stack((sample, torch.flip(sample, [2]))) return new_sample
def inference(self, input, target): ######## # TODO # ######## # 在這裡實施 Beam Search # 此函式的 batch size = 1 # input = [batch size, input len, vocab size] # target = [batch size, target len, vocab size] batch_size = input.shape[0] input_len = input.shape[1] # 取得最大字數 vocab_size = self.decoder.cn_vocab_size # 準備一個儲存空間來儲存輸出 outputs = torch.zeros(batch_size, input_len, vocab_size).to(self.device) # 將輸入放入 Encoder encoder_outputs, hidden = self.encoder(input) # Encoder 最後的隱藏層(hidden state) 用來初始化 Decoder # encoder_outputs 主要是使用在 Attention # 因為 Encoder 是雙向的RNN,所以需要將同一層兩個方向的 hidden state 接在一起 # hidden = [num_layers * directions, batch size , hid dim] --> [num_layers, directions, batch size , hid dim] hidden = hidden.view(self.encoder.n_layers, 2, batch_size, -1) hidden = torch.cat((hidden[:, -2, :, :], hidden[:, -1, :, :]), dim=2) # 取的 <BOS> token input = target[:, 0] preds = [] # print(input.shape) # print(input.shape) output, hidden = self.decoder(input, hidden, encoder_outputs) # 將預測結果存起來 # 儲存每一步最大的k個可能性 到時候才能回溯 # index組成 k個(自己的index, parent的index) outputs[:, 0] = output index = [] probabilities = [] # 取出機率最大的單詞 # top1,top2是index prob = F.softmax(output, dim=1) # print(torch.topk(prob, 2, dim=1)[1][0]) top1, top2 = torch.topk(prob, 2, dim=1)[1][0] index.append([(top1, top1), (top2, top2)]) probabilities.append([prob[0][top1], prob[0][top2]]) input = top1.view(-1) # print(input) hidden1 = hidden input2 = top2.view(-1) # preds.append(top1.unsqueeze(1)) for t in range(1, input_len-1): # 這裡很聰明的把input變成sentence length=1的狀態 所以丟進RNN就相當於只有一個time stamp # 然後再手動把那個time stamp的output變成input 丟到下一個time stamp的decoder中 # output:(batch size, vocab size)->預測哪個vocab最大 hidden就是decoder Rnn 的每一層最後一個time stamp的hidden state # print(input.shape) output, hidden = self.decoder(input, hidden, encoder_outputs) # print(t) output1, hidden1 = self.decoder(input2, hidden1, encoder_outputs) # 將預測結果存起來 outputs[:, t] = output # 從前面兩種可能分別預測的結果取前兩大 所以會有4種可能 # 然後從4種可能取最大的兩個 prob = F.softmax(output, dim=1) top1, top2 = torch.topk(prob, 2, dim=1)[1][0] prob2 = F.softmax(output1, dim=1) top21, top22 = torch.topk(prob2, 2, dim=1)[1][0] compare = [probabilities[t-1][0]*prob[0][top1], probabilities[t-1][0]*prob[0][top2], probabilities[t-1][1]*prob2[0][top21], probabilities[t-1][1]*prob2[0][top22]] compare = torch.tensor(compare).to(device) compare2 = [top1, top2, top21, top22] compare2 = torch.tensor(compare2).to(device) # 4個中最大的兩個的index top_index, top_index2 = torch.topk(compare, 2, dim=0)[1] if top_index >= 2: if top_index2 >= 2: index.append([(compare2[top_index], input2), (compare2[top_index2], input2)]) probabilities.append( [compare[top_index], compare[top_index2]]) input = compare2[top_index].view(-1) input2 = compare2[top_index2].view(-1) else: index.append([(compare2[top_index], input2), (compare2[top_index2], input)]) probabilities.append( [compare[top_index], compare[top_index2]]) input = compare2[top_index].view(-1) input2 = compare2[top_index2].view(-1) else: if top_index2 >= 2: index.append([(compare2[top_index], input), (compare2[top_index2], input2)]) probabilities.append( [compare[top_index], compare[top_index2]]) input = compare2[top_index].view(-1) input2 = compare2[top_index2].view(-1) else: index.append([(compare2[top_index], input), (compare2[top_index2], input)]) probabilities.append( [compare[top_index], compare[top_index2]]) input = compare2[top_index].view(-1) input2 = compare2[top_index2].view(-1) # print(input) if probabilities[input_len-2][0] > probabilities[input_len-2][1]: preds.append(index[input_len-2][0][0]) parent = index[input_len-2][0][1] for i in range(input_len-3, -1, -1): # index[i]:[(自己,parent)(自己,parent)] if index[i][0][0] == parent: preds.append(parent) parent = index[i][0][1] else: preds.append(parent) parent = index[i][1][1] preds = torch.tensor(preds) preds = torch.flip(preds, dims=[0]) preds = preds.view(1, -1) # print(preds) # preds = torch.cat(preds, 1) # print(preds.shape) # outputs為decoder每個time stamp所預測的one hot vector # 例如time 1 2 3 4 5 # 每個time stamp都會有vocab size的向量 其中最大的會被當作下一個time stamp的input return outputs, preds
def train(model, dataset, val_dataset=None, n_epoch=1, lr=0.1, print_every=100, log_every=100, val_every=2000, focalloss=None, device=None, verbose=False): print("====================== train ======================") t0 = time.time() log = dict(train=list(), val=list(), val_seen=list()) if focalloss is not None: loss_fn = FocalLoss(weight=None, gamma=focalloss) else: loss_fn = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=lr) dataloader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=8, drop_last=False) for i in range(n_epoch): t1 = time.time() for j, data in enumerate(dataloader): model.train() model.zero_grad() x, f, a_mat, d_mat, gt_idxs = data if verbose: print("x", x.size(), "f", f.size(), "a_mat", a_mat.size(), "d_mat", d_mat.size(), "gt_idxs", gt_idxs.size()) n = len(gt_idxs[0, :]) if verbose: print(j, list(gt_idxs[0, :]), n) x_tensor = x[0, :].float() f_tensor = f[0, :].float() a_tensor = a_mat[0, :].float() d_tensor = d_mat[0, :].float() g_tensor = gt_idxs[0, :].long() if device is not None: x_tensor = x_tensor.to(device) f_tensor = f_tensor.to(device) a_tensor = a_tensor.to(device) d_tensor = d_tensor.to(device) g_tensor = g_tensor.to(device) scores, _ = model(x_tensor, f_tensor, a_tensor, d_tensor) loss_f = loss_fn(scores, g_tensor.long()) loss_r = loss_fn(scores, torch.flip(g_tensor, (0, )).long()) loss = torch.min(loss_f, loss_r) loss.backward() optimizer.step() with torch.no_grad(): model.seen += 1 if (j+1) % log_every == 0: log["train"].append(dict(epoch=i+1, iter=j+1, seen=model.seen, loss=loss.item())) if (j+1) % print_every == 0: print("epoch {} loss {:.4f}".format(i, loss.data)) if (j+1) % val_every == 0: if val_dataset is not None: with torch.no_grad(): val_acc = val(model, val_dataset, device=device, verbose=False) log["val_seen"].append(dict(seen=model.seen, acc=val_acc)) t2 = time.time() eta = (t2 - t1) / float(j + 1) * float(len(dataset) - j - 1) print("seen {}, acc {:.4f}, {:.1f}s to go for this epoch".format(model.seen, val_acc, eta)) if val_dataset is not None: with torch.no_grad(): val_acc = val(model, val_dataset, device=device, verbose=False) log["val"].append(dict(epoch=i, acc=val_acc)) print("seen {}, acc {:.4f}".format(model.seen, val_acc)) t2 = time.time() eta = (t2-t0) / float(i+1) * float(n_epoch-i-1) print("time elapsed {:.1f}s, {:.1f}s for this epoch, {:.1f}s to go".format(t2-t0, t2-t1, eta)) print("=======================================================================================================") return model, log
def test_one_epoch(dataset, DATAloader, net, epoch): #### start testing now Acc_array = 0. Prec_array = 0. Spe_array = 0. Rec_array = 0. IoU_array = 0. Dice_array = 0. HD_array = 0. sample_num = 0. result_list = [] CEloss_list = [] JAloss_list = [] Label_list = [] net.eval() with torch.no_grad(): for i_batch, sample_batched in enumerate(DATAloader): name_batched = sample_batched['name'] row_batched = sample_batched['row'] col_batched = sample_batched['col'] [batch, channel, height, width] = sample_batched['image'].size() multi_avg = torch.zeros( (batch, cfg.MODEL_NUM_CLASSES, height, width), dtype=torch.float32).to(1) labels_batched = sample_batched['segmentation'].cpu().numpy() for rate in cfg.TEST_MULTISCALE: inputs_batched = sample_batched['image_%f' % rate] _, predicts, threshold = net(inputs_batched) predicts = predicts.to(1) threshold = threshold.to(1) predicts_batched = predicts.clone() threshold_batched = threshold.clone() del predicts del threshold if cfg.TEST_FLIP: inputs_batched_flip = torch.flip(inputs_batched, [3]) _, predicts_flip, threshold_flip = net(inputs_batched_flip) predicts_flip = torch.flip(predicts_flip, [3]).to(1) threshold_flip = torch.flip(threshold_flip, [3]).to(1) predicts_batched_flip = predicts_flip.clone() threshold_batched_flip = threshold_flip.clone() del predicts_flip del threshold_flip predicts_batched = (predicts_batched + predicts_batched_flip) / 2.0 threshold_batched = (threshold_batched + threshold_batched_flip) / 2.0 predicts_batched = F.interpolate(predicts_batched, size=None, scale_factor=1 / rate, mode='bilinear', align_corners=True) threshold_batched = F.interpolate(threshold_batched, size=None, scale_factor=1 / rate, mode='bilinear', align_corners=True) multi_avg = multi_avg + predicts_batched del predicts_batched multi_avg = multi_avg / len(cfg.TEST_MULTISCALE) multi_avg = nn.Softmax(dim=1)(multi_avg) multi_avg = multi_avg - threshold_batched result = torch.argmax(multi_avg, dim=1).cpu().numpy().astype(np.uint8) threshold = threshold_batched.cpu().numpy() del threshold_batched for i in range(batch): row = row_batched[i] col = col_batched[i] p = result[i, :, :] l = labels_batched[i, :, :] thres = threshold[i, 1, :, :] #p = cv2.resize(p, dsize=(col,row), interpolation=cv2.INTER_NEAREST) #l = cv2.resize(l, dsize=(col,row), interpolation=cv2.INTER_NEAREST) predict = np.int32(p) gt = np.int32(l) cal = gt < 255 mask = (predict == gt) * cal TP = np.zeros((cfg.MODEL_NUM_CLASSES), np.uint64) TN = np.zeros((cfg.MODEL_NUM_CLASSES), np.uint64) P = np.zeros((cfg.MODEL_NUM_CLASSES), np.uint64) T = np.zeros((cfg.MODEL_NUM_CLASSES), np.uint64) P = np.sum((predict == 1)).astype(np.float64) T = np.sum((gt == 1)).astype(np.float64) TP = np.sum((gt == 1) * (predict == 1)).astype(np.float64) TN = np.sum((gt == 0) * (predict == 0)).astype(np.float64) Acc = (TP + TN) / (T + P - TP + TN) Prec = TP / (P + 1e-10) Spe = TN / (P - TP + TN) Rec = TP / T DICE = 2 * TP / (T + P) IoU = TP / (T + P - TP) # HD = max(directed_hausdorff(predict, gt)[0], directed_hausdorff(predict, gt)[0]) # HD = 2*Prec*Rec/(Rec+Prec+1e-10) beta = 2 HD = Rec * Prec * (1 + beta**2) / (Rec + beta**2 * Prec + 1e-10) Acc_array += Acc Prec_array += Prec Spe_array += Spe Rec_array += Rec Dice_array += DICE IoU_array += IoU HD_array += HD sample_num += 1 #p = cv2.resize(p, dsize=(col,row), interpolation=cv2.INTER_NEAREST) result_list.append({ 'predict': np.uint8(p * 255), 'threshold': np.uint8(thres * 255), 'label': np.uint8(l * 255), 'IoU': IoU, 'name': name_batched[i] }) Acc_score = Acc_array * 100 / sample_num Prec_score = Prec_array * 100 / sample_num Spe_score = Spe_array * 100 / sample_num Rec_score = Rec_array * 100 / sample_num Dice_score = Dice_array * 100 / sample_num IoUP = IoU_array * 100 / sample_num HD_score = HD_array * 100 / sample_num print( '%10s:%7.3f%% %10s:%7.3f%% %10s:%7.3f%% %10s:%7.3f%% %10s:%7.3f%% %10s:%7.3f%% %10s:%7.3f%%\n' % ('Acc', Acc_score, 'Sen', Rec_score, 'Spe', Spe_score, 'Prec', Prec_score, 'Dice', Dice_score, 'Jac', IoUP, 'F2', HD_score)) if epoch % 50 == 0: dataset.save_result_train_thres(result_list, cfg.MODEL_NAME) return Dice_score, IoUP