コード例 #1
0
ファイル: test.py プロジェクト: gtesei/fast-furious
def predict(model, batch, flipped_batch, use_gpu):
    image_ids, inputs = batch['image_id'], batch['input']
    if use_gpu:
        inputs = inputs.cuda()
    outputs, _, _ = model(inputs)
    probs = torch.sigmoid(outputs)

    if flipped_batch is not None:
        flipped_image_ids, flipped_inputs = flipped_batch['image_id'], flipped_batch['input']
        # assert image_ids == flipped_image_ids
        if use_gpu:
            flipped_inputs = flipped_inputs.cuda()
        flipped_outputs, _, _ = model(flipped_inputs)
        flipped_probs = torch.sigmoid(flipped_outputs)

        probs += torch.flip(flipped_probs, (3,))  # flip back and add
        probs *= 0.5

    probs = probs.squeeze(1).cpu().numpy()
    if args.resize:
        probs = np.swapaxes(probs, 0, 2)
        probs = cv2.resize(probs, (orig_img_size, orig_img_size), interpolation=cv2.INTER_LINEAR)
        probs = np.swapaxes(probs, 0, 2)
    else:
        probs = probs[:, y0:y1, x0:x1]
    return probs
コード例 #2
0
def flip_tensor(x):
    return torch.flip(x, [3])
コード例 #3
0
    def forward(self, input, var_rnn_hidden, visit_rnn_hidden):
        """

        :param input:
        :param var_rnn_hidden:
        :param visit_rnn_hidden:
        :return:
        """
        # emb_layer: input(*): LongTensor of arbitrary shape containing the indices to extract
        # emb_layer: output(*,H): where * is the input shape and H = embedding_dim
        # print("size of input:")
        # print(input.shape)
        v = self.emb_layer(input)
        # print("size of v:")
        # print(v.shape)
        v = self.dropout(v)

        # GRU:
        # input of shape (seq_len, batch, input_size)
        # seq_len: visit_seq_len
        # batch: batch_size
        # input_size: embedding dimension
        #
        # h_0 of shape (num_layers*num_directions, batch, hidden_size)
        # num_layers(1)*num_directions(1)
        # batch: batch_size
        # hidden_size:
        if self.reverse_rnn_feeding:
            visit_rnn_output, visit_rnn_hidden = self.visit_level_rnn(
                torch.flip(v, [0]), visit_rnn_hidden)
            alpha = self.visit_level_attention(
                torch.flip(visit_rnn_output, [0]))
        else:
            visit_rnn_output, visit_rnn_hidden = self.visit_level_rnn(
                v, visit_rnn_hidden)
            alpha = self.visit_level_attention(visit_rnn_output)
        visit_attn_w = F.softmax(alpha, dim=0)

        if self.reverse_rnn_feeding:
            var_rnn_output, var_rnn_hidden = self.variable_level_rnn(
                torch.flip(v, [0]), var_rnn_hidden)
            beta = self.variable_level_attention(
                torch.flip(var_rnn_output, [0]))
        else:
            var_rnn_output, var_rnn_hidden = self.variable_level_rnn(
                v, var_rnn_hidden)
            beta = self.variable_level_attention(var_rnn_output)
        var_attn_w = torch.tanh(beta)

        # print("beta attn:")
        # print(var_attn_w.shape)
        # '*' = hadamard product (element-wise product)
        attn_w = visit_attn_w * var_attn_w
        c = torch.sum(attn_w * v, dim=0)
        # print("context:")
        # print(c.shape)

        c = self.output_dropout(c)
        #print("context:")
        #print(c.shape)
        output = self.output_layer(c)
        #print("output:")
        #print(output.shape)
        output = F.softmax(output, dim=1)
        # print("output:")
        # print(output.shape)

        return output, var_rnn_hidden, visit_rnn_hidden
コード例 #4
0
            z0 = epsilon * torch.exp(.5 * qz0_logvar) + qz0_mean
            orig_ts = torch.from_numpy(orig_ts).float().to(device)

            # take first trajectory for visualization
            z0 = z0[0]

            ts_pos = np.linspace(0., 2. * np.pi, num=2000)
            ts_neg = np.linspace(-np.pi, 0., num=2000)[::-1].copy()
            ts_pos = torch.from_numpy(ts_pos).float().to(device)
            ts_neg = torch.from_numpy(ts_neg).float().to(device)

            zs_pos = odeint(func, z0, ts_pos)
            zs_neg = odeint(func, z0, ts_neg)

            xs_pos = dec(zs_pos)
            xs_neg = torch.flip(dec(zs_neg), dims=[0])

        xs_pos = xs_pos.cpu().numpy()
        xs_neg = xs_neg.cpu().numpy()
        orig_traj = orig_trajs[0].cpu().numpy()
        samp_traj = samp_trajs[0].cpu().numpy()

        plt.figure()
        plt.plot(orig_traj[:, 0],
                 orig_traj[:, 1],
                 'g',
                 label='true trajectory')
        plt.plot(xs_pos[:, 0],
                 xs_pos[:, 1],
                 'r',
                 label='learned trajectory (t>0)')
コード例 #5
0
    parser.add_argument('--batch_size', type=int, default=1, help='バッチサイズ')
    parser.add_argument('--device',
                        type=str,
                        default='gpu',
                        choices=['gpu', 'cpu'],
                        help='デバイス')
    args = parser.parse_args()
    args.resolution = 1024
    return args


# 変換関数
ops_dict = {
    # 変調転置畳み込みの重み (iC,oC,kH,kW)
    'mTc':
    lambda weight: torch.flip(torch.from_numpy(weight.transpose(
        (2, 3, 0, 1))), [2, 3]),
    # 転置畳み込みの重み (iC,oC,kH,kW)
    'Tco':
    lambda weight: torch.from_numpy(weight.transpose((2, 3, 0, 1))),
    # 畳み込みの重み (oC,iC,kH,kW)
    'con':
    lambda weight: torch.from_numpy(weight.transpose((3, 2, 0, 1))),
    # 全結合層の重み (oD, iD)
    'fc_':
    lambda weight: torch.from_numpy(weight.transpose((1, 0))),
    # 全結合層のバイアス項, 固定入力, 固定ノイズ, v1ノイズの重み (無変換)
    'any':
    lambda weight: torch.from_numpy(weight),
    # Style-Mixingの値, v2ノイズの重み (scalar)
    'uns':
    lambda weight: torch.from_numpy(np.array(weight).reshape(1)),
コード例 #6
0
def constrained_max_pooling_binary_OHEM_focal_ratio(all_output,
                                                    num_samples,
                                                    all_target,
                                                    gamma_n=0,
                                                    gamma_p=0,
                                                    OHEM_Thr=10000,
                                                    max_ratio=1,
                                                    random_n=False,
                                                    constraints=None,
                                                    clamp=0):
    num_hit = 0
    # Here we clamp the sigmoid output to prevent NaN problem
    # When we calculate loss
    all_output = torch.clamp(torch.sigmoid(all_output), clamp, 1.0 - clamp)
    num_utts = all_output.shape[0]
    num_sigmoid = all_output.shape[2]

    new_outputs = []
    new_targets = []
    for j in range(num_sigmoid):
        new_outputs.append([])
        new_targets.append([])

    for i in range(num_utts):
        end_idx = num_samples[i]

        sorted_output, sorted_index = torch.sort(all_output[i, :end_idx],
                                                 dim=0)
        reversed_index = torch.flip(sorted_index, dims=[0])
        if all_target[i][0] == 0:
            # target is 0, so we don't need constraint
            for j in range(num_sigmoid):
                selected_indexes = OHEM(reversed_index[:, j], OHEM_Thr)
                new_outputs[j].append(all_output[i, selected_indexes, j])
                new_targets[j].append([0] * len(selected_indexes))

            if torch.sum(sorted_output[-1, :] >= 0.5) <= 0:
                # all the binary probilities are smaller than 0.5
                num_hit += 1
        else:
            # target is not 0, we should calculate constraint, here we support multiple constraint
            index_constraint = set()
            if constraints != None:
                for x in constraints[i]:
                    index_constraint = set.union(index_constraint,
                                                 set(range(x[0], x[1])))
            # we calculate negative loss for all non-target sigmoid
            target_sigmoid = all_target[i][0] - 1
            non_target_sigmoids = range(num_sigmoid)
            non_target_sigmoids.remove(target_sigmoid)
            for j in non_target_sigmoids:
                selected_indexes = OHEM(reversed_index[:, j], OHEM_Thr)
                new_outputs[j].append(all_output[i, selected_indexes, j])
                new_targets[j].append([0] * len(selected_indexes))
            # calculate positive loss for target sigmoid
            if len(index_constraint) == 0:
                # non-constraint. constraints == None or this utterance don't have
                # constraint (is possible)
                new_outputs[target_sigmoid].append(
                    sorted_output[-1, [target_sigmoid]])
                new_targets[target_sigmoid].append([1])
                if sorted_output[-1, target_sigmoid] >= 0.5:
                    num_hit += 1
            else:
                # with constraint. constraints != None and this utterance do have
                # constraint
                index_constraint = torch.tensor(list(index_constraint))
                sorted_output_short, _ = torch.sort(
                    all_output[i, index_constraint, target_sigmoid])
                new_outputs[target_sigmoid].append(
                    sorted_output_short[-1].view(1, ))
                new_targets[target_sigmoid].append([1])
                if sorted_output_short[-1] >= 0.5:
                    num_hit += 1
        if torch.sum(torch.isnan(sorted_output)) > 0:
            print("Error: output NaNs\n")
            exit(1)

    # Here we select training samples acorrding to max_ratio
    loss, num_training = select_training_samples(new_outputs,
                                                 new_targets,
                                                 ratio=max_ratio,
                                                 gamma_n=gamma_n,
                                                 gamma_p=gamma_p,
                                                 random_n=random_n)
    if torch.isnan(loss) > 0:
        print("Error: Loss NaNs\n")
        exit(1)
    return loss, float(num_hit) * 100 / len(num_samples), num_training
コード例 #7
0
 def transpose(self, t, trans_idx):
     # print('transpose jt .. ', t.size())
     if trans_idx >= 4:
         t = torch.flip(t, [3])
     return torch.rot90(t, trans_idx % 4, [2, 3])
コード例 #8
0
def _augment_channelswap(audio):
    """Swap channels of stereo signals with a probability of p=0.5"""
    if audio.shape[0] == 2 and torch.FloatTensor(1).uniform_() < 0.5:
        return torch.flip(audio, [0])
    else:
        return audio
コード例 #9
0
    def forward(
        self,
        tokens: torch.Tensor,
        seq_lens: torch.Tensor,
        dict_feat: Optional[Tuple[torch.Tensor, ...]] = None,
        actions: Optional[List[List[int]]] = None,
        contextual_token_embeddings: Optional[torch.Tensor] = None,
    ) -> List[Tuple[torch.Tensor, torch.Tensor]]:
        """RNNG forward function.

        Args:
            tokens (torch.Tensor): list of tokens
            seq_lens (torch.Tensor): list of sequence lengths
            dict_feat (Optional[Tuple[torch.Tensor, ...]]): dictionary or gazetteer
                features for each token
            actions (Optional[List[List[int]]]): Used only during training.
                Oracle actions for the instances.

        Returns:
            list of top k tuple of predicted actions tensor and corresponding scores tensor.
            Tensor shape:
            (batch_size, action_length)
            (batch_size, action_length, number_of_actions)
        """
        beam_size = self.beam_size
        top_k = self.top_k

        if self.stage != Stage.TEST:
            beam_size = 1
            top_k = 1

        if self.training:
            assert actions is not None, "actions must be provided for training"
            actions_idx_rev = list(reversed(actions[0]))
        else:
            torch.manual_seed(0)

        beam_size = max(beam_size, 1)

        # Reverse the order of input tokens.
        tokens_list_rev = torch.flip(tokens, [len(tokens.size()) - 1])

        # Aggregate inputs for embedding module.
        embedding_input = [tokens]
        if dict_feat is not None:
            embedding_input.append(dict_feat)
        if contextual_token_embeddings is not None:
            embedding_input.append(contextual_token_embeddings)

        # Embed and reverse the order of tokens.
        token_embeddings = self.embedding(*embedding_input)
        token_embeddings = torch.flip(token_embeddings,
                                      [len(tokens.size()) - 1])

        # Batch size is always = 1. So we squeeze the batch_size dimension.
        token_embeddings = token_embeddings.squeeze(0)
        tokens_list_rev = tokens_list_rev.squeeze(0)

        initial_state = ParserState(self)
        for i in range(token_embeddings.size()[0]):
            token_embedding = token_embeddings[i].unsqueeze(0)
            tok = tokens_list_rev[i]
            initial_state.buffer_stackrnn.push(token_embedding, Element(tok))

        beam = [initial_state]
        while beam and any(not state.finished() for state in beam):
            # Stores plans for expansion as (score, state, action)
            plans: List[Tuple[float, ParserState, int]] = []
            # Expand current beam states
            for state in beam:
                # Keep terminal states
                if state.finished():
                    plans.append((state.neg_prob, state, -1))
                    continue

                #  translating Expression p_t = affine_transform({pbias, S,
                #  stack_summary, B, buffer_summary, A, action_summary});
                stack = state.stack_stackrnn
                stack_summary = stack.embedding()
                action_summary = state.action_stackrnn.embedding()
                buffer_summary = state.buffer_stackrnn.embedding()
                if self.dropout_layer.p > 0:
                    stack_summary = self.dropout_layer(stack_summary)
                    action_summary = self.dropout_layer(action_summary)
                    buffer_summary = self.dropout_layer(buffer_summary)

                # feature for index of last open non-terminal
                last_open_NT_feature = torch.zeros(len(self.actions_vocab))
                open_NT_exists = state.num_open_NT > 0

                if (len(stack) > 0 and open_NT_exists
                        and self.ablation_use_last_open_NT_feature):
                    last_open_NT = None
                    try:
                        open_NT = state.is_open_NT[::-1].index(True)
                        last_open_NT = stack.element_from_top(open_NT)
                    except ValueError:
                        pass
                    if last_open_NT:
                        last_open_NT_feature[last_open_NT.node] = 1.0
                last_open_NT_feature = last_open_NT_feature.unsqueeze(0)

                summaries = []
                if self.ablation_use_buffer:
                    summaries.append(buffer_summary)
                if self.ablation_use_stack:
                    summaries.append(stack_summary)
                if self.ablation_use_action:
                    summaries.append(action_summary)
                if self.ablation_use_last_open_NT_feature:
                    summaries.append(last_open_NT_feature)

                state.action_p = self.action_linear(torch.cat(summaries,
                                                              dim=1))

                log_probs = F.log_softmax(state.action_p, dim=1)[0]

                for action in self.valid_actions(state):
                    plans.append((state.neg_prob - log_probs[action].item(),
                                  state, action))

            beam = []
            # Take actions to regenerate the beam
            for neg_prob, state, predicted_action_idx in sorted(
                    plans)[:beam_size]:
                # Skip terminal states
                if state.finished():
                    beam.append(state)
                    continue

                # Only branch out states when needed
                if beam_size > 1:
                    state = state.copy()

                state.predicted_actions_idx.append(predicted_action_idx)

                target_action_idx = predicted_action_idx
                if self.training:
                    assert (len(actions_idx_rev) >
                            0), "Actions and tokens may not be in sync."
                    target_action_idx = actions_idx_rev[-1]
                    actions_idx_rev = actions_idx_rev[:-1]

                if (self.constraints_ignore_loss_for_unsupported
                        and state.found_unsupported):
                    pass
                else:
                    state.action_scores.append(state.action_p)

                self.push_action(state, target_action_idx)

                state.neg_prob = neg_prob
                beam.append(state)
            # End for
        # End while
        assert len(beam) > 0, "How come beam is empty?"
        assert len(state.stack_stackrnn) == 1, "How come stack len is " + str(
            len(state.stack_stackrnn))
        assert len(
            state.buffer_stackrnn) == 0, "How come buffer len is " + str(
                len(state.buffer_stackrnn))

        # Unsqueeze to add batch dimension before returning.
        return [(
            cuda_utils.LongTensor(state.predicted_actions_idx).unsqueeze(0),
            torch.cat(state.action_scores).unsqueeze(0),
        ) for state in sorted(beam)[:top_k]]
コード例 #10
0
def softmax_rgb_blend(colors,
                      fragments,
                      blend_params,
                      znear: float = 1.0,
                      zfar: float = 100) -> torch.Tensor:
    """
    RGB and alpha channel blending to return an RGBA image based on the method
    proposed in [0]
      - **RGB** - blend the colors based on the 2D distance based probability map and
        relative z distances.
      - **A** - blend based on the 2D distance based probability map.

    Args:
        colors: (N, H, W, K, 3) RGB color for each of the top K faces per pixel.
        fragments: namedtuple with outputs of rasterization. We use properties
            - pix_to_face: LongTensor of shape (N, H, W, K) specifying the indices
              of the faces (in the packed representation) which
              overlap each pixel in the image.
            - dists: FloatTensor of shape (N, H, W, K) specifying
              the 2D euclidean distance from the center of each pixel
              to each of the top K overlapping faces.
            - zbuf: FloatTensor of shape (N, H, W, K) specifying
              the interpolated depth from each pixel to to each of the
              top K overlapping faces.
        blend_params: instance of BlendParams dataclass containing properties
            - sigma: float, parameter which controls the width of the sigmoid
              function used to calculate the 2D distance based probability.
              Sigma controls the sharpness of the edges of the shape.
            - gamma: float, parameter which controls the scaling of the
              exponential function used to control the opacity of the color.
            - background_color: (3) element list/tuple/torch.Tensor specifying
              the RGB values for the background color.
        znear: float, near clipping plane in the z direction
        zfar: float, far clipping plane in the z direction

    Returns:
        RGBA pixel_colors: (N, H, W, 4)

    [0] Shichen Liu et al, 'Soft Rasterizer: A Differentiable Renderer for
    Image-based 3D Reasoning'
    """

    N, H, W, K = fragments.pix_to_face.shape
    device = fragments.pix_to_face.device
    pix_colors = torch.ones((N, H, W, 4),
                            dtype=colors.dtype,
                            device=colors.device)
    background = blend_params.background_color
    if not torch.is_tensor(background):
        background = torch.tensor(background,
                                  dtype=torch.float32,
                                  device=device)

    # Background color
    delta = np.exp(1e-10 / blend_params.gamma) * 1e-10
    delta = torch.tensor(delta, device=device)

    # Mask for padded pixels.
    mask = fragments.pix_to_face >= 0

    # Sigmoid probability map based on the distance of the pixel to the face.
    prob_map = torch.sigmoid(-fragments.dists / blend_params.sigma) * mask

    # The cumulative product ensures that alpha will be 1 if at least 1 face
    # fully covers the pixel as for that face prob will be 1.0
    # TODO: investigate why torch.cumprod backwards is very slow for large
    # values of K.
    # Temporarily replace this with exp(sum(log))) using the fact that
    # a*b = exp(log(a*b)) = exp(log(a) + log(b))
    # alpha = 1.0 - torch.cumprod((1.0 - prob), dim=-1)[..., -1]

    alpha = 1.0 - torch.exp(torch.log((1.0 - prob_map)).sum(dim=-1))

    # Weights for each face. Adjust the exponential by the max z to prevent
    # overflow. zbuf shape (N, H, W, K), find max over K.
    # TODO: there may still be some instability in the exponent calculation.

    z_inv = (zfar - fragments.zbuf) / (zfar - znear) * mask
    z_inv_max = torch.max(z_inv, dim=-1).values[..., None]
    weights_num = prob_map * torch.exp(
        (z_inv - z_inv_max) / blend_params.gamma)

    # Normalize weights.
    # weights_num shape: (N, H, W, K). Sum over K and divide through by the sum.
    denom = weights_num.sum(dim=-1)[..., None] + delta
    weights = weights_num / denom

    # Sum: weights * textures + background color
    weighted_colors = (weights[..., None] * colors).sum(dim=-2)
    weighted_background = (delta / denom) * background
    pix_colors[..., :3] = weighted_colors + weighted_background
    pix_colors[..., 3] = alpha

    # Clamp colors to the range 0-1 and flip y axis.
    pix_colors = torch.clamp(pix_colors, min=0, max=1.0)
    return torch.flip(pix_colors, [1])
コード例 #11
0
    def mf_ensemble_test(self):
        img_list = fnmatch.filter(os.listdir(self.video_i), '*.png')
        img_list.sort(key=lambda x: int(x[:-4]))
        with torch.no_grad():
            for index in tqdm(range(len(img_list))):
                if ((index + 1) % 10):
                    continue

                list_mf = []
                for i in [
                        index - 4 if (index - 4) > 0 else 0, index - 3 if
                    (index - 3) > 0 else 0, index - 2 if
                    (index - 2) > 0 else 0, index - 1 if
                    (index - 1) > 0 else 0, index, index + 1 if
                    (index + 1) < len(img_list) else len(img_list) - 1,
                        index + 2 if
                    (index + 2) < len(img_list) else len(img_list) - 1,
                        index + 3 if
                    (index + 3) < len(img_list) else len(img_list) - 1, index +
                        4 if (index + 4) < len(img_list) else len(img_list) - 1
                ]:

                    img_path = os.path.join(self.video_i, img_list[i])
                    img = Image.open(img_path).convert('RGB')
                    img = np.asarray(img)
                    list_mf.append(img)

                info_num = int(img_list[index].split('.')[0]) - 1
                mod = info_num % 4
                if mod == 0:
                    pqf = np.array([1, 0.9, 0.9, 0.9, 1.1, 0.9, 0.9, 0.9, 1])

                elif mod == 1:
                    pqf = np.array([0.9, 0.9, 0.9, 1, 1, 0.9, 0.9, 1, 0.9])

                elif mod == 2:
                    pqf = np.array([0.9, 0.9, 1, 0.9, 1, 0.9, 1, 0.9, 0.9])

                elif mod == 3:
                    pqf = np.array([0.9, 1, 0.9, 0.9, 1, 1, 0.9, 0.9, 0.9])

                else:
                    pass
                pqf = torch.from_numpy(pqf).float()
                pqf = torch.unsqueeze(pqf, 0)
                pqf_lst = [pqf, pqf, pqf, pqf]
                pqf = torch.cat(pqf_lst, 0).cuda()

                info_filename = str(info_num) + '.tuLayer.png'
                info_path = os.path.join(self.info, info_filename)
                info = Image.open(info_path).convert('RGB')
                info = np.asarray(info)[..., 0:1]

                input_root = np.stack(list_mf, axis=0)
                info_root = info
                # rot ?
                input_1F = np.ascontiguousarray(input_root)
                input_1T = np.ascontiguousarray(
                    input_root.transpose(0, 2, 1, 3))

                info_1F = np.ascontiguousarray(info_root)
                info_1T = np.ascontiguousarray(info_root.transpose(1, 0, 2))

                # rot_F hflip ? vflip ?
                input_1F_2F = input_1F
                input_1F_2F_3F = input_1F_2F
                input_1F_2F_3T = np.ascontiguousarray(
                    input_1F_2F[:, ::-1, :, :])

                input_1F_2T = np.ascontiguousarray(input_1F[:, :, ::-1, :])
                input_1F_2T_3F = input_1F_2T
                input_1F_2T_3T = np.ascontiguousarray(
                    input_1F_2T[:, ::-1, :, :])

                info_1F_2F = info_1F
                info_1F_2F_3F = info_1F_2F
                info_1F_2F_3T = np.ascontiguousarray(info_1F_2F[::-1, :, :])

                info_1F_2T = np.ascontiguousarray(info_1F[:, ::-1, :])
                info_1F_2T_3F = info_1F_2T
                info_1F_2T_3T = np.ascontiguousarray(info_1F_2T[::-1, :, :])

                # rot_T hflip ? vflip ?
                input_1T_2F = input_1T
                input_1T_2F_3F = input_1T_2F
                input_1T_2F_3T = np.ascontiguousarray(
                    input_1T_2F[:, ::-1, :, :])

                input_1T_2T = np.ascontiguousarray(input_1T[:, :, ::-1, :])
                input_1T_2T_3F = input_1T_2T
                input_1T_2T_3T = np.ascontiguousarray(
                    input_1T_2T[:, ::-1, :, :])

                info_1T_2F = info_1T
                info_1T_2F_3F = info_1T_2F
                info_1T_2F_3T = np.ascontiguousarray(info_1T_2F[::-1, :, :])

                info_1T_2T = np.ascontiguousarray(info_1T[:, ::-1, :])
                info_1T_2T_3F = info_1T_2T
                info_1T_2T_3T = np.ascontiguousarray(info_1T_2T[::-1, :, :])

                # print(input.shape)
                # N H W C
                # print(torch.from_numpy(img).shape)

                input_1F_2F_3F = torch.from_numpy(input_1F_2F_3F).permute(
                    0, 3, 1, 2).float() / 255
                input_1F_2F_3T = torch.from_numpy(input_1F_2F_3T).permute(
                    0, 3, 1, 2).float() / 255
                input_1F_2T_3F = torch.from_numpy(input_1F_2T_3F).permute(
                    0, 3, 1, 2).float() / 255
                input_1F_2T_3T = torch.from_numpy(input_1F_2T_3T).permute(
                    0, 3, 1, 2).float() / 255

                input_1T_2F_3F = torch.from_numpy(input_1T_2F_3F).permute(
                    0, 3, 1, 2).float() / 255
                input_1T_2F_3T = torch.from_numpy(input_1T_2F_3T).permute(
                    0, 3, 1, 2).float() / 255
                input_1T_2T_3F = torch.from_numpy(input_1T_2T_3F).permute(
                    0, 3, 1, 2).float() / 255
                input_1T_2T_3T = torch.from_numpy(input_1T_2T_3T).permute(
                    0, 3, 1, 2).float() / 255

                info_1F_2F_3F = torch.from_numpy(info_1F_2F_3F).permute(
                    2, 0, 1).float() / 255
                info_1F_2F_3T = torch.from_numpy(info_1F_2F_3T).permute(
                    2, 0, 1).float() / 255
                info_1F_2T_3F = torch.from_numpy(info_1F_2T_3F).permute(
                    2, 0, 1).float() / 255
                info_1F_2T_3T = torch.from_numpy(info_1F_2T_3T).permute(
                    2, 0, 1).float() / 255

                info_1T_2F_3F = torch.from_numpy(info_1T_2F_3F).permute(
                    2, 0, 1).float() / 255
                info_1T_2F_3T = torch.from_numpy(info_1T_2F_3T).permute(
                    2, 0, 1).float() / 255
                info_1T_2T_3F = torch.from_numpy(info_1T_2T_3F).permute(
                    2, 0, 1).float() / 255
                info_1T_2T_3T = torch.from_numpy(info_1T_2T_3T).permute(
                    2, 0, 1).float() / 255

                # B N C H W
                input_norot = [
                    torch.unsqueeze(input_1F_2F_3F, 0),
                    torch.unsqueeze(input_1F_2F_3T, 0),
                    torch.unsqueeze(input_1F_2T_3F, 0),
                    torch.unsqueeze(input_1F_2T_3T, 0)
                ]
                input_rot = [
                    torch.unsqueeze(input_1T_2F_3F, 0),
                    torch.unsqueeze(input_1T_2F_3T, 0),
                    torch.unsqueeze(input_1T_2T_3F, 0),
                    torch.unsqueeze(input_1T_2T_3T, 0)
                ]

                input_norot = torch.cat(input_norot, 0).cuda()

                input_rot = torch.cat(input_rot, 0).cuda()

                info_norot = [
                    torch.unsqueeze(info_1F_2F_3F, 0),
                    torch.unsqueeze(info_1F_2F_3T, 0),
                    torch.unsqueeze(info_1F_2T_3F, 0),
                    torch.unsqueeze(info_1F_2T_3T, 0)
                ]
                info_rot = [
                    torch.unsqueeze(info_1T_2F_3F, 0),
                    torch.unsqueeze(info_1T_2F_3T, 0),
                    torch.unsqueeze(info_1T_2T_3F, 0),
                    torch.unsqueeze(info_1T_2T_3T, 0)
                ]

                info_norot = torch.cat(info_norot, 0).cuda()

                info_rot = torch.cat(info_rot, 0).cuda()

                model = self.model

                with timer('EMGA_ensemble'):
                    # 4, C, H, W
                    out = model(input_norot, info_norot, pqf)
                    out = out[4]
                    out_rot = model(input_rot, info_rot, pqf)
                    # multiscale outputs
                    out_rot = out_rot[4]

                out_0, out_1, out_2, out_3 = out[0], out[1], out[2], out[3]
                out_4, out_5, out_6, out_7 = out_rot[0], out_rot[1], out_rot[
                    2], out_rot[3]

                out_x4 = out_0 + torch.flip(out_1, [
                    1,
                ]) + torch.flip(out_2, [
                    2,
                ]) + torch.flip(out_3, [1, 2])

                out_rot_x4 = out_4 + torch.flip(out_5, [
                    1,
                ]) + torch.flip(out_6, [
                    2,
                ]) + torch.flip(out_7, [1, 2])
                out_rot_x4 = out_rot_x4.permute(
                    0, 2, 1)  # 注意顺序,input先rot,output则最后rot.

                out = (out_x4 + out_rot_x4) / 8.0

                out = out.cpu()
                out = out.detach().numpy() * 255.0
                out = out.clip(0, 255).transpose(1, 2, 0)
                out_img = Image.fromarray(out.astype(np.uint8), mode='RGB')

                output_path = os.path.join(self.video_o, img_list[index])
                out_img.save(output_path)
コード例 #12
0
def flip_3d(input_, list_axes):
    input_ = torch.flip(input_, list_axes)

    return input_
コード例 #13
0
def train(train_loader, model, optimizer, epoch, args, log):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):

        target = target.long()
        input, target = input.cuda(), target.cuda()
        data_time.update(time.time() - end)

        if args.train == 'mixup':
            input_var, target_var = Variable(input), Variable(target)
            output, reweighted_target = model(input_var,target_var, mixup= True, mixup_alpha = args.mixup_alpha)
            loss = bce_loss(softmax(output), reweighted_target)#mixup_criterion(target_a, target_b, lam)

        elif args.train== 'mixup_hidden':
            input_var, target_var = Variable(input), Variable(target)
            output, reweighted_target = model(input_var, target_var, mixup_hidden= True, mixup_alpha = args.mixup_alpha)
            loss = bce_loss(softmax(output), reweighted_target)#mixup_criterion(target_a, target_b, lam)

        elif args.train == 'vanilla':
            input_var, target_var = Variable(input), Variable(target)
            output, reweighted_target = model(input_var, target_var)
            loss = bce_loss(softmax(output), reweighted_target)

        elif args.train == 'cutout':
            cutout = Cutout(1, args.cutout)
            cut_input = cutout.apply(input)
            input_var = torch.autograd.Variable(input)
            target_var = torch.autograd.Variable(target)
            cut_input_var = torch.autograd.Variable(cut_input)
            output, reweighted_target = model(cut_input_var, target_var)
            loss = bce_loss(softmax(output), reweighted_target)

        elif args.train == 'vanilla_cutout':
            cutout = Cutout(1, args.cutout)
            cut_input = cutout.apply(input)
            input_var = torch.autograd.Variable(input)
            target_var = torch.autograd.Variable(target)
            cut_input_var = torch.autograd.Variable(cut_input)
            output, reweighted_target = model(input_var, target_var)
            loss = bce_loss(softmax(output), reweighted_target)
            output_aug, reweighted_target_aug = model(cut_input_var, target_var)
            loss_aug = bce_loss(softmax(output_aug), reweighted_target_aug)
            loss = loss + loss_aug

        elif args.train == 'vanilla_cutout_consistency_reg':
            cutout = Cutout(1, args.cutout)
            cut_input = cutout.apply(input)
            input_var = torch.autograd.Variable(input)
            target_var = torch.autograd.Variable(target)
            cut_input_var = torch.autograd.Variable(cut_input)
            output, reweighted_target = model(input_var, target_var)
            loss = bce_loss(softmax(output), reweighted_target)
            output_aug, reweighted_target_aug = model(cut_input_var, target_var)
            output_anchor = Variable(output.detach().data, requires_grad=False)
            loss_aug = mse_loss(output_anchor, output_aug)
            loss = loss + loss_aug
#             print("=" * 100)
#             print("Cutout function : ", Cutout)
#             print("Cutout : ", cutout)
#             print("cut_input : ", cut_input)
#             exit(1)
            
        elif args.train == 'vanilla_cutout_consistency_proposed':
            # cutout = Cutout(1, args.cutout)
            # cut_input = cutout.apply(input)
            cut_input = input + torch.randn(input.size()).cuda() * 0.01
            
            input_var = torch.autograd.Variable(input)
            target_var = torch.autograd.Variable(target)
            cut_input_var = torch.autograd.Variable(cut_input)

            target_layer_num = torch.randperm(4)[0]+1
            output, reweighted_target, output_anchor = model(input_var, target_var, layer_num_out=target_layer_num)
            loss = bce_loss(softmax(output), reweighted_target)

            output_aug, reweighted_target_aug = model.forward_n_layers(cut_input_var, target_var, layer_num=target_layer_num)
            output_anchor = Variable(output_anchor.detach().data, requires_grad=False)

            # loss_aug softmax
            loss_aug = mse_loss(softmax(output_anchor), softmax(output_aug))
            # loss_aug = mse_loss(output_anchor, output_aug)

            alpha = (epoch / 400)

            loss = loss + alpha * loss_aug
        elif args.train == 'horizontal_flip':
#             cutout = Cutout(1, args.cutout)
            flip_input = torch.flip(input, (3,))
    
            input_var = torch.autograd.Variable(input)
            target_var = torch.autograd.Variable(target)
            flip_input_var = torch.autograd.Variable(flip_input)
            
            output, reweighted_target = model(input_var, target_var)
            loss = bce_loss(softmax(output), reweighted_target)        
            

        
        elif args.train == 'vanilla_horizontal_flip':
            flip_input = torch.flip(input, (3,))
            input_var = torch.autograd.Variable(input)
            target_var = torch.autograd.Variable(target)
            flip_input_var = torch.autograd.Variable(flip_input)
            
            output, reweighted_target = model(input_var, target_var)
            loss = bce_loss(softmax(output), reweighted_target)        
            
            output_aug, _ = model(flip_input_var, target_var)
            loss_aug = bce_loss(softmax(output_aug), reweighted_target)
            
            loss = (loss + loss_aug)/2
            
        elif args.train == 'vanilla_horizontal_flip_consistency_reg': # {vanilla loss}와 {flip의 output과 vanilla output}의 차이의 loss를 더함
            flip_input = torch.flip(input, (3,))
            input_var, target_var = torch.autograd.Variable(input), torch.autograd.Variable(target)
            flip_input_var = torch.autograd.Variable(flip_input)

            output, reweighted_target = model(input_var, target_var)
            loss = bce_loss(softmax(output), reweighted_target) #우선 vanilla loss부터 구함
            
            output_aug, reweighted_target_aug = model(flip_input_var, target_var)
            output_anchor = Variable(output.detach().data, requires_grad=False)

            loss_aug = mse_loss(output_anchor, output_aug) # flip한 결과와 그냥 vanilla output의 loss를 구함
            
            loss = (loss + loss_aug)/2
        
        # measure accuracy and record loss
        prec1, prec5 = accuracy(output, target, topk=(1, 5))
        losses.update(loss.item(), input.size(0))
        top1.update(prec1.item(), input.size(0))
        top5.update(prec5.item(), input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        max_grad_norm = 5.
        torch_utils.clip_grad_norm_(model.parameters(),max_grad_norm)
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        
        
        if i % args.print_freq == 0:
            print_log('  Epoch: [{:03d}][{:03d}/{:03d}]   '
                'Time {batch_time.val:.3f} ({batch_time.avg:.3f})   '
                'Data {data_time.val:.3f} ({data_time.avg:.3f})   '
                'Loss {loss.val:.4f} ({loss.avg:.4f})   '
                'Prec@1 {top1.val:.3f} ({top1.avg:.3f})   '
                'Prec@5 {top5.val:.3f} ({top5.avg:.3f})   '.format(
                epoch, i, len(train_loader), batch_time=batch_time,
                data_time=data_time, loss=losses, top1=top1, top5=top5) + time_string(), log)

    print_log('  **Train** Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f} Error@1 {error1:.3f}'.format(top1=top1, top5=top5, error1=100-top1.avg), log)
    return top1.avg, top5.avg, losses.avg
コード例 #14
0
def get_multi_stage_outputs(outputs,
                            outputs_flip,
                            num_joints,
                            with_heatmaps,
                            with_ae,
                            tag_per_joint=True,
                            flip_index=None,
                            project2image=True,
                            size_projected=None):
    """Inference the model to get multi-stage outputs (heatmaps & tags), and
    resize them to base sizes.

    Args:
        outputs (list(torch.Tensor)): Outputs of network
        outputs_flip (list(torch.Tensor)): Flip outputs of network
        num_joints (int): Number of joints
        with_heatmaps (list[bool]): Option to output
            heatmaps for different stages.
        with_ae (list[bool]): Option to output
            ae tags for different stages.
        tag_per_joint (bool): Option to use one tag map per joint.
        flip_index (list[int]): Keypoint flip index.
        project2image (bool): Option to resize to base scale.
        size_projected ([w, h]): Base size of heatmaps.

    Returns:
        tuple: A tuple containing multi-stage outputs.

        - outputs (list(torch.Tensor)): List of simple outputs and
          flip outputs.
        - heatmaps (torch.Tensor): Multi-stage heatmaps that are resized to
          the base size.
        - tags (torch.Tensor): Multi-stage tags that are resized to
          the base size.
    """

    heatmaps_avg = 0
    num_heatmaps = 0
    heatmaps = []
    tags = []

    flip_test = outputs_flip is not None

    # aggregate heatmaps from different stages
    for i, output in enumerate(outputs):
        if i != len(outputs) - 1:
            output = torch.nn.functional.interpolate(
                output,
                size=(outputs[-1].size(2), outputs[-1].size(3)),
                mode='bilinear',
                align_corners=False)

        # staring index of the associative embeddings
        offset_feat = num_joints if with_heatmaps[i] else 0

        if with_heatmaps[i]:
            heatmaps_avg += output[:, :num_joints]
            num_heatmaps += 1

        if with_ae[i]:
            tags.append(output[:, offset_feat:])

    if num_heatmaps > 0:
        heatmaps.append(heatmaps_avg / num_heatmaps)

    if flip_test and flip_index:
        # perform flip testing
        heatmaps_avg = 0
        num_heatmaps = 0

        for i, output in enumerate(outputs_flip):
            if i != len(outputs_flip) - 1:
                output = torch.nn.functional.interpolate(
                    output,
                    size=(outputs_flip[-1].size(2), outputs_flip[-1].size(3)),
                    mode='bilinear',
                    align_corners=False)
            output = torch.flip(output, [3])
            outputs.append(output)

            offset_feat = num_joints if with_heatmaps[i] else 0

            if with_heatmaps[i]:
                heatmaps_avg += output[:, :num_joints][:, flip_index, :, :]
                num_heatmaps += 1

            if with_ae[i]:
                tags.append(output[:, offset_feat:])
                if tag_per_joint:
                    tags[-1] = tags[-1][:, flip_index, :, :]

        heatmaps.append(heatmaps_avg / num_heatmaps)

    if project2image and size_projected:
        heatmaps = [
            torch.nn.functional.interpolate(hms,
                                            size=(size_projected[1],
                                                  size_projected[0]),
                                            mode='bilinear',
                                            align_corners=False)
            for hms in heatmaps
        ]

        tags = [
            torch.nn.functional.interpolate(tms,
                                            size=(size_projected[1],
                                                  size_projected[0]),
                                            mode='bilinear',
                                            align_corners=False)
            for tms in tags
        ]

    return outputs, heatmaps, tags
コード例 #15
0
    def horisontal_flip(self, images, targets):
        images = torch.flip(images, [-1])
        targets[:, 2] = 1 - targets[:, 2]  # horizontal flip
        targets[:, 6] = -targets[:, 6]  # yaw angle flip

        return images, targets
コード例 #16
0
ファイル: dataset.py プロジェクト: Vowowow/resnet101_dcn
def flip(image, label=None):
    if np.random.rand() < 0.5:
        image = torch.flip(image, [3])
        if label is not None:
            label = torch.flip(label, [3])
    return image, label
コード例 #17
0
 def matrix_to_vector(self, matrix):
     return torch.flip(matrix, dims=[0]).flatten()
コード例 #18
0
def _(x):
    return torch.flip(x, [-1])
コード例 #19
0
 def vector_to_matrix(self, vector):
     output_h, output_w = [
         self.im_H + self.ker_H - 1, self.im_W + self.ker_W - 1
     ]
     return torch.flip(vector.reshape(output_h, output_w), dims=[0])
コード例 #20
0
 def transpose_inverse(self, t, trans_idx):
     # print( 'inverse transpose .. t', t.size())
     t = torch.rot90(t, 4 - trans_idx % 4, [2, 3])
     if trans_idx >= 4:
         t = torch.flip(t, [3])
     return t
コード例 #21
0
def main():
    # logging.basicConfig(level=logging.DEBUG)

    # Initial parsing looking for `RunPath` ...
    opts = AppSettings()
    opts = update_settings(opts)
    if not opts.path.key:
        raise ValueError('opts.path.key required for evaluation (For now)')
    path = RunPath(opts.path)

    # Re-parse full args with `base_opts` as default instead
    # TODO(ycho): Verify if this works.
    base_opts = update_settings(
        opts, argv=['--config_file',
                    str(path.dir / 'opts.yaml')])
    opts = update_settings(base_opts)

    # Instantiation ...
    device = resolve_device(opts.device)
    model = KeypointNetwork2D(opts.model).to(device)

    # Load checkpoint.
    ckpt_file = get_latest_file(path.ckpt)
    print('ckpt = {}'.format(ckpt_file))
    Saver(model, None).load(ckpt_file)

    # NOTE(ycho): Forcing data loading on the CPU.
    # TODO(ycho): Consider scripted compositions?
    transform = Compose([
        DenseMapsMobilePose(opts.maps, th.device('cpu:0')),
        Normalize(Normalize.Settings()),
        InstancePadding(opts.padding)
    ])
    _, test_loader = get_loaders(opts.dataset,
                                 device=th.device('cpu:0'),
                                 batch_size=opts.batch_size,
                                 transform=transform)

    model.eval()
    for data in test_loader:
        # Now that we're here, convert all inputs to the device.
        data = {
            k: (v.to(device) if isinstance(v, th.Tensor) else v)
            for (k, v) in data.items()
        }
        image = data[Schema.IMAGE]
        image_scale = th.as_tensor(image.shape[-2:])  # (h,w) order
        print('# instances = {}'.format(data[Schema.INSTANCE_NUM]))
        with th.no_grad():
            outputs = model(image)

            heatmap = outputs[Schema.HEATMAP]
            kpt_heatmap = outputs[Schema.KEYPOINT_HEATMAP]

            # FIXME(ycho): hardcoded obj==1 assumption
            scores, indices = decode_kpt_heatmap(kpt_heatmap,
                                                 max_num_instance=4)

            # hmm...
            upsample_ratio = th.as_tensor(image_scale /
                                          th.as_tensor(heatmap.shape[-2:]),
                                          device=indices.device)
            upsample_ratio = upsample_ratio[None, None, None, :]

        scaled_indices = indices * upsample_ratio

        # Visualize inferred keypoints ...
        if False:
            # FIXME(ycho): Pedantically incorrect!!
            heatmap_vis = DrawKeypointMap(
                DrawKeypointMap.Settings(as_displacement=False))(heatmap)
            kpt_heatmap_vis = DrawKeypointMap(
                DrawKeypointMap.Settings(as_displacement=False))(kpt_heatmap)

            fig, ax = plt.subplots(3, 1)
            hv_cpu = heatmap_vis[0].detach().cpu().numpy().transpose(1, 2, 0)
            khv_cpu = kpt_heatmap_vis[0].detach().cpu().numpy().transpose(
                1, 2, 0)
            img_cpu = th.clip(0.5 + (image[0] * 0.25), 0.0,
                              1.0).detach().cpu().numpy().transpose(1, 2, 0)
            ax[0].imshow(hv_cpu)
            ax[1].imshow(khv_cpu / khv_cpu.max())
            ax[2].imshow(img_cpu)
            plt.show()

        # scores = (32,9,4)
        # (i,j)  = (32,2,9,4)
        for i_batch in range(scores.shape[0]):
            # GROUND_TRUTH
            kpt_in = data[Schema.KEYPOINT_2D][i_batch, ..., :2]
            kpt_in = kpt_in * image_scale.to(kpt_in.device)
            # X-Y order (J-I order)
            # print(kpt_in)

            # print(scaled_indices[i_batch])  # Y-X order (I-J order)
            print('scale.shape')  # 32,4,3
            print(data[Schema.SCALE].shape)
            sol = compute_pose_epnp(
                data[Schema.PROJECTION][i_batch],
                # not estimating scale info for now ...,
                data[Schema.SCALE][i_batch],
                th.flip(scaled_indices[i_batch], dims=(-1, )) /
                image_scale.to(scaled_indices.device))
            if sol is None:
                continue
            R, T = sol
            print(R, data[Schema.ORIENTATION][i_batch])
            print(T, data[Schema.TRANSLATION][i_batch])
            break

        np.save(F'/tmp/heatmap.npy', heatmap.cpu().numpy())
        np.save(F'/tmp/kpt_heatmap.npy', kpt_heatmap.cpu().numpy())
        break
コード例 #22
0
ファイル: encoder.py プロジェクト: naoymd/IMDb_classification
 def reverse_input(self, input):
     reverse_input = torch.flip(input, [1])
     return reverse_input
コード例 #23
0
                    image = np.zeros((TEST_WINDOW, TEST_WINDOW, 3),
                                     dtype=np.uint8)
                    for fl in range(3):
                        image[:, :, fl] = layers[fl].read(
                            window=Window.from_slices((x1, x2), (y1, y2)))

                #           print("Test {}-{}:Shape is:{}".format(filename,index,image.shape))
                image = cv2.resize(image, (TEST_NEW_SIZE, TEST_NEW_SIZE))
                image = trfm(image)
                with torch.no_grad():
                    if Open_Classifer:
                        image = image.to(DEVICE)[
                            None]  # 这里加入的是batch维度 这里的测试是每张图的测试
                        score = model(image)[0][0][0]

                        score2 = model(torch.flip(image, [0, 3]))[0]
                        score2 = torch.flip(score2, [3, 0])[0][0]

                        score3 = model(torch.flip(image, [1, 2]))[0]
                        score3 = torch.flip(score3, [2, 1])[0][0]
                    else:
                        image = image.to(DEVICE)[
                            None]  # 这里加入的是batch维度 这里的测试是每张图的测试
                        score = model(image)[0][0]

                        score2 = model(torch.flip(image, [0, 3]))
                        score2 = torch.flip(score2, [3, 0])[0][0]

                        score3 = model(torch.flip(image, [1, 2]))
                        score3 = torch.flip(score3, [2, 1])[0][0]
コード例 #24
0
def horizontal_flip(images, targets):
    images = torch.flip(images, [-1])
    targets[:, 2] = 1 - targets[:, 2]
    return images, targets
コード例 #25
0
def random_hflip(tensor, prob):
    if prob > random():
        return tensor
    return torch.flip(tensor, dims=(3, ))
コード例 #26
0
ファイル: utils.py プロジェクト: whjzsy/SiamRCNN
def flip_tensor(x):
    return torch.flip(x, [x.dim() - 1])
コード例 #27
0
ファイル: losses.py プロジェクト: Jacob-Bishop/TTS
    def forward(self, postnet_output, decoder_output, mel_input, linear_input,
                stopnet_output, stopnet_target, output_lens, decoder_b_output,
                alignments, alignment_lens, alignments_backwards, input_lens):

        return_dict = {}
        # decoder and postnet losses
        if self.config.loss_masking:
            decoder_loss = self.criterion(decoder_output, mel_input,
                                          output_lens)
            if self.config.model in ["Tacotron", "TacotronGST"]:
                postnet_loss = self.criterion(postnet_output, linear_input,
                                              output_lens)
            else:
                postnet_loss = self.criterion(postnet_output, mel_input,
                                              output_lens)
        else:
            decoder_loss = self.criterion(decoder_output, mel_input)
            if self.config.model in ["Tacotron", "TacotronGST"]:
                postnet_loss = self.criterion(postnet_output, linear_input)
            else:
                postnet_loss = self.criterion(postnet_output, mel_input)
        loss = decoder_loss + postnet_loss
        return_dict['decoder_loss'] = decoder_loss
        return_dict['postnet_loss'] = postnet_loss

        # stopnet loss
        stop_loss = self.criterion_st(
            stopnet_output, stopnet_target,
            output_lens) if self.config.stopnet else torch.zeros(1)
        if not self.config.separate_stopnet and self.config.stopnet:
            loss += stop_loss
        return_dict['stopnet_loss'] = stop_loss

        # backward decoder loss (if enabled)
        if self.config.bidirectional_decoder:
            if self.config.loss_masking:
                decoder_b_loss = self.criterion(
                    torch.flip(decoder_b_output, dims=(1, )), mel_input,
                    output_lens)
            else:
                decoder_b_loss = self.criterion(
                    torch.flip(decoder_b_output, dims=(1, )), mel_input)
            decoder_c_loss = torch.nn.functional.l1_loss(
                torch.flip(decoder_b_output, dims=(1, )), decoder_output)
            loss += decoder_b_loss + decoder_c_loss
            return_dict['decoder_b_loss'] = decoder_b_loss
            return_dict['decoder_c_loss'] = decoder_c_loss

        # double decoder consistency loss (if enabled)
        if self.config.double_decoder_consistency:
            decoder_b_loss = self.criterion(decoder_b_output, mel_input,
                                            output_lens)
            # decoder_c_loss = torch.nn.functional.l1_loss(decoder_b_output, decoder_output)
            attention_c_loss = torch.nn.functional.l1_loss(
                alignments, alignments_backwards)
            loss += decoder_b_loss + attention_c_loss
            return_dict['decoder_coarse_loss'] = decoder_b_loss
            return_dict['decoder_ddc_loss'] = attention_c_loss

        # guided attention loss (if enabled)
        if self.config.ga_alpha > 0:
            ga_loss = self.criterion_ga(alignments, input_lens, alignment_lens)
            loss += ga_loss * self.ga_alpha
            return_dict['ga_loss'] = ga_loss * self.ga_alpha

        return_dict['loss'] = loss
        return return_dict
コード例 #28
0
 def __call__(self, sample):
     new_sample = torch.stack((sample, torch.flip(sample, [2])))
     return new_sample
コード例 #29
0
    def inference(self, input, target):
        ########
        # TODO #
        ########
        # 在這裡實施 Beam Search
        # 此函式的 batch size = 1
        # input  = [batch size, input len, vocab size]
        # target = [batch size, target len, vocab size]
        batch_size = input.shape[0]
        input_len = input.shape[1]        # 取得最大字數
        vocab_size = self.decoder.cn_vocab_size

        # 準備一個儲存空間來儲存輸出
        outputs = torch.zeros(batch_size, input_len,
                              vocab_size).to(self.device)

        # 將輸入放入 Encoder
        encoder_outputs, hidden = self.encoder(input)
        # Encoder 最後的隱藏層(hidden state) 用來初始化 Decoder
        # encoder_outputs 主要是使用在 Attention
        # 因為 Encoder 是雙向的RNN,所以需要將同一層兩個方向的 hidden state 接在一起
        # hidden =  [num_layers * directions, batch size  , hid dim]  --> [num_layers, directions, batch size  , hid dim]
        hidden = hidden.view(self.encoder.n_layers, 2, batch_size, -1)
        hidden = torch.cat((hidden[:, -2, :, :], hidden[:, -1, :, :]), dim=2)
        # 取的 <BOS> token
        input = target[:, 0]
        preds = []
        # print(input.shape)
        # print(input.shape)
        output, hidden = self.decoder(input, hidden, encoder_outputs)
        # 將預測結果存起來
        # 儲存每一步最大的k個可能性 到時候才能回溯
        # index組成 k個(自己的index, parent的index)
        outputs[:, 0] = output
        index = []
        probabilities = []
        # 取出機率最大的單詞
        # top1,top2是index
        prob = F.softmax(output, dim=1)
        # print(torch.topk(prob, 2, dim=1)[1][0])
        top1, top2 = torch.topk(prob, 2, dim=1)[1][0]
        index.append([(top1, top1), (top2, top2)])

        probabilities.append([prob[0][top1], prob[0][top2]])
        input = top1.view(-1)
        # print(input)
        hidden1 = hidden
        input2 = top2.view(-1)
        # preds.append(top1.unsqueeze(1))
        for t in range(1, input_len-1):
            # 這裡很聰明的把input變成sentence length=1的狀態 所以丟進RNN就相當於只有一個time stamp
            # 然後再手動把那個time stamp的output變成input 丟到下一個time stamp的decoder中
            # output:(batch size, vocab size)->預測哪個vocab最大 hidden就是decoder Rnn 的每一層最後一個time stamp的hidden state
            # print(input.shape)

            output, hidden = self.decoder(input, hidden, encoder_outputs)
            # print(t)
            output1, hidden1 = self.decoder(input2, hidden1, encoder_outputs)
            # 將預測結果存起來
            outputs[:, t] = output
            # 從前面兩種可能分別預測的結果取前兩大 所以會有4種可能
            # 然後從4種可能取最大的兩個
            prob = F.softmax(output, dim=1)
            top1, top2 = torch.topk(prob, 2, dim=1)[1][0]
            prob2 = F.softmax(output1, dim=1)
            top21, top22 = torch.topk(prob2, 2, dim=1)[1][0]
            compare = [probabilities[t-1][0]*prob[0][top1],
                       probabilities[t-1][0]*prob[0][top2], probabilities[t-1][1]*prob2[0][top21], probabilities[t-1][1]*prob2[0][top22]]
            compare = torch.tensor(compare).to(device)
            compare2 = [top1, top2, top21, top22]
            compare2 = torch.tensor(compare2).to(device)
            # 4個中最大的兩個的index
            top_index, top_index2 = torch.topk(compare, 2, dim=0)[1]
            if top_index >= 2:
                if top_index2 >= 2:
                    index.append([(compare2[top_index], input2),
                                  (compare2[top_index2], input2)])
                    probabilities.append(
                        [compare[top_index], compare[top_index2]])
                    input = compare2[top_index].view(-1)
                    input2 = compare2[top_index2].view(-1)
                else:
                    index.append([(compare2[top_index], input2),
                                  (compare2[top_index2], input)])
                    probabilities.append(
                        [compare[top_index], compare[top_index2]])
                    input = compare2[top_index].view(-1)
                    input2 = compare2[top_index2].view(-1)
            else:
                if top_index2 >= 2:
                    index.append([(compare2[top_index], input),
                                  (compare2[top_index2], input2)])
                    probabilities.append(
                        [compare[top_index], compare[top_index2]])
                    input = compare2[top_index].view(-1)
                    input2 = compare2[top_index2].view(-1)
                else:
                    index.append([(compare2[top_index], input),
                                  (compare2[top_index2], input)])
                    probabilities.append(
                        [compare[top_index], compare[top_index2]])
                    input = compare2[top_index].view(-1)
                    input2 = compare2[top_index2].view(-1)
                # print(input)
        if probabilities[input_len-2][0] > probabilities[input_len-2][1]:
            preds.append(index[input_len-2][0][0])
            parent = index[input_len-2][0][1]
            for i in range(input_len-3, -1, -1):

                    # index[i]:[(自己,parent)(自己,parent)]
                if index[i][0][0] == parent:
                    preds.append(parent)
                    parent = index[i][0][1]
                else:
                    preds.append(parent)
                    parent = index[i][1][1]

        preds = torch.tensor(preds)
        preds = torch.flip(preds, dims=[0])
        preds = preds.view(1, -1)
        # print(preds)
        # preds = torch.cat(preds, 1)
        # print(preds.shape)
        # outputs為decoder每個time stamp所預測的one hot vector
        # 例如time 1 2 3 4 5
        #   每個time stamp都會有vocab size的向量 其中最大的會被當作下一個time stamp的input
        return outputs, preds
コード例 #30
0
def train(model, dataset, val_dataset=None, n_epoch=1, lr=0.1, print_every=100, log_every=100, val_every=2000,
          focalloss=None, device=None, verbose=False):
    print("====================== train ======================")
    t0 = time.time()
    log = dict(train=list(), val=list(), val_seen=list())
    if focalloss is not None:
        loss_fn = FocalLoss(weight=None, gamma=focalloss)
    else:
        loss_fn = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    dataloader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=8, drop_last=False)
    for i in range(n_epoch):
        t1 = time.time()
        for j, data in enumerate(dataloader):
            model.train()
            model.zero_grad()
            x, f, a_mat, d_mat, gt_idxs = data
            if verbose:
                print("x", x.size(), "f", f.size(), "a_mat", a_mat.size(), "d_mat", d_mat.size(),
                      "gt_idxs", gt_idxs.size())
            n = len(gt_idxs[0, :])
            if verbose:
                print(j, list(gt_idxs[0, :]), n)
            x_tensor = x[0, :].float()
            f_tensor = f[0, :].float()
            a_tensor = a_mat[0, :].float()
            d_tensor = d_mat[0, :].float()
            g_tensor = gt_idxs[0, :].long()
            if device is not None:
                x_tensor = x_tensor.to(device)
                f_tensor = f_tensor.to(device)
                a_tensor = a_tensor.to(device)
                d_tensor = d_tensor.to(device)
                g_tensor = g_tensor.to(device)
            scores, _ = model(x_tensor, f_tensor, a_tensor, d_tensor)
            loss_f = loss_fn(scores, g_tensor.long())
            loss_r = loss_fn(scores, torch.flip(g_tensor, (0, )).long())
            loss = torch.min(loss_f, loss_r)
            loss.backward()
            optimizer.step()
            with torch.no_grad():
                model.seen += 1
                if (j+1) % log_every == 0:
                    log["train"].append(dict(epoch=i+1, iter=j+1, seen=model.seen, loss=loss.item()))
                if (j+1) % print_every == 0:
                    print("epoch {} loss {:.4f}".format(i, loss.data))
                if (j+1) % val_every == 0:
                    if val_dataset is not None:
                        with torch.no_grad():
                            val_acc = val(model, val_dataset, device=device, verbose=False)
                            log["val_seen"].append(dict(seen=model.seen, acc=val_acc))
                            t2 = time.time()
                            eta = (t2 - t1) / float(j + 1) * float(len(dataset) - j - 1)
                            print("seen {}, acc {:.4f}, {:.1f}s to go for this epoch".format(model.seen, val_acc, eta))
        if val_dataset is not None:
            with torch.no_grad():
                val_acc = val(model, val_dataset, device=device, verbose=False)
                log["val"].append(dict(epoch=i, acc=val_acc))
                print("seen {}, acc {:.4f}".format(model.seen, val_acc))
        t2 = time.time()
        eta = (t2-t0) / float(i+1) * float(n_epoch-i-1)
        print("time elapsed {:.1f}s, {:.1f}s for this epoch, {:.1f}s to go".format(t2-t0, t2-t1, eta))
        print("=======================================================================================================")
    return model, log
コード例 #31
0
ファイル: train.py プロジェクト: Guo-Xiaoqing/ThresholdNet
def test_one_epoch(dataset, DATAloader, net, epoch):
    #### start testing now
    Acc_array = 0.
    Prec_array = 0.
    Spe_array = 0.
    Rec_array = 0.
    IoU_array = 0.
    Dice_array = 0.
    HD_array = 0.
    sample_num = 0.
    result_list = []
    CEloss_list = []
    JAloss_list = []
    Label_list = []
    net.eval()
    with torch.no_grad():
        for i_batch, sample_batched in enumerate(DATAloader):
            name_batched = sample_batched['name']
            row_batched = sample_batched['row']
            col_batched = sample_batched['col']

            [batch, channel, height, width] = sample_batched['image'].size()
            multi_avg = torch.zeros(
                (batch, cfg.MODEL_NUM_CLASSES, height, width),
                dtype=torch.float32).to(1)
            labels_batched = sample_batched['segmentation'].cpu().numpy()
            for rate in cfg.TEST_MULTISCALE:
                inputs_batched = sample_batched['image_%f' % rate]
                _, predicts, threshold = net(inputs_batched)
                predicts = predicts.to(1)
                threshold = threshold.to(1)
                predicts_batched = predicts.clone()
                threshold_batched = threshold.clone()
                del predicts
                del threshold
                if cfg.TEST_FLIP:
                    inputs_batched_flip = torch.flip(inputs_batched, [3])
                    _, predicts_flip, threshold_flip = net(inputs_batched_flip)
                    predicts_flip = torch.flip(predicts_flip, [3]).to(1)
                    threshold_flip = torch.flip(threshold_flip, [3]).to(1)
                    predicts_batched_flip = predicts_flip.clone()
                    threshold_batched_flip = threshold_flip.clone()
                    del predicts_flip
                    del threshold_flip
                    predicts_batched = (predicts_batched +
                                        predicts_batched_flip) / 2.0
                    threshold_batched = (threshold_batched +
                                         threshold_batched_flip) / 2.0

                predicts_batched = F.interpolate(predicts_batched,
                                                 size=None,
                                                 scale_factor=1 / rate,
                                                 mode='bilinear',
                                                 align_corners=True)
                threshold_batched = F.interpolate(threshold_batched,
                                                  size=None,
                                                  scale_factor=1 / rate,
                                                  mode='bilinear',
                                                  align_corners=True)
                multi_avg = multi_avg + predicts_batched
                del predicts_batched

            multi_avg = multi_avg / len(cfg.TEST_MULTISCALE)
            multi_avg = nn.Softmax(dim=1)(multi_avg)
            multi_avg = multi_avg - threshold_batched
            result = torch.argmax(multi_avg,
                                  dim=1).cpu().numpy().astype(np.uint8)
            threshold = threshold_batched.cpu().numpy()
            del threshold_batched

            for i in range(batch):
                row = row_batched[i]
                col = col_batched[i]
                p = result[i, :, :]
                l = labels_batched[i, :, :]
                thres = threshold[i, 1, :, :]
                #p = cv2.resize(p, dsize=(col,row), interpolation=cv2.INTER_NEAREST)
                #l = cv2.resize(l, dsize=(col,row), interpolation=cv2.INTER_NEAREST)
                predict = np.int32(p)
                gt = np.int32(l)
                cal = gt < 255
                mask = (predict == gt) * cal
                TP = np.zeros((cfg.MODEL_NUM_CLASSES), np.uint64)
                TN = np.zeros((cfg.MODEL_NUM_CLASSES), np.uint64)
                P = np.zeros((cfg.MODEL_NUM_CLASSES), np.uint64)
                T = np.zeros((cfg.MODEL_NUM_CLASSES), np.uint64)

                P = np.sum((predict == 1)).astype(np.float64)
                T = np.sum((gt == 1)).astype(np.float64)
                TP = np.sum((gt == 1) * (predict == 1)).astype(np.float64)
                TN = np.sum((gt == 0) * (predict == 0)).astype(np.float64)

                Acc = (TP + TN) / (T + P - TP + TN)
                Prec = TP / (P + 1e-10)
                Spe = TN / (P - TP + TN)
                Rec = TP / T
                DICE = 2 * TP / (T + P)
                IoU = TP / (T + P - TP)
                #	HD = max(directed_hausdorff(predict, gt)[0], directed_hausdorff(predict, gt)[0])
                #	HD = 2*Prec*Rec/(Rec+Prec+1e-10)
                beta = 2
                HD = Rec * Prec * (1 + beta**2) / (Rec + beta**2 * Prec +
                                                   1e-10)
                Acc_array += Acc
                Prec_array += Prec
                Spe_array += Spe
                Rec_array += Rec
                Dice_array += DICE
                IoU_array += IoU
                HD_array += HD
                sample_num += 1
                #p = cv2.resize(p, dsize=(col,row), interpolation=cv2.INTER_NEAREST)
                result_list.append({
                    'predict': np.uint8(p * 255),
                    'threshold': np.uint8(thres * 255),
                    'label': np.uint8(l * 255),
                    'IoU': IoU,
                    'name': name_batched[i]
                })

        Acc_score = Acc_array * 100 / sample_num
        Prec_score = Prec_array * 100 / sample_num
        Spe_score = Spe_array * 100 / sample_num
        Rec_score = Rec_array * 100 / sample_num
        Dice_score = Dice_array * 100 / sample_num
        IoUP = IoU_array * 100 / sample_num
        HD_score = HD_array * 100 / sample_num
        print(
            '%10s:%7.3f%%   %10s:%7.3f%%   %10s:%7.3f%%   %10s:%7.3f%%   %10s:%7.3f%%   %10s:%7.3f%%   %10s:%7.3f%%\n'
            % ('Acc', Acc_score, 'Sen', Rec_score, 'Spe', Spe_score, 'Prec',
               Prec_score, 'Dice', Dice_score, 'Jac', IoUP, 'F2', HD_score))
        if epoch % 50 == 0:
            dataset.save_result_train_thres(result_list, cfg.MODEL_NAME)

        return Dice_score, IoUP