예제 #1
0
    def __init__(self, K, m, T, n_timesteps, n_classes, batch_size, n_channels, n_lstm_out=128,
                 n_lstm_layers=1, fc_out=100, Conv1_NF=128, Conv2_NF=256, Conv3_NF=128,
                 lstmDropP = 0.8, FC_DropP = 0.3, SEB = True, is_attention=False,
                 is_tpa=False, device='cuda'):
        """
        dim : Feature dimension (default : 128)
        K : queue size; number of negative keys ( default :65536)
        m : moco momentum of updating key encoder (default : 0.999)
        T : softmax temperature(default : 0.07)
        """
        super(MoCoFcn, self).__init__()

        self.K = K
        self.m = m
        self.T = T
        self.n_classes = n_classes

        # input_dim, dim
        self.encoder_q = Fcn(n_timesteps, n_classes, batch_size, n_channels,n_lstm_out,
                             n_lstm_layers, fc_out, Conv1_NF, Conv2_NF, Conv3_NF,
                             lstmDropP, FC_DropP, SEB, is_attention, is_tpa, True, device=device)

        self.encoder_k = Fcn(n_timesteps, n_classes, batch_size, n_channels,n_lstm_out,
                             n_lstm_layers, fc_out, Conv1_NF, Conv2_NF, Conv3_NF,
                             lstmDropP, FC_DropP, SEB, is_attention, is_tpa, True, device=device)
        self.input_attention_negatives = nn.Parameter(torch.randn(self.n_classes, K))
        self.n_classes = n_classes

        self.fc = nn.Linear(fc_out, n_classes)
        self.softmax = nn.Softmax(dim=-1)

        # we make queue for each classes and get negative value through input attention
        self.register_buffer(f"queue",
                             nn.functional.normalize(torch.randn(self.n_classes, fc_out, K), dim=0))  # (n_classes, dim, K)
        self.register_buffer(f"queue_ptr", torch.zeros((n_classes, 1), dtype=torch.long))  # (n_classes)
예제 #2
0
    def __init__(self,
                 input_dim,
                 dim,
                 n_timesteps,
                 n_classes,
                 batch_size,
                 n_channels,
                 K=65536,
                 T=0.7,
                 m=0.999,
                 n_lstm_out=128,
                 n_lstm_layers=1,
                 Conv1_NF=128,
                 Conv2_NF=256,
                 Conv3_NF=128,
                 lstmDropP=0.8,
                 FC_DropP=0.3,
                 SEB=True,
                 is_attention=False,
                 is_tpa=False,
                 device='cuda'):

        super(FcnMoCo, self).__init__()
        self.fcn = Fcn(n_timesteps,
                       n_classes,
                       batch_size,
                       n_channels,
                       n_lstm_out,
                       n_lstm_layers,
                       input_dim,
                       Conv1_NF,
                       Conv2_NF,
                       Conv3_NF,
                       lstmDropP,
                       FC_DropP,
                       SEB,
                       is_attention,
                       is_tpa,
                       is_moco=True,
                       device=device)

        self.K = K
        assert K % batch_size == 0

        self.m = m
        self.T = T
        self.n_classes = n_classes

        self.softmax = nn.LogSoftmax(dim=1)
        self.encoder_q = nn.Sequential(nn.Linear(input_dim, dim), nn.ReLU(),
                                       nn.Linear(dim, dim))
        self.encoder_k = nn.Sequential(nn.Linear(input_dim, dim), nn.ReLU(),
                                       nn.Linear(dim, dim))

        self.input_attention_negatives = nn.Parameter(
            torch.randn(self.n_classes, K))

        # we make queue for each classes and get negative value through input attention
        self.register_buffer(
            f"queue",
            nn.functional.normalize(torch.randn(self.n_classes, dim, K),
                                    dim=0))  # (n_classes, dim, K)
        self.register_buffer(f"queue_ptr",
                             torch.zeros((n_classes, 1),
                                         dtype=torch.long))  # (n_classes)
예제 #3
0
                                'upstream_models')
                            args.downstream_model_path = os.path.join(
                                'output', PROJECT_NAME, args.exp_name,
                                'downstream_models')
                            args.project_name = PROJECT_NAME

                            if not os.path.exists(args.upstream_model_path):
                                os.makedirs(args.upstream_model_path)

                            if not os.path.exists(args.downstream_model_path):
                                os.makedirs(args.downstream_model_path)

                            if args.model_type == 'lstm_fcn':
                                model = Fcn(n_timesteps=adjusted_window_length,
                                            n_channels=nfeature,
                                            n_classes=nclass,
                                            is_attention=False,
                                            device=device)

                            elif args.model_type == 'alstm_fcn':
                                model = Fcn(n_timesteps=adjusted_window_length,
                                            n_channels=nfeature,
                                            n_classes=nclass,
                                            is_attention=True,
                                            device=device)

                            elif args.model_type == 'tpa_fcn':
                                model = Fcn(n_timesteps=adjusted_window_length,
                                            n_channels=nfeature,
                                            n_classes=nclass,
                                            is_tpa=True,
예제 #4
0
        self.ys = ys
        xs = np.transpose(xs, axes=(0, 2, 1))
        assert len(self.xs) == len(self.ys)

    def __len__(self):
        return len(self.xs)

    def __getitem__(self, idx):
        return {'x': self.xs[idx], 'y': self.ys[idx]}


if __name__ == '__main__':
    for i, dn in enumerate(candidate_datasets):
        nfeature, ntimestep, nclass, x_train, y_train, x_test, y_test = load_info_raw_ts(
            'data/', dataset=dn)
        model = Fcn(n_timesteps=ntimestep,
                    n_channels=nfeature,
                    n_classes=nclass,
                    is_attention=False,
                    device=device)

        optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-3)
        # optimizer = optim.SGD(model.parameters(), lr=1e-3, momentum=0.9)

        model.to(device)
        t_total = time.time()
        train(model, optimizer, x_train, y_train, x_test, y_test)
        print("Optimization Finished!")
        print("Total time elapsed: {:.4f}s".format(time.time() - t_total))
        torch.cuda.empty_cache()
예제 #5
0
class MoCoFcn(nn.Module):

    def __init__(self, K, m, T, n_timesteps, n_classes, batch_size, n_channels, n_lstm_out=128,
                 n_lstm_layers=1, fc_out=100, Conv1_NF=128, Conv2_NF=256, Conv3_NF=128,
                 lstmDropP = 0.8, FC_DropP = 0.3, SEB = True, is_attention=False,
                 is_tpa=False, device='cuda'):
        """
        dim : Feature dimension (default : 128)
        K : queue size; number of negative keys ( default :65536)
        m : moco momentum of updating key encoder (default : 0.999)
        T : softmax temperature(default : 0.07)
        """
        super(MoCoFcn, self).__init__()

        self.K = K
        self.m = m
        self.T = T
        self.n_classes = n_classes

        # input_dim, dim
        self.encoder_q = Fcn(n_timesteps, n_classes, batch_size, n_channels,n_lstm_out,
                             n_lstm_layers, fc_out, Conv1_NF, Conv2_NF, Conv3_NF,
                             lstmDropP, FC_DropP, SEB, is_attention, is_tpa, True, device=device)

        self.encoder_k = Fcn(n_timesteps, n_classes, batch_size, n_channels,n_lstm_out,
                             n_lstm_layers, fc_out, Conv1_NF, Conv2_NF, Conv3_NF,
                             lstmDropP, FC_DropP, SEB, is_attention, is_tpa, True, device=device)
        self.input_attention_negatives = nn.Parameter(torch.randn(self.n_classes, K))
        self.n_classes = n_classes

        self.fc = nn.Linear(fc_out, n_classes)
        self.softmax = nn.Softmax(dim=-1)

        # we make queue for each classes and get negative value through input attention
        self.register_buffer(f"queue",
                             nn.functional.normalize(torch.randn(self.n_classes, fc_out, K), dim=0))  # (n_classes, dim, K)
        self.register_buffer(f"queue_ptr", torch.zeros((n_classes, 1), dtype=torch.long))  # (n_classes)


    @torch.no_grad()
    def _momentum_update_key_enocder(self):
        # Momentum update of the key encoder
        for param_q, param_k in zip(self.encoder_q.parameters(), self.encoder_k.parameters()):
            param_k.data = param_k.data * self.m * param_q.data * (1. - self.m)

    @torch.no_grad()
    def _dequeue_and_enqueue(self, c, keys):
        batch_size = keys.shape[0]
        ptr = int(self.queue_ptr[c])
        assert self.K % batch_size == 0  # for simplicity
        self.queue[c, :, ptr:ptr + batch_size] = keys.T  # replace the key at ptr (dequeue and enqueue)
        ptr = (ptr + batch_size) % self.K  # move pointer recursively
        self.queue_ptr[c, 0] = ptr

    def forward(self, tq, tk, ys):
        # for this model, we don't use augment data to make negative samples
        assert tq.shape[0] == tk.shape[0] == ys.shape[0]
        batch_size = tq.shape[0]
        c = ys[0]
        assert all([r == c for r in ys])

        tq_c = tq
        tk_c = tk

        # instead we bring each positive and negative samples
        zq_c, _ = self.encoder_q(tq_c)  # (N_c, dim)
        zq_c = nn.functional.normalize(zq_c, dim=1)  # (N_c, dim)

        zk_c, _ = self.encoder_k(tk_c)   # (N_c, K)
        zk_c = nn.functional.normalize(zk_c, dim=1)  # (N_c, dim)
        zk_c = zk_c.detach()  # (N_c, dim)

        # positive logits : Nx1, negative logits : Nx(C-1)xdim
        l_pos = torch.einsum('nc,nc->n', [zq_c, zk_c]).unsqueeze(-1)

        # contribution : we consider n-1 classes data which is negative class from queue
        # queue : (n_classes, dim, K), queue_ptr : (dim,)
        l_neg = torch.einsum('nd,cdk->cnk', [zq_c, self.queue.detach()])
        l_neg = l_neg[[ct for ct in range(self.n_classes) if ct != c]]  # (n_classes-1, batch_size, K)
        l_neg = l_neg.permute(1, 0, 2)  # (b, n_classes-1, K)  # 32, 4, 100

        matrix = self.input_attention_negatives[[i for i in range(self.n_classes) if i != c]]
        # N_c, n_classes-1
        alpha = torch.einsum('nck,ck->nc', [l_neg, matrix])
        # N_c, n_classes-1, 1
        alpha = F.softmax(alpha, dim=-1).unsqueeze(-1)
        l_neg = l_neg * alpha
        # N_c, k
        l_neg = l_neg.sum(1)
        logits = torch.cat([l_pos, l_neg], dim=1)
        logits /= self.T  # apply temperature
        labels = torch.zeros(logits.shape[0], dtype=torch.long).cuda()  # labels: positive key indicators
        self._dequeue_and_enqueue(c.long().item(), zk_c)  # dequeue and enqueue

        # fine tuning part
        pred = self.fc(zq_c)
        pred = self.softmax(pred)

        return logits, labels, pred
예제 #6
0
class MoCoFcnMixed(nn.Module):
    def __init__(self,
                 K,
                 m,
                 T,
                 n_timesteps,
                 n_classes,
                 batch_size,
                 n_channels,
                 n_lstm_out=128,
                 n_lstm_layers=1,
                 fc_out=100,
                 Conv1_NF=128,
                 Conv2_NF=256,
                 Conv3_NF=128,
                 lstmDropP=0.8,
                 FC_DropP=0.3,
                 SEB=True,
                 is_attention=False,
                 is_tpa=False,
                 device='cuda'):
        """
            dim : Feature dimension (default : 128)
            K : queue size; number of negative keys ( default :65536)
            m : moco momentum of updating key encoder (default : 0.999)
            T : softmax temperature(default : 0.07)
            """
        super(MoCoFcnMixed, self).__init__()

        self.K = K
        self.m = m
        self.T = T
        self.n_classes = n_classes
        self.device = device

        # input_dim, dim
        self.encoder_q = Fcn(n_timesteps, n_classes, n_channels, n_lstm_out,
                             n_lstm_layers, fc_out, Conv1_NF, Conv2_NF,
                             Conv3_NF, lstmDropP, FC_DropP, SEB, is_attention,
                             is_tpa, True, device)

        self.encoder_k = Fcn(n_timesteps, n_classes, n_channels, n_lstm_out,
                             n_lstm_layers, fc_out, Conv1_NF, Conv2_NF,
                             Conv3_NF, lstmDropP, FC_DropP, SEB, is_attention,
                             is_tpa, True, device)
        self.n_classes = n_classes

        self.fc = nn.Linear(fc_out, n_classes)
        self.softmax = nn.Softmax(dim=-1)

        # we make queue for each classes and get negative value through input attention
        self.register_buffer(
            f"queue",
            nn.functional.normalize(torch.randn(self.n_classes, fc_out, K),
                                    dim=0))  # (n_classes, dim, K)
        self.register_buffer(f"queue_ptr",
                             torch.zeros(1, dtype=torch.long))  # (n_classes)

    @torch.no_grad()
    def _momentum_update_key_enocder(self):
        # Momentum update of the key encoder
        for param_q, param_k in zip(self.encoder_q.parameters(),
                                    self.encoder_k.parameters()):
            param_k.data = param_k.data * self.m * param_q.data * (1. - self.m)

    @torch.no_grad()
    def _dequeue_and_enqueue(self, labels, keys):

        batch_size = keys.shape[0]
        adjusted_batch_size = int(
            batch_size /
            self.n_classes)  # class distribution is uniform in minibatch
        ptr = int(self.queue_ptr)
        labels = pd.Series(labels)
        """
        RuntimeError: The expanded size of the tensor (11) must match the existing size (25) at non-singleton dimension 1.
        Target sizes: [100, 11].  Tensor sizes: [100, 25]
        """
        for c in range(self.n_classes):
            update = keys[labels[labels == c].index.tolist()]
            update = update[:adjusted_batch_size, :]
            self.queue[
                c, :, ptr:ptr +
                update.shape[0]] = update.T  # (adjusted_batch_size, fc_out)

        ptr = (ptr + adjusted_batch_size) % self.K  # move pointer recursively

        # drop remaing last part
        if ptr + adjusted_batch_size > self.K:
            ptr = 0

        self.queue_ptr[0] = ptr

    def forward(self, tq, tk, ys):
        # for this model, we don't use augment data to make negative samples
        assert tq.shape[0] == tk.shape[0] == ys.shape[0]
        batch_size = tq.shape[0]

        tq_c = tq
        tk_c = tk

        # instead we bring each positive and negative samples
        zq_c, _ = self.encoder_q(tq_c)  # (N_c, dim)
        zq_c = nn.functional.normalize(zq_c, dim=1)  # (N_c, dim)

        zk_c, _ = self.encoder_k(tk_c)  # (N_c, K)
        zk_c = nn.functional.normalize(zk_c, dim=1)  # (N_c, dim)
        zk_c = zk_c.detach()  # (N_c, dim)

        # positive logits : Nx1, negative logits : Nx(C-1)xdim
        l_pos = torch.einsum('nc,nc->n', [zq_c, zk_c]).unsqueeze(-1)

        # contribution : we consider n-1 classes data which is negative class from queue
        # queue : (n_classes, dim, K), queue_ptr : (dim,)
        l_neg = torch.einsum('nd,cdk->cnk',
                             [zq_c, self.queue.detach()])  # (C, N, K)
        nc = torch.ones(size=(batch_size,
                              self.n_classes)).to(self.device)  # (N, C)

        ysn = np.squeeze(ys.cpu().numpy())

        nc[range(batch_size), ysn] = 0  # (N, C)
        l_neg = torch.einsum('nc,chk->nhk', [nc, l_neg])  # (N, C, k)
        l_neg = l_neg.sum(1)  # (N, K)
        logits = torch.cat([l_pos, l_neg], dim=1)
        logits /= self.T  # apply temperature
        labels = torch.zeros(logits.shape[0], dtype=torch.long).to(
            self.device)  # labels: positive key indicators
        self._dequeue_and_enqueue(ysn, zk_c)  # dequeue and enqueue

        # fine tuning part
        pred = self.fc(zq_c)
        pred = self.softmax(pred)

        return logits, labels, pred, zq_c