def __init__(self, K, m, T, n_timesteps, n_classes, batch_size, n_channels, n_lstm_out=128, n_lstm_layers=1, fc_out=100, Conv1_NF=128, Conv2_NF=256, Conv3_NF=128, lstmDropP = 0.8, FC_DropP = 0.3, SEB = True, is_attention=False, is_tpa=False, device='cuda'): """ dim : Feature dimension (default : 128) K : queue size; number of negative keys ( default :65536) m : moco momentum of updating key encoder (default : 0.999) T : softmax temperature(default : 0.07) """ super(MoCoFcn, self).__init__() self.K = K self.m = m self.T = T self.n_classes = n_classes # input_dim, dim self.encoder_q = Fcn(n_timesteps, n_classes, batch_size, n_channels,n_lstm_out, n_lstm_layers, fc_out, Conv1_NF, Conv2_NF, Conv3_NF, lstmDropP, FC_DropP, SEB, is_attention, is_tpa, True, device=device) self.encoder_k = Fcn(n_timesteps, n_classes, batch_size, n_channels,n_lstm_out, n_lstm_layers, fc_out, Conv1_NF, Conv2_NF, Conv3_NF, lstmDropP, FC_DropP, SEB, is_attention, is_tpa, True, device=device) self.input_attention_negatives = nn.Parameter(torch.randn(self.n_classes, K)) self.n_classes = n_classes self.fc = nn.Linear(fc_out, n_classes) self.softmax = nn.Softmax(dim=-1) # we make queue for each classes and get negative value through input attention self.register_buffer(f"queue", nn.functional.normalize(torch.randn(self.n_classes, fc_out, K), dim=0)) # (n_classes, dim, K) self.register_buffer(f"queue_ptr", torch.zeros((n_classes, 1), dtype=torch.long)) # (n_classes)
def __init__(self, input_dim, dim, n_timesteps, n_classes, batch_size, n_channels, K=65536, T=0.7, m=0.999, n_lstm_out=128, n_lstm_layers=1, Conv1_NF=128, Conv2_NF=256, Conv3_NF=128, lstmDropP=0.8, FC_DropP=0.3, SEB=True, is_attention=False, is_tpa=False, device='cuda'): super(FcnMoCo, self).__init__() self.fcn = Fcn(n_timesteps, n_classes, batch_size, n_channels, n_lstm_out, n_lstm_layers, input_dim, Conv1_NF, Conv2_NF, Conv3_NF, lstmDropP, FC_DropP, SEB, is_attention, is_tpa, is_moco=True, device=device) self.K = K assert K % batch_size == 0 self.m = m self.T = T self.n_classes = n_classes self.softmax = nn.LogSoftmax(dim=1) self.encoder_q = nn.Sequential(nn.Linear(input_dim, dim), nn.ReLU(), nn.Linear(dim, dim)) self.encoder_k = nn.Sequential(nn.Linear(input_dim, dim), nn.ReLU(), nn.Linear(dim, dim)) self.input_attention_negatives = nn.Parameter( torch.randn(self.n_classes, K)) # we make queue for each classes and get negative value through input attention self.register_buffer( f"queue", nn.functional.normalize(torch.randn(self.n_classes, dim, K), dim=0)) # (n_classes, dim, K) self.register_buffer(f"queue_ptr", torch.zeros((n_classes, 1), dtype=torch.long)) # (n_classes)
'upstream_models') args.downstream_model_path = os.path.join( 'output', PROJECT_NAME, args.exp_name, 'downstream_models') args.project_name = PROJECT_NAME if not os.path.exists(args.upstream_model_path): os.makedirs(args.upstream_model_path) if not os.path.exists(args.downstream_model_path): os.makedirs(args.downstream_model_path) if args.model_type == 'lstm_fcn': model = Fcn(n_timesteps=adjusted_window_length, n_channels=nfeature, n_classes=nclass, is_attention=False, device=device) elif args.model_type == 'alstm_fcn': model = Fcn(n_timesteps=adjusted_window_length, n_channels=nfeature, n_classes=nclass, is_attention=True, device=device) elif args.model_type == 'tpa_fcn': model = Fcn(n_timesteps=adjusted_window_length, n_channels=nfeature, n_classes=nclass, is_tpa=True,
self.ys = ys xs = np.transpose(xs, axes=(0, 2, 1)) assert len(self.xs) == len(self.ys) def __len__(self): return len(self.xs) def __getitem__(self, idx): return {'x': self.xs[idx], 'y': self.ys[idx]} if __name__ == '__main__': for i, dn in enumerate(candidate_datasets): nfeature, ntimestep, nclass, x_train, y_train, x_test, y_test = load_info_raw_ts( 'data/', dataset=dn) model = Fcn(n_timesteps=ntimestep, n_channels=nfeature, n_classes=nclass, is_attention=False, device=device) optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-3) # optimizer = optim.SGD(model.parameters(), lr=1e-3, momentum=0.9) model.to(device) t_total = time.time() train(model, optimizer, x_train, y_train, x_test, y_test) print("Optimization Finished!") print("Total time elapsed: {:.4f}s".format(time.time() - t_total)) torch.cuda.empty_cache()
class MoCoFcn(nn.Module): def __init__(self, K, m, T, n_timesteps, n_classes, batch_size, n_channels, n_lstm_out=128, n_lstm_layers=1, fc_out=100, Conv1_NF=128, Conv2_NF=256, Conv3_NF=128, lstmDropP = 0.8, FC_DropP = 0.3, SEB = True, is_attention=False, is_tpa=False, device='cuda'): """ dim : Feature dimension (default : 128) K : queue size; number of negative keys ( default :65536) m : moco momentum of updating key encoder (default : 0.999) T : softmax temperature(default : 0.07) """ super(MoCoFcn, self).__init__() self.K = K self.m = m self.T = T self.n_classes = n_classes # input_dim, dim self.encoder_q = Fcn(n_timesteps, n_classes, batch_size, n_channels,n_lstm_out, n_lstm_layers, fc_out, Conv1_NF, Conv2_NF, Conv3_NF, lstmDropP, FC_DropP, SEB, is_attention, is_tpa, True, device=device) self.encoder_k = Fcn(n_timesteps, n_classes, batch_size, n_channels,n_lstm_out, n_lstm_layers, fc_out, Conv1_NF, Conv2_NF, Conv3_NF, lstmDropP, FC_DropP, SEB, is_attention, is_tpa, True, device=device) self.input_attention_negatives = nn.Parameter(torch.randn(self.n_classes, K)) self.n_classes = n_classes self.fc = nn.Linear(fc_out, n_classes) self.softmax = nn.Softmax(dim=-1) # we make queue for each classes and get negative value through input attention self.register_buffer(f"queue", nn.functional.normalize(torch.randn(self.n_classes, fc_out, K), dim=0)) # (n_classes, dim, K) self.register_buffer(f"queue_ptr", torch.zeros((n_classes, 1), dtype=torch.long)) # (n_classes) @torch.no_grad() def _momentum_update_key_enocder(self): # Momentum update of the key encoder for param_q, param_k in zip(self.encoder_q.parameters(), self.encoder_k.parameters()): param_k.data = param_k.data * self.m * param_q.data * (1. - self.m) @torch.no_grad() def _dequeue_and_enqueue(self, c, keys): batch_size = keys.shape[0] ptr = int(self.queue_ptr[c]) assert self.K % batch_size == 0 # for simplicity self.queue[c, :, ptr:ptr + batch_size] = keys.T # replace the key at ptr (dequeue and enqueue) ptr = (ptr + batch_size) % self.K # move pointer recursively self.queue_ptr[c, 0] = ptr def forward(self, tq, tk, ys): # for this model, we don't use augment data to make negative samples assert tq.shape[0] == tk.shape[0] == ys.shape[0] batch_size = tq.shape[0] c = ys[0] assert all([r == c for r in ys]) tq_c = tq tk_c = tk # instead we bring each positive and negative samples zq_c, _ = self.encoder_q(tq_c) # (N_c, dim) zq_c = nn.functional.normalize(zq_c, dim=1) # (N_c, dim) zk_c, _ = self.encoder_k(tk_c) # (N_c, K) zk_c = nn.functional.normalize(zk_c, dim=1) # (N_c, dim) zk_c = zk_c.detach() # (N_c, dim) # positive logits : Nx1, negative logits : Nx(C-1)xdim l_pos = torch.einsum('nc,nc->n', [zq_c, zk_c]).unsqueeze(-1) # contribution : we consider n-1 classes data which is negative class from queue # queue : (n_classes, dim, K), queue_ptr : (dim,) l_neg = torch.einsum('nd,cdk->cnk', [zq_c, self.queue.detach()]) l_neg = l_neg[[ct for ct in range(self.n_classes) if ct != c]] # (n_classes-1, batch_size, K) l_neg = l_neg.permute(1, 0, 2) # (b, n_classes-1, K) # 32, 4, 100 matrix = self.input_attention_negatives[[i for i in range(self.n_classes) if i != c]] # N_c, n_classes-1 alpha = torch.einsum('nck,ck->nc', [l_neg, matrix]) # N_c, n_classes-1, 1 alpha = F.softmax(alpha, dim=-1).unsqueeze(-1) l_neg = l_neg * alpha # N_c, k l_neg = l_neg.sum(1) logits = torch.cat([l_pos, l_neg], dim=1) logits /= self.T # apply temperature labels = torch.zeros(logits.shape[0], dtype=torch.long).cuda() # labels: positive key indicators self._dequeue_and_enqueue(c.long().item(), zk_c) # dequeue and enqueue # fine tuning part pred = self.fc(zq_c) pred = self.softmax(pred) return logits, labels, pred
class MoCoFcnMixed(nn.Module): def __init__(self, K, m, T, n_timesteps, n_classes, batch_size, n_channels, n_lstm_out=128, n_lstm_layers=1, fc_out=100, Conv1_NF=128, Conv2_NF=256, Conv3_NF=128, lstmDropP=0.8, FC_DropP=0.3, SEB=True, is_attention=False, is_tpa=False, device='cuda'): """ dim : Feature dimension (default : 128) K : queue size; number of negative keys ( default :65536) m : moco momentum of updating key encoder (default : 0.999) T : softmax temperature(default : 0.07) """ super(MoCoFcnMixed, self).__init__() self.K = K self.m = m self.T = T self.n_classes = n_classes self.device = device # input_dim, dim self.encoder_q = Fcn(n_timesteps, n_classes, n_channels, n_lstm_out, n_lstm_layers, fc_out, Conv1_NF, Conv2_NF, Conv3_NF, lstmDropP, FC_DropP, SEB, is_attention, is_tpa, True, device) self.encoder_k = Fcn(n_timesteps, n_classes, n_channels, n_lstm_out, n_lstm_layers, fc_out, Conv1_NF, Conv2_NF, Conv3_NF, lstmDropP, FC_DropP, SEB, is_attention, is_tpa, True, device) self.n_classes = n_classes self.fc = nn.Linear(fc_out, n_classes) self.softmax = nn.Softmax(dim=-1) # we make queue for each classes and get negative value through input attention self.register_buffer( f"queue", nn.functional.normalize(torch.randn(self.n_classes, fc_out, K), dim=0)) # (n_classes, dim, K) self.register_buffer(f"queue_ptr", torch.zeros(1, dtype=torch.long)) # (n_classes) @torch.no_grad() def _momentum_update_key_enocder(self): # Momentum update of the key encoder for param_q, param_k in zip(self.encoder_q.parameters(), self.encoder_k.parameters()): param_k.data = param_k.data * self.m * param_q.data * (1. - self.m) @torch.no_grad() def _dequeue_and_enqueue(self, labels, keys): batch_size = keys.shape[0] adjusted_batch_size = int( batch_size / self.n_classes) # class distribution is uniform in minibatch ptr = int(self.queue_ptr) labels = pd.Series(labels) """ RuntimeError: The expanded size of the tensor (11) must match the existing size (25) at non-singleton dimension 1. Target sizes: [100, 11]. Tensor sizes: [100, 25] """ for c in range(self.n_classes): update = keys[labels[labels == c].index.tolist()] update = update[:adjusted_batch_size, :] self.queue[ c, :, ptr:ptr + update.shape[0]] = update.T # (adjusted_batch_size, fc_out) ptr = (ptr + adjusted_batch_size) % self.K # move pointer recursively # drop remaing last part if ptr + adjusted_batch_size > self.K: ptr = 0 self.queue_ptr[0] = ptr def forward(self, tq, tk, ys): # for this model, we don't use augment data to make negative samples assert tq.shape[0] == tk.shape[0] == ys.shape[0] batch_size = tq.shape[0] tq_c = tq tk_c = tk # instead we bring each positive and negative samples zq_c, _ = self.encoder_q(tq_c) # (N_c, dim) zq_c = nn.functional.normalize(zq_c, dim=1) # (N_c, dim) zk_c, _ = self.encoder_k(tk_c) # (N_c, K) zk_c = nn.functional.normalize(zk_c, dim=1) # (N_c, dim) zk_c = zk_c.detach() # (N_c, dim) # positive logits : Nx1, negative logits : Nx(C-1)xdim l_pos = torch.einsum('nc,nc->n', [zq_c, zk_c]).unsqueeze(-1) # contribution : we consider n-1 classes data which is negative class from queue # queue : (n_classes, dim, K), queue_ptr : (dim,) l_neg = torch.einsum('nd,cdk->cnk', [zq_c, self.queue.detach()]) # (C, N, K) nc = torch.ones(size=(batch_size, self.n_classes)).to(self.device) # (N, C) ysn = np.squeeze(ys.cpu().numpy()) nc[range(batch_size), ysn] = 0 # (N, C) l_neg = torch.einsum('nc,chk->nhk', [nc, l_neg]) # (N, C, k) l_neg = l_neg.sum(1) # (N, K) logits = torch.cat([l_pos, l_neg], dim=1) logits /= self.T # apply temperature labels = torch.zeros(logits.shape[0], dtype=torch.long).to( self.device) # labels: positive key indicators self._dequeue_and_enqueue(ysn, zk_c) # dequeue and enqueue # fine tuning part pred = self.fc(zq_c) pred = self.softmax(pred) return logits, labels, pred, zq_c