def __init__(self): super(RelPosNet, self).__init__() # self.conv1 = nn.Conv2d(1, 2, (2,1), stride=(1,1)) #we want 2 filters? self.stagenet = StagerNet() self.linear = nn.Linear(100, 1)
def __init__(self): super(TemporalShufflingNet, self).__init__() # self.conv1 = nn.Conv2d(1, 2, (2,1), stride=(1,1)) #we want 2 filters? self.stagenet = StagerNet() self.linear = nn.Linear(200, 1)
def __init__(self, ): super(CPC_Net, self).__init__() # self.conv1 = nn.Conv2d(1, 2, (2,1), stride=(1,1)) # we want 2 filters? self.stagenet = StagerNet() self.linear = nn.Linear(100, 1) Np = 10 h_dim = 100 ct_dim = 100 self.NpList = [] for i in range(Np): self.NpList.append( nn.Bilinear(in1_features=h_dim, in2_features=ct_dim, out_features=1, bias=False)) self.logsoftmax = nn.LogSoftmax()
def __init__(self, Np): super(CPC_Net, self).__init__() self.stagenet = StagerNet() h_dim = 100 ct_dim = 100 self.gru = nn.GRU(ct_dim, h_dim, 1, batch_first=True) self.BilinearList = nn.ModuleList() for i in range(Np): self.BilinearList.append( nn.Bilinear(in1_features=h_dim, in2_features=ct_dim, out_features=1, bias=False)) self.sample_bilin = nn.Bilinear(in1_features=h_dim, in2_features=ct_dim, out_features=1, bias=False) self.logsoftmax = nn.LogSoftmax()
for predicted in range(list(Xp_new.shape)[1]): #bilinear = self.BilinearList[predicted] for sample in range(list(Xp_new.shape)[2]): output_cat[batch, predicted, sample] = self.BilinearList[predicted]( hn[batch, :], Xp_new[batch, predicted, sample, :]) # print(output_cat.shape) return output_cat if __name__ == "__main__": device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") # PyTorch v0.4.0 model_stager = StagerNet().to(device) model = TemporalShufflingNet().to(device) # print(model) x1 = torch.randn(2, 3000, 2) x2 = torch.randn(2, 3000, 2) x3 = torch.randn(2, 3000, 2) y = torch.randn(2, 1) x1, x2, x3, y = x1.to(device), x2.to(device), x3.to(device), y.to(device) print("Start Training") loss_fn = torch.nn.SoftMarginLoss(reduction='sum') learning_rate = 5e-4 beta_vals = (0.9, 0.999) optimizer = torch.optim.Adam(model.parameters(),
def train_end_to_end(stagernet_path, train_set, val_set, pos_labels_per_class, max_epochs, verbose=False): train_set_reduced = restrict_training_size_per_class( train_set, pos_labels_per_class) params = {'batch_size': 256, 'shuffle': True, 'num_workers': 6} training_generator = torch.utils.data.DataLoader(train_set_reduced, **params) validation_generator = torch.utils.data.DataLoader(val_set, **params) print("len of the dataloader is:", len(training_generator)) if stagernet_path == "full_supervision": trained_stage = StagerNet() else: trained_stage = StagerNet() if stagernet_path: trained_stage.load_state_dict( torch.load(".." + os.sep + "models" + os.sep + stagernet_path)) for p in trained_stage.parameters(): p.requires_grad = False #print(p) # cuda setup if allowed device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") # PyTorch v0.4.0 model = DownstreamNet(trained_stage).to(device) #defining training parameters loss_fn = nn.CrossEntropyLoss() learning_rate = 5e-4 beta_vals = (0.9, 0.999) optimizer = torch.optim.Adam(model.parameters(), betas=beta_vals, lr=learning_rate, weight_decay=0.001) print("Start Training") for epoch in range(max_epochs): running_loss = 0 correct = 0 total = 0 for x, y in training_generator: #print(X1.shape) #print(y.shape) # Transfer to GPU x, y = x.to(device), y.to(device) y_pred = model(x) loss = loss_fn(y_pred, y) #print("batch:", loss.item()) #accuracy correct += num_correct(y_pred, y) total += len(y) print_class_counts(y_pred) #zero gradients optimizer.zero_grad() # Backward pass: compute gradient of the loss with respect to model # parameters loss.backward() # Calling the step function on an Optimizer makes an update to its # parameters optimizer.step() running_loss += loss.item() model.train = False val_correct = 0 val_total = 0 for x, y in validation_generator: x, y = x.to(device), y.to(device) y_pred = model(x) val_correct += num_correct(y_pred, y) val_total += len(y) model.train = True # val_outputs = model() if verbose: print('[Epoch %d] Training loss: %.3f' % (epoch + 1, running_loss / len(training_generator))) print('Training accuracy: %.3f' % (correct / total)) print('Validation accuracy: %.3f' % (val_correct / val_total)) return val_correct / val_total