def getLoss(ltype, device): if ltype == "BinaryCrossentropy": return BinaryCrossentropy().to(device) if ltype == "BCEWithLogitsLoss": return BCEWithLogitsLoss().to(device) if ltype == "FocalBCEWithLogitsLoss": return FocalBCEWithLogitsLoss(device).to(device)
def train(): tb_writer = SummaryWriter('tb_output') device = 'cuda:0' if CONF['GPU'] else 'cpu' model: nn.Module = CaseModel() # tb_writer.add_graph(model) model.train() train_dataset = CasRelDataset(path_or_json=CONF['TRAIN_DATA_PATH']) eval_dataset = CasRelDataset(path_or_json=CONF['EVAL_DATA_PATH']) dataloader = DataLoader(train_dataset, batch_size=CONF['batch_size'], shuffle=True, collate_fn=collate_casrel) loss_func = BCEWithLogitsLoss() best_loss = 1e3 optim = Adam(model.parameters(), lr=1e-5) global_steps = 0 for epoch_num in range(Epochs): epoch_loss = 0.0 model = model.to(device=device) for (batch_tokens, batch_mask, batch_sub_head, batch_sub_tail, batch_sub_head_arr, batch_sub_tail_arr, batch_obj_head_arr, batch_obj_tail_arr) in tqdm(dataloader, f'Epoch {epoch_num:3.0f}/{Epochs}', len(dataloader)): batch_tokens, batch_mask, batch_sub_head, batch_sub_tail, batch_sub_head_arr, batch_sub_tail_arr,batch_obj_head_arr, batch_obj_tail_arr = list( map(lambda x: x.to(device), (batch_tokens, batch_mask, batch_sub_head, batch_sub_tail,batch_sub_head_arr, batch_sub_tail_arr,batch_obj_head_arr, batch_obj_tail_arr) ) ) sub_head_pred, sub_tail_pred, obj_head_pred, obj_tail_pred = model(batch_tokens, batch_mask, batch_sub_head, batch_sub_tail) sub_head_loss = loss_func(sub_head_pred.squeeze(), batch_sub_head_arr) sub_tail_loss = loss_func(sub_tail_pred.squeeze(), batch_sub_tail_arr) obj_head_loss = loss_func(obj_head_pred, batch_obj_head_arr) obj_tail_loss = loss_func(obj_tail_pred, batch_obj_tail_arr) loss = sub_head_loss + sub_tail_loss + obj_head_loss + obj_tail_loss epoch_loss += loss logger.info(f'every batch loss: {loss}') global_steps += 1 tb_writer.add_scalar('train_loss', loss, global_steps) optim.zero_grad() loss.backward() optim.step() # end one epoch p, r, f = metric(model.to('cpu'), eval_dataset) logger.info(f'epoch:{epoch_num + 1:3.0f}, precision: {p:5.4f}, recall: {r:5.4f}, f1-score: {f:5.4f}') if epoch_loss < best_loss: best_loss = epoch_loss save_model = CONF['SAVE_MODEL'] if not os.path.exists(os.path.dirname(save_model)): os.makedirs(os.path.dirname(save_model)) torch.save(model.state_dict(), save_model)
def __init__(self, args, num_features, time_length): """[summary] Args: args ([type]): [description] time_length (int): Total timesteps in dataset. """ super(DySAT, self).__init__() self.args = args if args.window < 0: self.num_time_steps = time_length else: self.num_time_steps = min(time_length, args.window + 1) # window = 0 => only self. self.num_features = num_features self.structural_head_config = list(map(int, args.structural_head_config.split(","))) self.structural_layer_config = list(map(int, args.structural_layer_config.split(","))) self.temporal_head_config = list(map(int, args.temporal_head_config.split(","))) self.temporal_layer_config = list(map(int, args.temporal_layer_config.split(","))) self.spatial_drop = args.spatial_drop self.temporal_drop = args.temporal_drop self.structural_attn, self.temporal_attn = self.build_model() self.bceloss = BCEWithLogitsLoss()
def __init__(self, weights, per_image=False, skip_empty=True, channel_weights=[1, 0.1, 0.1], channel_losses=None): super().__init__() self.weights = weights self.bce = BCEWithLogitsLoss() self.dice = DiceLoss(per_image=per_image) self.gdl = GeneralizedDice() self.jaccard = JaccardLoss(per_image=per_image) self.focal = FocalLoss2d() self.mapping = { 'bce': self.bce, 'dice': self.dice, 'gdl': self.gdl, 'focal': self.focal, 'jaccard': self.jaccard } self.expect_sigmoid = {'dice', 'focal', 'jaccard', "gdl"} self.per_channel = {'dice', 'jaccard', "gdl"} self.values = {} self.channel_weights = channel_weights self.channel_losses = channel_losses self.skip_empty = skip_empty
def forward(self, input_ids, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None, labels=None): outputs = self.bert(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids, position_ids=position_ids, head_mask=head_mask) pooled_output = outputs[1] pooled_output = self.dropout(pooled_output + 0.1) logits = self.classifier(pooled_output) # add hidden states and attention if they are here outputs = (logits, ) + outputs[2:] if labels is not None: if self.num_labels == 1: # We are doing regression loss_fct = BCEWithLogitsLoss() loss = loss_fct(logits.view(-1), labels.view(-1)) else: loss_fct = CrossEntropyLoss() loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) outputs = (loss, ) + outputs return outputs # (loss), logits, (hidden_states), (attentions)
def __init__(self, weights, per_image=False, skip_empty=False, channel_weights=[1, 0.2, 0.1], channel_losses=None): super().__init__() self.weights = weights self.bce = BCEWithLogitsLoss() self.dice = DiceLoss() self.lcdice = LogCoshDiceLoss() self.jaccard = JaccardLoss(per_image=per_image) self.focal = BinaryFocalLoss() self.mapping = { 'bce': self.bce, 'dice': self.dice, 'lcdice': self.lcdice, 'focal': self.focal, 'jaccard': self.jaccard } self.expect_sigmoid = {'dice', 'jaccard', 'lcdice'} self.per_channel = {'dice', 'jaccard', 'lcdice', "focal", "bce"} self.values = {} self.channel_weights = channel_weights self.channel_losses = channel_losses self.skip_empty = skip_empty
def __init__(self, loss_names, loss_weights, device, num_classes): """ :param loss_names: list of loss names, possible losses=['jaccard', 'nlll', 'crossentropy', 'smooth_jaccard', 'focal', 'dice'] :param loss_weights: list of weight coefficients for each loss from loss_names. :param device: execution device. :param num_classes: number of classes in training data. """ super(MultiLoss, self).__init__() assert len(loss_names) == len(loss_weights) self.device = device self.losses = dict() self.loss_weights = dict() self.num_classes = num_classes for loss, weight in zip(loss_names, loss_weights): loss = loss.lower() if loss == 'jaccard': if self.num_classes > 1: self.losses[loss] = JaccardLossMulti() else: self.losses[loss] = JaccardLoss() elif loss == 'nlll': # if last layer of network is softmax use NLLLoss if isn't use CrossEntropyLoss self.losses[loss] = NLLLoss() elif loss == 'crossentropy': # if last layer of network is softmax use NLLLoss if isn't use CrossEntropyLoss # or BCEWithLogitsLoss for binary case if self.num_classes > 1: self.losses[loss] = CrossEntropyLoss(size_average=True) else: self.losses[loss] = BCEWithLogitsLoss(size_average=True) elif loss == 'smooth_jaccard': if self.num_classes > 1: raise ValueError("ERROR: for multiclass case loss is not implemented.") else: self.losses[loss] = SmoothJaccardLoss() elif loss == 'focal': if self.num_classes > 1: self.losses[loss] = FocalLossMulti(size_average=False) else: self.losses[loss] = FocalLossBinary(size_average=False) elif loss == 'dice' and self.num_classes < 2: self.losses[loss] = DiceLoss() else: raise ValueError(loss) self.loss_weights[loss] = weight
def __init__(self, adapterClassifier, id2label, lr) -> None: super().__init__() self.classifier = adapterClassifier self.id2label = id2label self.lr = lr self.criterion = BCEWithLogitsLoss(pos_weight=torch.full((len(id2label),), 1.)) self.sig = Sigmoid() self.declare_metrics(self.id2label)
def __init__(self, device, alpha=1, gamma=2, reduce=True): super(FocalBCEWithLogitsLoss, self).__init__() self.alpha = alpha self.gamma = gamma self.reduce = reduce self.bce = BCEWithLogitsLoss(reduction='none').to(device)
def train_binary_classifier(true_inputs, false_inputs, encoder, params, num_val_samples=1000): outputmodelname = params.outputmodelname + "_binary_clf" if params.load_binary_clf: binary_classifier = BinaryClassifier(params.embedding_dim, 512, 0., 0.).to(encoder.device) checkpoint = torch.load(os.path.join(params.outputdir, outputmodelname), map_location=params.device) binary_classifier.load_state_dict(checkpoint["model_state_dict"]) return binary_classifier inputs = true_inputs + false_inputs t = ([1] * len(true_inputs)) + ([0] * len(false_inputs)) # get validation set indices = list(range(len(inputs))) inputs, t = np.array(inputs), np.array(t) shuffle(indices) val_inputs = inputs[indices[-num_val_samples:]] val_targets = t[indices[-num_val_samples:]] inputs = inputs[indices[:-num_val_samples]] t = t[indices[:-num_val_samples]] indices = list(range(len(inputs))) binary_classifier = BinaryClassifier( params.embedding_dim, 512, params.dropout_binary, params.gaussian_noise_binary).to(encoder.device) opt = torch.optim.Adam(binary_classifier.parameters(), lr=params.lr_bclf) freeze(encoder) encoder.eval() loss_f = BCEWithLogitsLoss() def save_clf(): checkpoint = {"model_state_dict": binary_classifier.state_dict()} torch.save(checkpoint, os.path.join(params.outputdir, outputmodelname)) best_acc = evaluate(val_inputs, val_targets, encoder, binary_classifier, params) bsize = params.batch_size correct = 0. for e in range(params.n_epochs_binary): # shuffle data in each epoch shuffle(indices) inputs = inputs[indices] t = t[indices] binary_classifier.train() losses = [] for idx in range(0, len(inputs), bsize): ib = inputs[idx:idx + bsize] tb = t[idx:idx + bsize] tb = torch.tensor(tb, device=encoder.device).view(-1, 1).float() with torch.no_grad(): embeddings = encoder(ib) preds = binary_classifier(embeddings) acc = ((preds > 0.5) == tb).sum() loss = loss_f(preds, tb) correct += acc opt.zero_grad() loss.backward() opt.step() losses.append(loss.item()) if (idx / bsize) % params.log_freq == 0: avg_loss = np.array(losses[-params.log_freq:]).mean() print( "Binary classification step {}<->{}: loss {} ; t-acc: {}, v-acc: {}" .format(e, idx, avg_loss, correct / float(params.log_freq * bsize), best_acc)) correct = 0. val_acc = evaluate(val_inputs, val_targets, encoder, binary_classifier, params) if val_acc > best_acc: best_acc = val_acc save_clf() print("Loss in epoch {}: {}".format(e, np.array(losses).mean())) return binary_classifier
def main(): gpu_num = int(sys.argv[1]) random_seed = (int(time.time()) * (gpu_num + 1)) % (2 ** 31 - 1) np.random.seed(random_seed) random.seed(random_seed) torch.manual_seed(random_seed) torch.cuda.manual_seed(random_seed) HYPERPARAMETERS = { 'batch_size': choice([4096, 8192, 16384]), # [8192//2, 8192*2] 'nn_encoder_out': choice(list(range(10, 100))), # [20,40] 'enc_hidden_layer_k': choice(np.linspace(0.5, 4.0, 8)), # [0.5,4] denominator of nn 'nn_encoder_out' E.G. if 'nn_encoder_out' = 30 so every feature encoder hidden layer size will be 15 'n_splits': 10, # n folds 'optimizer': 'adam', # ['RMSprop','adam'] 'lr': choice(np.linspace(0.001, 0.01, 10)), # [0.01,0.001] 'use_dropout': choice([True,False]), 'use_bn': choice([True,False]), 'lr_sheduler_factor': choice(np.linspace(0.1, 0.9, 9)), # [0.1,0.9] 'lr_sheduler_patience': choice(list(range(3, 15))), # [3,15] 'lr_sheduler_min_lr': 0.0001, # not so important but non't have to be too small 'max_epoch': 9999, # we want to use early_stop so just need to be big 'early_stop_wait': 20, # bigger - better but slower, but i guess 20 is okay 'upsampling_times': choice(list(range(3, 20))), # [3,20] more = slower 'upsampling_class_balancer': choice(list(range(2, 10))) # [0.1,7] } ans = {} for i in range(6): with open(f"../output/hpo_logs_{i}.json" ,"r") as f: for item in f.readlines(): d = eval(item) ans[d["target"]] = d["params"] score = sorted(ans)[-gpu_num - 1] params = ans[score] params['batch_size'] = int(params['batch_size']) params['nn_encoder_out'] = int(params['nn_encoder_out']) params['lr_sheduler_patience'] = int(params['lr_sheduler_patience']) params['upsampling_times'] = int(params['upsampling_times']) params['upsampling_class_balancer'] = int(params['upsampling_class_balancer']) params['upsampling_class_balancer'] = min(params['upsampling_class_balancer'], params['upsampling_times']) params['use_bn'] = params['use_bn'] > 0.5 params['use_dropout'] = params['use_dropout'] > 0.5 for key in params: HYPERPARAMETERS[key] = params[key] with open(f"log_{gpu_num}.txt", "a") as f: for key in HYPERPARAMETERS: f.write(key + " " + str(HYPERPARAMETERS[key]) + "\n") print(key, HYPERPARAMETERS[key]) print(score) print("\nSEED:", random_seed) print("GPU:", gpu_num, "\n") input_path = "../input/" output_path = "../output/" print("torch:", torch.__version__) print("loading data...") train_df = pd.read_csv(input_path + 'train.csv.zip') label = train_df.target train = train_df.drop(['ID_code', 'target'], axis=1) cols = train.columns test = pd.read_csv(input_path + 'test.csv.zip') test = test.drop(['ID_code'], axis=1) test_filtered = pd.read_pickle(input_path + 'test_filtered.pkl') test_filtered = test_filtered.loc[:, train.columns] train_test = pd.concat([train, test_filtered]).reset_index(drop=True) vcs_train_test = {} for col in tqdm(train.columns): vcs_train_test[col] = train_test.loc[:, col].value_counts() generate_features(test, vcs_train_test, cols) ups = UpsamplingPreprocessor(HYPERPARAMETERS['upsampling_times'], HYPERPARAMETERS['upsampling_class_balancer']) loss_f = BCEWithLogitsLoss() batch_size = HYPERPARAMETERS['batch_size'] N_IN = 2 gpu = torch.device(f'cuda:{gpu_num % 4}') cpu = torch.device('cpu') folds = StratifiedKFold(n_splits=HYPERPARAMETERS['n_splits'], shuffle=True, random_state=42) oof = np.zeros(len(train)) predictions = np.zeros(len(test)) for fold_, (trn_idx, val_idx) in enumerate(folds.split(train.values, label)): print("Fold {}".format(fold_)) X_train, Train_label = ups.fit_transform(train.loc[trn_idx], label.loc[trn_idx]) X_val, Val_label = train.loc[val_idx], label.loc[val_idx] generate_features(X_train, vcs_train_test, cols) generate_features(X_val, vcs_train_test, cols) cols_new = X_train.columns scaler = StandardScaler() X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=cols_new) X_val = pd.DataFrame(scaler.transform(X_val), columns=cols_new) test_new = pd.DataFrame(scaler.transform(test), columns=cols_new) train_tensors = [] val_tensors = [] test_tensors = [] for fff in range(200): cols_to_use = [f'var_{fff}', f'var_{fff}_1_flag'] train_t = X_train.loc[:, cols_to_use].values val_t = X_val.loc[:, cols_to_use].values test_t = test_new.loc[:, cols_to_use].values train_tensors.append(torch.tensor(train_t, requires_grad=False, device=cpu, dtype=torch.float32)) val_tensors.append(torch.tensor(val_t, requires_grad=False, device=cpu, dtype=torch.float32)) test_tensors.append(torch.tensor(test_t, requires_grad=False, device=gpu, dtype=torch.float32)) train_tensors = torch.cat(train_tensors, 1).view((-1, 200, N_IN)) val_tensors = torch.cat(val_tensors, 1).view((-1, 200, N_IN)) test_tensors = torch.cat(test_tensors, 1).view((-1, 200, N_IN)) try: y_train_t = torch.tensor(Train_label, requires_grad=False, device=cpu, dtype=torch.float32) except: y_train_t = torch.tensor(Train_label.values, requires_grad=False, device=cpu, dtype=torch.float32) try: y_val_t = torch.tensor(Val_label, requires_grad=False, device=cpu, dtype=torch.float32) except: y_val_t = torch.tensor(Val_label.values, requires_grad=False, device=cpu, dtype=torch.float32) nn = NN(D_in=N_IN, enc_out=HYPERPARAMETERS['nn_encoder_out'], enc_hidden_layer_k=HYPERPARAMETERS['enc_hidden_layer_k'], use_dropout=HYPERPARAMETERS['use_dropout'], use_BN=HYPERPARAMETERS['use_bn']).to(gpu) if HYPERPARAMETERS['optimizer'] == 'adam': optimizer = Adam(params=nn.parameters(), lr=HYPERPARAMETERS['lr']) elif HYPERPARAMETERS['optimizer'] == 'RMSprop': optimizer = RMSprop(params=nn.parameters(), lr=HYPERPARAMETERS['lr']) scheduler = ReduceLROnPlateau(optimizer, 'max', factor=HYPERPARAMETERS['lr_sheduler_factor'], patience=HYPERPARAMETERS['lr_sheduler_patience'], min_lr=HYPERPARAMETERS['lr_sheduler_min_lr'], verbose=True) best_AUC = 0 early_stop = 0 for epoch in tqdm(range(HYPERPARAMETERS['max_epoch'])): nn.train() dl = batch_iter(train_tensors, y_train_t, batch_size=batch_size) for data, label_t in dl: pred = nn(data.to(gpu)) loss = loss_f(pred, torch.unsqueeze(label_t.to(gpu), -1)) optimizer.zero_grad() loss.backward() optimizer.step() with torch.no_grad(): nn.eval() blobs = [] for batch in torch.split(val_tensors, batch_size): blob = nn(batch.to(gpu)).data.cpu().numpy().flatten() blobs.append(blob) val_pred = np.concatenate(blobs) AUC = roc_auc_score(label[val_idx], val_pred) print('EPOCH {}'.format(epoch)) print('LOSS: ', loss_f(torch.tensor(val_pred), y_val_t)) print('AUC: ', AUC) scheduler.step(AUC) if AUC > best_AUC: early_stop = 0 best_AUC = AUC torch.save(nn, output_path + f'best_auc_nn_{gpu_num}.pkl') else: early_stop += 1 print('SCORE IS NOT THE BEST. Early stop counter: {}'.format(early_stop)) if early_stop == HYPERPARAMETERS['early_stop_wait']: print(f'EARLY_STOPPING NOW, BEST AUC = {best_AUC}') break print('=' * 50) best_model = torch.load(output_path + f'best_auc_nn_{gpu_num}.pkl') with torch.no_grad(): best_model.eval() blobs = [] for batch in torch.split(val_tensors, batch_size): blob = best_model(batch.to(gpu)).data.cpu().numpy().flatten() blobs.append(blob) oof[val_idx] = np.concatenate(blobs) auc = round(roc_auc_score(Val_label, oof[val_idx]), 5) with open(f"log_{gpu_num}.txt", "a") as f: f.write(str(fold_) + " " + str(auc) + "\n") blobs = [] for batch in torch.split(test_tensors, batch_size): blob = best_model(batch).data.cpu().numpy().flatten() blobs.append(blob) predictions_test = np.concatenate(blobs) predictions += predictions_test / folds.n_splits auc = round(roc_auc_score(label, oof), 5) print("CV score: {:<8.5f}".format(auc)) with open(f"log_{gpu_num}.txt", "a") as f: f.write("OOF " + str(auc) + "\n") np.save(output_path + f"nn_{gpu_num}_{auc}_oof.npy", oof) np.save(output_path + f"nn_{gpu_num}_{auc}_test.npy", predictions)
loss += loss_class_laso_out loss += loss_recon # don't include subtraction for now (a little weird) # loss += loss_class_S # loss += loss_recon_S loss += loss_class_U loss += loss_recon_U loss += loss_class_I loss += loss_recon_I return loss if __name__ == "__main__": # RUN FOR SANITY CHECK from torch.nn.modules.loss import BCEWithLogitsLoss, MSELoss laso = LaSO() laso_loss = LaSOLoss(BCEWithLogitsLoss(), MSELoss()) laso.zero_grad() # this should be None print("Classifier fully connected layer gradient", laso.classifier_model.fc.weight.grad) x = laso(torch.randn((8, 3, 244, 244)).to(device)) loss = laso_loss(x, torch.randint(0, 2, (8, 20)).to(device)) loss.backward() # this should be nonzero print("Classifier fully connected layer gradient", laso.classifier_model.fc.weight.grad)
def __init__(self, weight=None, size_average=True): super().__init__() self.bce = BCEWithLogitsLoss()