def _fit(self, X_train, y_train, n_epochs=50, eval_set=()): seed_torch() x_train = torch.tensor(X_train, dtype=torch.float32).to(self.device) y = torch.tensor(y_train[:, np.newaxis], dtype=torch.float32).to(self.device) train = torch.utils.data.TensorDataset(x_train, y) train_loader = torch.utils.data.DataLoader(train, batch_size=self.train_batch, shuffle=True) if len(eval_set) == 2: x_val = torch.tensor(eval_set[0], dtype=torch.float32).to(self.device) y_val = torch.tensor(eval_set[1][:, np.newaxis], dtype=torch.float32).to(self.device) valid = torch.utils.data.TensorDataset(x_val, y_val) valid_loader = torch.utils.data.DataLoader( valid, batch_size=self.val_batch, shuffle=False) model = self.model(**self.kwargs) model.to(self.device) optimizer = optim.Adam(model.parameters()) if self.anneal: scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=n_epochs) best_score = -np.inf for epoch in range(n_epochs): with timer(f"Epoch {epoch+1}/{n_epochs}", self.logger): model.train() avg_loss = 0. for (x_batch, y_batch) in train_loader: y_pred = model(x_batch) loss = self.loss_fn(y_pred, y_batch) optimizer.zero_grad() loss.backward() optimizer.step() avg_loss += loss.item() / len(train_loader) valid_preds, avg_val_loss = self._val(valid_loader, model) search_result = threshold_search(eval_set[1], valid_preds) val_mcc, val_threshold = search_result["mcc"], search_result[ "threshold"] self.logger.info( f"loss: {avg_loss:.4f} val_loss: {avg_val_loss:.4f}") self.logger.info(f"val_mcc: {val_mcc} best_t: {val_threshold}") if self.anneal: scheduler.step() if val_mcc > best_score: torch.save(model.state_dict(), self.path / f"best{self.fold}.pt") self.logger.info(f"Save model on epoch {epoch+1}") best_score = val_mcc model.load_state_dict(torch.load(self.path / f"best{self.fold}.pt")) valid_preds, avg_val_loss = self._val(valid_loader, model) self.logger.info(f"Validation loss: {avg_val_loss}") return valid_preds
for path in args.features: path = Path(path) assert path.exists() with open(path, "rb") as f: feats = pickle.load(f) if isinstance(feats, list): feats = np.concatenate(feats) features.append(feats) test = np.concatenate(features, axis=2) with open(f"trainer/{args.tag}/scaler.pkl", "rb") as f: scaler = pickle.load(f) logger.info(f"scaler size: {len(scaler)}") if len(scaler) > 0: for i in range(test.shape[1]): with timer(f"scaling dim{i+1}", logger): test[:, i, :] = scaler[i].transform(test[:, i, :]) test_tensor = torch.tensor(test, dtype=torch.float32).to(args.device) dataset = torch.utils.data.TensorDataset(test_tensor) loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False) with open(f"trainer/{args.tag}/trainer.pkl", "rb") as f: trainer = pickle.load(f) bin_path = Path(f"bin/{args.tag}") test_preds = np.zeros(test.shape[0]) for path in bin_path.iterdir(): with timer(f"use binary {path}", logger):
name="robust-denoising", tag=f"robust-denoising/{args.n_dims}") meta_train = pd.read_csv("../input/metadata_train.csv") meta_test = pd.read_csv("../input/metadata_test.csv") train_path = Path("../input/train.parquet") test_path = Path("../input/test.parquet") n_line = int(meta_train.shape[0] // 3) nchunk_train = 2 step = (n_line // nchunk_train) * 3 current_head = meta_train.signal_id[0] logger.info(f"step: {step}") logger.info(f"initial head: {current_head}") X = [] for i in range(nchunk_train): with timer(f"chunk{i+1}", logger): X_temp = robust_denoised_data(train_path, current_head, step, args.n_dims) X.append(X_temp) current_head += step logger.info(f"current head: {current_head}") X = np.concatenate(X) logger.info(f"X_shape: {X.shape}") with open(outdir / "train.pkl", "wb") as f: pickle.dump(X, f) n_line = int(meta_test.shape[0] // 3) nchunk_test = 7 step = (n_line // 6) * 3 current_head = meta_test.signal_id[0] logger.info(f"step: {step}")
for i in range(test.shape[1]): test[:, i, :] = scaler[i].transform(test[:, i, :]) test_tensor = torch.tensor(test, dtype=torch.float32).to(args.device) dataset = torch.utils.data.TensorDataset(test_tensor) loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False) with open(f"trainer/{args.tag}/trainer.pkl", "rb") as f: trainer = pickle.load(f) bin_path = Path(f"bin/{args.tag}") test_preds = np.zeros(test.shape[0]) for path in bin_path.iterdir(): with timer(f"use binary {path}", logger): model = LSTMAttentionNet(**trainer.kwargs) model.to(args.device) model.load_state_dict(torch.load(path)) model.eval() temp = np.zeros(test.shape[0]) for i, (x_batch, ) in enumerate(loader): with torch.no_grad(): y_pred = model(x_batch).detach() temp[i * batch_size:(i + 1) * batch_size] = sigmoid( y_pred.cpu().numpy())[:, 0] test_preds += temp / trainer.n_splits prob_path = Path(f"probability/{args.tag}") prob_path.mkdir(exist_ok=True, parents=True) with open(prob_path / "probability.pkl", "wb") as f: