Esempio n. 1
0
    def _fit(self, X_train, y_train, n_epochs=50, eval_set=()):
        seed_torch()
        x_train = torch.tensor(X_train, dtype=torch.float32).to(self.device)
        y = torch.tensor(y_train[:, np.newaxis],
                         dtype=torch.float32).to(self.device)

        train = torch.utils.data.TensorDataset(x_train, y)

        train_loader = torch.utils.data.DataLoader(train,
                                                   batch_size=self.train_batch,
                                                   shuffle=True)
        if len(eval_set) == 2:
            x_val = torch.tensor(eval_set[0],
                                 dtype=torch.float32).to(self.device)
            y_val = torch.tensor(eval_set[1][:, np.newaxis],
                                 dtype=torch.float32).to(self.device)
            valid = torch.utils.data.TensorDataset(x_val, y_val)
            valid_loader = torch.utils.data.DataLoader(
                valid, batch_size=self.val_batch, shuffle=False)
        model = self.model(**self.kwargs)
        model.to(self.device)
        optimizer = optim.Adam(model.parameters())
        if self.anneal:
            scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                             T_max=n_epochs)
        best_score = -np.inf

        for epoch in range(n_epochs):
            with timer(f"Epoch {epoch+1}/{n_epochs}", self.logger):
                model.train()
                avg_loss = 0.
                for (x_batch, y_batch) in train_loader:
                    y_pred = model(x_batch)
                    loss = self.loss_fn(y_pred, y_batch)
                    optimizer.zero_grad()

                    loss.backward()
                    optimizer.step()
                    avg_loss += loss.item() / len(train_loader)

                valid_preds, avg_val_loss = self._val(valid_loader, model)
                search_result = threshold_search(eval_set[1], valid_preds)
                val_mcc, val_threshold = search_result["mcc"], search_result[
                    "threshold"]
            self.logger.info(
                f"loss: {avg_loss:.4f} val_loss: {avg_val_loss:.4f}")
            self.logger.info(f"val_mcc: {val_mcc} best_t: {val_threshold}")
            if self.anneal:
                scheduler.step()
            if val_mcc > best_score:
                torch.save(model.state_dict(),
                           self.path / f"best{self.fold}.pt")
                self.logger.info(f"Save model on epoch {epoch+1}")
                best_score = val_mcc
        model.load_state_dict(torch.load(self.path / f"best{self.fold}.pt"))
        valid_preds, avg_val_loss = self._val(valid_loader, model)
        self.logger.info(f"Validation loss: {avg_val_loss}")
        return valid_preds
Esempio n. 2
0
    for path in args.features:
        path = Path(path)
        assert path.exists()
        with open(path, "rb") as f:
            feats = pickle.load(f)
        if isinstance(feats, list):
            feats = np.concatenate(feats)
        features.append(feats)
    test = np.concatenate(features, axis=2)
    with open(f"trainer/{args.tag}/scaler.pkl", "rb") as f:
        scaler = pickle.load(f)
    logger.info(f"scaler size: {len(scaler)}")
    if len(scaler) > 0:
        for i in range(test.shape[1]):
            with timer(f"scaling dim{i+1}", logger):
                test[:, i, :] = scaler[i].transform(test[:, i, :])

    test_tensor = torch.tensor(test, dtype=torch.float32).to(args.device)
    dataset = torch.utils.data.TensorDataset(test_tensor)
    loader = torch.utils.data.DataLoader(dataset,
                                         batch_size=batch_size,
                                         shuffle=False)

    with open(f"trainer/{args.tag}/trainer.pkl", "rb") as f:
        trainer = pickle.load(f)

    bin_path = Path(f"bin/{args.tag}")
    test_preds = np.zeros(test.shape[0])
    for path in bin_path.iterdir():
        with timer(f"use binary {path}", logger):
Esempio n. 3
0
        name="robust-denoising", tag=f"robust-denoising/{args.n_dims}")
    meta_train = pd.read_csv("../input/metadata_train.csv")
    meta_test = pd.read_csv("../input/metadata_test.csv")

    train_path = Path("../input/train.parquet")
    test_path = Path("../input/test.parquet")

    n_line = int(meta_train.shape[0] // 3)
    nchunk_train = 2
    step = (n_line // nchunk_train) * 3
    current_head = meta_train.signal_id[0]
    logger.info(f"step: {step}")
    logger.info(f"initial head: {current_head}")
    X = []
    for i in range(nchunk_train):
        with timer(f"chunk{i+1}", logger):
            X_temp = robust_denoised_data(train_path, current_head, step,
                                          args.n_dims)
            X.append(X_temp)
            current_head += step
            logger.info(f"current head: {current_head}")
    X = np.concatenate(X)
    logger.info(f"X_shape: {X.shape}")
    with open(outdir / "train.pkl", "wb") as f:
        pickle.dump(X, f)

    n_line = int(meta_test.shape[0] // 3)
    nchunk_test = 7
    step = (n_line // 6) * 3
    current_head = meta_test.signal_id[0]
    logger.info(f"step: {step}")
Esempio n. 4
0
        for i in range(test.shape[1]):
            test[:, i, :] = scaler[i].transform(test[:, i, :])

    test_tensor = torch.tensor(test, dtype=torch.float32).to(args.device)
    dataset = torch.utils.data.TensorDataset(test_tensor)
    loader = torch.utils.data.DataLoader(dataset,
                                         batch_size=batch_size,
                                         shuffle=False)

    with open(f"trainer/{args.tag}/trainer.pkl", "rb") as f:
        trainer = pickle.load(f)

    bin_path = Path(f"bin/{args.tag}")
    test_preds = np.zeros(test.shape[0])
    for path in bin_path.iterdir():
        with timer(f"use binary {path}", logger):
            model = LSTMAttentionNet(**trainer.kwargs)
            model.to(args.device)
            model.load_state_dict(torch.load(path))

            model.eval()
            temp = np.zeros(test.shape[0])
            for i, (x_batch, ) in enumerate(loader):
                with torch.no_grad():
                    y_pred = model(x_batch).detach()
                    temp[i * batch_size:(i + 1) * batch_size] = sigmoid(
                        y_pred.cpu().numpy())[:, 0]
            test_preds += temp / trainer.n_splits
    prob_path = Path(f"probability/{args.tag}")
    prob_path.mkdir(exist_ok=True, parents=True)
    with open(prob_path / "probability.pkl", "wb") as f: