def predict_with_models(
    predictions_path: pathlib.Path,
    y: pd.DataFrame,
    X_test: pd.DataFrame,
    models_configs: typing.Sequence[typing.Dict],
    datasets: typing.Sequence,
    output_dim: int,
    seed: int,
):
    _set_seed(seed)
    for model_config, dataset_generator in zip(models_configs, datasets):
        for i, (X_train, X_test) in enumerate(dataset_generator):
            name, model = _create_model(len(list(X_train)), model_config,
                                        output_dim)
            name = f"{i}_{name}"
            net = skorch.NeuralNet(
                model,
                criterion=torch.nn.CrossEntropyLoss,
                max_epochs=40,
                optimizer=torch.optim.Adam,
                device="cuda",
                batch_size=64,
                train_split=skorch.dataset.CVSplit(0.2, stratified=True),
                iterator_train__shuffle=True,
                callbacks=[
                    skorch.callbacks.EpochScoring(
                        make_scorer(lambda y_true, y_pred: np.mean(
                            y_true == np.argmax(y_pred, axis=-1))),
                        name="validation_accuracy",
                        lower_is_better=False,
                    ),
                    skorch.callbacks.EarlyStopping(
                        monitor="validation_accuracy",
                        lower_is_better=False,
                        patience=8),
                    ("PredictTest",
                     PredictTest(X_test, monitor="validation_accuracy")),
                    skorch.callbacks.LRScheduler(
                        policy=torch.optim.lr_scheduler.ReduceLROnPlateau,
                        monitor="validation_accuracy",
                        mode="max",
                        factor=0.6,
                        patience=2,
                        verbose=True,
                    ),
                ],
            )
            net.fit(X_train.values.astype(np.float32), y.values)
            _save_predictions(predictions_path, name, net.history)
Exemple #2
0
 def _make_classifier(self, model):
     return skorch.NeuralNet(module=CnnLSTMAttention,
                             module__embeddings_size=model.vector_size(),
                             module__out_classes=self.out_classes * 2,
                             criterion=self.criterion,
                             optimizer=torch.optim.Adam,
                             lr=self.lr,
                             max_epochs=self.num_epochs,
                             use_cuda=self.use_cuda,
                             gradient_clip_value=self.clip_grad_norm,
                             verbose=self.verbose,
                             batch_size=self.batch_size,
                             dataset=EmbeddingsSeqDataset,
                             dataset__model=model,
                             dataset__max_len=self.max_seq_len)
Exemple #3
0
def train_fuzzy(model, X, y, show_plots=True):
    X = torch.tensor(X, dtype=torch.float)
    y = torch.tensor(y, dtype=torch.float)
    net = skorch.NeuralNet(
        model,
        max_epochs=50,
        criterion=torch.nn.MSELoss,
        optimizer=torch.optim.SGD,
        optimizer__lr=1e-6,
        optimizer__momentum=0.99,
        callbacks=[FittingCallback()],
    )
    if show_plots:
        experimental.plot_all_mfs(model, X)
    net.fit(X, y)
    if show_plots:
        experimental.plot_all_mfs(model, X)
Exemple #4
0
def build_model():
    classifier = skorch.NeuralNet(
        module=MLPModule,
        optimizer=torch.optim.Adam,
        criterion=torch.nn.BCEWithLogitsLoss,
        max_epochs=5,
        batch_size=128,
        device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
        train_split=None,
        callbacks=[
            DynamicVariablesSetter(),
        ],
    )

    model = make_pipeline(
        build_preprocessor(),
        classifier,
    )

    return model
Exemple #5
0
def test_vignette(show_plots=True):
    model = vignette_examples.vignette_ex3()
    X, y = jang_examples.make_sinc_xy_large().dataset.tensors
    net = skorch.NeuralNet(
        model,
        max_epochs=50,
        # train_split=None,
        # window_size=1024,
        criterion=torch.nn.MSELoss,
        # criterion__reduction='sum',
        optimizer=torch.optim.SGD,
        optimizer__lr=1e-4,
        optimizer__momentum=0.99,
        callbacks=[FittingCallback()],
    )
    net.fit(X, y)
    if show_plots:
        experimental.plot_all_mfs(model, X)
        y_actual = y
        y_pred = model(X)
        experimental.plot_results(y_actual, y_pred)
Exemple #6
0
def test_jang(show_plots=True):
    model = jang_examples.ex1_model()
    train_data = jang_examples.make_sinc_xy()
    X, y = train_data.dataset.tensors
    net = skorch.NeuralNet(
        model,
        max_epochs=100,
        train_split=None,
        criterion=torch.nn.MSELoss,
        #criterion__reduction='sum',
        optimizer=torch.optim.SGD,
        optimizer__lr=1e-4,
        optimizer__momentum=0.99,
        callbacks=[FittingCallback()],
    )
    net.fit(X, y)
    if show_plots:
        experimental.plot_all_mfs(model, X)
        y_actual = y
        y_pred = model(X)
        experimental.plot_results(y_actual, y_pred)
Exemple #7
0
def load_protein_accessory_model(dirname: str) -> skorch.NeuralNet:
    """Loads the protein accessory model"""
    predicted_proteins = utils.read_delimited_file(
        os.path.join(dirname, "protein_proteins.txt"))
    with open(os.path.join(dirname, "params.json")) as source:
        model_params = json.load(source)

    encoded_to_protein_skorch = skorch.NeuralNet(
        module=autoencoders.Decoder,
        module__num_units=16,
        module__intermediate_dim=model_params["interdim"],
        module__num_outputs=len(predicted_proteins),
        module__final_activation=nn.Identity(),
        module__activation=ACT_DICT[model_params["act"]],
        # module__final_activation=nn.Linear(
        #     len(predicted_proteins), len(predicted_proteins), bias=True
        # ),  # Paper uses identity activation instead
        lr=model_params["lr"],
        criterion=LOSS_DICT[model_params["loss"]],  # Other works use L1 loss
        optimizer=OPTIM_DICT[model_params["optim"]],
        batch_size=model_params["bs"],
        max_epochs=500,
        callbacks=[
            skorch.callbacks.EarlyStopping(patience=25),
            skorch.callbacks.LRScheduler(
                policy=torch.optim.lr_scheduler.ReduceLROnPlateau,
                **model_utils.REDUCE_LR_ON_PLATEAU_PARAMS,
            ),
            skorch.callbacks.GradientNormClipping(gradient_clip_value=5),
        ],
        iterator_train__num_workers=8,
        iterator_valid__num_workers=8,
        device="cpu",
    )
    encoded_to_protein_skorch_cp = skorch.callbacks.Checkpoint(
        dirname=dirname, fn_prefix="net_")
    encoded_to_protein_skorch.load_params(
        checkpoint=encoded_to_protein_skorch_cp)
    return encoded_to_protein_skorch
Exemple #8
0
def build_model(device=torch.device("cpu")):
    model = skorch.NeuralNet(
        module=VAE,
        module__image_shape=(2, 10, 10),
        module__hid_size=512,
        module__latent_size=2,
        optimizer=torch.optim.Adam,
        optimizer__lr=0.0001,
        criterion=ELBO,
        max_epochs=10,
        batch_size=128,
        iterator_train=DataIterator,
        iterator_train__shuffle=True,
        # iterator_tarin__num_workers=2,
        iterator_valid=DataIterator,
        iterator_valid__shuffle=False,
        # iterator_valid__num_workers=2,
        device=device,
        callbacks=[
            ShapeSetter(),
            skorch.callbacks.EpochScoring(epoch_vis, on_train=True),
        ])
    return model
Exemple #9
0
def build_model(device=torch.device("cpu"), regularized=False):
    model = skorch.NeuralNet(
        module=AutoEncoder,
        module__image_shape=(2, 10, 10),
        module__hid_size=512,
        module__latent_size=2,
        optimizer=torch.optim.Adam,
        optimizer__lr=0.0001,
        criterion=MSE,
        criterion__regularized=regularized,
        max_epochs=20,
        batch_size=512,
        iterator_train=DataIterator,
        iterator_train__shuffle=True,
        # iterator_tarin__num_workers=2,
        iterator_valid=DataIterator,
        iterator_valid__shuffle=False,
        # iterator_valid__num_workers=2,
        device=device,
        callbacks=[
            ShapeSetter(),
        ],
    )
    return model
def main(
    params_path: Path = "training/params.yml",
    viz: bool = False,
    toy: bool = False,
    model: str = "torch",
) -> None:

    torch.autograd.set_detect_anomaly(True)

    params = dicto.load(params_path)

    df_train, df_test = dataget.toy.spirals().get()

    X_train = df_train[["x0", "x1"]].to_numpy()
    y_train = df_train["y"].to_numpy()
    X_test = df_test[["x0", "x1"]].to_numpy()
    y_test = df_test["y"].to_numpy()

    transform = MinMaxScaler()
    X_train = transform.fit_transform(X_train)
    X_test = transform.transform(X_test)

    ds_train = ContrastiveDataset(
        X_train,
        y_train,
        batch_size=params.batch_size,
        steps_per_epoch=params.steps_per_epoch,
        noise_std=params.noise_std,
        n_neighbors=params.n_neighbors,
        n_hops=params.n_hops,
        transform=torch.tensor,
        viz=viz,
    )

    ds_test = ContrastiveDataset(
        X_test,
        y_test,
        batch_size=32,
        steps_per_epoch=1,
        noise_std=params.noise_std,
        n_neighbors=params.n_neighbors,
        n_hops=params.n_hops,
        transform=torch.tensor,
        viz=False,
    )

    if viz:
        visualize(ds_train)

    # pytorch
    model = ContrastiveNet(
        batch_size=params.batch_size * 2,
        n_layers=params.n_layers,
        n_units=params.n_units,
        embedding_size=params.embedding_size,
    )

    net = skorch.NeuralNet(
        model,
        criterion=criterion,
        batch_size=None,
        max_epochs=params.epochs,
        lr=params.lr,
        optimizer=torch.optim.Adam,
        # train_split=lambda X, y: (X, ds_test),
        train_split=None,
        device="cuda",
    )

    net.fit(ds_train, y=None)

    net.module.eval()
    h = (net.module(
        torch.tensor(X_train, dtype=torch.float32, device="cuda"),
        return_embeddings=True,
    ).cpu().detach().numpy())

    h = PCA(1).fit_transform(h)

    px.scatter(x=X_train[:, 0], y=X_train[:, 1], color=h[:, 0]).show()
Exemple #11
0
def main():
    """Train a protein predictor"""
    parser = build_parser()
    args = parser.parse_args()

    # Create output directory
    if not os.path.isdir(args.outdir):
        os.makedirs(args.outdir)

    # Specify output log file
    logger = logging.getLogger()
    fh = logging.FileHandler(os.path.join(args.outdir, "training.log"))
    fh.setLevel(logging.INFO)
    logger.addHandler(fh)

    # Log parameters
    for arg in vars(args):
        logging.info(f"Parameter {arg}: {getattr(args, arg)}")
    with open(os.path.join(args.outdir, "params.json"), "w") as sink:
        json.dump(vars(args), sink, indent=4)

    # Load the model
    pretrained_net = model_utils.load_model(args.encoder, device=args.device)

    # Load in some files
    rna_genes = utils.read_delimited_file(
        os.path.join(args.encoder, "rna_genes.txt"))
    atac_bins = utils.read_delimited_file(
        os.path.join(args.encoder, "atac_bins.txt"))

    # Read in the RNA
    rna_data_kwargs = copy.copy(sc_data_loaders.TENX_PBMC_RNA_DATA_KWARGS)
    rna_data_kwargs["cluster_res"] = args.clusterres
    rna_data_kwargs["fname"] = args.rnaCounts
    rna_data_kwargs["reader"] = lambda x: load_rna_files(
        x, args.encoder, transpose=not args.notrans)

    # Construct data folds
    full_sc_rna_dataset = sc_data_loaders.SingleCellDataset(
        valid_cluster_id=args.validcluster,
        test_cluster_id=args.testcluster,
        **rna_data_kwargs,
    )
    full_sc_rna_dataset.data_raw.write_h5ad(
        os.path.join(args.outdir, "full_rna.h5ad"))

    train_valid_test_dsets = []
    for mode in ["all", "train", "valid", "test"]:
        logging.info(f"Constructing {mode} dataset")
        sc_rna_dataset = sc_data_loaders.SingleCellDatasetSplit(
            full_sc_rna_dataset, split=mode)
        sc_rna_dataset.data_raw.write_h5ad(
            os.path.join(args.outdir, f"{mode}_rna.h5ad"))  # Write RNA input
        sc_atac_dummy_dataset = sc_data_loaders.DummyDataset(
            shape=len(atac_bins), length=len(sc_rna_dataset))
        # RNA and fake ATAC
        sc_dual_dataset = sc_data_loaders.PairedDataset(
            sc_rna_dataset,
            sc_atac_dummy_dataset,
            flat_mode=True,
        )
        # encoded(RNA) as "x" and RNA + fake ATAC as "y"
        sc_rna_encoded_dataset = sc_data_loaders.EncodedDataset(
            sc_dual_dataset, model=pretrained_net, input_mode="RNA")
        sc_rna_encoded_dataset.encoded.write_h5ad(
            os.path.join(args.outdir, f"{mode}_encoded.h5ad"))
        sc_protein_dataset = sc_data_loaders.SingleCellProteinDataset(
            args.proteinCounts,
            obs_names=sc_rna_dataset.obs_names,
            transpose=not args.notrans,
        )
        sc_protein_dataset.data_raw.write_h5ad(
            os.path.join(args.outdir, f"{mode}_protein.h5ad"))  # Write protein
        # x = 16 dimensional encoded layer, y = 25 dimensional protein array
        sc_rna_protein_dataset = sc_data_loaders.SplicedDataset(
            sc_rna_encoded_dataset, sc_protein_dataset)
        _temp = sc_rna_protein_dataset[0]  # ensure calling works
        train_valid_test_dsets.append(sc_rna_protein_dataset)

    # Unpack and do sanity checks
    _, sc_rna_prot_train, sc_rna_prot_valid, sc_rna_prot_test = train_valid_test_dsets
    x, y, z = sc_rna_prot_train[0], sc_rna_prot_valid[0], sc_rna_prot_test[0]
    assert (x[0].shape == y[0].shape == z[0].shape
            ), f"Got mismatched shapes: {x[0].shape} {y[0].shape} {z[0].shape}"
    assert (x[1].shape == y[1].shape == z[1].shape
            ), f"Got mismatched shapes: {x[1].shape} {y[1].shape} {z[1].shape}"

    protein_markers = list(sc_protein_dataset.data_raw.var_names)
    with open(os.path.join(args.outdir, "protein_proteins.txt"), "w") as sink:
        sink.write("\n".join(protein_markers) + "\n")
    assert len(
        utils.read_delimited_file(
            os.path.join(args.outdir,
                         "protein_proteins.txt"))) == len(protein_markers)
    logging.info(f"Predicting on {len(protein_markers)} proteins")

    if args.preprocessonly:
        return

    protein_decoder_skorch = skorch.NeuralNet(
        module=autoencoders.Decoder,
        module__num_units=16,
        module__intermediate_dim=args.interdim,
        module__num_outputs=len(protein_markers),
        module__activation=ACT_DICT[args.act],
        module__final_activation=nn.Identity(),
        # module__final_activation=nn.Linear(
        #     len(protein_markers), len(protein_markers), bias=True
        # ),  # Paper uses identity activation instead
        lr=args.lr,
        criterion=LOSS_DICT[args.loss],  # Other works use L1 loss
        optimizer=OPTIM_DICT[args.optim],
        batch_size=args.bs,
        max_epochs=args.epochs,
        callbacks=[
            skorch.callbacks.EarlyStopping(patience=15),
            skorch.callbacks.LRScheduler(
                policy=torch.optim.lr_scheduler.ReduceLROnPlateau,
                patience=5,
                factor=0.1,
                min_lr=1e-6,
                # **model_utils.REDUCE_LR_ON_PLATEAU_PARAMS,
            ),
            skorch.callbacks.GradientNormClipping(gradient_clip_value=5),
            skorch.callbacks.Checkpoint(
                dirname=args.outdir,
                fn_prefix="net_",
                monitor="valid_loss_best",
            ),
        ],
        train_split=skorch.helper.predefined_split(sc_rna_prot_valid),
        iterator_train__num_workers=8,
        iterator_valid__num_workers=8,
        device=utils.get_device(args.device),
    )
    protein_decoder_skorch.fit(sc_rna_prot_train, y=None)

    # Plot the loss history
    fig = plot_loss_history(protein_decoder_skorch.history,
                            os.path.join(args.outdir, "loss.pdf"))
Exemple #12
0
# #     y = torch.cat((y, Y[i]), 0)
# # xfin = x.unsqueeze(0)
# # yfin = y.unsqueeze(0)
# net.fit(X=X, y=Y)

n_classes = 12

network = SegNet(in_channels=3, n_classes=n_classes)
network.init_encoder()

network.cuda()

net = skorch.NeuralNet(
    module=network,
    criterion=torch.nn.CrossEntropyLoss,
    train_split=None,
    use_cuda=True,
    batch_size=10,
)

params = {'lr': [0.01, 0.02], 'max_epochs': [5, 10]}

# if only training
# net.fit(X=X, y=y)

image_indicators = np.hstack([np.repeat(i, len(x)) for i, x in enumerate(X)])
labels = image_indicators % n_classes
#
X, y = np.vstack(X), np.hstack(Y)

cv = LeavePLabelOut(labels=labels, p=1)
Exemple #13
0
def main(
    params_path: Path = Path("training/params.yml"),
    cache: bool = False,
    viz: bool = False,
    debug: bool = False,
    toy: bool = False,
):
    if debug:
        import debugpy

        print("Waiting debuger....")
        debugpy.listen(("localhost", 5678))
        debugpy.wait_for_client()

    params = dicto.load(params_path)

    train_cache = Path("cache/train.csv")
    test_cache = Path("cache/test.csv")
    transformer_cache = Path("cache/transformer.pkl")

    if cache and train_cache.exists():
        df_train = pd.read_csv(train_cache)
        df_test = pd.read_csv(test_cache)
        transformer = pickle.load(transformer_cache.open("rb"))
    else:
        df, df_real = dataget.kaggle(competition="cat-in-the-dat-ii").get(
            files=["train.csv", "test.csv"])

        df.drop(columns=["id"], inplace=True)

        df_train, df_test = estimator.split(df, params)

        if toy:
            df_train = df_train.sample(n=1000)
            df_test = df_test.sample(n=1000)

        transformer = GenericTransformer(
            categorical=params.categorical,
            numerical=params.numerical,
        )

        df_train = transformer.fit_transform(df_train)
        df_test = transformer.transform(df_test)

        df_train.to_csv(train_cache, index=False)
        df_test.to_csv(test_cache, index=False)
        pickle.dump(transformer, transformer_cache.open("wb"))

    print(df_train)
    print(df_test)

    ds_train = estimator.get_dataset(df_train, params, "train")
    ds_test = estimator.get_dataset(df_test, params, "test")

    print(ds_train[:10])
    print(ds_test[:10])

    model = estimator.get_model(params,
                                n_categories=transformer.n_categories,
                                numerical=[])
    print(model)
    exit()

    net = skorch.NeuralNet(model, )

    model.summary()

    print(ds_train)

    model.fit(
        ds_train,
        epochs=params.epochs,
        steps_per_epoch=params.steps_per_epoch,
        validation_data=ds_test,
        callbacks=[
            tf.keras.callbacks.TensorBoard(log_dir=str(
                Path("summaries") / Path(model.name)),
                                           profile_batch=0)
        ],
    )

    # Export to saved model
    save_path = f"models/{model.name}"
    model.save(save_path)

    print(f"{save_path=}")

    vizualize(df_train, df_test, model)