Exemple #1
0
def test_simple_vc_infer():
    # Load checkpointed model and run inference
    test_data_dir = get_data_folder()
    model_dir = os.path.join(test_data_dir, ".test_model")

    # Create neural factory
    nf = nemo.core.NeuralModuleFactory(
        placement=nemo.core.neural_factory.DeviceType.GPU,
        checkpoint_dir=model_dir)

    # Generate dataset
    bam = os.path.join(test_data_dir, "small_bam.bam")
    labels = os.path.join(test_data_dir, "candidates.vcf.gz")
    vcf_bam_tuple = VCFReader.VcfBamPath(vcf=labels, bam=bam, is_fp=False)
    vcf_loader = VCFReader([vcf_bam_tuple])
    test_dataset = ReadPileupDataLoader(ReadPileupDataLoader.Type.TEST,
                                        vcf_loader,
                                        batch_size=32,
                                        shuffle=False)

    # Neural Network
    alexnet = AlexNet(num_input_channels=1, num_output_logits=3)

    # Create train DAG
    encoding = test_dataset()
    vz = alexnet(encoding=encoding)

    # Invoke the "train" action.
    results = nf.infer([vz], checkpoint_dir=model_dir, verbose=True)

    # Decode inference results to labels
    zyg_decoder = ZygosityLabelDecoder()
    for tensor_batches in results:
        for batch in tensor_batches:
            predicted_classes = torch.argmax(batch, dim=1)
            inferred_zygosity = [
                zyg_decoder(pred) for pred in predicted_classes
            ]

    assert (len(inferred_zygosity) == len(vcf_loader))

    shutil.rmtree(model_dir)
Exemple #2
0
def test_simple_vc_trainer():
    # Train a sample model with test data

    # Create neural factory
    model_dir = os.path.join(get_data_folder(), ".test_model")
    nf = nemo.core.NeuralModuleFactory(
        placement=nemo.core.neural_factory.DeviceType.GPU,
        checkpoint_dir=model_dir)

    # Generate dataset
    bam = os.path.join(get_data_folder(), "small_bam.bam")
    labels = os.path.join(get_data_folder(), "candidates.vcf.gz")
    vcf_loader = VCFReader(vcf=labels, bams=[bam], is_fp=False)

    # Neural Network
    alexnet = AlexNet(num_input_channels=1, num_output_logits=3)

    # Create train DAG
    dataset_train = ReadPileupDataLoader(ReadPileupDataLoader.Type.TRAIN,
                                         [vcf_loader],
                                         batch_size=32,
                                         shuffle=True)
    vz_ce_loss = CrossEntropyLossNM(logits_ndim=2)
    vz_labels, encoding = dataset_train()
    vz = alexnet(encoding=encoding)
    vz_loss = vz_ce_loss(logits=vz, labels=vz_labels)

    # Create evaluation DAG using same dataset as training
    dataset_eval = ReadPileupDataLoader(ReadPileupDataLoader.Type.EVAL,
                                        [vcf_loader],
                                        batch_size=32,
                                        shuffle=False)
    vz_ce_loss_eval = CrossEntropyLossNM(logits_ndim=2)
    vz_labels_eval, encoding_eval = dataset_eval()
    vz_eval = alexnet(encoding=encoding_eval)
    vz_loss_eval = vz_ce_loss_eval(logits=vz_eval, labels=vz_labels_eval)

    # Logger callback
    logger_callback = nemo.core.SimpleLossLoggerCallback(
        tensors=[vz_loss, vz, vz_labels],
        step_freq=1,
    )

    evaluator_callback = nemo.core.EvaluatorCallback(
        eval_tensors=[vz_loss_eval, vz_eval, vz_labels_eval],
        user_iter_callback=eval_iter_callback,
        user_epochs_done_callback=eval_epochs_done_callback,
        eval_step=1,
    )

    # Checkpointing models through NeMo callback
    checkpoint_callback = nemo.core.CheckpointCallback(
        folder=nf.checkpoint_dir,
        load_from_folder=None,
        # Checkpointing frequency in steps
        step_freq=-1,
        # Checkpointing frequency in epochs
        epoch_freq=1,
        # Number of checkpoints to keep
        checkpoints_to_keep=1,
        # If True, CheckpointCallback will raise an Error if restoring fails
        force_load=False)

    # Invoke the "train" action.
    nf.train(
        [vz_loss],
        callbacks=[logger_callback, checkpoint_callback, evaluator_callback],
        optimization_params={
            "num_epochs": 1,
            "lr": 0.001
        },
        optimizer="adam")

    assert (os.path.exists(os.path.join(model_dir, "AlexNet-EPOCH-1.pt")))
Exemple #3
0
repo_root_dir = pathlib.Path(__file__).parent.parent.parent.parent.absolute()

# Create neural factory. In this case, the checkpoint_dir has to be set for NeMo to pick
# up a pre-trained model.
nf = nemo.core.NeuralModuleFactory(
    placement=nemo.core.neural_factory.DeviceType.GPU, checkpoint_dir="./")

# Dataset generation is done in a similar manner. It's important to note that the encoder used
# for inference much match that for training.
encoding_layers = [PileupEncoder.Layer.READ, PileupEncoder.Layer.BASE_QUALITY]
pileup_encoder = PileupEncoder(window_size=100,
                               max_reads=100,
                               layers=encoding_layers)

# Neural Network
model = AlexNet(num_input_channels=len(encoding_layers), num_output_logits=3)

# Similar to training, a dataloader needs to be setup for the relevant datasets. In the case of
# inference, it doesn't matter if the files are tagged as false positive or not. Each example will be
# evaluated by the network. For simplicity the example is using the same dataset from training.
# Note: No label encoder is required in inference.
data_folder = os.path.join(repo_root_dir, "tests", "data")
bam = os.path.join(data_folder, "small_bam.bam")
labels = os.path.join(data_folder, "candidates.vcf.gz")
vcf_loader = VCFReader(vcf=labels, bams=[bam], is_fp=False)
test_dataset = ReadPileupDataLoader(ReadPileupDataLoader.Type.TEST,
                                    [vcf_loader],
                                    batch_size=32,
                                    shuffle=False,
                                    sample_encoder=pileup_encoder)
Exemple #4
0
def create_model():
    """Return neural network to train."""
    # Neural Network
    alexnet = AlexNet(num_input_channels=2, num_output_logits=3)

    return alexnet