def test_mutant_amino_acids_in_mm10_chr9_82927102_refGT_altTG_pT441H():
    # In the Isovar repository this test is weird because the VCF only
    # mentions the G>T variant but doesn't include the subsequent nucleotide
    # change T>G. To avoid having to think about phasing of variants I changed
    # the VCF in vaxrank to contain a GT>TG variant.
    arg_parser = make_variant_sequences_arg_parser()
    args = arg_parser.parse_args([
        "--vcf", data_path("b16.f10/b16.f10.Phip.vcf"),
        "--bam", data_path("b16.f10/b16.combined.sorted.bam"),
    ])
    reads_generator = allele_reads_generator_from_args(args)
    ranked_list, _ = ranked_vaccine_peptides(
        reads_generator=reads_generator,
        mhc_predictor=RandomBindingPredictor(["H-2-Kb", "H-2-Db"]),
        vaccine_peptide_length=15,
        padding_around_mutation=5,
        min_alt_rna_reads=1,
        min_variant_sequence_coverage=1,
        variant_sequence_assembly=True,
        max_vaccine_peptides_per_variant=1)

    for variant, vaccine_peptides in ranked_list:
        vaccine_peptide = vaccine_peptides[0]
        mutant_protein_fragment = vaccine_peptide.mutant_protein_fragment
        check_mutant_amino_acids(
            variant,
            mutant_protein_fragment)
def test_keep_top_k_epitopes():
    arg_parser = make_variant_sequences_arg_parser()
    args = arg_parser.parse_args([
        "--vcf", data_path("b16.f10/b16.f10.Phip.vcf"),
        "--bam", data_path("b16.f10/b16.combined.sorted.bam"),
    ])
    reads_generator = allele_reads_generator_from_args(args)

    keep_k_epitopes = 3
    ranked_list, _ = ranked_vaccine_peptides(
        reads_generator=reads_generator,
        mhc_predictor=RandomBindingPredictor(["H-2-Kb", "H-2-Db"]),
        vaccine_peptide_length=15,
        padding_around_mutation=5,
        min_alt_rna_reads=1,
        min_variant_sequence_coverage=1,
        variant_sequence_assembly=True,
        max_vaccine_peptides_per_variant=1,
        num_mutant_epitopes_to_keep=keep_k_epitopes)

    for variant, vaccine_peptides in ranked_list:
        vaccine_peptide = vaccine_peptides[0]
        eq_(keep_k_epitopes, len(vaccine_peptide.mutant_epitope_predictions))
        # recompute the expected score, make sure the top-k argument from ranked_vaccine_peptides()
        # propagated as expected
        mutant_epitope_score = sum(
            p.logistic_epitope_score() for p in vaccine_peptide.mutant_epitope_predictions)
        assert_almost_equal(mutant_epitope_score, vaccine_peptide.mutant_epitope_score)
def test_mutant_amino_acids_in_mm10_chrX_8125624_refC_altA_pS460I():
    # there are two co-occurring variants in the RNAseq data but since
    # they don't happen in the same codon then we're considering the Varcode
    # annotation to be correct
    # TODO: deal with phasing of variants explicitly so that both
    # variant positions are considered mutated
    arg_parser = make_variant_sequences_arg_parser()
    args = arg_parser.parse_args([
        "--vcf", data_path("b16.f10/b16.f10.Wdr13.vcf"),
        "--bam", data_path("b16.f10/b16.combined.sorted.bam"),
    ])
    reads_generator = allele_reads_generator_from_args(args)
    ranked_list, _ = ranked_vaccine_peptides(
        reads_generator=reads_generator,
        mhc_predictor=RandomBindingPredictor(["H-2-Kb", "H-2-Db"]),
        vaccine_peptide_length=15,
        padding_around_mutation=5,
        max_vaccine_peptides_per_variant=1,
        min_alt_rna_reads=1,
        min_variant_sequence_coverage=1,
        variant_sequence_assembly=True)

    for variant, vaccine_peptides in ranked_list:
        eq_(
            1,
            len(vaccine_peptides),
            "Expected 1 vaccine peptide for variant '%s' but got %d" % (
                variant, len(vaccine_peptides)))
        vaccine_peptide = vaccine_peptides[0]
        mutant_protein_fragment = vaccine_peptide.mutant_protein_fragment
        check_mutant_amino_acids(variant, mutant_protein_fragment)
Example #4
0
def test_mutant_amino_acids_in_mm10_chr9_82927102_refGT_altTG_pT441H():
    # In the Isovar repository this test is weird because the VCF only
    # mentions the G>T variant but doesn't include the subsequent nucleotide
    # change T>G. To avoid having to think about phasing of variants I changed
    # the VCF in vaxrank to contain a GT>TG variant.
    arg_parser = make_variant_sequences_arg_parser()
    args = arg_parser.parse_args([
        "--vcf",
        data_path("b16.f10/b16.f10.Phip.vcf"),
        "--bam",
        data_path("b16.f10/b16.combined.sorted.bam"),
    ])
    reads_generator = allele_reads_generator_from_args(args)
    ranked_list = ranked_vaccine_peptides(reads_generator=reads_generator,
                                          mhc_predictor=RandomBindingPredictor(
                                              ["H-2-Kb", "H-2-Db"]),
                                          vaccine_peptide_length=15,
                                          padding_around_mutation=5,
                                          min_alt_rna_reads=1,
                                          min_variant_sequence_coverage=1,
                                          variant_sequence_assembly=True,
                                          max_vaccine_peptides_per_variant=1)

    for variant, vaccine_peptides in ranked_list:
        vaccine_peptide = vaccine_peptides[0]
        mutant_protein_fragment = vaccine_peptide.mutant_protein_fragment
        check_mutant_amino_acids(variant, mutant_protein_fragment)
Example #5
0
def test_mutant_amino_acids_in_mm10_chrX_8125624_refC_altA_pS460I():
    # there are two co-occurring variants in the RNAseq data but since
    # they don't happen in the same codon then we're considering the Varcode
    # annotation to be correct
    # TODO: deal with phasing of variants explicitly so that both
    # variant positions are considered mutated
    arg_parser = make_variant_sequences_arg_parser()
    args = arg_parser.parse_args([
        "--vcf",
        data_path("b16.f10/b16.f10.Wdr13.vcf"),
        "--bam",
        data_path("b16.f10/b16.combined.sorted.bam"),
    ])
    reads_generator = allele_reads_generator_from_args(args)
    ranked_list = ranked_vaccine_peptides(reads_generator=reads_generator,
                                          mhc_predictor=RandomBindingPredictor(
                                              ["H-2-Kb", "H-2-Db"]),
                                          vaccine_peptide_length=15,
                                          padding_around_mutation=5,
                                          max_vaccine_peptides_per_variant=1,
                                          min_alt_rna_reads=1,
                                          min_variant_sequence_coverage=1,
                                          variant_sequence_assembly=True)

    for variant, vaccine_peptides in ranked_list:
        eq_(
            1, len(vaccine_peptides),
            "Expected 1 vaccine peptide for variant '%s' but got %d" %
            (variant, len(vaccine_peptides)))
        vaccine_peptide = vaccine_peptides[0]
        mutant_protein_fragment = vaccine_peptide.mutant_protein_fragment
        check_mutant_amino_acids(variant, mutant_protein_fragment)
Example #6
0
def test_keep_top_k_epitopes():
    arg_parser = make_variant_sequences_arg_parser()
    args = arg_parser.parse_args([
        "--vcf",
        data_path("b16.f10/b16.f10.Phip.vcf"),
        "--bam",
        data_path("b16.f10/b16.combined.sorted.bam"),
    ])
    reads_generator = allele_reads_generator_from_args(args)

    keep_k_epitopes = 3
    ranked_list, _ = ranked_vaccine_peptides(
        reads_generator=reads_generator,
        mhc_predictor=RandomBindingPredictor(["H-2-Kb", "H-2-Db"]),
        vaccine_peptide_length=15,
        padding_around_mutation=5,
        min_alt_rna_reads=1,
        min_variant_sequence_coverage=1,
        variant_sequence_assembly=True,
        max_vaccine_peptides_per_variant=1,
        num_mutant_epitopes_to_keep=keep_k_epitopes)

    for variant, vaccine_peptides in ranked_list:
        vaccine_peptide = vaccine_peptides[0]
        eq_(keep_k_epitopes, len(vaccine_peptide.mutant_epitope_predictions))
        # recompute the expected score, make sure the top-k argument from ranked_vaccine_peptides()
        # propagated as expected
        mutant_epitope_score = sum(
            p.logistic_epitope_score()
            for p in vaccine_peptide.mutant_epitope_predictions)
        assert_almost_equal(mutant_epitope_score,
                            vaccine_peptide.mutant_epitope_score)
Example #7
0
def new_run_arg_parser():
    # inherit commandline options from Isovar
    arg_parser = make_variant_sequences_arg_parser(
        prog="vaxrank",
        description=(
            "Select personalized vaccine peptides from cancer variants, "
            "expression data, and patient HLA type."),
    )
    add_translation_args(arg_parser)
    add_mhc_args(arg_parser)
    add_vaccine_peptide_args(arg_parser)
    add_output_args(arg_parser)
    add_optional_output_args(arg_parser)
    add_supplemental_report_args(arg_parser)
    return arg_parser
Example #8
0
def new_run_arg_parser():
    # inherit commandline options from Isovar
    arg_parser = make_variant_sequences_arg_parser(
        prog="vaxrank",
        description=(
            "Select personalized vaccine peptides from cancer variants, "
            "expression data, and patient HLA type."),
    )
    add_translation_args(arg_parser)
    add_mhc_args(arg_parser)
    add_vaccine_peptide_args(arg_parser)
    add_output_args(arg_parser)
    add_optional_output_args(arg_parser)
    add_supplemental_report_args(arg_parser)
    return arg_parser
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import print_function, division, absolute_import
import logging
import logging.config
import pkg_resources

from isovar.cli.variant_sequences import make_variant_sequences_arg_parser, variant_sequences_dataframe_from_args

logging.config.fileConfig(pkg_resources.resource_filename("isovar.cli", "logging.conf"))
logger = logging.getLogger(__name__)

parser = make_variant_sequences_arg_parser(add_sequence_length_arg=True)
parser.add_argument(
    "--output",
    default="isovar-variant-sequences-results.csv",
    help="Name of CSV file which contains predicted sequences",
)

if __name__ == "__main__":
    args = parser.parse_args()
    logger.info(args)
    df = variant_sequences_dataframe_from_args(args)
    logger.info(df)
    df.to_csv(args.output)
Example #10
0
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import print_function, division, absolute_import
import logging
import logging.config
import pkg_resources

from isovar.cli.variant_sequences import (
    make_variant_sequences_arg_parser,
    variant_sequences_dataframe_from_args
)

logging.config.fileConfig(
    pkg_resources.resource_filename('isovar.cli', 'logging.conf'))
logger = logging.getLogger(__name__)

parser = make_variant_sequences_arg_parser(add_sequence_length_arg=True)
parser.add_argument(
    "--output",
    default="isovar-variant-sequences-results.csv",
    help="Name of CSV file which contains predicted sequences")

if __name__ == "__main__":
    args = parser.parse_args()
    logger.info(args)
    df = variant_sequences_dataframe_from_args(args)
    logger.info(df)
    df.to_csv(args.output)