Esempio n. 1
0
    def __init__(
            self,
            hla_alleles,
            netmhc_command = "netMHCpan"):
        self.netmhc_command = netmhc_command

        try:
            run_command([self.netmhc_command])
        except:
            assert False, "Failed to run %s" % self.netmhc_command

        try:
            valid_alleles_str = check_output([self.netmhc_command, "-listMHC"])
            assert len(valid_alleles_str) > 0, \
                "%s returned empty allele list" % self.self.netmhc_command
            valid_alleles = set([])
            for line in valid_alleles_str.split("\n"):
                if not line.startswith("#"):
                    valid_alleles.add(line)
        except:
            logging.warning("Failed to run %s -listMHC", self.netmhc_command)
            valid_alleles = None

        self.alleles = []
        for allele in hla_alleles:
            allele = normalize_hla_allele_name(allele.strip().upper())
            # for some reason netMHCpan drop the "*" in names
            # such as "HLA-A*03:01" becomes "HLA-A03:01"
            if valid_alleles and allele.replace("*", "") not in valid_alleles:
                print "Skipping %s (not available in NetMHCpan)" % allele
            else:
                self.alleles.append(allele)
        # don't run the MHC predictor twice for homozygous alleles,
        # only run it for unique alleles
        self.alleles = set(self.alleles)
Esempio n. 2
0
    def __init__(self, hla_alleles, netmhc_command="netMHCpan"):
        self.netmhc_command = netmhc_command

        try:
            run_command([self.netmhc_command])
        except:
            assert False, "Failed to run %s" % self.netmhc_command

        try:
            valid_alleles_str = check_output([self.netmhc_command, "-listMHC"])
            assert len(valid_alleles_str) > 0, \
                "%s returned empty allele list" % self.self.netmhc_command
            valid_alleles = set([])
            for line in valid_alleles_str.split("\n"):
                if not line.startswith("#"):
                    valid_alleles.add(line)
        except:
            logging.warning("Failed to run %s -listMHC", self.netmhc_command)
            valid_alleles = None

        self.alleles = []
        for allele in hla_alleles:
            allele = normalize_hla_allele_name(allele.strip().upper())
            # for some reason netMHCpan drop the "*" in names
            # such as "HLA-A*03:01" becomes "HLA-A03:01"
            if valid_alleles and allele.replace("*", "") not in valid_alleles:
                print "Skipping %s (not available in NetMHCpan)" % allele
            else:
                self.alleles.append(allele)
        # don't run the MHC predictor twice for homozygous alleles,
        # only run it for unique alleles
        self.alleles = set(self.alleles)
Esempio n. 3
0
    def predict(self, df, mutation_window_size = None):
        """
        Given a dataframe of mutated amino acid sequences, run each sequence
        through NetMHCpan.
        If mutation_window_size is not None then only make predictions for that
        number residues away from mutations.

        Expects the input DataFrame to have the following fields:
            - SourceSequence
            - MutationStart
            - MutationEnd
            - GeneInfo
            - Gene
            - GeneMutationInfo
            - PeptideMutationInfo
            - TranscriptId
        """

        input_filename, peptide_entries = create_input_fasta_file(
            df,
            mutation_window_size=mutation_window_size
        )

        alleles_str = \
            ",".join(allele.replace("*", "") for allele in self.alleles)
        output_file =  tempfile.NamedTemporaryFile(
                "r+",
                prefix="netMHCpan_output",
                delete=False)
        command = [
            self.netmhc_command,
                "-xls",
                "-xlsfile", output_file.name,
                 "-l", "9",
                  "-f", input_filename,
                  "-a", alleles_str]
        print " ".join(command)

        with CleanupFiles(
                filenames = [input_filename],
                files = [output_file]):
            run_command(command)
            results = parse_xls_file(
                output_file.read(),
                peptide_entries,
                mutation_window_size=mutation_window_size)

        assert len(results) > 0, "No epitopes from netMHCpan"
        return pd.DataFrame.from_records(results)
Esempio n. 4
0
    def predict(self, df, mutation_window_size=None):
        """
        Given a dataframe of mutated amino acid sequences, run each sequence
        through NetMHCpan.
        If mutation_window_size is not None then only make predictions for that
        number residues away from mutations.

        Expects the input DataFrame to have the following fields:
            - SourceSequence
            - MutationStart
            - MutationEnd
            - GeneInfo
            - Gene
            - GeneMutationInfo
            - PeptideMutationInfo
            - TranscriptId
        """

        input_filename, peptide_entries = create_input_fasta_file(
            df, mutation_window_size=mutation_window_size)

        alleles_str = \
            ",".join(allele.replace("*", "") for allele in self.alleles)
        output_file = tempfile.NamedTemporaryFile("r+",
                                                  prefix="netMHCpan_output",
                                                  delete=False)
        command = [
            self.netmhc_command, "-xls", "-xlsfile", output_file.name, "-l",
            "9", "-f", input_filename, "-a", alleles_str
        ]
        print " ".join(command)

        with CleanupFiles(filenames=[input_filename], files=[output_file]):
            run_command(command)
            results = parse_xls_file(output_file.read(),
                                     peptide_entries,
                                     mutation_window_size=mutation_window_size)

        assert len(results) > 0, "No epitopes from netMHCpan"
        return pd.DataFrame.from_records(results)