def __init__(self, reference_folder: ReferenceFolder,
                 configuration: DependenciesConfiguration):

        self.hla_database = reference_folder.get_mhc_database()

        faker = Faker()
        mixmhcpred_alleles = set(
            self.load_mhc1_alleles(
                MixMHCpred(None, configuration=configuration,
                           mhc_parser=None).available_alleles))
        netmhcpan_alleles = set(
            self.load_mhc1_alleles(reference_folder.get_available_alleles().
                                   get_available_mhc_i()))
        mhc1_alleles = mixmhcpred_alleles.union(netmhcpan_alleles)

        mixmhc2pred_alleles = set(
            self.load_mhc2_alleles(
                MixMhc2Pred(runner=None,
                            configuration=configuration,
                            mhc_parser=None).available_alleles))
        netmhc2pan_alleles = set(
            self.load_mhc2_alleles(reference_folder.get_available_alleles().
                                   get_available_mhc_ii()))
        mhc2_isoforms = mixmhc2pred_alleles.union(netmhc2pan_alleles)

        self.patient_provider = PatientProvider(faker, mhc1_alleles,
                                                mhc2_isoforms,
                                                self.hla_database)
        self.neoantigen_provider = NeoantigenProvider(
            faker,
            proteome_fasta=os.path.join(reference_folder.proteome_db,
                                        HOMO_SAPIENS_FASTA))
Example #2
0
 def test_non_supported_organism(self):
     fake_reference_folder = FakeReferenceFolder()
     test_tools.mock_file_existence(
         existing_files=fake_reference_folder.resources
     )
     with self.assertRaises(NeofoxConfigurationException):
         ReferenceFolder(organism="rat")
Example #3
0
 def __init__(self, references: ReferenceFolder, runner, configuration):
     """
     :type runner: neofox.helpers.runner.Runner
     :type configuration: neofox.references.DependenciesConfiguration
     """
     self.runner = runner
     self.configuration = configuration
     self.iedb_fasta = references.get_iedb_fasta()
Example #4
0
    def __init__(
        self,
        references: ReferenceFolder,
        configuration: DependenciesConfiguration,
        tcell_predictor: TcellPrediction,
        self_similarity: SelfSimilarityCalculator,
        affinity_threshold =neofox.AFFINITY_THRESHOLD_DEFAULT
    ):
        """class to annotate neoantigens"""
        self.runner = Runner()
        self.configuration = configuration
        self.proteome_db = references.proteome_db
        self.available_alleles = references.get_available_alleles()
        self.tcell_predictor = tcell_predictor
        self.self_similarity = self_similarity
        self.organism = references.organism

        # NOTE: this one loads a big file, but it is faster loading it multiple times than passing it around
        self.uniprot = Uniprot(references.uniprot_pickle)

        # initialise proteome and IEDB BLASTP runners
        self.proteome_blastp_runner = BlastpRunner(
            runner=self.runner, configuration=configuration,
            database=references.get_proteome_database())
        self.iedb_blastp_runner = BlastpRunner(
            runner=self.runner, configuration=configuration,
            database=references.get_iedb_database())

        # NOTE: these resources do not read any file thus can be initialised fast
        self.dissimilarity_calculator = DissimilarityCalculator(
            proteome_blastp_runner=self.proteome_blastp_runner, affinity_threshold=affinity_threshold)
        self.neoantigen_fitness_calculator = NeoantigenFitnessCalculator(iedb_blastp_runner=self.iedb_blastp_runner)
        self.neoag_calculator = NeoagCalculator(
            runner=self.runner, configuration=configuration, affinity_threshold=affinity_threshold
        )
        self.differential_binding = DifferentialBinding(affinity_threshold=affinity_threshold)
        self.priority_score_calculator = PriorityScore()
        self.iedb_immunogenicity = IEDBimmunogenicity(affinity_threshold=affinity_threshold)
        self.amplitude = Amplitude()
        self.hex = Hex(runner=self.runner, configuration=configuration, references=references)
        self.mhc_database = references.get_mhc_database()
        self.mhc_parser = MhcParser.get_mhc_parser(self.mhc_database)

        self.resources_versions = references.get_resources_versions()
Example #5
0
 def test_one_resource_do_not_exist(self):
     fake_reference_folder = FakeReferenceFolder()
     test_tools.mock_file_existence(
         existing_files=fake_reference_folder.resources[
             1 : len(fake_reference_folder.resources)
         ],
         non_existing_files=[fake_reference_folder.resources[0]],
     )
     with self.assertRaises(NeofoxConfigurationException):
         ReferenceFolder()
Example #6
0
 def test_organism_human(self):
     fake_reference_folder = FakeReferenceFolder()
     test_tools.mock_file_existence(
         existing_files=fake_reference_folder.resources
     )
     ReferenceFolder(organism=ORGANISM_HOMO_SAPIENS)
Example #7
0
 def test_organism_mouse(self):
     fake_reference_folder = FakeReferenceFolder(organism=ORGANISM_MUS_MUSCULUS)
     test_tools.mock_file_existence(
         existing_files=fake_reference_folder.resources
     )
     ReferenceFolder(organism=ORGANISM_MUS_MUSCULUS)
Example #8
0
 def test_all_resources_exist(self):
     fake_reference_folder = FakeReferenceFolder()
     test_tools.mock_file_existence(
         existing_files=fake_reference_folder.resources
     )
     ReferenceFolder()
Example #9
0
 def test_non_existing_reference(self):
     os.environ[neofox.REFERENCE_FOLDER_ENV] = "/non_existing_folder"
     with self.assertRaises(NeofoxConfigurationException):
         ReferenceFolder()
Example #10
0
 def test_empty_string_reference(self):
     os.environ[neofox.REFERENCE_FOLDER_ENV] = ""
     with self.assertRaises(NeofoxConfigurationException):
         ReferenceFolder()
Example #11
0
 def test_not_provided_reference(self):
     del os.environ[neofox.REFERENCE_FOLDER_ENV]
     with self.assertRaises(NeofoxConfigurationException):
         ReferenceFolder()
Example #12
0
def load_references(organism=ORGANISM_HOMO_SAPIENS):
    dotenv.load_dotenv(override=True)
    return ReferenceFolder(organism=organism), DependenciesConfiguration()
Example #13
0
def neofox_cli():
    parser = ArgumentParser(
        description=
        "NeoFox {} annotates a given set of neoantigen candidate sequences "
        "derived from point mutation with relevant neoantigen features".format(
            neofox.VERSION),
        epilog=epilog)
    parser.add_argument(
        "--candidate-file",
        dest="candidate_file",
        help=
        "input file with neoantigens candidates represented by long mutated peptide sequences",
    )
    parser.add_argument(
        "--json-file",
        dest="json_file",
        help=
        "input JSON file with neoantigens candidates represented by long mutated peptide sequences",
    )
    parser.add_argument(
        "--patient-data",
        dest="patients_data",
        help=
        "file with data for patients with columns: identifier, estimated_tumor_content, "
        "mhc_i_alleles, mhc_ii_alleles, tissue",
        required=True,
    )
    parser.add_argument(
        "--output-folder",
        dest="output_folder",
        help="output folder",
        required=True,
    )
    parser.add_argument(
        "--output-prefix",
        dest="output_prefix",
        help="prefix to name output files in the output folder",
        default="neofox",
    )
    parser.add_argument(
        "--with-table",
        dest="with_table",
        action="store_true",
        help="output results in a short wide tab-separated table "
        "(if no format is specified this is the default)",
    )
    parser.add_argument(
        "--with-json",
        dest="with_json",
        action="store_true",
        help="output results in JSON format",
    )
    parser.add_argument(
        "--patient-id",
        dest="patient_id",
        help=
        "the patient id for the input file. This parameter is only required, "
        'if the column "patient" has not been added to the candidate file',
    )
    parser.add_argument(
        "--affinity-threshold",
        dest="affinity_threshold",
        help=
        "neoantigen candidates with a best predicted affinity greater than or equal than this threshold will be "
        "not annotated with features that specifically model neoepitope recognition. A threshold that is commonly "
        "used is 500 nM",
        default=AFFINITY_THRESHOLD_DEFAULT)
    parser.add_argument("--num-cpus",
                        dest="num_cpus",
                        default=1,
                        help="number of CPUs for computation")
    parser.add_argument(
        "--config",
        dest="config",
        help=
        "an optional configuration file with all the environment variables",
    )
    parser.add_argument("--organism",
                        dest="organism",
                        choices=[ORGANISM_HOMO_SAPIENS, ORGANISM_MUS_MUSCULUS],
                        help="the organism to which the data corresponds",
                        default="human")
    args = parser.parse_args()

    candidate_file = args.candidate_file
    json_file = args.json_file
    patient_id = args.patient_id
    patients_data = args.patients_data
    output_folder = args.output_folder
    output_prefix = args.output_prefix
    with_table = args.with_table
    with_json = args.with_json
    affinity_threshold = int(args.affinity_threshold)
    num_cpus = int(args.num_cpus)
    config = args.config
    organism = args.organism

    logger.info("NeoFox v{}".format(neofox.VERSION))

    try:
        # check parameters
        if bool(candidate_file) + bool(json_file) > 1:
            raise NeofoxInputParametersException(
                "Please, define either a candidate file, a standard input file or a JSON file as input. Not many of them"
            )
        if not candidate_file and not json_file:
            raise NeofoxInputParametersException(
                "Please, define one input file, either a candidate file, a standard input file or a JSON file"
            )
        if not with_table and not with_json:
            with_table = True  # if none specified short wide is the default

        # makes sure that the output folder exists
        os.makedirs(output_folder, exist_ok=True)

        # loads configuration
        if config:
            dotenv.load_dotenv(config, override=True)
        reference_folder = ReferenceFolder(organism=organism)

        # reads the input data
        neoantigens, patients = _read_data(candidate_file, json_file,
                                           patients_data, patient_id,
                                           reference_folder.get_mhc_database())

        # run annotations
        annotated_neoantigens = NeoFox(
            neoantigens=neoantigens,
            patients=patients,
            patient_id=patient_id,
            work_folder=output_folder,
            output_prefix=output_prefix,
            num_cpus=num_cpus,
            reference_folder=reference_folder,
            affinity_threshold=affinity_threshold).get_annotations()

        _write_results(
            annotated_neoantigens,
            output_folder,
            output_prefix,
            with_json,
            with_table,
        )
    except Exception as e:
        logger.exception(e)  # logs every exception in the file
        raise e

    logger.info("Finished NeoFox")
Example #14
0
    def __init__(self,
                 neoantigens: List[Neoantigen],
                 patients: List[Patient],
                 num_cpus: int = 1,
                 patient_id: str = None,
                 work_folder=None,
                 output_prefix=None,
                 reference_folder: ReferenceFolder = None,
                 configuration: DependenciesConfiguration = None,
                 verbose=True,
                 configuration_file=None,
                 affinity_threshold=AFFINITY_THRESHOLD_DEFAULT):

        self.affinity_threshold = affinity_threshold

        if configuration_file:
            dotenv.load_dotenv(configuration_file, override=True)

        # initialise logs
        self.log_file_name = self._get_log_file_name(output_prefix,
                                                     work_folder)
        self._initialise_logs(self.log_file_name, verbose)

        # intialize references folder and configuration
        # NOTE: uses the reference folder and config passed as a parameter if exists, this is here to make it
        # testable with fake objects
        self.reference_folder = (reference_folder if reference_folder else
                                 ReferenceFolder(verbose=verbose))
        # NOTE: makes this call to force the loading of the available alleles here
        self.reference_folder.get_available_alleles()
        self.configuration = (configuration if configuration else
                              DependenciesConfiguration())
        self.tcell_predictor = TcellPrediction(
            affinity_threshold=self.affinity_threshold)
        self.self_similarity = SelfSimilarityCalculator()
        self.num_cpus = num_cpus

        if (neoantigens is None or len(neoantigens) == 0 or patients is None
                or len(patients) == 0):
            raise NeofoxConfigurationException(
                "Missing input data to run Neofox")

        # validates neoantigens
        self.neoantigens = neoantigens
        for n in self.neoantigens:
            if n.patient_identifier is None:
                n.patient_identifier = patient_id
            # NOTE: the position of the mutations is not expected from the user and if provide the value is ignored
            n.mutation.position = EpitopeHelper.mut_position_xmer_seq(
                mutation=n.mutation)
            ModelValidator.validate_neoantigen(n)

        # validates patients
        self.patients = {}
        for patient in patients:
            ModelValidator.validate_patient(
                patient, organism=self.reference_folder.organism)
            self.patients[patient.identifier] = patient

        self._validate_input_data()

        # retrieve from the data, if RNA-seq was available
        # add this information to patient model
        expression_per_patient = {
            self.patients[patient].identifier: []
            for patient in self.patients
        }
        for neoantigen in self.neoantigens:
            expression_per_patient[neoantigen.patient_identifier].append(
                neoantigen.rna_expression)

        for patient in self.patients:
            self.patients[patient].is_rna_available = all(
                e is not None for e in expression_per_patient[
                    self.patients[patient].identifier])

        # only performs the expression imputation for humans
        if self.reference_folder.organism == ORGANISM_HOMO_SAPIENS:
            # impute expresssion from TCGA, ONLY if isRNAavailable = False for given patient,
            # otherwise original values is reported
            # NOTE: this must happen after validation to avoid uncaptured errors due to missing patients
            # NOTE: add gene expression to neoantigen candidate model
            self.neoantigens = self._conditional_expression_imputation()

        logger.info("Data loaded")