def test_neoantigens_referring_to_non_existing_patients(self): neoantigen = self._get_test_neoantigen() neoantigen.patient_identifier = ( "I am not patient" # should be a valid aminoacid ) with self.assertRaises(NeofoxDataValidationException): NeoFox( neoantigens=[neoantigen], patient_id=None, patients=[self._get_test_patient()], num_cpus=1, reference_folder=FakeReferenceFolder(), configuration=FakeDependenciesConfiguration(), ) neoantigen.patient_identifier = None with self.assertRaises(NeofoxDataValidationException): NeoFox( neoantigens=[neoantigen], patient_id=None, patients=[self._get_test_patient()], num_cpus=1, reference_folder=FakeReferenceFolder(), configuration=FakeDependenciesConfiguration(), ) neoantigen.patient_identifier = "" with self.assertRaises(NeofoxDataValidationException): NeoFox( neoantigens=[neoantigen], patient_id=None, patients=[self._get_test_patient()], num_cpus=1, reference_folder=FakeReferenceFolder(), configuration=FakeDependenciesConfiguration(), )
def test_patient_with_non_existing_allele_does_not_crash(self): """""" neoantigens, patients, patient_id = self._get_test_data() for p in patients: # sets one MHC I allele to a non existing allele p.mhc1[0].alleles[0] = MhcFactory.build_mhc1_alleles( ["HLA-A*99:99"], mhc_database=self.hla_database)[0].alleles[0] neofox = NeoFox( neoantigens=neoantigens, patient_id=patient_id, patients=patients, num_cpus=1, ) neofox.get_annotations()
def test_neoantigen_without_9mer_netmhcpan_results(self): patient_identifier = "12345" neoantigen = Neoantigen(mutation=Mutation( wild_type_xmer="HLAQHQRVHTGEKPYKCNECGKTFRQT", mutated_xmer="HLAQHQRVHTGEKAYKCNECGKTFRQT"), patient_identifier=patient_identifier) patient = PatientFactory.build_patient( identifier=patient_identifier, mhc_alleles=[ "HLA-A*24:106", "HLA-A*02:200", "HLA-B*08:33", "HLA-B*40:94", "HLA-C*02:20", "HLA-C*07:86" ], mhc2_alleles=[ "HLA-DRB1*07:14", "HLA-DRB1*04:18", "HLA-DPA1*01:05", "HLA-DPA1*03:01", "HLA-DPB1*17:01", "HLA-DPB1*112:01", "HLA-DQA1*01:06", "HLA-DQA1*01:09", "HLA-DQB1*03:08", "HLA-DQB1*06:01" ], mhc_database=self.references.get_mhc_database()) annotations = NeoFox( neoantigens=[neoantigen], patients=[patient], num_cpus=1, ).get_annotations() # it does not crash even though there are no best 9mers self.assertIsNotNone(annotations)
def test_neoantigen_in_proteome(self): patient_identifier = "12345" neoantigen = Neoantigen( mutation=Mutation(mutated_xmer="PKLLENLLSKGETISFLECF"), patient_identifier=patient_identifier) patient = PatientFactory.build_patient( identifier=patient_identifier, mhc_alleles=[ "HLA-A*24:106", "HLA-A*02:200", "HLA-B*08:33", "HLA-B*40:94", "HLA-C*02:20", "HLA-C*07:86" ], mhc2_alleles=[ "HLA-DRB1*07:14", "HLA-DRB1*04:18", "HLA-DPA1*01:05", "HLA-DPA1*03:01", "HLA-DPB1*17:01", "HLA-DPB1*112:01", "HLA-DQA1*01:06", "HLA-DQA1*01:09", "HLA-DQB1*03:08", "HLA-DQB1*06:01" ], mhc_database=self.references.get_mhc_database()) annotations = NeoFox( neoantigens=[neoantigen], patients=[patient], num_cpus=1, ).get_annotations() # it does not crash even though there are no best 9mers self.assertIsNotNone(annotations)
def test_neofox_without_mixmhcpreds(self): """ This test aims at testing neofox when MixMHCpred and MixMHC2pred are not configured. As these are optional it shoudl just run, but without these annotations in the output """ del os.environ[NEOFOX_MIXMHCPRED_ENV] del os.environ[NEOFOX_MIXMHC2PRED_ENV] annotations = NeoFox( neoantigens=self.neoantigens, patient_id=self.patient_id, patients=self.patients, num_cpus=1, ).get_annotations() annotation_names = [ a.name for n in annotations for a in n.neofox_annotations.annotations ] # check it does not contain any of the MixMHCpred annotations self.assertNotIn("MixMHC2pred_best_peptide", annotation_names) self.assertNotIn("MixMHC2pred_best_rank", annotation_names) self.assertNotIn("MixMHC2pred_best_allele", annotation_names) self.assertNotIn("MixMHCpred_best_peptide", annotation_names) self.assertNotIn("MixMHCpred_best_score", annotation_names) self.assertNotIn("MixMHCpred_best_rank", annotation_names) self.assertNotIn("MixMHCpred_best_allele", annotation_names) # checks it does have some of the NetMHCpan annotations self.assertIn("Best_affinity_MHCI_9mer_position_mutation", annotation_names) self.assertIn("Best_rank_MHCII_score", annotation_names)
def compute_annotations(): return NeoFox( neoantigens=neoantigens, patient_id=self.patient_id, patients=self.patients, num_cpus=4, ).get_annotations()
def test_missing_input_raises_exception(self): with self.assertRaises(NeofoxConfigurationException): NeoFox( neoantigens=None, patient_id=None, patients=None, num_cpus=1, reference_folder=FakeReferenceFolder(), ) with self.assertRaises(NeofoxConfigurationException): NeoFox( neoantigens=[], patient_id=None, patients=[], num_cpus=1, reference_folder=FakeReferenceFolder(), )
def test_not_set_reference_folder_fails(self): with self.assertRaises(NeofoxConfigurationException): NeoFox( neoantigens=[self._get_test_neoantigen()], patient_id=None, patients=[self._get_test_patient()], num_cpus=1, reference_folder=FakeReferenceFolder(), ).get_annotations()
def test_neofox(self): """ This test is equivalent to the command line call: neofox --candidate-file /projects/SUMMIT/WP1.2/neofox/development/Pt29.sequences4testing.txt --patient-id Pt29 --patients-data ../resources/patient.pt29.csv NOTE: we will need to check the output when the calculation of resuls and printing to stdout have been decoupled """ output_file = pkg_resources.resource_filename( neofox.tests.__name__, "resources/output_{:%Y%m%d%H%M%S}_neoantigen_candidates_annotated.tsv" .format(datetime.now()), ) output_file_neoantigens = pkg_resources.resource_filename( neofox.tests.__name__, "resources/output_{:%Y%m%d%H%M%S}.neoantigens.tsv".format( datetime.now()), ) output_json_neoantigens = pkg_resources.resource_filename( neofox.tests.__name__, "resources/output_{:%Y%m%d%H%M%S}.neoantigen_candidates.json". format(datetime.now()), ) annotations = NeoFox( neoantigens=self.neoantigens, patient_id=self.patient_id, patients=self.patients, num_cpus=4, ).get_annotations() annotation_names = [ a.name for n in annotations for a in n.neofox_annotations.annotations ] # check it does contain any of the MixMHCpred annotations self.assertIn("MixMHC2pred_best_peptide", annotation_names) self.assertIn("MixMHC2pred_best_rank", annotation_names) self.assertIn("MixMHC2pred_best_allele", annotation_names) self.assertIn("MixMHCpred_best_peptide", annotation_names) self.assertIn("MixMHCpred_best_score", annotation_names) self.assertIn("MixMHCpred_best_rank", annotation_names) self.assertIn("MixMHCpred_best_allele", annotation_names) # checks it does have some of the NetMHCpan annotations self.assertIn("Best_affinity_MHCI_9mer_position_mutation", annotation_names) self.assertIn("Best_rank_MHCII_score", annotation_names) # writes output ModelConverter.annotations2table(neoantigens=annotations).to_csv( output_file, sep="\t", index=False) ModelConverter._objects2dataframe(annotations).to_csv( output_file_neoantigens, sep="\t", index=False) with open(output_json_neoantigens, "wb") as f: f.write(json.dumps(ModelConverter.objects2json(annotations))) # regression test self._regression_test_on_output_file(new_file=output_file)
def test_valid_data_does_not_raise_exceptions(self): NeoFox( neoantigens=[self._get_test_neoantigen()], patient_id=None, patients=[self._get_test_patient()], num_cpus=1, reference_folder=FakeReferenceFolder(), configuration=FakeDependenciesConfiguration(), )
def test_empty_reference_folder_fails(self): os.environ[neofox.REFERENCE_FOLDER_ENV] = "dummy" with self.assertRaises(NeofoxConfigurationException): NeoFox( neoantigens=[self._get_test_neoantigen()], patient_id=None, patients=[self._get_test_patient()], num_cpus=1, reference_folder=FakeReferenceFolder(), ).get_annotations()
def test_gene_expression_imputation(self): neoantigens, patients, patient_id = self._get_test_data() neofox = NeoFox( neoantigens=neoantigens, patient_id=patient_id, patients=patients, num_cpus=1, ) for n in neofox.neoantigens: self.assertIsNotNone(n.imputed_gene_expression) self.assertGreater(n.imputed_gene_expression, 0)
def test_validation_captures_bad_mutated_xmer(self): neoantigen = self._get_test_neoantigen() neoantigen.mutation.mutated_xmer = "123" # should be a valid aminoacid with self.assertRaises(NeofoxDataValidationException): NeoFox( neoantigens=[neoantigen], patient_id=None, patients=[self._get_test_patient()], num_cpus=1, reference_folder=FakeReferenceFolder(), configuration=FakeDependenciesConfiguration(), )
def test_validation_captures_bad_patient(self): patient = self._get_test_patient() patient.identifier = 12345 # should be a string with self.assertRaises(NeofoxDataValidationException): NeoFox( neoantigens=[self._get_test_neoantigen()], patient_id=None, patients=[patient], num_cpus=1, reference_folder=FakeReferenceFolder(), configuration=FakeDependenciesConfiguration(), )
def test_neoantigens_with_empty_gene(self): """""" neoantigens, patients, patient_id = self._get_test_data() for n in neoantigens: n.gene = "" neofox = NeoFox( neoantigens=neoantigens, patient_id=patient_id, patients=patients, num_cpus=1, ) for n in neofox.neoantigens: self.assertIsNone(n.imputed_gene_expression)
def test_neofox_model_input(self): """""" neoantigens, patients, patient_id = self._get_test_data() annotations = NeoFox( neoantigens=neoantigens, patient_id=patient_id, patients=patients, num_cpus=2, ).get_annotations() self.assertEqual(5, len(annotations)) self.assertIsInstance(annotations[0], Neoantigen) self.assertTrue( len(annotations[0].neofox_annotations.annotations) == 86)
def test_neofox_without_mhc1(self): neoantigens, patients, patient_id = self._get_test_data() for p in patients: p.mhc1 = [] annotations = NeoFox( neoantigens=neoantigens, patient_id=patient_id, patients=patients, num_cpus=1, ).get_annotations() self.assertEqual(5, len(annotations)) self.assertIsInstance(annotations[0], Neoantigen) self.assertTrue( len(annotations[0].neofox_annotations.annotations) == 39)
def test_neoantigens_with_rna_expression(self): """""" neoantigens, patients, patient_id = self._get_test_data() for n in neoantigens: n.rna_expression = 1.2 neofox = NeoFox( neoantigens=neoantigens, patient_id=patient_id, patients=patients, num_cpus=1, ) for p in neofox.patients.values(): if p.identifier == patient_id: self.assertTrue(p.is_rna_available)
def test_neoantigens_without_gene(self): """""" neoantigens, patients, patient_id = self._get_test_data() for n in neoantigens: n.gene = "" annotations = NeoFox( neoantigens=neoantigens, patient_id=patient_id, patients=patients, num_cpus=1, ).get_annotations() self.assertEqual(5, len(annotations)) self.assertIsInstance(annotations[0], Neoantigen) self.assertTrue( len(annotations[0].neofox_annotations.annotations) > 10)
def test_neofox_only_one_neoantigen(self): """""" input_file = pkg_resources.resource_filename( neofox.tests.__name__, "resources/test_data_only_one.txt") neoantigens = ModelConverter.parse_candidate_file(input_file) annotations = NeoFox( neoantigens=neoantigens, patient_id=self.patient_id, patients=self.patients, num_cpus=4, ).get_annotations() self.assertEqual(1, len(annotations)) self.assertIsInstance(annotations[0], Neoantigen) self.assertTrue( len(annotations[0].neofox_annotations.annotations) > 10)
def test_neofox_with_config(self): neoantigens, patients, patient_id = self._get_test_data() config_file = pkg_resources.resource_filename( neofox.tests.__name__, "resources/neofox_config.txt") try: NeoFox( neoantigens=neoantigens, patient_id=patient_id, patients=patients, num_cpus=1, configuration_file=config_file, ) except NeofoxConfigurationException as e: assert "/neofox/testing/reference_data" in str(e) return assert False
def test_neomouse(self): output_file = pkg_resources.resource_filename( neofox.tests.__name__, "resources/output_mouse_{:%Y%m%d%H%M%S}_neoantigen_candidates_annotated.tsv" .format(datetime.now()), ) output_file_neoantigens = pkg_resources.resource_filename( neofox.tests.__name__, "resources/output_mouse_{:%Y%m%d%H%M%S}.neoantigens.tsv".format( datetime.now()), ) output_json_neoantigens = pkg_resources.resource_filename( neofox.tests.__name__, "resources/output_mouse_{:%Y%m%d%H%M%S}.neoantigen_candidates.json" .format(datetime.now()), ) annotations = NeoFox( neoantigens=self.neoantigens_mouse, patient_id=self.patient_id, patients=self.patients_mouse, num_cpus=4, reference_folder=self.references_mouse).get_annotations() annotation_names = [ a.name for n in annotations for a in n.neofox_annotations.annotations ] # checks it does have some of the NetMHCpan annotations self.assertIn("Best_affinity_MHCI_9mer_position_mutation", annotation_names) self.assertIn("Best_rank_MHCII_score", annotation_names) # writes output ModelConverter.annotations2table(neoantigens=annotations).to_csv( output_file, sep="\t", index=False) ModelConverter._objects2dataframe(annotations).to_csv( output_file_neoantigens, sep="\t", index=False) with open(output_json_neoantigens, "wb") as f: f.write(json.dumps(ModelConverter.objects2json(annotations))) # regression test self._regression_test_on_output_file( new_file=output_file, previous_filename="resources/output_previous_mouse.txt")
def test_neofox_synthetic_data(self): """ this test just ensures that NeoFox does not crash with the synthetic data """ data = [ ("resources/synthetic_data/neoantigens_1patients_10neoantigens.2.txt", "resources/synthetic_data/patients_1patients_10neoantigens.2.txt" ), ("resources/synthetic_data/neoantigens_10patients_10neoantigens.0.txt", "resources/synthetic_data/patients_10patients_10neoantigens.0.txt" ), #("resources/synthetic_data/neoantigens_100patients_10neoantigens.2.txt", # "resources/synthetic_data/patients_100patients_10neoantigens.2.txt"), #("resources/synthetic_data/neoantigens_no_wt_1patients_10neoantigens.3.txt", # "resources/synthetic_data/patients_no_wt_1patients_10neoantigens.3.txt"), #("resources/synthetic_data/poltergeist_neoantigens.txt", # "resources/synthetic_data/poltergeist_patients.txt") ("resources/synthetic_data/neoantigens_no_wt_10patients_10neoantigens.4.txt", "resources/synthetic_data/patients_no_wt_10patients_10neoantigens.4.txt" ), #("resources/synthetic_data/neoantigens_100patients_10neoantigens.4.txt", # "resources/synthetic_data/patients_100patients_10neoantigens.4.txt"), ] for n, p, in data: input_file = pkg_resources.resource_filename( neofox.tests.__name__, n) data = pd.read_csv(input_file, sep="\t") data = data.replace({np.nan: None}) neoantigens = ModelConverter._neoantigens_csv2objects(data) patients_file = pkg_resources.resource_filename( neofox.tests.__name__, p) patients = ModelConverter.parse_patients_file( patients_file, self.hla_database) annotations = NeoFox( neoantigens=neoantigens, patients=patients, num_cpus=4, ).get_annotations() self.assertIsNotNone(annotations)
def test_no_expression_imputation(self): input_file = pkg_resources.resource_filename( neofox.tests.__name__, "resources/test_candidate_file.txt" ) patients_file = pkg_resources.resource_filename( neofox.tests.__name__, "resources/test_patient_file.txt" ) patients = ModelConverter.parse_patients_file(patients_file, self.hla_database) neoantigens = ModelConverter.parse_candidate_file(input_file) neofox_runner = NeoFox( neoantigens=neoantigens, patients=patients, reference_folder=FakeReferenceFolder(), configuration=FakeDependenciesConfiguration(), ) for neoantigen in neoantigens: for neoantigen_imputed in neofox_runner.neoantigens: if neoantigen.mutation.mutated_xmer == neoantigen_imputed.mutation.mutated_xmer: self.assertEqual( neoantigen.rna_expression, neoantigen_imputed.rna_expression )
def test_neoantigen_no_wt_failing(self): patient_identifier = "12345" neoantigen = Neoantigen( mutation=Mutation(mutated_xmer="SPSFPLEPDDEVFTAIAKAMEEMVEDS"), patient_identifier=patient_identifier) patient = Patient( identifier=patient_identifier, mhc1=MhcFactory.build_mhc1_alleles( [ "HLA-A*02:24", "HLA-A*36:04", "HLA-B*58:25", "HLA-B*35:102", "HLA-C*02:30", "HLA-C*07:139" ], mhc_database=self.references.get_mhc_database()), ) annotations = NeoFox( neoantigens=[neoantigen], patients=[patient], num_cpus=1, ).get_annotations() # it does not crash even though there are no best 9mers self.assertIsNotNone(annotations)
def test_neoantigen_failing(self): patient_identifier = "12345" neoantigen = Neoantigen(mutation=Mutation( wild_type_xmer="ARPDMFCLFHGKRYFPGESWHPYLEPQ", mutated_xmer="ARPDMFCLFHGKRHFPGESWHPYLEPQ"), patient_identifier=patient_identifier) patient = Patient( identifier=patient_identifier, mhc1=MhcFactory.build_mhc1_alleles( [ "HLA-A*03:01", "HLA-A*29:02", "HLA-B*07:02", "HLA-B*44:03", "HLA-C*07:02", "HLA-C*16:01" ], mhc_database=self.references.get_mhc_database()), ) annotations = NeoFox( neoantigens=[neoantigen], patients=[patient], num_cpus=1, ).get_annotations() # it does not crash even though there are no best 9mers self.assertIsNotNone(annotations)
def test_with_expression_imputation(self): input_file = pkg_resources.resource_filename( neofox.tests.__name__, "resources/test_candidate_file_Pty.txt" ) neoantigens= ModelConverter.parse_candidate_file(input_file) import copy original_neoantigens = copy.deepcopy(neoantigens) patients_file = pkg_resources.resource_filename( neofox.tests.__name__, "resources/test_patient_file.txt" ) patients = ModelConverter.parse_patients_file(patients_file, self.hla_database) neofox_runner = NeoFox( neoantigens=neoantigens, patients=patients, reference_folder=FakeReferenceFolder(), configuration=FakeDependenciesConfiguration(), ) for neoantigen in original_neoantigens: for neoantigen_imputed in neofox_runner.neoantigens: self.assertFalse( neoantigen.rna_expression == neoantigen_imputed.rna_expression )
def test_neoantigens_with_rare_aminoacids(self): """""" neoantigens, patients, patient_id = self._get_test_data() for n in neoantigens: position_to_replace = int(len(n.mutation.mutated_xmer) / 2) n.mutation.mutated_xmer = n.mutation.mutated_xmer[:position_to_replace] + "U" + \ n.mutation.mutated_xmer[position_to_replace+1:] annotations = NeoFox( neoantigens=neoantigens, patient_id=patient_id, patients=patients, num_cpus=1, ).get_annotations() self.assertEqual(5, len(annotations)) self.assertIsInstance(annotations[0], Neoantigen) self.assertTrue( len(annotations[0].neofox_annotations.annotations) > 10) for na in annotations: for a in na.neofox_annotations.annotations: if a.name in [ "Selfsimilarity_MHCI_conserved_binder", "Tcell_predictor_score_cutoff" ]: self.assertEqual(a.value, NOT_AVAILABLE_VALUE)
def neofox_cli(): parser = ArgumentParser( description= "NeoFox {} annotates a given set of neoantigen candidate sequences " "derived from point mutation with relevant neoantigen features".format( neofox.VERSION), epilog=epilog) parser.add_argument( "--candidate-file", dest="candidate_file", help= "input file with neoantigens candidates represented by long mutated peptide sequences", ) parser.add_argument( "--json-file", dest="json_file", help= "input JSON file with neoantigens candidates represented by long mutated peptide sequences", ) parser.add_argument( "--patient-data", dest="patients_data", help= "file with data for patients with columns: identifier, estimated_tumor_content, " "mhc_i_alleles, mhc_ii_alleles, tissue", required=True, ) parser.add_argument( "--output-folder", dest="output_folder", help="output folder", required=True, ) parser.add_argument( "--output-prefix", dest="output_prefix", help="prefix to name output files in the output folder", default="neofox", ) parser.add_argument( "--with-table", dest="with_table", action="store_true", help="output results in a short wide tab-separated table " "(if no format is specified this is the default)", ) parser.add_argument( "--with-json", dest="with_json", action="store_true", help="output results in JSON format", ) parser.add_argument( "--patient-id", dest="patient_id", help= "the patient id for the input file. This parameter is only required, " 'if the column "patient" has not been added to the candidate file', ) parser.add_argument( "--affinity-threshold", dest="affinity_threshold", help= "neoantigen candidates with a best predicted affinity greater than or equal than this threshold will be " "not annotated with features that specifically model neoepitope recognition. A threshold that is commonly " "used is 500 nM", default=AFFINITY_THRESHOLD_DEFAULT) parser.add_argument("--num-cpus", dest="num_cpus", default=1, help="number of CPUs for computation") parser.add_argument( "--config", dest="config", help= "an optional configuration file with all the environment variables", ) parser.add_argument("--organism", dest="organism", choices=[ORGANISM_HOMO_SAPIENS, ORGANISM_MUS_MUSCULUS], help="the organism to which the data corresponds", default="human") args = parser.parse_args() candidate_file = args.candidate_file json_file = args.json_file patient_id = args.patient_id patients_data = args.patients_data output_folder = args.output_folder output_prefix = args.output_prefix with_table = args.with_table with_json = args.with_json affinity_threshold = int(args.affinity_threshold) num_cpus = int(args.num_cpus) config = args.config organism = args.organism logger.info("NeoFox v{}".format(neofox.VERSION)) try: # check parameters if bool(candidate_file) + bool(json_file) > 1: raise NeofoxInputParametersException( "Please, define either a candidate file, a standard input file or a JSON file as input. Not many of them" ) if not candidate_file and not json_file: raise NeofoxInputParametersException( "Please, define one input file, either a candidate file, a standard input file or a JSON file" ) if not with_table and not with_json: with_table = True # if none specified short wide is the default # makes sure that the output folder exists os.makedirs(output_folder, exist_ok=True) # loads configuration if config: dotenv.load_dotenv(config, override=True) reference_folder = ReferenceFolder(organism=organism) # reads the input data neoantigens, patients = _read_data(candidate_file, json_file, patients_data, patient_id, reference_folder.get_mhc_database()) # run annotations annotated_neoantigens = NeoFox( neoantigens=neoantigens, patients=patients, patient_id=patient_id, work_folder=output_folder, output_prefix=output_prefix, num_cpus=num_cpus, reference_folder=reference_folder, affinity_threshold=affinity_threshold).get_annotations() _write_results( annotated_neoantigens, output_folder, output_prefix, with_json, with_table, ) except Exception as e: logger.exception(e) # logs every exception in the file raise e logger.info("Finished NeoFox")