def __init__(self, reference_folder: ReferenceFolder, configuration: DependenciesConfiguration): self.hla_database = reference_folder.get_mhc_database() faker = Faker() mixmhcpred_alleles = set( self.load_mhc1_alleles( MixMHCpred(None, configuration=configuration, mhc_parser=None).available_alleles)) netmhcpan_alleles = set( self.load_mhc1_alleles(reference_folder.get_available_alleles(). get_available_mhc_i())) mhc1_alleles = mixmhcpred_alleles.union(netmhcpan_alleles) mixmhc2pred_alleles = set( self.load_mhc2_alleles( MixMhc2Pred(runner=None, configuration=configuration, mhc_parser=None).available_alleles)) netmhc2pan_alleles = set( self.load_mhc2_alleles(reference_folder.get_available_alleles(). get_available_mhc_ii())) mhc2_isoforms = mixmhc2pred_alleles.union(netmhc2pan_alleles) self.patient_provider = PatientProvider(faker, mhc1_alleles, mhc2_isoforms, self.hla_database) self.neoantigen_provider = NeoantigenProvider( faker, proteome_fasta=os.path.join(reference_folder.proteome_db, HOMO_SAPIENS_FASTA))
def test_non_supported_organism(self): fake_reference_folder = FakeReferenceFolder() test_tools.mock_file_existence( existing_files=fake_reference_folder.resources ) with self.assertRaises(NeofoxConfigurationException): ReferenceFolder(organism="rat")
def __init__(self, references: ReferenceFolder, runner, configuration): """ :type runner: neofox.helpers.runner.Runner :type configuration: neofox.references.DependenciesConfiguration """ self.runner = runner self.configuration = configuration self.iedb_fasta = references.get_iedb_fasta()
def __init__( self, references: ReferenceFolder, configuration: DependenciesConfiguration, tcell_predictor: TcellPrediction, self_similarity: SelfSimilarityCalculator, affinity_threshold =neofox.AFFINITY_THRESHOLD_DEFAULT ): """class to annotate neoantigens""" self.runner = Runner() self.configuration = configuration self.proteome_db = references.proteome_db self.available_alleles = references.get_available_alleles() self.tcell_predictor = tcell_predictor self.self_similarity = self_similarity self.organism = references.organism # NOTE: this one loads a big file, but it is faster loading it multiple times than passing it around self.uniprot = Uniprot(references.uniprot_pickle) # initialise proteome and IEDB BLASTP runners self.proteome_blastp_runner = BlastpRunner( runner=self.runner, configuration=configuration, database=references.get_proteome_database()) self.iedb_blastp_runner = BlastpRunner( runner=self.runner, configuration=configuration, database=references.get_iedb_database()) # NOTE: these resources do not read any file thus can be initialised fast self.dissimilarity_calculator = DissimilarityCalculator( proteome_blastp_runner=self.proteome_blastp_runner, affinity_threshold=affinity_threshold) self.neoantigen_fitness_calculator = NeoantigenFitnessCalculator(iedb_blastp_runner=self.iedb_blastp_runner) self.neoag_calculator = NeoagCalculator( runner=self.runner, configuration=configuration, affinity_threshold=affinity_threshold ) self.differential_binding = DifferentialBinding(affinity_threshold=affinity_threshold) self.priority_score_calculator = PriorityScore() self.iedb_immunogenicity = IEDBimmunogenicity(affinity_threshold=affinity_threshold) self.amplitude = Amplitude() self.hex = Hex(runner=self.runner, configuration=configuration, references=references) self.mhc_database = references.get_mhc_database() self.mhc_parser = MhcParser.get_mhc_parser(self.mhc_database) self.resources_versions = references.get_resources_versions()
def test_one_resource_do_not_exist(self): fake_reference_folder = FakeReferenceFolder() test_tools.mock_file_existence( existing_files=fake_reference_folder.resources[ 1 : len(fake_reference_folder.resources) ], non_existing_files=[fake_reference_folder.resources[0]], ) with self.assertRaises(NeofoxConfigurationException): ReferenceFolder()
def test_organism_human(self): fake_reference_folder = FakeReferenceFolder() test_tools.mock_file_existence( existing_files=fake_reference_folder.resources ) ReferenceFolder(organism=ORGANISM_HOMO_SAPIENS)
def test_organism_mouse(self): fake_reference_folder = FakeReferenceFolder(organism=ORGANISM_MUS_MUSCULUS) test_tools.mock_file_existence( existing_files=fake_reference_folder.resources ) ReferenceFolder(organism=ORGANISM_MUS_MUSCULUS)
def test_all_resources_exist(self): fake_reference_folder = FakeReferenceFolder() test_tools.mock_file_existence( existing_files=fake_reference_folder.resources ) ReferenceFolder()
def test_non_existing_reference(self): os.environ[neofox.REFERENCE_FOLDER_ENV] = "/non_existing_folder" with self.assertRaises(NeofoxConfigurationException): ReferenceFolder()
def test_empty_string_reference(self): os.environ[neofox.REFERENCE_FOLDER_ENV] = "" with self.assertRaises(NeofoxConfigurationException): ReferenceFolder()
def test_not_provided_reference(self): del os.environ[neofox.REFERENCE_FOLDER_ENV] with self.assertRaises(NeofoxConfigurationException): ReferenceFolder()
def load_references(organism=ORGANISM_HOMO_SAPIENS): dotenv.load_dotenv(override=True) return ReferenceFolder(organism=organism), DependenciesConfiguration()
def neofox_cli(): parser = ArgumentParser( description= "NeoFox {} annotates a given set of neoantigen candidate sequences " "derived from point mutation with relevant neoantigen features".format( neofox.VERSION), epilog=epilog) parser.add_argument( "--candidate-file", dest="candidate_file", help= "input file with neoantigens candidates represented by long mutated peptide sequences", ) parser.add_argument( "--json-file", dest="json_file", help= "input JSON file with neoantigens candidates represented by long mutated peptide sequences", ) parser.add_argument( "--patient-data", dest="patients_data", help= "file with data for patients with columns: identifier, estimated_tumor_content, " "mhc_i_alleles, mhc_ii_alleles, tissue", required=True, ) parser.add_argument( "--output-folder", dest="output_folder", help="output folder", required=True, ) parser.add_argument( "--output-prefix", dest="output_prefix", help="prefix to name output files in the output folder", default="neofox", ) parser.add_argument( "--with-table", dest="with_table", action="store_true", help="output results in a short wide tab-separated table " "(if no format is specified this is the default)", ) parser.add_argument( "--with-json", dest="with_json", action="store_true", help="output results in JSON format", ) parser.add_argument( "--patient-id", dest="patient_id", help= "the patient id for the input file. This parameter is only required, " 'if the column "patient" has not been added to the candidate file', ) parser.add_argument( "--affinity-threshold", dest="affinity_threshold", help= "neoantigen candidates with a best predicted affinity greater than or equal than this threshold will be " "not annotated with features that specifically model neoepitope recognition. A threshold that is commonly " "used is 500 nM", default=AFFINITY_THRESHOLD_DEFAULT) parser.add_argument("--num-cpus", dest="num_cpus", default=1, help="number of CPUs for computation") parser.add_argument( "--config", dest="config", help= "an optional configuration file with all the environment variables", ) parser.add_argument("--organism", dest="organism", choices=[ORGANISM_HOMO_SAPIENS, ORGANISM_MUS_MUSCULUS], help="the organism to which the data corresponds", default="human") args = parser.parse_args() candidate_file = args.candidate_file json_file = args.json_file patient_id = args.patient_id patients_data = args.patients_data output_folder = args.output_folder output_prefix = args.output_prefix with_table = args.with_table with_json = args.with_json affinity_threshold = int(args.affinity_threshold) num_cpus = int(args.num_cpus) config = args.config organism = args.organism logger.info("NeoFox v{}".format(neofox.VERSION)) try: # check parameters if bool(candidate_file) + bool(json_file) > 1: raise NeofoxInputParametersException( "Please, define either a candidate file, a standard input file or a JSON file as input. Not many of them" ) if not candidate_file and not json_file: raise NeofoxInputParametersException( "Please, define one input file, either a candidate file, a standard input file or a JSON file" ) if not with_table and not with_json: with_table = True # if none specified short wide is the default # makes sure that the output folder exists os.makedirs(output_folder, exist_ok=True) # loads configuration if config: dotenv.load_dotenv(config, override=True) reference_folder = ReferenceFolder(organism=organism) # reads the input data neoantigens, patients = _read_data(candidate_file, json_file, patients_data, patient_id, reference_folder.get_mhc_database()) # run annotations annotated_neoantigens = NeoFox( neoantigens=neoantigens, patients=patients, patient_id=patient_id, work_folder=output_folder, output_prefix=output_prefix, num_cpus=num_cpus, reference_folder=reference_folder, affinity_threshold=affinity_threshold).get_annotations() _write_results( annotated_neoantigens, output_folder, output_prefix, with_json, with_table, ) except Exception as e: logger.exception(e) # logs every exception in the file raise e logger.info("Finished NeoFox")
def __init__(self, neoantigens: List[Neoantigen], patients: List[Patient], num_cpus: int = 1, patient_id: str = None, work_folder=None, output_prefix=None, reference_folder: ReferenceFolder = None, configuration: DependenciesConfiguration = None, verbose=True, configuration_file=None, affinity_threshold=AFFINITY_THRESHOLD_DEFAULT): self.affinity_threshold = affinity_threshold if configuration_file: dotenv.load_dotenv(configuration_file, override=True) # initialise logs self.log_file_name = self._get_log_file_name(output_prefix, work_folder) self._initialise_logs(self.log_file_name, verbose) # intialize references folder and configuration # NOTE: uses the reference folder and config passed as a parameter if exists, this is here to make it # testable with fake objects self.reference_folder = (reference_folder if reference_folder else ReferenceFolder(verbose=verbose)) # NOTE: makes this call to force the loading of the available alleles here self.reference_folder.get_available_alleles() self.configuration = (configuration if configuration else DependenciesConfiguration()) self.tcell_predictor = TcellPrediction( affinity_threshold=self.affinity_threshold) self.self_similarity = SelfSimilarityCalculator() self.num_cpus = num_cpus if (neoantigens is None or len(neoantigens) == 0 or patients is None or len(patients) == 0): raise NeofoxConfigurationException( "Missing input data to run Neofox") # validates neoantigens self.neoantigens = neoantigens for n in self.neoantigens: if n.patient_identifier is None: n.patient_identifier = patient_id # NOTE: the position of the mutations is not expected from the user and if provide the value is ignored n.mutation.position = EpitopeHelper.mut_position_xmer_seq( mutation=n.mutation) ModelValidator.validate_neoantigen(n) # validates patients self.patients = {} for patient in patients: ModelValidator.validate_patient( patient, organism=self.reference_folder.organism) self.patients[patient.identifier] = patient self._validate_input_data() # retrieve from the data, if RNA-seq was available # add this information to patient model expression_per_patient = { self.patients[patient].identifier: [] for patient in self.patients } for neoantigen in self.neoantigens: expression_per_patient[neoantigen.patient_identifier].append( neoantigen.rna_expression) for patient in self.patients: self.patients[patient].is_rna_available = all( e is not None for e in expression_per_patient[ self.patients[patient].identifier]) # only performs the expression imputation for humans if self.reference_folder.organism == ORGANISM_HOMO_SAPIENS: # impute expresssion from TCGA, ONLY if isRNAavailable = False for given patient, # otherwise original values is reported # NOTE: this must happen after validation to avoid uncaptured errors due to missing patients # NOTE: add gene expression to neoantigen candidate model self.neoantigens = self._conditional_expression_imputation() logger.info("Data loaded")