def test_failed_mapping(self): genes = {"Gene1", "Gene2"} mapped_identifiers = util.map_identifiers(genes, return_all=True) self.assertEqual(0, len(mapped_identifiers)) # test with a single gene genes = {"Gene1"} mapped_identifiers = util.map_identifiers(genes, return_all=True) self.assertEqual(0, len(mapped_identifiers))
def test_ssgsea(self): json_obj = json.loads(self.test_json) request = create_analysis_input_object(json_obj) request.datasets[0].df = util.string_to_array(request.datasets[0].data) # get the mappings mappings = util.map_identifiers({"MITF", "CD19", "MS4A1"}) gene_set = self._get_gene_set() gene_id_colname = request.datasets[0].df.dtype.names[0] gene_set_mapping = GeneSetMapping.create_mapping(gene_set, identifier_mapping=mappings, identifiers=request.datasets[0].df[:][ gene_id_colname].tolist()) analyser = ReactomeGSVARAnalyser() result = analyser.analyse_request(request=request, gene_set_mappings={request.datasets[0].name: gene_set_mapping}, identifier_mappings=mappings, gene_set=gene_set) # test the result self.assertEqual(1, len(result)) self.assertIsNotNone(result[0].pathways) self.assertIsNotNone(result[0].fold_changes) # test the actual result reader = csv.DictReader(result[0].pathways.split("\n"), delimiter="\t") self.assertEqual(5, len(reader.fieldnames)) required_fields = ["Pathway", "Name", "Sample.1", "Sample.2", "Sample.3"] for required_field in required_fields: self.assertTrue(required_field in reader.fieldnames) # test the pathways found_pathways = 0 found_p1 = False found_p2 = False for pathway in reader: found_pathways += 1 if pathway["Pathway"] == "R-HSA-1280218": found_p1 = True self.assertEqual("0.0", pathway["Sample.1"].strip()) self.assertEqual("0.02880908", pathway["Sample.2"].strip()) self.assertEqual("0.02880908", pathway["Sample.3"].strip()) if pathway["Pathway"] == "R-HSA-392499": found_p2 = True self.assertEqual(-0.5, float(pathway["Sample.1"])) self.assertEqual(-0.5, float(pathway["Sample.2"])) self.assertEqual(-0.5, float(pathway["Sample.3"])) self.assertEqual(143, found_pathways) self.assertTrue(found_p1) self.assertTrue(found_p2)
def test_no_design_filtering(self): test_json = """ { "analysisId": "test_01", "datasets": [ { "data": "\\tSample 1\\tSample2\\tSample 3\\nCD19\\t10\\t20\\t2\\nMS4A1\\t10\\t20\\t2\\n\ MITF\\t10\\t0\\t0\\n", "design": { "analysisGroup": [ "Treatment", "Control", "Treatment" ], "comparison": { "group1": "Control", "group2": "Treatment" }, "samples": [ "Sample 1", "Sample 2", "Sample 3" ], "patient": [ "Patient 1", "Patient 2", "Patient 3" ] }, "name": "First experiment", "type": "rnaseq_counts" } ], "methodName": "ssgsea" } """ worker = reactome_analysis_worker.ReactomeAnalysisWorker() json_obj = json.loads(test_json) request_obj = create_analysis_input_object(json_obj) worker._convert_datasets(request_obj) mappings = util.map_identifiers({"MITF", "CD19", "MS4A1"}) self.assertEqual(3, len(request_obj.datasets[0].df)) filtered_df = reactome_analysis_worker.ReactomeAnalysisWorker._filter_dataset(request_obj.datasets[0].df, mappings, None, 0.5) self.assertIsNotNone(filtered_df) self.assertEqual(2, len(filtered_df))
def _map_identifiers(self, request: AnalysisInput, reactome_server: str) -> dict: """ Map all submitted identifiers using Reactom's mapping service. :param request: The analysis request :param reactome_server: The reactome server to use :returns: A dict with the original identifier as key and the mappings as value (list) """ # get all identifiers all_identifiers = ReactomeAnalysisWorker._extract_identifiers( request.datasets) # make sure more than one gene was submitted if len(all_identifiers) <= 1: LOGGER.debug( "Analysis request {} contains an insufficient number of genes ({})" .format(request.analysis_id, str(len(all_identifiers)))) raise Exception("Analysis requires >1 genes.") # get the identifier mappings self._set_status(request.analysis_id, status="running", description="Mapping identifiers...", completed=0.1) try: identifier_mappings = util.map_identifiers( all_identifiers, return_all=True, reactome_server=reactome_server) except util.MappingException as e: LOGGER.debug("Identifier mapping failed", exc_info=1) raise Exception("Invalid gene/protein identifiers submitted") except Exception as e: LOGGER.error("Failed to connect to mapping service: " + str(e)) LOGGER.debug("Mapping failed", exc_info=1) raise Exception( "Failed to contact identifier mapping service. Please try again later." ) LOGGER.debug("Mapped {} of {} submitted identifiers".format( str(len(identifier_mappings)), str(len(all_identifiers)))) # make sure that identifiers were mapped if len(identifier_mappings) < 1: raise Exception("Failed to map any submitted identifiers") return identifier_mappings
def testMapping(self): # use all genes from one pathway genes = set() with open( os.path.join(os.path.dirname(__file__), "testfiles", "R-HSA-1980143.uniprot.txt")) as reader: for line in reader: genes.add(line.strip()) mapped_identifiers = util.map_identifiers(genes, return_all=True) self.assertEqual(len(genes), len(mapped_identifiers)) # all identifiers should only map to a single one for mapped_identifier in mapped_identifiers.values(): # There are multiple mappings when referring to isoforms (one case) self.assertTrue(len(mapped_identifier) < 3, msg="Multiple mappings for {}".format( ",".join(mapped_identifier)))
def test_analysis(self): json_obj = json.loads(self.test_json) request = create_analysis_input_object(json_obj) request.datasets[0].df = util.string_to_array(request.datasets[0].data) # get the mappings mappings = util.map_identifiers({ "MITF", "CD19", "MS4A1", "SDC1", "CD38", "EGFR", "IL10", "IL6", "GRB2", "GAB1", "SHC1" }) # filter the dataset request.datasets[0].df = ReactomeAnalysisWorker._filter_dataset( request.datasets[0].df, mappings, request.datasets[0].design, 1) gene_set = self._get_gene_set() gene_id_colname = request.datasets[0].df.dtype.names[0] gene_set_mapping = GeneSetMapping.create_mapping( gene_set, identifier_mapping=mappings, identifiers=request.datasets[0].df[:][gene_id_colname].tolist()) analyser = ReactomeRAnalyser() result = analyser.analyse_request( request=request, gene_set_mappings={request.datasets[0].name: gene_set_mapping}, identifier_mappings=mappings, gene_set=gene_set) # test the result self.assertEqual(1, len(result)) self.assertIsNotNone(result[0].pathways) result_lines = result[0].pathways.split("\n") self.assertEqual(233, len(result_lines)) reader = csv.DictReader(result_lines, delimiter="\t") required_fields = ("Pathway", "Name", "Direction", "FDR", "PValue", "NGenes") for field in required_fields: self.assertTrue(field in reader.fieldnames, "Missing field " + field)
def test_pathway_string(self): json_obj = json.loads(self.test_json) # add the parameters json_obj["parameters"] = [{"name": "pathways", "value": "R-HSA-1280218,R-HSA-392499"}, {"name": "create_reactome_visualization", "value": "False"}] request = create_analysis_input_object(json_obj) request.datasets[0].df = util.string_to_array(request.datasets[0].data) # get the mappings mappings = util.map_identifiers({"MITF", "CD19", "MS4A1"}) gene_set = self._get_gene_set() gene_id_colname = request.datasets[0].df.dtype.names[0] gene_set_mapping = GeneSetMapping.create_mapping(gene_set, identifier_mapping=mappings, identifiers=request.datasets[0].df[:][ gene_id_colname].tolist()) analyser = ReactomeGSVARAnalyser() result = analyser.analyse_request(request=request, gene_set_mappings={request.datasets[0].name: gene_set_mapping}, identifier_mappings=mappings, gene_set=gene_set) # test the result self.assertEqual(1, len(result)) self.assertIsNotNone(result[0].pathways) self.assertIsNotNone(result[0].fold_changes) # test the actual result reader = csv.DictReader(result[0].pathways.split("\n"), delimiter="\t") self.assertEqual(5, len(reader.fieldnames)) # there should only be two entries n_entries = 0 for line in reader: n_entries += 1 self.assertEqual(2, n_entries)
def test_heartbeat(self): json_obj = json.loads(self.test_json) json_obj["parameters"].append({ "name": "max_missing_values", "value": "1" }) # remove the patient since this coefficient cannot be estimated json_obj["datasets"][0]["design"].pop("patient") request = create_analysis_input_object(json_obj) request.datasets[0].df = util.string_to_array(request.datasets[0].data) # get the mappings mappings = util.map_identifiers({"MITF", "CD19", "MS4A1"}) # filter the dataset request.datasets[0].df = ReactomeAnalysisWorker._filter_dataset( request.datasets[0].df, mappings, request.datasets[0].design, 1) gene_set = self._get_gene_set() gene_id_colname = request.datasets[0].df.dtype.names[0] gene_set_mapping = GeneSetMapping.create_mapping( gene_set, identifier_mapping=mappings, identifiers=request.datasets[0].df[:][gene_id_colname].tolist()) analyser = ReactomeRAnalyser() analyser.set_heartbeat_callback(self.update_heartbeat) start_time = int(time.time()) - 1 result = analyser.analyse_request( request=request, gene_set_mappings={request.datasets[0].name: gene_set_mapping}, identifier_mappings=mappings, gene_set=gene_set) # make sure the heartbeat was updated self.assertGreater(self.last_heartbeat, start_time)
def test_parameter_passing(self): json_obj = json.loads(self.test_json) json_obj["parameters"].append({ "name": "max_missing_values", "value": "1" }) # remove the patient since this coefficient cannot be estimated json_obj["datasets"][0]["design"].pop("patient") request = create_analysis_input_object(json_obj) request.datasets[0].df = util.string_to_array(request.datasets[0].data) self.assertEqual(3, len(request.parameters)) # default values inserted automatically self.assertEqual(6, len(request.parameter_dict)) self.assertTrue("max_missing_values" in request.parameter_dict) # get the mappings mappings = util.map_identifiers({"MITF", "CD19", "MS4A1"}) # filter the dataset request.datasets[0].df = ReactomeAnalysisWorker._filter_dataset( request.datasets[0].df, mappings, request.datasets[0].design, 1) gene_set = self._get_gene_set() gene_id_colname = request.datasets[0].df.dtype.names[0] gene_set_mapping = GeneSetMapping.create_mapping( gene_set, identifier_mapping=mappings, identifiers=request.datasets[0].df[:][gene_id_colname].tolist()) analyser = ReactomeRAnalyser() result = analyser.analyse_request( request=request, gene_set_mappings={request.datasets[0].name: gene_set_mapping}, identifier_mappings=mappings, gene_set=gene_set) # test the result self.assertEqual(1, len(result)) self.assertIsNotNone(result[0].pathways) result_lines = result[0].pathways.split("\n") self.assertEqual(24, len(result_lines)) reader = csv.DictReader(result_lines, delimiter="\t") required_fields = ("Pathway", "Name", "Direction", "FDR", "PValue", "NGenes") for field in required_fields: self.assertTrue(field in reader.fieldnames, "Missing field " + field) pathways_up = ("R-HSA-392499", "R-HSA-597592", "R-HSA-2990846", "R-HSA-3108232", "R-HSA-3232118") for row in reader: if reader.line_num == 2: self.assertTrue(row["Pathway"] == "R-HSA-392499") if reader.line_num == 6: self.assertTrue(row["Pathway"] == "R-HSA-3232118") if reader.line_num == 15: self.assertTrue(row["Pathway"] == "R-HSA-162582") if reader.line_num == 24: self.assertTrue(row["Pathway"] == "R-HSA-6811558") if row["Pathway"] in pathways_up: self.assertTrue(row["Direction"] == "Down") self.assertTrue( float(row["av_foldchange"]) < 0, "Incorrect regulation for " + row["Pathway"]) else: self.assertTrue(row["Direction"] == "Up") self.assertTrue(float(row["av_foldchange"]) > 0) # test the FC result self.assertIsNotNone(result[0].fold_changes) fc_lines = result[0].fold_changes.split("\n") self.assertEqual(4, len(fc_lines)) fc_reader = csv.DictReader(fc_lines, delimiter="\t") fc_fields = ("logFC", "Identifier") for field in fc_fields: self.assertTrue(field in fc_reader.fieldnames, "Missing FC field " + field) mitf_found = False for row in fc_reader: if row["Identifier"] == "MITF": self.assertAlmostEqual(4.53, float(row["logFC"]), delta=0.01) mitf_found = True self.assertTrue(mitf_found, "Failed to find MITF in FC data")
def test_no_design(self): test_json = """ { "analysisId": "test_01", "datasets": [ { "data": "\\tSample 1\\tSample2\\tSample 3\\nCD19\\t10\\t20\\t2\\nMS4A1\\t10\\t20\\t2\\n\ MITF\\t10\\t0\\t0\\n", "name": "First experiment", "type": "rnaseq_counts" } ], "methodName": "ssgsea" } """ json_obj = json.loads(test_json) request = create_analysis_input_object(json_obj) request.datasets[0].df = util.string_to_array(request.datasets[0].data) self.assertIsNotNone(request) # get the mappings mappings = util.map_identifiers({"MITF", "CD19", "MS4A1"}) gene_set = self._get_gene_set() gene_id_colname = request.datasets[0].df.dtype.names[0] gene_set_mapping = GeneSetMapping.create_mapping(gene_set, identifier_mapping=mappings, identifiers=request.datasets[0].df[:][ gene_id_colname].tolist()) analyser = ReactomeGSVARAnalyser() result = analyser.analyse_request(request=request, gene_set_mappings={request.datasets[0].name: gene_set_mapping}, identifier_mappings=mappings, gene_set=gene_set) # test the result self.assertEqual(1, len(result)) self.assertIsNotNone(result[0].pathways) self.assertIsNotNone(result[0].fold_changes) # test the actual result reader = csv.DictReader(result[0].pathways.split("\n"), delimiter="\t") self.assertEqual(5, len(reader.fieldnames)) required_fields = ["Pathway", "Sample_1", "Sample2", "Sample_3"] for required_field in required_fields: self.assertTrue(required_field in reader.fieldnames, "Missing required field " + required_field) # test the pathways found_pathways = 0 for pathway in reader: found_pathways += 1 if pathway["Pathway"] == "R-HSA-1280218": self.assertEqual("0.0", pathway["Sample_1"].strip()) self.assertEqual("0.02880908", pathway["Sample2"].strip()) self.assertEqual("0.02880908", pathway["Sample_3"].strip()) if pathway["Pathway"] == "R-HSA-392499": self.assertEqual(-0.5, float(pathway["Sample_1"])) self.assertEqual(-0.5, float(pathway["Sample2"])) self.assertEqual(-0.5, float(pathway["Sample_3"])) self.assertEqual(143, found_pathways)
def test_interactor_mapping(self): mapped_identifier = util.map_identifiers(["MS4A1"], return_all=True) self.assertEqual(1, len(mapped_identifier)) self.assertEqual(1, len(mapped_identifier["MS4A1"])) self.assertEqual("P11836", mapped_identifier["MS4A1"][0])