def test_affy_to_pcl(self): """ """ job = prepare_ba_job() # Make sure that a previous test didn't leave a directory around. shutil.rmtree("/home/user/data_store/processor_job_" + str(job.id), ignore_errors=True) job_context = array_express.affy_to_pcl(job.pk) self.assertEqual(job_context["platform_accession_code"], "hugene10st") self.assertEqual(job_context["brainarray_package"], "hugene10sthsensgprobe") updated_job = ProcessorJob.objects.get(pk=job.pk) self.assertTrue(updated_job.success) self.assertEqual(len(ComputationalResult.objects.all()), 1) self.assertEqual(len(ComputedFile.objects.all()), 1) self.assertEqual(ComputedFile.objects.all()[0].filename, "GSM1426071_CD_colon_active_1.PCL") output_filename = ComputedFile.objects.all()[0].absolute_file_path expected_data = pd.read_csv( "/home/user/data_store/TEST/PCL/GSM1426071_CD_colon_active_1.PCL", sep="\t" )["GSM1426071_CD_colon_active_1.CEL"] actual_data = pd.read_csv(output_filename, sep="\t")["GSM1426071_CD_colon_active_1.CEL"] assertMostlyAgrees(self, expected_data, actual_data) os.remove(output_filename)
def test_convert_simple_pcl_with_header(self): """PCL with header > ID_REF, VALUE """ job = prepare_job({ "accession_code": "GSM1234847", "source_filename": "https://www.ebi.ac.uk/arrayexpress/experiments/E-GEOD-51013/", "filename": "GSM1234847_sample_table.txt", "absolute_file_path": "/home/user/data_store/raw/TEST/NO_OP/GSM1234847_sample_table.txt", "platform_accession_code": "A-AFFY-38", }) final_context = assertRunsSuccessfully(self, job) expected_data = pd.read_csv( "/home/user/data_store/TEST/NO_OP/EXPECTED/gene_converted_GSM1234847-tbl-1.txt", sep="\t", index_col=0, )["VALUE"] actual_data = pd.read_csv(final_context["output_file_path"], sep="\t", index_col=0)["VALUE"] assertMostlyAgrees(self, expected_data, actual_data)
def test_affy_to_pcl_no_brainarray(self): """ """ job = prepare_non_ba_job() # Make sure that a previous test didn't leave a directory around. shutil.rmtree("/home/user/data_store/processor_job_" + str(job.id), ignore_errors=True) array_express.affy_to_pcl(job.pk) updated_job = ProcessorJob.objects.get(pk=job.pk) self.assertTrue(updated_job.success) self.assertEqual(len(ComputationalResult.objects.all()), 1) self.assertEqual(len(ComputedFile.objects.all()), 1) self.assertEqual(ComputedFile.objects.all()[0].filename, "GSM45588.PCL") output_filename = ComputedFile.objects.all()[0].absolute_file_path expected_data = pd.read_csv("/home/user/data_store/TEST/PCL/GSM45588.PCL", sep="\t")[ "GSM45588.CEL" ] actual_data = pd.read_csv(output_filename, sep="\t")["GSM45588.CEL"] assertMostlyAgrees(self, expected_data, actual_data) os.remove(output_filename)
def test_affy_to_pcl_huex_v1(self): """Special Case because there is no CDL for Huex V1""" job = prepare_huex_v1_job() shutil.rmtree("/home/user/data_store/processor_job_" + str(job.id), ignore_errors=True) array_express.affy_to_pcl(job.pk) updated_job = ProcessorJob.objects.get(pk=job.pk) self.assertTrue(updated_job.success) self.assertEqual(len(ComputationalResult.objects.all()), 1) self.assertEqual(len(ComputedFile.objects.all()), 1) self.assertEqual( ComputedFile.objects.all()[0].filename, "GSM1364667_U_110208_7-02-10_S2.PCL" ) output_filename = ComputedFile.objects.all()[0].absolute_file_path expected_data = pd.read_csv( "/home/user/data_store/TEST/PCL/GSM1364667_U_110208_7-02-10_S2.PCL", sep="\t" )["GSM1364667_U_110208_7-02-10_S2.CEL"] actual_data = pd.read_csv(output_filename, sep="\t")["GSM1364667_U_110208_7-02-10_S2.CEL"] assertMostlyAgrees(self, expected_data, actual_data) os.remove(ComputedFile.objects.all()[0].absolute_file_path)
def test_convert_illumina_no_header(self): """Illumina file without header, ex: > ILMN_1885639 10.0000 0.7931 > ILMN_2209417 10.0000 0.2029 > ILMN_1765401 152.0873 0.0000 """ job = prepare_job({ "accession_code": "GSM1089291", "source_filename": "https://github.com/AlexsLemonade/refinebio/files/2255178/GSM1089291-tbl-1.txt", "filename": "GSM1089291-tbl-1.txt", "absolute_file_path": "/home/user/data_store/raw/TEST/NO_OP/GSM1089291-tbl-1.txt", "platform_accession_code": "A-MEXP-1171", "manufacturer": "ILLUMINA", }) final_context = assertRunsSuccessfully(self, job) self.assertTrue( no_op.check_output_quality(final_context["output_file_path"])) # To: # ENSG00000105675 10 # ENSG00000085721 152.0873 # ENSG00000278494 152.0873 expected_data = pd.read_csv( "/home/user/data_store/TEST/NO_OP/EXPECTED/gene_converted_GSM1089291-tbl-1.txt", sep="\t", names=["", "VALUE"], index_col=0, )["VALUE"] actual_data = pd.read_csv(final_context["output_file_path"], sep="\t", names=["", "VALUE"], index_col=0)["VALUE"] assertMostlyAgrees(self, expected_data, actual_data)
def test_convert_processed_illumina(self): """Illumina file with header, ex: > Reporter Identifier VALUE Detection Pval > ILMN_1343291 14.943602 0 > ILMN_1343295 13.528082 0 """ job = prepare_job({ "accession_code": "GSM557500", "source_filename": "https://www.ebi.ac.uk/arrayexpress/experiments/E-GEOD-22433/", "filename": "GSM557500-tbl-1.txt", "absolute_file_path": "/home/user/data_store/raw/TEST/NO_OP/GSM557500-tbl-1.txt", "platform_accession_code": "A-MEXP-1171", "manufacturer": "ILLUMINA", }) final_context = assertRunsSuccessfully(self, job) self.assertTrue( no_op.check_output_quality(final_context["output_file_path"])) # To: # ENSG00000156508 14.943602 # ENSG00000111640 13.528082 expected_data = pd.read_csv( "/home/user/data_store/TEST/NO_OP/EXPECTED/gene_converted_GSM557500-tbl-1.txt", sep="\t", names=["", "VALUE"], index_col=0, )["VALUE"] actual_data = pd.read_csv(final_context["output_file_path"], sep="\t", names=["", "VALUE"], index_col=0)["VALUE"] assertMostlyAgrees(self, expected_data, actual_data)