def test_intersect(self): isamp = self.gen_fake_isamp() series = Series('GSE0') sample_list = [Sample('GSM10', series), Sample('GSM20', series)] for __ in sample_list : series.add_passed_sample(__) self.assertEqual(ppr.intersect(series, isamp), sample_list)
def test_intersect_with_discrenpacy(self, L): isamp = self.gen_fake_isamp() series = Series('GSE0') sample_list = [Sample('GSM10', series)] for __ in sample_list : series.add_passed_sample(__) self.assertEqual(ppr.intersect(series, isamp), sample_list) L.check(('rsempipeline.utils.pre_pipeline_run', 'ERROR', 'Discrepancy for GSE0: 1 GSMs in soft, 2 GSMs in isamp, and only 1 left after intersection.'),)
def test_analyze_one(self, mock_parse, L): fake_isamp = self.gen_fake_isamp() fake_series = Series('GSE0') sample_list = [Sample('GSM10', fake_series)] for __ in sample_list : __.organism = 'H**o Sapiens' fake_series.add_passed_sample(__) mock_parse.return_value = fake_series self.assertEqual(ppr.analyze_one('GSE0_family.soft.subset', fake_isamp, ['H**o sapiens']), sample_list) L.check(('rsempipeline.utils.pre_pipeline_run', 'ERROR', 'Discrepancy for GSE0: 1 GSMs in soft, 2 GSMs in isamp, and only 1 left after intersection.'),)
def test_gen_orig_params_per_with_a_single_sra(self): # mock a series and sample series = Series('GSE123456', 'GSE123456_family.soft.subset') sample = Sample('GSM1', series) sample.outdir = 'some_outdir/GSE123456/some_species/GSM1' series.add_passed_sample(sample) with mock.patch('rsempipeline.utils.download.open', mock.mock_open(read_data=SRA_INFO_YAML_SINGLE_SRA)): vals = download.gen_orig_params_per(sample) self.assertEqual(vals, [ [None, ['some_outdir/GSE123456/some_species/GSM1/SRX685892/SRR1557065/SRR1557065.sra', 'some_outdir/GSE123456/some_species/GSM1/SRR1557065.sra.download.COMPLETE'], sample]])
def test_gen_all_samples_from_soft_and_isamp( self, mock_get_isamp, mock_analyze_one, mock_sanity_check): mock_sanity_check.return_value = True mock_get_isamp.return_value = self.gen_fake_isamp() fake_series = Series('GSE0') sample_list = [Sample('GSM10', fake_series), Sample('GSM20', fake_series)] for __ in sample_list : __.organism = 'H**o Sapiens' fake_series.add_passed_sample(__) mock_analyze_one.return_value = sample_list self.assertEqual(ppr.gen_all_samples_from_soft_and_isamp( ['soft1'], 'isamp_file_or_str', {'INTERESTED_ORGANISMS': ['H**o Sapiens']}), sample_list)
def test_add(self, mock_is_info_complete): series = Series('GSE123456', 'GSE123456_family.soft.subset') current_sample = Sample('GSM1', series) mock_is_info_complete.return_value = True self.assertEqual(soft_parser.add(current_sample, series, 1), 2) mock_is_info_complete.return_value = False self.assertEqual(soft_parser.add(current_sample, series, 1), 1)
def test_gen_orig_params_per_with_a_single_sra(self): # mock a series and sample series = Series('GSE123456', 'GSE123456_family.soft.subset') sample = Sample('GSM1', series) sample.outdir = 'some_outdir/GSE123456/some_species/GSM1' series.add_passed_sample(sample) with mock.patch('rsempipeline.utils.download.open', mock.mock_open(read_data=SRA_INFO_YAML_SINGLE_SRA)): vals = download.gen_orig_params_per(sample) self.assertEqual(vals, [[ None, [ 'some_outdir/GSE123456/some_species/GSM1/SRX685892/SRR1557065/SRR1557065.sra', 'some_outdir/GSE123456/some_species/GSM1/SRR1557065.sra.download.COMPLETE' ], sample ]])
def test_gen_orig_params_per_with_multiple_sras(self): # mock a series and sample series = Series('GSE123456', 'GSE123456_family.soft.subset') sample = Sample('GSM1', series) sample.outdir = 'some_outdir/GSE123456/some_species/GSM1' series.add_passed_sample(sample) with mock.patch('rsempipeline.utils.download.open', mock.mock_open(read_data=SRA_INFO_YAML_MULTIPLE_SRAS)): vals = download.gen_orig_params_per(sample) self.assertEqual(vals, [ # in the format of input, outputs, other params [None, ['some_outdir/GSE123456/some_species/GSM1/SRX135160/SRR453140/SRR453140.sra', 'some_outdir/GSE123456/some_species/GSM1/SRR453140.sra.download.COMPLETE'], sample], [None, ['some_outdir/GSE123456/some_species/GSM1/SRX135160/SRR453141/SRR453141.sra', 'some_outdir/GSE123456/some_species/GSM1/SRR453141.sra.download.COMPLETE'], sample], [None, ['some_outdir/GSE123456/some_species/GSM1/SRX135160/SRR453142/SRR453142.sra', 'some_outdir/GSE123456/some_species/GSM1/SRR453142.sra.download.COMPLETE'], sample], [None, ['some_outdir/GSE123456/some_species/GSM1/SRX135160/SRR453143/SRR453143.sra', 'some_outdir/GSE123456/some_species/GSM1/SRR453143.sra.download.COMPLETE'], sample] ])
def test_gen_orig_params_per_with_multiple_sras(self): # mock a series and sample series = Series('GSE123456', 'GSE123456_family.soft.subset') sample = Sample('GSM1', series) sample.outdir = 'some_outdir/GSE123456/some_species/GSM1' series.add_passed_sample(sample) with mock.patch('rsempipeline.utils.download.open', mock.mock_open(read_data=SRA_INFO_YAML_MULTIPLE_SRAS)): vals = download.gen_orig_params_per(sample) self.assertEqual( vals, [ # in the format of input, outputs, other params [ None, [ 'some_outdir/GSE123456/some_species/GSM1/SRX135160/SRR453140/SRR453140.sra', 'some_outdir/GSE123456/some_species/GSM1/SRR453140.sra.download.COMPLETE' ], sample ], [ None, [ 'some_outdir/GSE123456/some_species/GSM1/SRX135160/SRR453141/SRR453141.sra', 'some_outdir/GSE123456/some_species/GSM1/SRR453141.sra.download.COMPLETE' ], sample ], [ None, [ 'some_outdir/GSE123456/some_species/GSM1/SRX135160/SRR453142/SRR453142.sra', 'some_outdir/GSE123456/some_species/GSM1/SRR453142.sra.download.COMPLETE' ], sample ], [ None, [ 'some_outdir/GSE123456/some_species/GSM1/SRX135160/SRR453143/SRR453143.sra', 'some_outdir/GSE123456/some_species/GSM1/SRR453143.sra.download.COMPLETE' ], sample ] ])
def parse(soft_file, interested_organisms): """Parse the soft file :param interested_organisms: a list of interested organisms: ['H**o sapiens', 'Mus musculus'] """ logger.info("Parsing file: {0} ...".format(soft_file)) series_name_from_file = get_series_name_from(soft_file) print series_name_from_file # Assume one GSE per soft file # index: the index of all passed samples, unpassed samples are not indexed index, series, current_sample = 1, None, None with open(soft_file, 'rb') as inf: for line in inf: label, value = [__.strip() for __ in line.split('=')] if label == '^SERIES': series = Series(value, os.path.abspath(soft_file)) if series.name != series_name_from_file: msg = ('series contained in the soft file doesn\'t match ' 'that in the filename: {0} != {1}'.format( series, series_name_from_file)) raise ValueError(msg) elif label == '^SAMPLE': index = add(current_sample, series, index) current_sample = Sample(name=value, series=series) if current_sample: current_sample = update(current_sample, label, value, interested_organisms) if series is not None: # add the last sample add(current_sample, series, index) logger.info("{0}: {1}/{2} samples passed".format( series.name, series.num_passed_samples(), series.num_samples())) logger.info('=' * 30) return series
def test_add_with_current_sample_being_None(self): series = Series('GSE123456', 'GSE123456_family.soft.subset') self.assertEqual(soft_parser.add(None, series, 1), 1)
def setUp(self): self.series = Series("GSE123456", "GSE123456_family.soft.subset")
class SeriesTestCase(unittest.TestCase): def setUp(self): self.series = Series("GSE123456", "GSE123456_family.soft.subset") def test___init__(self): self.assertEqual(self.series.name, "GSE123456") self.assertEqual(self.series.passed_samples, []) self.assertEqual(self.series.samples, []) self.assertEqual(self.series.soft_file, "GSE123456_family.soft.subset") self.assertEqual(self.series.num_passed_samples(), 0) self.assertEqual(self.series.num_samples(), 0) def test_add_sample(self): sample = Sample("GSM1", self.series) self.series.add_sample(sample) self.assertEqual(self.series.num_samples(), 1) self.assertEqual(self.series.num_passed_samples(), 0) def test_add_passed_sample(self): sample1 = Sample("GSM1", self.series) sample2 = Sample("GSM2", self.series) self.series.add_sample(sample1) self.series.add_passed_sample(sample2) self.assertEqual(self.series.num_samples(), 2) self.assertEqual(self.series.num_passed_samples(), 1) def test___str__(self): self.assertEqual(str(self.series), "GSE123456 (passed samples: 0/0)") def test___repr__(self): self.assertEqual(repr(self.series), "GSE123456 (passed samples: 0/0)")
def test_analyze_one_soft_series_name_not_in_isamp_series_names_list(self, mock_parse, L): fake_isamp = self.gen_fake_isamp() fake_series = Series('GSE9999') mock_parse.return_value = fake_series self.assertIsNone(ppr.analyze_one('GSE9999_family.soft.subset', fake_isamp, []))