예제 #1
0
 def test_intersect(self):
     isamp = self.gen_fake_isamp()
     series = Series('GSE0')
     sample_list = [Sample('GSM10', series), Sample('GSM20', series)]
     for __ in sample_list :
         series.add_passed_sample(__)
     self.assertEqual(ppr.intersect(series, isamp), sample_list)
예제 #2
0
    def test_gen_orig_params_per_with_a_single_sra(self):
        # mock a series and sample
        series = Series('GSE123456', 'GSE123456_family.soft.subset')
        sample = Sample('GSM1', series)
        sample.outdir = 'some_outdir/GSE123456/some_species/GSM1'
        series.add_passed_sample(sample)

        with mock.patch('rsempipeline.utils.download.open',
                        mock.mock_open(read_data=SRA_INFO_YAML_SINGLE_SRA)):
            vals = download.gen_orig_params_per(sample)
        self.assertEqual(vals, [
            [None, ['some_outdir/GSE123456/some_species/GSM1/SRX685892/SRR1557065/SRR1557065.sra',
                    'some_outdir/GSE123456/some_species/GSM1/SRR1557065.sra.download.COMPLETE'], sample]])
예제 #3
0
class SampleTestCase(unittest.TestCase):
    def setUp(self):
        self.series = Series("GSE123456", "GSE123456_family.soft.subset")
        self.sample = Sample("GSM1", self.series)

    def test_is_info_complete(self):
        self.assertFalse(self.sample.is_info_complete())
        self.sample.organism = "Mus musculus"
        self.assertFalse(self.sample.is_info_complete())
        self.sample.url = "ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByExp/sra/SRX/SRX000/SRX000000"
        self.assertTrue(self.sample.is_info_complete())

    @mock.patch("rsempipeline.utils.objs.Sample.is_info_complete")
    def test_gen_outdir(self, mock_is_info_complete):
        self.sample.organism = "Mus musculus"
        self.sample.url = "ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByExp/sra/SRX/SRX000/SRX000000"
        self.assertEqual(self.sample.gen_outdir("some_outdir"), "some_outdir/GSE123456/mus_musculus/GSM1")
        self.assertEqual(self.sample.outdir, "some_outdir/GSE123456/mus_musculus/GSM1")
        mock_is_info_complete.return_value = False
        self.assertRaisesRegexp(ValueError, "not information complete", self.sample.gen_outdir, "some_outdir")

    def test___str__(self):
        self.sample.organism = "Mus musculus"
        self.sample.url = "ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByExp/sra/SRX/SRX000/SRX000000"
        self.sample.gen_outdir("some_outdir")
        self.assertEqual(str(self.sample), "<GSM1 (0/0/0) of GSE123456 at some_outdir/GSE123456/mus_musculus/GSM1>")

    def test___repr__(self):
        self.sample.organism = "Mus musculus"
        self.sample.url = "ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByExp/sra/SRX/SRX000/SRX000000"
        self.sample.gen_outdir("some_outdir")
        self.assertEqual(str(self.sample), "<GSM1 (0/0/0) of GSE123456 at some_outdir/GSE123456/mus_musculus/GSM1>")
예제 #4
0
    def test_gen_all_samples_from_soft_and_isamp(
            self, mock_get_isamp, mock_analyze_one, mock_sanity_check):
        mock_sanity_check.return_value = True
        mock_get_isamp.return_value = self.gen_fake_isamp()
        fake_series = Series('GSE0')
        sample_list = [Sample('GSM10', fake_series), Sample('GSM20', fake_series)]
        for __ in sample_list :
            __.organism = 'H**o Sapiens'
            fake_series.add_passed_sample(__)
        mock_analyze_one.return_value = sample_list

        self.assertEqual(ppr.gen_all_samples_from_soft_and_isamp(
            ['soft1'], 'isamp_file_or_str', {'INTERESTED_ORGANISMS': ['H**o Sapiens']}),
            sample_list)
예제 #5
0
 def test_add(self, mock_is_info_complete):
     series = Series('GSE123456', 'GSE123456_family.soft.subset')
     current_sample = Sample('GSM1', series)
     mock_is_info_complete.return_value = True
     self.assertEqual(soft_parser.add(current_sample, series, 1), 2)
     mock_is_info_complete.return_value = False
     self.assertEqual(soft_parser.add(current_sample, series, 1), 1)
예제 #6
0
    def test_gen_orig_params_per_with_a_single_sra(self):
        # mock a series and sample
        series = Series('GSE123456', 'GSE123456_family.soft.subset')
        sample = Sample('GSM1', series)
        sample.outdir = 'some_outdir/GSE123456/some_species/GSM1'
        series.add_passed_sample(sample)

        with mock.patch('rsempipeline.utils.download.open',
                        mock.mock_open(read_data=SRA_INFO_YAML_SINGLE_SRA)):
            vals = download.gen_orig_params_per(sample)
        self.assertEqual(vals, [[
            None,
            [
                'some_outdir/GSE123456/some_species/GSM1/SRX685892/SRR1557065/SRR1557065.sra',
                'some_outdir/GSE123456/some_species/GSM1/SRR1557065.sra.download.COMPLETE'
            ], sample
        ]])
예제 #7
0
    def test_gen_orig_params_per_with_multiple_sras(self):
        # mock a series and sample
        series = Series('GSE123456', 'GSE123456_family.soft.subset')
        sample = Sample('GSM1', series)
        sample.outdir = 'some_outdir/GSE123456/some_species/GSM1'
        series.add_passed_sample(sample)

        with mock.patch('rsempipeline.utils.download.open',
                        mock.mock_open(read_data=SRA_INFO_YAML_MULTIPLE_SRAS)):
            vals = download.gen_orig_params_per(sample)
        self.assertEqual(vals, [
            # in the format of input, outputs, other params
            [None, ['some_outdir/GSE123456/some_species/GSM1/SRX135160/SRR453140/SRR453140.sra', 'some_outdir/GSE123456/some_species/GSM1/SRR453140.sra.download.COMPLETE'], sample],
            [None, ['some_outdir/GSE123456/some_species/GSM1/SRX135160/SRR453141/SRR453141.sra', 'some_outdir/GSE123456/some_species/GSM1/SRR453141.sra.download.COMPLETE'], sample],
            [None, ['some_outdir/GSE123456/some_species/GSM1/SRX135160/SRR453142/SRR453142.sra', 'some_outdir/GSE123456/some_species/GSM1/SRR453142.sra.download.COMPLETE'], sample],
            [None, ['some_outdir/GSE123456/some_species/GSM1/SRX135160/SRR453143/SRR453143.sra', 'some_outdir/GSE123456/some_species/GSM1/SRR453143.sra.download.COMPLETE'], sample]
        ])
예제 #8
0
 def test_intersect_with_discrenpacy(self, L):
     isamp = self.gen_fake_isamp()
     series = Series('GSE0')
     sample_list = [Sample('GSM10', series)]
     for __ in sample_list :
         series.add_passed_sample(__)
     self.assertEqual(ppr.intersect(series, isamp), sample_list)
     L.check(('rsempipeline.utils.pre_pipeline_run', 'ERROR',
              'Discrepancy for GSE0: 1 GSMs in soft, 2 GSMs in isamp, and only 1 left after intersection.'),)
예제 #9
0
    def test_gen_orig_params_per_with_multiple_sras(self):
        # mock a series and sample
        series = Series('GSE123456', 'GSE123456_family.soft.subset')
        sample = Sample('GSM1', series)
        sample.outdir = 'some_outdir/GSE123456/some_species/GSM1'
        series.add_passed_sample(sample)

        with mock.patch('rsempipeline.utils.download.open',
                        mock.mock_open(read_data=SRA_INFO_YAML_MULTIPLE_SRAS)):
            vals = download.gen_orig_params_per(sample)
        self.assertEqual(
            vals,
            [
                # in the format of input, outputs, other params
                [
                    None,
                    [
                        'some_outdir/GSE123456/some_species/GSM1/SRX135160/SRR453140/SRR453140.sra',
                        'some_outdir/GSE123456/some_species/GSM1/SRR453140.sra.download.COMPLETE'
                    ], sample
                ],
                [
                    None,
                    [
                        'some_outdir/GSE123456/some_species/GSM1/SRX135160/SRR453141/SRR453141.sra',
                        'some_outdir/GSE123456/some_species/GSM1/SRR453141.sra.download.COMPLETE'
                    ], sample
                ],
                [
                    None,
                    [
                        'some_outdir/GSE123456/some_species/GSM1/SRX135160/SRR453142/SRR453142.sra',
                        'some_outdir/GSE123456/some_species/GSM1/SRR453142.sra.download.COMPLETE'
                    ], sample
                ],
                [
                    None,
                    [
                        'some_outdir/GSE123456/some_species/GSM1/SRX135160/SRR453143/SRR453143.sra',
                        'some_outdir/GSE123456/some_species/GSM1/SRR453143.sra.download.COMPLETE'
                    ], sample
                ]
            ])
예제 #10
0
 def test_analyze_one(self, mock_parse, L):
     fake_isamp = self.gen_fake_isamp()
     fake_series = Series('GSE0')
     sample_list = [Sample('GSM10', fake_series)]
     for __ in sample_list :
         __.organism = 'H**o Sapiens'
         fake_series.add_passed_sample(__)
     mock_parse.return_value = fake_series
     self.assertEqual(ppr.analyze_one('GSE0_family.soft.subset', fake_isamp, ['H**o sapiens']),
                      sample_list)
     L.check(('rsempipeline.utils.pre_pipeline_run', 'ERROR',
              'Discrepancy for GSE0: 1 GSMs in soft, 2 GSMs in isamp, and only 1 left after intersection.'),)
예제 #11
0
def parse(soft_file, interested_organisms):
    """Parse the soft file
    :param interested_organisms: a list of interested organisms: ['H**o
                                 sapiens', 'Mus musculus']
    """
    logger.info("Parsing file: {0} ...".format(soft_file))
    series_name_from_file = get_series_name_from(soft_file)
    print series_name_from_file

    # Assume one GSE per soft file
    # index: the index of all passed samples, unpassed samples are not indexed
    index, series, current_sample = 1, None, None
    with open(soft_file, 'rb') as inf:
        for line in inf:
            label, value = [__.strip() for __ in line.split('=')]
            if label == '^SERIES':
                series = Series(value, os.path.abspath(soft_file))
                if series.name != series_name_from_file:
                    msg = ('series contained in the soft file doesn\'t match '
                           'that in the filename: {0} != {1}'.format(
                               series, series_name_from_file))
                    raise ValueError(msg)
            elif label == '^SAMPLE':
                index = add(current_sample, series, index)
                current_sample = Sample(name=value, series=series)

            if current_sample:
                current_sample = update(current_sample, label, value,
                                        interested_organisms)
        if series is not None:
            # add the last sample
            add(current_sample, series, index)

            logger.info("{0}: {1}/{2} samples passed".format(
                series.name, series.num_passed_samples(),
                series.num_samples()))
            logger.info('=' * 30)
            return series
예제 #12
0
 def setUp(self):
     self.series = Series("GSE123456", "GSE123456_family.soft.subset")
     self.sample = Sample("GSM1", self.series)