예제 #1
0
 def test_job_conf_getters(self):
     values = ['int', '1', 'float', '2.3', 'bool', 'false']
     conv_values = [1, 2.3, False]
     jc = JobConf(values)
     for i, k in enumerate(values[::2]):
         getter = getattr(jc, 'get_%s' % k)
         self.assertEqual(getter(k), conv_values[i])
     for jc in JobConf([]), JobConf(['x', 'foo']):
         for d in False, True:
             self.assertEqual(jc.get_bool('x', default=d), d)
     self.assertRaises(RuntimeError, JobConf(['x', 'foo']).get_bool, 'x')
예제 #2
0
    def setUp(self):
        self._map_batch_size = 6
        self._log = logging.getLogger(__name__)
        self._old_cwd = os.getcwd()
        self._jc = JobConf([
            'seal.seqal.log.level', 'DEBUG',
            'seal.seqal.fastq-subformat', 'fastq-sanger',
            #'seal.seqal.alignment.max.isize', None,
            #'seal.seqal.alignment.min.isize', None,
            'seal.seqal.pairing.batch.size', self._map_batch_size,
            'seal.seqal.min_hit_quality', 0,
            'seal.seqal.remove_unmapped', False,
            'seal.seqal.nthreads', 1,
            'seal.seqal.trim.qual', 0,
            'mapred.reduce.tasks', 0,
            'mapred.cache.archives',
                (os.path.join("file://", tseal_utils.MiniRefMemDir, "mini_ref_bwamem_0.7.8.tar") + "#reference"),
         ])
        self._things_to_clean_up = []

        workdir = tempfile.mkdtemp("seqal_mapper_test_workdir")
        self._things_to_clean_up.append(workdir)
        os.chdir(workdir)
        self._ctx = sam_map_context(self._jc, [])

        try:
            self._setup_ref(self._jc.get('mapred.cache.archives'))
            self._mapper = mapper(self._ctx)
        except StandardError:
            # call tearDown ourselves because unittest doesn't call it if setUp fails
            self.tearDown()
            raise
예제 #3
0
파일: test_support.py 프로젝트: wtj/pydoop
 def test_job_conf(self):
     job_conf = {}
     for k in mrv1_to_mrv2:
         job_conf[k] = k
     jc = JobConf(
         [item for sublist in job_conf.iteritems() for item in sublist])
     for k in mrv2_to_mrv1:
         self.assertEqual(jc[k], job_conf[mrv2_to_mrv1[k]])
예제 #4
0
    def setUp(self):
        self._map_batch_size = 6
        self._log = logging.getLogger(__name__)
        self._old_cwd = os.getcwd()
        self._jc = JobConf([
            'seal.seqal.log.level',
            'DEBUG',
            'seal.seqal.fastq-subformat',
            'fastq-sanger',
            #'seal.seqal.alignment.max.isize', None,
            #'seal.seqal.alignment.min.isize', None,
            'seal.seqal.pairing.batch.size',
            self._map_batch_size,
            'seal.seqal.min_hit_quality',
            0,
            'seal.seqal.remove_unmapped',
            False,
            'seal.seqal.nthreads',
            1,
            'seal.seqal.trim.qual',
            0,
            'mapred.reduce.tasks',
            0,
            'mapred.cache.archives',
            (os.path.join("file://", tseal_utils.MiniRefMemDir,
                          "mini_ref_bwamem_0.7.8.tar") + "#reference"),
        ])
        self._things_to_clean_up = []

        workdir = tempfile.mkdtemp("seqal_mapper_test_workdir")
        self._things_to_clean_up.append(workdir)
        os.chdir(workdir)
        self._ctx = sam_map_context(self._jc, [])

        try:
            self._setup_ref(self._jc.get('mapred.cache.archives'))
            self._mapper = mapper(self._ctx)
        except StandardError:
            # call tearDown ourselves because unittest doesn't call it if setUp fails
            self.tearDown()
            raise
예제 #5
0
파일: test_support.py 프로젝트: wtj/pydoop
 def test_job_conf_getters(self):
     values = ['int', '1', 'float', '2.3', 'bool', 'false']
     conv_values = [1, 2.3, False]
     jc = JobConf(values)
     for i, k in enumerate(values[::2]):
         getter = getattr(jc, 'get_%s' % k)
         self.assertEqual(getter(k), conv_values[i])
     for jc in JobConf([]), JobConf(['x', 'foo']):
         for d in False, True:
             self.assertEqual(jc.get_bool('x', default=d), d)
     self.assertRaises(RuntimeError, JobConf(['x', 'foo']).get_bool, 'x')
예제 #6
0
class TestSeqalMapper(unittest.TestCase):

    def setUp(self):
        self._map_batch_size = 6
        self._log = logging.getLogger(__name__)
        self._old_cwd = os.getcwd()
        self._jc = JobConf([
            'seal.seqal.log.level', 'DEBUG',
            'seal.seqal.fastq-subformat', 'fastq-sanger',
            #'seal.seqal.alignment.max.isize', None,
            #'seal.seqal.alignment.min.isize', None,
            'seal.seqal.pairing.batch.size', self._map_batch_size,
            'seal.seqal.min_hit_quality', 0,
            'seal.seqal.remove_unmapped', False,
            'seal.seqal.nthreads', 1,
            'seal.seqal.trim.qual', 0,
            'mapred.reduce.tasks', 0,
            'mapred.cache.archives',
                (os.path.join("file://", tseal_utils.MiniRefMemDir, "mini_ref_bwamem_0.7.8.tar") + "#reference"),
         ])
        self._things_to_clean_up = []

        workdir = tempfile.mkdtemp("seqal_mapper_test_workdir")
        self._things_to_clean_up.append(workdir)
        os.chdir(workdir)
        self._ctx = sam_map_context(self._jc, [])

        try:
            self._setup_ref(self._jc.get('mapred.cache.archives'))
            self._mapper = mapper(self._ctx)
        except StandardError:
            # call tearDown ourselves because unittest doesn't call it if setUp fails
            self.tearDown()
            raise

    def tearDown(self):
        try:
            for item in self._things_to_clean_up:
                try:
                    shutil.rmtree(item)
                except StandardError as e:
                    self._log.info("Failed to remove %s", item)
                    self._log.info("Error: %s", e)
        finally:
            os.chdir(self._old_cwd)

    def _setup_ref(self, mr_cache_archives):
        self._log.info("Setting up reference using property value '%s'", mr_cache_archives)
        archive, link = mr_cache_archives.split('#')
        ar = tarfile.TarFile(archive)
        ar.extractall('.')
        os.symlink('.', link)
        self._log.info("Here is the listing of the extraction directory: %s", ', '.join(os.listdir('.')))

    def test_simple_map(self):
        # get input data and expected output.  We keep exactly one map batch of reads
        # (as per self._map_batch_size)
        reads = tseal_utils.get_mini_ref_seqs()[0:(self._map_batch_size / 2)]
        expected_output = sorted(tseal_utils.rapi_mini_ref_seqs_sam_no_header().split('\n')[0:2*len(reads)])
        self._log.info("loaded %s fragments and %s lines of expected output", len(reads), len(expected_output))
        if len(reads) * 2 < self._mapper.batch_size:
            self.fail("batch size for test (%s) is set larger than the number of available "
                "reads (%s). Aligner won't run" % (self._mapper.batch_size, len(reads) * 2))
        for idx, fragment in enumerate(reads):
            self._ctx.set_input_key(idx * 100)
            self._ctx.set_input_value('\t'.join(fragment))
            self._mapper.map(self._ctx)

        produced_sam = sorted(self._ctx.sam_lines)
        self.assertEquals(len(expected_output), len(produced_sam))
        self.assertEquals(expected_output, produced_sam)
        self.assertEquals(len(reads) * 2, self._ctx.counters["SEQAL:EMITTED SAM RECORDS"])
예제 #7
0
 def test_missing_key(self):
     jc = JobConf(((1, 2), (3, 4)))
     self.assertRaises(RuntimeError, jc.get, 'no_key')
예제 #8
0
 def test_missing_key(self):
     jc = JobConf(('a', 'b', 'c', 'd'))
     self.assertRaises(RuntimeError, jc.get, 'no_key')
예제 #9
0
class TestSeqalMapper(unittest.TestCase):
    def setUp(self):
        self._map_batch_size = 6
        self._log = logging.getLogger(__name__)
        self._old_cwd = os.getcwd()
        self._jc = JobConf([
            'seal.seqal.log.level',
            'DEBUG',
            'seal.seqal.fastq-subformat',
            'fastq-sanger',
            #'seal.seqal.alignment.max.isize', None,
            #'seal.seqal.alignment.min.isize', None,
            'seal.seqal.pairing.batch.size',
            self._map_batch_size,
            'seal.seqal.min_hit_quality',
            0,
            'seal.seqal.remove_unmapped',
            False,
            'seal.seqal.nthreads',
            1,
            'seal.seqal.trim.qual',
            0,
            'mapred.reduce.tasks',
            0,
            'mapred.cache.archives',
            (os.path.join("file://", tseal_utils.MiniRefMemDir,
                          "mini_ref_bwamem_0.7.8.tar") + "#reference"),
        ])
        self._things_to_clean_up = []

        workdir = tempfile.mkdtemp("seqal_mapper_test_workdir")
        self._things_to_clean_up.append(workdir)
        os.chdir(workdir)
        self._ctx = sam_map_context(self._jc, [])

        try:
            self._setup_ref(self._jc.get('mapred.cache.archives'))
            self._mapper = mapper(self._ctx)
        except StandardError:
            # call tearDown ourselves because unittest doesn't call it if setUp fails
            self.tearDown()
            raise

    def tearDown(self):
        try:
            for item in self._things_to_clean_up:
                try:
                    shutil.rmtree(item)
                except StandardError as e:
                    self._log.info("Failed to remove %s", item)
                    self._log.info("Error: %s", e)
        finally:
            os.chdir(self._old_cwd)

    def _setup_ref(self, mr_cache_archives):
        self._log.info("Setting up reference using property value '%s'",
                       mr_cache_archives)
        archive, link = mr_cache_archives.split('#')
        ar = tarfile.TarFile(archive)
        ar.extractall('.')
        os.symlink('.', link)
        self._log.info("Here is the listing of the extraction directory: %s",
                       ', '.join(os.listdir('.')))

    def test_simple_map(self):
        # get input data and expected output.  We keep exactly one map batch of reads
        # (as per self._map_batch_size)
        reads = tseal_utils.get_mini_ref_seqs()[0:(self._map_batch_size / 2)]
        expected_output = sorted(
            tseal_utils.rapi_mini_ref_seqs_sam_no_header().split(
                '\n')[0:2 * len(reads)])
        self._log.info("loaded %s fragments and %s lines of expected output",
                       len(reads), len(expected_output))
        if len(reads) * 2 < self._mapper.batch_size:
            self.fail(
                "batch size for test (%s) is set larger than the number of available "
                "reads (%s). Aligner won't run" %
                (self._mapper.batch_size, len(reads) * 2))
        for idx, fragment in enumerate(reads):
            self._ctx.set_input_key(idx * 100)
            self._ctx.set_input_value('\t'.join(fragment))
            self._mapper.map(self._ctx)

        produced_sam = sorted(self._ctx.sam_lines)
        self.assertEquals(len(expected_output), len(produced_sam))
        self.assertEquals(expected_output, produced_sam)
        self.assertEquals(
            len(reads) * 2, self._ctx.counters["SEQAL:EMITTED SAM RECORDS"])
예제 #10
0
 def setUp(self):
     self.__jc = JobConf([])
     self.__ctx = reduce_context(self.__jc, [])
     self.__reducer = reducer(self.__ctx)
     self.__reducer.discard_duplicates = True
     self.__clean_reducer = reducer(self.__ctx)  # unmodified