Example #1
0
def _create_runnable_task(task_id, input_names, output_names):
    """Write the necessary files and generate the cmds"""
    input_files = [get_temp_file(suffix=x) for x in input_names]
    output_files = [get_temp_file(suffix=x) for x in output_names]
    d = get_temp_dir("runnable-task-")
    msg = "MOCK DATA"
    cmds = [_to_cmd(msg, x) for x in output_files]
    return _to_runnable_task(task_id, input_files, output_files, cmds, d)
Example #2
0
def _create_runnable_task(task_id, input_names, output_names):
    """Write the necessary files and generate the cmds"""
    input_files = [get_temp_file(suffix=x) for x in input_names]
    output_files = [get_temp_file(suffix=x) for x in output_names]
    d = get_temp_dir("runnable-task-")
    msg = "MOCK DATA"
    cmds = [_to_cmd(msg, x) for x in output_files]
    return _to_runnable_task(task_id, input_files, output_files, cmds, d)
    def test_smoke(self):
        t = get_temp_file(suffix="-stats-1.json")
        _write_stats_to_json({"n_reads": 549, "n_zmws": 100}, t)
        t2 = get_temp_file(suffix="-stats-2.json")
        _write_stats_to_json({"n_reads": 733, "n_zmws": 100}, t2)

        tg = get_temp_file(suffix="stats-gather.json")
        G.gather_report([t, t2], tg)

        r = load_report_from_json(tg)
        stats = {a.id: a.value for a in r.attributes}
        self.assertEqual(stats["pb_n_reads"], 549 + 733)
        self.assertEqual(stats["pb_n_zmws"], 200)
Example #4
0
 def test_filter_dataset_bq(self):
     ds_in = get_temp_file(suffix=".subreadset.xml")
     ds = SubreadSet(pbtestdata.get_file("barcoded-subreadset"),
                     strict=True)
     ds.filters.addRequirement(bq=[('>=', 31)])
     assert len(ds) == 1
     ds.write(ds_in)
     ds_out = get_temp_file(suffix=".subreadset.xml")
     args = self.BASE_ARGS + [ds_in, ds_out, "length >= 10 AND bq >= 10"]
     self._check_call(args)
     n_expected = 2
     expected_filter_str = "( bq >= 10 AND length >= 10 )"
     self.run_after(ds_out, n_expected, expected_filter_str)
Example #5
0
 def test_filter_dataset_downsample(self):
     ds_in = get_temp_file(suffix=".subreadset.xml")
     with SubreadSet(pbtestdata.get_file("subreads-xml"),
                     strict=True) as ds:
         assert len(ds) == 117 and len(ds.filters) == 0
         ds.write(ds_in)
     ds_out = get_temp_file(suffix=".subreadset.xml")
     args = self.BASE_ARGS + [ds_in, ds_out, "", "--downsample", "2"]
     self._check_call(args)
     n_expected = 54
     expected_filter_str = "( Uint32Cast(zm) % 2 == 0 )"
     ds = self.run_after(ds_out, n_expected, expected_filter_str)
     assert "downsampled" in ds.tags
Example #6
0
    def test_smoke(self):
        t = get_temp_file(suffix="-stats-1.json")
        _write_stats_to_json({'n_reads': 549, 'n_zmws': 100}, t)
        t2 = get_temp_file(suffix="-stats-2.json")
        _write_stats_to_json({'n_reads': 733, 'n_zmws': 100}, t2)

        tg = get_temp_file(suffix="stats-gather.json")
        G.gather_report([t, t2], tg)

        r = load_report_from_json(tg)
        stats = {a.id: a.value for a in r.attributes}
        self.assertEqual(stats['pb_n_reads'], 549 + 733)
        self.assertEqual(stats['pb_n_zmws'], 200)
Example #7
0
class TestFilterDataSet(PbTestApp):
    TASK_ID = "pbcoretools.tasks.filterdataset"
    DRIVER_EMIT = 'python -m pbcoretools.tasks.filters emit-tool-contract {i} '.format(
        i=TASK_ID)
    DRIVER_RESOLVE = 'python -m pbcoretools.tasks.filters run-rtc '
    TASK_OPTIONS = {"pbcoretools.task_options.other_filters": "length <= 1400"}
    RESOLVED_TASK_OPTIONS = {
        "pbcoretools.task_options.other_filters": "length <= 1400"
    }
    INPUT_FILES = [get_temp_file(suffix=".subreadset.xml")]
    MAX_NPROC = 24
    RESOLVED_NPROC = 1
    IS_DISTRIBUTED = True
    RESOLVED_IS_DISTRIBUTED = True
    READER_CLASS = SubreadSet

    @classmethod
    def setUpClass(cls):
        ds = SubreadSet(data.getXml(10), strict=True)
        ds.write(cls.INPUT_FILES[0])

    def _get_counts(self, rtc):
        n_expected = 18
        with self.READER_CLASS(rtc.task.output_files[0]) as f:
            n_actual = len(f)
        return n_expected, n_actual

    def _get_filters(self, rtc):
        with self.READER_CLASS(rtc.task.output_files[0]) as f:
            return str(f.filters)

    def run_after(self, rtc, output_dir):
        n_expected, n_actual = self._get_counts(rtc)
        self.assertEqual(self._get_filters(rtc), "( length <= 1400 )")
        self.assertEqual(n_actual, n_expected)
Example #8
0
    def test_run(self):

        root_output_dir = self._get_root_temp_dir()
        i = random.randint(1, 10000)
        name = "{n}_{i}".format(n=self.JOB_NAME, i=i)
        output_dir = os.path.join(root_output_dir, name)
        os.mkdir(output_dir)

        ep_d = {ep_id: get_temp_file(suffix=name) for ep_id, name in self.ENTRY_POINTS.iteritems()}

        for ep_id, file_name in ep_d.iteritems():
            with open(file_name, 'w') as x:
                x.write("Mock data for {i} \n".format(i=ep_id))

        cmd = self.TO_CMD_FUNC(output_dir, self.WORKFLOW_XML, self.PRESET_XML, ep_d)

        stderr_path = os.path.join(output_dir, 'job.stderr')
        stdout_path = os.path.join(output_dir, 'job.stdout')
        log.debug(cmd)
        with open(stdout_path, 'w') as wo:
            with open(stderr_path, 'w') as we:
                rcode, stdout_results, stderr_results, run_time = run_command(cmd, wo, we)

        log.debug("Integration Job {i} state {s} in {t:.2f} sec.".format(i=self.JOB_NAME, s=rcode, t=run_time))

        if rcode != 0:
            log.error("Integration Job {i} failed.".format(i=self.JOB_NAME))
            log.error(stdout_results)
            log.error(stderr_results)
            if os.path.exists(stderr_path):
                with open(stderr_path, 'r') as f:
                    log.error(f.read())

        emsg = "Failed Integration Job {i} with exit code {r} in {d}. {w}".format(i=self.JOB_NAME, r=rcode, d=output_dir, w=self.WORKFLOW_XML)
        self.assertEqual(rcode, 0, emsg)
Example #9
0
    def test_run(self):

        root_output_dir = self._get_root_temp_dir()
        i = random.randint(1, 10000)
        name = "{n}_{i}".format(n=self.JOB_NAME, i=i)
        output_dir = os.path.join(root_output_dir, name)
        os.mkdir(output_dir)

        ep_d = {ep_id: get_temp_file(suffix=name) for ep_id, name in self.ENTRY_POINTS.iteritems()}

        for ep_id, file_name in ep_d.iteritems():
            with open(file_name, 'w') as x:
                x.write("Mock data for {i} \n".format(i=ep_id))

        cmd = self.TO_CMD_FUNC(output_dir, self.WORKFLOW_XML, self.PRESET_JSON, self.PRESET_XML, ep_d)

        stderr_path = os.path.join(output_dir, 'job.stderr')
        stdout_path = os.path.join(output_dir, 'job.stdout')
        log.debug(cmd)
        with open(stdout_path, 'w') as wo:
            with open(stderr_path, 'w') as we:
                rcode, stdout_results, stderr_results, run_time = run_command(cmd, wo, we)

        log.debug("Integration Job {i} state {s} in {t:.2f} sec.".format(i=self.JOB_NAME, s=rcode, t=run_time))

        if rcode != 0 and self.EXPECTED_EXIT_CODE == 0:
            log.error("Integration Job {i} failed.".format(i=self.JOB_NAME))
            log.error(stdout_results)
            log.error(stderr_results)
            if os.path.exists(stderr_path):
                with open(stderr_path, 'r') as f:
                    log.error(f.read())

        emsg = "Failed Integration Job {i} with exit code {r} in {d}. {w}".format(i=self.JOB_NAME, r=rcode, d=output_dir, w=self.WORKFLOW_XML)
        self.assertEqual(rcode, self.EXPECTED_EXIT_CODE, emsg)
Example #10
0
class _BaseTestBam2Fasta(PbTestApp):
    TASK_ID = "pbcoretools.tasks.bam2fasta"
    DRIVER_EMIT = 'python -m pbcoretools.tasks.converters emit-tool-contract {i} '.format(
        i=TASK_ID)
    DRIVER_RESOLVE = 'python -m pbcoretools.tasks.converters run-rtc '
    INPUT_FILES = [get_temp_file(suffix=".subreadset.xml")]
    MAX_NPROC = 24
    RESOLVED_NPROC = 1
    IS_DISTRIBUTED = True
    RESOLVED_IS_DISTRIBUTED = True
    READER_CLASS = FastaReader
    NRECORDS_EXPECTED = None

    def _get_output_file(self, rtc):
        return rtc.task.output_files[0]

    def _get_counts(self, rtc):
        with openDataSet(self.INPUT_FILES[0]) as ds:
            n_expected = len([rec for rec in ds])
        with self.READER_CLASS(self._get_output_file(rtc)) as f:
            n_actual = len([rec for rec in f])
        return n_expected, n_actual

    def run_after(self, rtc, output_dir):
        n_expected, n_actual = self._get_counts(rtc)
        self.assertEqual(n_actual, n_expected)
        if self.NRECORDS_EXPECTED is not None:
            self.assertEqual(n_actual, self.NRECORDS_EXPECTED)
class TestGatherGFF(TextRecordsGatherBase,
                    pbcommand.testkit.core.PbTestGatherApp):
    """
    Test pbcoretools.tasks.gather_gff
    """
    RECORDS = [
        "contig1\tkinModCall\tmodified_base\t1\t1\t31\t+\t.\tcoverage=169",
        "contig1\tkinModCall\tmodified_base\t2\t2\t41\t-\t.\tcoverage=170",
        "contig1\tkinModCall\tmodified_base\t3\t3\t51\t+\t.\tcoverage=168",
        "contig1\tkinModCall\tmodified_base\t4\t4\t60\t-\t.\tcoverage=173",
    ]
    RECORD_HEADER = "##gff-version 3\n##source-id ipdSummary\n"
    EXTENSION = "gff"

    DRIVER_BASE = "python -m pbcoretools.tasks.gather_gff"
    INPUT_FILES = [get_temp_file(suffix=".chunks.json")]
    CHUNK_KEY = "$chunk.gff_id"

    @classmethod
    def _get_chunk_records(cls, i_chunk):
        if i_chunk == 0: return cls.RECORDS[2:]
        else: return cls.RECORDS[0:2]

    def _get_lines(self, lines):
        return [l.strip() for l in lines if l[0] != '#']

    def validate_content(self, lines):
        self.assertEqual(len(lines), 6)
        self.assertEqual(lines[1].strip(), "##source-id ipdSummary")
Example #12
0
    def setUpClass(cls):
        pipeline = REGISTERED_PIPELINES[cls.PB_PIPELINE_ID]
        log.debug(pipeline)

        cls.bindings = pipeline.all_bindings
        cls.EPOINTS_D = {
            k: get_temp_file(v)
            for k, v in cls.EPOINTS_NAMES.iteritems()
        }

        log.debug(pprint.pformat(cls.bindings, indent=4))
        log.debug(
            "Number of registered tasks {n}".format(n=len(REGISTERED_TASKS)))

        cls.bgraph = B.binding_strs_to_binding_graph(REGISTERED_TASKS,
                                                     cls.bindings)
        d = os.path.expanduser('~/scratch/tmp_pbsmrtpipe') if getpass.getuser(
        ) == 'mkocher' else None
        cls.output_dir = tempfile.mkdtemp(prefix='job_test_', dir=d)

        preset_record = IO.parse_pipeline_preset_xml(
            os.path.join(TEST_DATA_DIR, cls.PRESET_XML))
        cls.workflow_options = preset_record.to_workflow_level_opt()

        # leave this for now
        cls.envs = []
        cls.cluster_engine = C.load_installed_cluster_templates_by_name("sge")
Example #13
0
def _to_test_fofn(n, file_base_name):
    """return a path to fofn"""

    fofn_name = ".".join([file_base_name, ".fofn"])
    fofn = get_temp_file(suffix=fofn_name)

    fofn_files = []
    for i in xrange(n):
        name = "-".join([file_base_name, str(i)])
        f = get_temp_file(name)
        fofn_files.append(f)

    with open(fofn, 'w') as f:
        f.write("\n".join(fofn_files))

    return fofn
Example #14
0
 def test_01(self):
     nrecords = 15
     name = "example_fofn"
     f = get_temp_file(name)
     _ = M.write_random_fofn(f, nrecords)
     ds_metadata = dispatch_metadata_resolver(self.FILE_TYPE, f)
     self.assertEquals(ds_metadata.nrecords, nrecords)
     os.remove(f)
Example #15
0
 def test_bam2fastx_filtered(self):
     input_file = pbtestdata.get_file("subreads-xml")
     ds = SubreadSet(input_file, strict=True)
     ds.filters.addRequirement(length=[('>=', 1000)])
     input_tmp = get_temp_file(suffix=".subreadset.xml")
     ds.write(input_tmp)
     nrecords_expected = 13
     self.run_and_check_fastx(input_tmp, nrecords_expected)
Example #16
0
 def test_filter_dataset_combine_filters(self):
     ds_in, n_input = self._set_up_combine_filters()
     ds_out = get_temp_file(suffix=".subreadset.xml")
     args = self.BASE_ARGS + [ds_in, ds_out, "rq >= 0.901"]
     self._check_call(args)
     n_expected = 12
     expected_filter_str = "( length >= 1000 AND rq >= 0.901 )"
     self.run_after(ds_out, n_expected, expected_filter_str)
Example #17
0
 def test_filter_dataset_min_rq_null(self):
     ds_in, n_input = self._set_up_combine_filters()
     ds_out = get_temp_file(suffix=".subreadset.xml")
     args = self.BASE_ARGS + [ds_in, ds_out, "", "--min-qv", "-1"]
     self._check_call(args)
     n_expected = 13
     expected_filter_str = "( length >= 1000 )"
     self.run_after(ds_out, n_expected, expected_filter_str)
Example #18
0
 def test_filter_dataset_nofilter(self):
     ds_in, n_input = self._set_up_basic()
     ds_out = get_temp_file(suffix=".subreadset.xml")
     args = self.BASE_ARGS + [ds_in, ds_out, ""]
     self._check_call(args)
     n_expected = n_input
     expected_filter_str = ""
     self.run_after(ds_out, n_expected, expected_filter_str)
    def test_smoke(self):
        t = get_temp_file(suffix="-records-1.csv")
        _write_records_to_csv(list(_to_n_records(100)), t)

        t2 = get_temp_file(suffix="-records-2.csv")
        _write_records_to_csv(list(_to_n_records(57)), t2)

        tg = get_temp_file(suffix="records-gather.csv")
        G.gather_csv([t, t2], tg)

        nrecords = 0
        with open(tg, "r") as r:
            reader = csv.DictReader(r)
            log.debug(reader.fieldnames)
            for _ in reader:
                nrecords += 1

        self.assertEqual(nrecords, 157)
Example #20
0
 def _set_up_combine_filters(self):
     ds_in = get_temp_file(suffix=".subreadset.xml")
     with SubreadSet(pbtestdata.get_file("subreads-xml"),
                     strict=True) as ds:
         assert len(ds) == 117 and len(ds.filters) == 0
         ds.filters.addRequirement(length=[('>=', 1000)])
         assert len(ds) == 13
         ds.write(ds_in)
     return ds_in, 13
Example #21
0
 def _set_up_basic(self):
     input_file = get_temp_file(suffix=".subreadset.xml")
     ds = SubreadSet(data.getXml(9), strict=True)
     ds.metadata.addParentDataSet(uuid.uuid4(),
                                  ds.datasetType,
                                  createdBy="AnalysisJob",
                                  timeStampedName="")
     ds.write(input_file)
     return input_file, len(ds)
Example #22
0
    def test_smoke(self):
        t = get_temp_file(suffix="-records-1.csv")
        _write_records_to_csv(list(_to_n_records(100)), t)

        t2 = get_temp_file(suffix="-records-2.csv")
        _write_records_to_csv(list(_to_n_records(57)), t2)

        tg = get_temp_file(suffix="records-gather.csv")
        G.gather_csv([t, t2], tg)

        nrecords = 0
        with open(tg, 'r') as r:
            reader = csv.DictReader(r)
            log.debug(reader.fieldnames)
            for _ in reader:
                nrecords += 1

        self.assertEqual(nrecords, 157)
Example #23
0
    def test_load_preset(self):
        xml = IO.schema_workflow_options_to_xml(self._to_opts())
        preset_xml = get_temp_file(suffix="_preset.xml")
        log.debug(preset_xml)
        with open(preset_xml, 'w') as w:
            w.write(str(xml))

        preset_record = IO.parse_pipeline_preset_xml(preset_xml)
        workflow_level_opts = preset_record.to_workflow_level_opt()
        self.assertTrue(len(workflow_level_opts), len(self._to_opts()))
Example #24
0
    def test_load_preset(self):
        xml = IO.schema_workflow_options_to_xml(self._to_opts())
        preset_xml = get_temp_file(suffix="_preset.xml")
        log.debug(preset_xml)
        with open(preset_xml, 'w') as w:
            w.write(str(xml))

        preset_record = IO.parse_pipeline_preset_xml(preset_xml)
        workflow_level_opts = preset_record.to_workflow_level_opt()
        self.assertTrue(len(workflow_level_opts), len(self._to_opts()))
Example #25
0
 def test_combine_filters_run_filter_dataset(self):
     ds_in, n_input = self._set_up_combine_filters()
     ds_out = get_temp_file(suffix=".subreadset.xml")
     my_filters = "rq >= 0.901"
     run_filter_dataset(ds_in, ds_out, 0, my_filters)
     with openDataSet(ds_out, strict=True) as ds:
         assert len(ds) == 12
     my_filters = "rq >= 0.8"
     run_filter_dataset(ds_in, ds_out, 500, my_filters)
     with openDataSet(ds_out, strict=True) as ds:
         assert len(ds) == 48
Example #26
0
class TestScatterContigSet(ScatterSequenceBase,
                           pbcommand.testkit.core.PbTestScatterApp):
    """
    Test pbsmrtpipe.tools_dev.scatter_contigset
    """
    READER_CLASS = ContigSet
    DRIVER_BASE = "python -m pbsmrtpipe.tools_dev.scatter_contigset"
    INPUT_FILES = [get_temp_file(suffix=".contigset.xml")]
    MAX_NCHUNKS = 12
    RESOLVED_MAX_NCHUNKS = 12
    CHUNK_KEYS = ("$chunk.contigset_id", )
Example #27
0
class TestScatterFilterFasta(ScatterSequenceBase,
                             pbcommand.testkit.core.PbTestScatterApp):
    """
    Test pbcoretools.tasks.scatter_filter_fasta
    """
    READER_CLASS = FastaReader
    DRIVER_BASE = "python -m pbcoretools.tasks.scatter_filter_fasta"
    INPUT_FILES = [get_temp_file(suffix=".fasta")]
    MAX_NCHUNKS = 12
    RESOLVED_MAX_NCHUNKS = 12
    CHUNK_KEYS = ("$chunk.fasta_id", )
    def test_all_sane(self):
        """Test that all pipelines are well defined"""
        errors = []
        rtasks, rfiles_types, chunk_operators, pipelines = L.load_all()

        for pipeline_id, pipeline in pipelines.items():
            emsg = "Pipeline {p} is not valid.".format(p=pipeline_id)
            log.debug("Checking Sanity of registered Pipeline {i}".format(
                i=pipeline_id))
            log.info(pipeline_id)
            log.debug(pipeline)
            try:
                # Validate with Avro
                d = pipeline_template_to_dict(pipeline, rtasks)
                _ = validate_pipeline_template(d)
                name = pipeline_id + "_pipeline_template.avro"
                output_file = get_temp_file(suffix=name)
                log.info(
                    "{p} converted to avro successfully".format(p=pipeline_id))

                bg = BU.binding_strs_to_binding_graph(rtasks,
                                                      pipeline.all_bindings)
                BU.validate_binding_graph_integrity(bg)
                BU.validate_compatible_binding_file_types(bg)
                validate_entry_points(d)
                # pprint.pprint(d)

                # for debugging purposes
                output_json = output_file.replace(".avro", '.json')
                log.info("writing pipeline to {p}".format(p=output_json))
                with open(output_json, 'w') as j:
                    j.write(json.dumps(d, sort_keys=True, indent=4))

                log.info(
                    "writing pipeline template to {o}".format(o=output_file))

                # Test writing to avro if the pipeline is actually valid
                write_pipeline_template_to_avro(pipeline, rtasks, output_file)
                log.info("Pipeline {p} is valid.".format(p=pipeline_id))

                log.info("Loading avro {i} from {p}".format(i=pipeline_id,
                                                            p=output_file))
                pipeline_d = load_pipeline_template_from_avro(output_file)
                self.assertIsInstance(pipeline_d, dict)

            except Exception as e:
                m = emsg + " Error: " + e.message
                log.error(m)
                errors.append(m)
                log.error(emsg)
                log.error(e)

        msg = "\n".join(errors) if errors else ""
        self.assertEqual([], errors, msg)
Example #29
0
class TestBam2FastaCCS(TestBam2FastqArchive):
    TASK_ID = "pbcoretools.tasks.bam2fasta_ccs"
    DRIVER_EMIT = 'python -m pbcoretools.tasks.converters emit-tool-contract {i} '.format(
        i=TASK_ID)
    INPUT_FILES = [get_temp_file(".consensusreadset.xml")]
    READER_CLASS = FastaReader
    NRECORDS_EXPECTED = None

    @classmethod
    def setUpClass(cls):
        ds = ConsensusReadSet(pbcore.data.getCCSBAM(), strict=True)
        ds.write(cls.INPUT_FILES[0])
Example #30
0
class TestScatterContigSet(TestScatterFilterFasta,
                           pbcommand.testkit.core.PbTestScatterApp):
    """
    Test pbcoretools.tasks.scatter_contigset
    """
    NRECORDS = 51
    READER_CLASS = ContigSet
    DRIVER_BASE = "python -m pbcoretools.tasks.scatter_contigset"
    INPUT_FILES = [get_temp_file(suffix=".contigset.xml")]
    MAX_NCHUNKS = 24
    RESOLVED_MAX_NCHUNKS = 24
    NCHUNKS_EXPECTED = 17
    CHUNK_KEYS = ("$chunk.contigset_id", )
Example #31
0
    def test_write_chunks(self):

        def f(i):
            return {"{c}movie_fofn_id".format(c=PipelineChunk.CHUNK_KEY_PREFIX): "/path/to_movie-{i}.fofn".format(i=i),
                    "{c}region_fofn_id".format(c=PipelineChunk.CHUNK_KEY_PREFIX): "/path/rgn_{i}.fofn".format(i=i)}

        to_i = lambda i: "chunk-id-{i}".format(i=i)
        to_p = lambda i: PipelineChunk(to_i(i), **f(i))

        nchunks = 5
        pipeline_chunks = [to_p(i) for i in xrange(nchunks)]
        log.debug(pipeline_chunks)
        tmp_name = get_temp_file("_chunk.json")

        IO.write_pipeline_chunks(pipeline_chunks, tmp_name, "Example chunk file")

        pchunks = IO.load_pipeline_chunks_from_json(tmp_name)
        self.assertEquals(len(pchunks), nchunks)
Example #32
0
    def setUpClass(cls):
        pipeline = REGISTERED_PIPELINES[cls.PB_PIPELINE_ID]
        log.debug(pipeline)

        cls.bindings = pipeline.all_bindings
        cls.EPOINTS_D = {k: get_temp_file(v) for k, v in cls.EPOINTS_NAMES.iteritems()}

        log.debug(pprint.pformat(cls.bindings, indent=4))
        log.debug("Number of registered tasks {n}".format(n=len(REGISTERED_TASKS)))

        cls.bgraph = B.binding_strs_to_binding_graph(REGISTERED_TASKS, cls.bindings)
        d = os.path.expanduser('~/scratch/tmp_pbsmrtpipe') if getpass.getuser() == 'mkocher' else None
        cls.output_dir = tempfile.mkdtemp(prefix='job_test_', dir=d)

        preset_record = IO.parse_pipeline_preset_xml(os.path.join(TEST_DATA_DIR, cls.PRESET_XML))
        cls.workflow_options = preset_record.to_workflow_level_opt()

        # leave this for now
        cls.envs = []
        cls.cluster_engine = C.load_installed_cluster_templates_by_name("sge")
Example #33
0
class TestGatherCSV(TextRecordsGatherBase,
                    pbcommand.testkit.core.PbTestGatherApp):
    """
    Test pbcoretools.tasks.gather_csv
    """
    RECORDS = [
        "contig1,3000000,170",
        "contig2,90000,180",
        "contig3,58000,159",
        "contig4,20000,160",
    ]
    RECORD_HEADER = "contig_id,length,coverage\n"
    EXTENSION = "csv"

    DRIVER_BASE = "python -m pbcoretools.tasks.gather_csv"
    INPUT_FILES = [get_temp_file(suffix=".chunks.json")]
    CHUNK_KEY = "$chunk.csv_id"

    def _get_lines(self, lines):
        return [l.strip() for l in lines[1:]]
Example #34
0
class TestGatherGFF(TextRecordsGatherBase,
                    pbcommand.testkit.core.PbTestGatherApp):
    """
    Test pbsmrtpipe.tools_dev.gather_gff
    """
    RECORDS = [
        "contig1\tkinModCall\tmodified_base\t1\t1\t31\t+\t.\tcoverage=169",
        "contig1\tkinModCall\tmodified_base\t2\t2\t41\t-\t.\tcoverage=170",
        "contig1\tkinModCall\tmodified_base\t3\t3\t51\t+\t.\tcoverage=168",
        "contig1\tkinModCall\tmodified_base\t4\t4\t60\t-\t.\tcoverage=173",
    ]
    RECORD_HEADER = None
    EXTENSION = "gff"

    DRIVER_BASE = "python -m pbsmrtpipe.tools_dev.gather_gff"
    INPUT_FILES = [get_temp_file(suffix=".chunks.json")]
    CHUNK_KEY = "$chunk.gff_id"

    def _get_lines(self, lines):
        return [l.strip() for l in lines if l[0] != '#']
Example #35
0
    def test_write_chunks(self):
        def f(i):
            return {
                "{c}movie_fofn_id".format(c=PipelineChunk.CHUNK_KEY_PREFIX):
                "/path/to_movie-{i}.fofn".format(i=i),
                "{c}region_fofn_id".format(c=PipelineChunk.CHUNK_KEY_PREFIX):
                "/path/rgn_{i}.fofn".format(i=i)
            }

        to_i = lambda i: "chunk-id-{i}".format(i=i)
        to_p = lambda i: PipelineChunk(to_i(i), **f(i))

        nchunks = 5
        pipeline_chunks = [to_p(i) for i in xrange(nchunks)]
        log.debug(pipeline_chunks)
        tmp_name = get_temp_file("_chunk.json")

        IO.write_pipeline_chunks(pipeline_chunks, tmp_name,
                                 "Example chunk file")

        pchunks = IO.load_pipeline_chunks_from_json(tmp_name)
        self.assertEquals(len(pchunks), nchunks)
Example #36
0
 def test_01(self):
     p = get_temp_file(self.FILE_NAME)
     self._write_mock_file(p)
     ds_metadata = dispatch_metadata_resolver(self.FILE_TYPE, p)
     self.assertIsInstance(ds_metadata, DatasetMetadata)
     self.assertEquals(ds_metadata.nrecords, self.NRECORDS)