Example #1
0
 def setUp(self):
     self.temp_d = tempfile.TemporaryDirectory()
     self.rurl = "gs://data"
     os.chdir(self.temp_d.name)
     sample = TenxSample(name='TESTER', base_path=self.temp_d.name)
     self.asm = sample.assembly()
     rsample = TenxSample(name='TESTER', base_path=self.rurl)
     self.remote_asm = rsample.assembly()
     if TenxApp.config is None: TenxApp()
     TenxApp.config['TENX_DATA_PATH'] = self.temp_d.name
     TenxApp.config['TENX_REMOTE_URL'] = self.rurl
Example #2
0
    def test5_run_cleanup(self, call_patch, check_call_patch,
                          check_output_patch):
        call_patch.return_value = 0
        check_call_patch.return_value = 0
        check_output_patch.return_value = b'0'
        pwd = os.getcwd()
        err = io.StringIO()
        sys.stderr = err

        sample = TenxSample(name="TESTER",
                            base_path=TenxApp.config.get("TENX_REMOTE_URL"))
        asm = sample.assembly()
        with self.assertRaisesRegex(
                Exception,
                "Failed to find 4 mkoutput fasta files. Refusing to remove post assembly files"
        ):
            assembly.run_cleanup(asm)

        check_output_patch.return_value = b'a.fasta.gz\na.fasta.gz\na.fasta.gz\na.fasta.gz\n'
        err.seek(0, 0)
        assembly.run_cleanup(asm)

        self.maxDiff = 10000
        expected_err = "Cleanup assembly for TESTER ...\nAssembly remote URL: gs://data/TESTER/assembly\nChecking if gsutil is installed...\nRUNNING: which gsutil\nChecking mkfastq files exist.\nRUNNING: gsutil ls gs://data/TESTER/assembly/mkoutput/*fasta.gz\nRemoving ASSEMBLER_CS logs path.\nRUNNING: gsutil -m rm -r gs://data/TESTER/assembly/ASSEMBLER_CS\nMoving outs / assembly / stats to outs.\nRUNNING: gsutil -m mv gs://data/TESTER/assembly/outs/assembly/stats gs://data/TESTER/assembly/outs\nRemoving outs / assembly path\nRUNNING: gsutil -m rm -r gs://data/TESTER/assembly/outs/assembly\nCleanup assembly ... OK\n"
        self.assertEqual(err.getvalue(), expected_err)
        self.assertEqual(os.getcwd(), pwd)
Example #3
0
    def test11_is_successful(self):
        sample = TenxSample(name="TESTER",
                            base_path=TenxApp.config.get("TENX_DATA_PATH"))
        asm = sample.assembly()
        os.makedirs(asm.path)
        self.assertFalse(asm.is_successful())

        os.makedirs(os.path.join(asm.path, "outs", "assembly"))
        self.assertTrue(asm.is_successful())
Example #4
0
 def setUp(self):
     self.temp_d = tempfile.TemporaryDirectory()
     sample = TenxSample(name="__SAMPLE__", base_path=self.temp_d.name)
     self.asm = sample.assembly()
     os.makedirs(os.path.join(self.asm.path))
     tenx.app.TenxApp.config = {
         "TENX_DATA_PATH":
         os.path.join(self.temp_d.name, "__SAMPLE__", "assembly"),
         "TENX_REMOTE_URL":
         "gs://data",
         "TENX_CROMWELL_PATH":
         os.path.join(os.path.dirname(__file__), "data", "app"),
     }
Example #5
0
    def test1_run_assemble_fails_without_supernova(self):
        err = io.StringIO()
        sys.stderr = err

        sample = TenxSample(name="TESTER",
                            base_path=TenxApp.config.get("TENX_DATA_PATH"))
        asm = sample.assembly()
        with self.assertRaisesRegex(Exception, "No such file or directory"):
            run_assemble(asm)
        self.assertFalse(os.path.exists(sample.path))

        expected_err = "Checking if supernova is in PATH...\nRUNNING: supernova --help\n"
        self.assertEqual(err.getvalue(), expected_err)
        sys.stderr = sys.__stderr__
Example #6
0
    def test31_run_mkoutput_fails_with_incomplete_assembly(
            self, check_call_patch):
        check_call_patch.return_value = '0'
        err = io.StringIO()
        sys.stderr = err

        sample = TenxSample(name="TESTER",
                            base_path=TenxApp.config.get("TENX_DATA_PATH"))
        asm = sample.assembly()
        with self.assertRaisesRegex(
                Exception, "Assembly is not complete! Cannot run mkoutput!"):
            assembly.run_mkoutput(asm)
        expected_err = "Running mkoutput for TESTER...\n"
        self.assertEqual(err.getvalue(), expected_err)
        sys.stderr = sys.__stderr__
Example #7
0
    def test3_run_assemble_success(self, check_call_patch):
        check_call_patch.return_value = '0'
        err = io.StringIO()
        sys.stderr = err

        sample = TenxSample(name="TESTER",
                            base_path=TenxApp.config.get("TENX_DATA_PATH"))
        asm = sample.assembly()
        os.makedirs(asm.outs_assembly_path)
        run_assemble(asm)
        self.assertTrue(os.path.exists(sample.path))

        expected_err = "Checking if supernova is in PATH...\nRUNNING: supernova --help\nRUNNING: supernova run --id=assembly --fastqs={} --uiport=18080 --nodebugmem --localcores=2 --localmem=2\n".format(
            sample.reads_path)
        self.assertEqual(err.getvalue(), expected_err)
        sys.stderr = sys.__stderr__
Example #8
0
    def test1_aln_asm(self):
        base_path = TenxApp.config.get("TENX_DATA_PATH")
        sample = TenxSample(base_path=base_path, name="TEST-001")
        ref = TenxReference(name='refdata-GRCh38-2.1.0')

        aln = sample.alignment(ref=ref)
        self.assertTrue(bool(aln))
        self.assertEqual(aln.__class__.__name__, "TenxAlignment")
        self.assertEqual(os.path.join(sample.path, "alignment"), aln.path)
        self.assertEqual(sample, aln.sample)
        self.assertEqual(ref, aln.ref)

        asm = sample.assembly()
        self.assertTrue(bool(asm))
        self.assertEqual(asm.__class__.__name__, "TenxAssembly")
        self.assertEqual(os.path.join(sample.path, "assembly"), asm.path)
        self.assertEqual(sample, asm.sample)
Example #9
0
    def test32_run_mkoutput_fails_without_all_fasta_files(
            self, check_call_patch):
        check_call_patch.return_value = '0'
        err = io.StringIO()
        sys.stderr = err

        sample = TenxSample(name="TESTER",
                            base_path=TenxApp.config.get("TENX_DATA_PATH"))
        asm = sample.assembly()
        outs_asm_d = asm.outs_assembly_path
        os.makedirs(outs_asm_d)
        with self.assertRaisesRegex(
                Exception, "Expected 4 assembly fasta.gz files in " +
                asm.mkoutput_path + " after running mkoutput, but found 0"):
            assembly.run_mkoutput(asm)
        expected_err = "Running mkoutput for TESTER...\nChecking if supernova is in PATH...\nRUNNING: supernova --help\nEntering {ASM_D}/mkoutput\nRUNNING: supernova mkoutput --asmdir={ASM_D}/outs/assembly --outprefix=TESTER.raw --style=raw\nRUNNING: supernova mkoutput --asmdir={ASM_D}/outs/assembly --outprefix=TESTER.megabubbles --style=megabubbles\nRUNNING: supernova mkoutput --asmdir={ASM_D}/outs/assembly --outprefix=TESTER.pseudohap2 --style=pseudohap2\n".format(
            ASM_D=asm.path)
        self.assertEqual(err.getvalue(), expected_err)
        sys.stderr = sys.__stderr__
Example #10
0
    def test2_run_assemble_fails_when_no_outs_assembly_dir(
            self, check_call_patch):
        check_call_patch.return_value = '0'
        err = io.StringIO()
        sys.stderr = err

        TenxApp.config['TENX_ASM_PARAMS'] = "--maxreads='all'"
        sample = TenxSample(name="TESTER",
                            base_path=TenxApp.config.get("TENX_DATA_PATH"))
        asm = sample.assembly()
        with self.assertRaisesRegex(
                Exception, "Ran supernova script, but {} was not found".format(
                    asm.outs_assembly_path)):
            run_assemble(asm)
        self.assertTrue(os.path.exists(sample.path))

        expected_err = "Checking if supernova is in PATH...\nRUNNING: supernova --help\nRUNNING: supernova run --id=assembly --fastqs={} --uiport=18080 --nodebugmem --localcores=2 --localmem=2 --maxreads='all'\n".format(
            sample.reads_path)
        self.assertEqual(err.getvalue(), expected_err)
        sys.stderr = sys.__stderr__
Example #11
0
    def test33_run_mkoutput_success(self, check_call_patch):
        check_call_patch.return_value = '0'
        err = io.StringIO()
        sys.stderr = err

        sample = TenxSample(name="TESTER",
                            base_path=TenxApp.config.get("TENX_DATA_PATH"))
        asm = sample.assembly()
        outs_asm_d = asm.outs_assembly_path
        os.makedirs(outs_asm_d)
        mkoutput_d = asm.mkoutput_path
        os.makedirs(mkoutput_d)
        for n in range(4):
            with open(os.path.join(mkoutput_d, "{}.fasta.gz".format(n)),
                      "w") as f:
                f.write(">SEQ1\nATGC")
                f.flush()
        assembly.run_mkoutput(asm)
        expected_err = "Running mkoutput for TESTER...\nChecking if supernova is in PATH...\nRUNNING: supernova --help\nEntering {ASM_D}/mkoutput\nRUNNING: supernova mkoutput --asmdir={ASM_D}/outs/assembly --outprefix=TESTER.raw --style=raw\nRUNNING: supernova mkoutput --asmdir={ASM_D}/outs/assembly --outprefix=TESTER.megabubbles --style=megabubbles\nRUNNING: supernova mkoutput --asmdir={ASM_D}/outs/assembly --outprefix=TESTER.pseudohap2 --style=pseudohap2\n".format(
            ASM_D=asm.path)
        self.assertEqual(err.getvalue(), expected_err)
        sys.stderr = sys.__stderr__
Example #12
0
 def test10_assembly(self):
     for base_path in TenxApp.config["TENX_DATA_PATH"], TenxApp.config[
             "TENX_REMOTE_URL"]:
         sample = TenxSample(name="TESTER", base_path=base_path)
         asm = sample.assembly()
         self.assertEqual(asm.path, os.path.join(sample.path, "assembly"))
         self.assertEqual(asm.path,
                          os.path.join(base_path, 'TESTER', 'assembly'))
         self.assertEqual(
             asm.mkoutput_path,
             os.path.join(base_path, 'TESTER', 'assembly', 'mkoutput'))
         self.assertEqual(
             asm.outs_path,
             os.path.join(base_path, 'TESTER', 'assembly', 'outs'))
         self.assertEqual(
             asm.outs_assembly_path,
             os.path.join(base_path, 'TESTER', 'assembly', 'outs',
                          'assembly'))
         self.assertEqual(
             asm.outs_assembly_stats_path,
             os.path.join(base_path, 'TESTER', 'assembly', 'outs',
                          'assembly', 'stats'))
Example #13
0
def asm_pipeline_cmd(sample_name):
    """
    Run the Assembly Pipeline with Cromwell

    Process includes: downloading reads, running supernova, mkoutput, and then uploading the assembly.
    """
    assert bool(TenxApp.config) is True, "Must provide tenx yaml config file!"
    sys.stderr.write("Run assembly pipeline for {}\n".format(sample_name))
    hostname = socket.gethostname()
    notifications.slack("{} START {}".format(sample_name, hostname))
    sample = TenxSample(base_path=TenxApp.config["TENX_DATA_PATH"],
                        name=sample_name)
    asm = sample.assembly()
    try:
        run_pipeline(asm)
    except BaseException as ex:
        sys.stderr.write("Exception: {}\n".format(ex))
        sys.stderr.write(
            "Exception encountered, sending notifications if configured...\n")
        notifications.slack("{} FAILED {}".format(sample_name,
                                                  socket.gethostname()))
        raise
    notifications.slack("{} SUCCESS {}".format(sample_name, hostname))
Example #14
0
class TenxAppTest1(unittest.TestCase):
    def setUp(self):
        self.data_dn = os.path.join(os.path.dirname(__file__), "data", "app")
        self.temp_d = tempfile.TemporaryDirectory()
        self.sample = TenxSample(name="__TEST__", base_path=self.temp_d.name)
        self.asm = self.sample.assembly()
        self.ref = TenxReference(name="__REF__")
        self.aln = self.sample.alignment(ref=self.ref)
        TenxApp.config = None

    def tearDown(self):
        TenxApp.config = None
        self.temp_d.cleanup()

    def test_init_fails(self):
        if TenxApp.config is None: TenxApp()
        TenxApp.config = None
        with self.assertRaisesRegex(IOError, "No such file or directory"):
            TenxApp("/tenx.yaml")

    def test_init(self):
        # init w/o config
        tenxapp = TenxApp()
        self.assertIsNotNone(TenxApp.config)

        # re-init w/ config
        TenxApp.config = None
        self.assertIsNone(TenxApp.config)

        conf_f = tempfile.NamedTemporaryFile()
        config = {
            "environment": "test",
            "TENX_SCRIPTS_PATH": "tests/test_app",
            "TENX_NOTIFICATIONS_SLACK": "https://slack.com",
        }
        conf_f.write(yaml.dump(config).encode())
        conf_f.flush()

        tenxapp = TenxApp(conf_f.name)
        self.assertIsNotNone(tenxapp)
        self.assertDictEqual(tenxapp.config, config)

    def test_cromwell_fails(self):
        with self.assertRaisesRegex(Exception,
                                    "Tenx config has not been initialized!"):
            cromwell = TenxCromwell(entity=self.asm)

        TenxApp.config = {
            "TENX_CROMWELL_PATH": "/",
        }
        with self.assertRaisesRegex(
                Exception, "Cromwell jar not found at /cromwell.jar!"):
            cromwell = TenxCromwell(entity=self.asm)

        with self.assertRaisesRegex(Exception, "Unknown entity:"):
            cromwell = TenxCromwell(entity=self.sample)

    def test_cromwell(self):
        TenxApp.config = {
            "TENX_CROMWELL_PATH": self.data_dn,
        }

        ref = TenxReference(name="__REF__")
        test_params = [
            {
                "pipeline": "supernova",
                "entity": self.asm,
                "inputs": {
                    "SAMPLE_NAME": self.asm.sample.name
                },
            },
            {
                "pipeline": "longranger",
                "entity": self.aln,
                "inputs": {
                    "SAMPLE_NAME": self.aln.sample.name,
                    "REF_NAME": self.aln.ref.name
                },
            },
        ]
        for p in test_params:
            pipeline_name = p["pipeline"]
            entity = p["entity"]
            cromwell = TenxCromwell(entity=entity)
            self.assertTrue(cromwell)
            self.assertEqual(cromwell.entity, entity)
            self.assertEqual(cromwell.pipeline_name, pipeline_name)

            templates_dn = cromwell.templates_dn
            self.assertTrue(templates_dn)

            cromwell_dn = cromwell.cromwell_dn
            self.assertEqual(cromwell_dn,
                             TenxApp.config.get("TENX_CROMWELL_PATH"))
            self.assertEqual(cromwell.cromwell_jar,
                             os.path.join(cromwell_dn, "cromwell.jar"))

            inputs_bn = ".".join([pipeline_name, "inputs", "json"])
            self.assertEqual(cromwell.inputs_bn, inputs_bn)
            wdl_bn = ".".join([pipeline_name, "gcloud", "wdl"])
            self.assertEqual(cromwell.wdl_bn, wdl_bn)
            conf_bn = ".".join([pipeline_name, "conf"])
            self.assertEqual(cromwell.conf_bn, conf_bn)

            self.assertDictEqual(cromwell.inputs_for_entity(), p["inputs"])

            conf_fn = os.path.join(cromwell.pipeline_dn, conf_bn)
            inputs_fn = os.path.join(cromwell.pipeline_dn, inputs_bn)
            wdl_fn = os.path.join(cromwell.pipeline_dn, wdl_bn)

            cmd = cromwell.command()
            expected_cmd = [
                "java", "-Dconfig={}".format(conf_fn), "-jar",
                cromwell.cromwell_jar, wdl_fn, "-i", inputs_fn
            ]

            self.assertTrue(os.path.exists(conf_fn))
            self.assertTrue(os.path.exists(inputs_fn))
            self.assertTrue(os.path.exists(wdl_fn))