def setUp(self):
     self.tmp_root = tempfile.mkdtemp()
     args = get_default_args()
     provider = gcf.GcfProvider(self.tmp_root)
     test_utils.set_pattern(provider, 'basic')
     self.proj = project.Project(args, provider)
     test_utils.create_fastq_files("gcf", "basic", self.tmp_root)
 def setUp(self):
     self.tmp_root = tempfile.mkdtemp()
     args = get_default_args()
     provider = sll.SllProvider(self.tmp_root)
     test_utils.set_pattern(provider, 'project_first')
     self.proj = project.Project(args, provider)
     test_utils.create_fastq_files("sll", "project_first", self.tmp_root)
def test_verify_flowcell_dir():
    good1 = ["150606_ABCDE12345"]
    good2 = ["000000_AAAAAAAAAA"]
    bad = ["1"]
    provider = sll.SllProvider(".")
    test_utils.set_pattern(provider, 'project_first')
    assert_equal(provider.verify_flowcell_dir(good1), True)
    assert_equal(provider.verify_flowcell_dir(good2), True)
    assert_raises(ValueError, provider.verify_flowcell_dir, bad)
    assert_raises(ValueError, provider.verify_flowcell_dir, good1 + good2)
def test_verify_fastq_files_lane_first():
    fastq1 = '5_150702_BC6U56ANXX_P1954_101_1.fastq.gz'
    fastq2 = '5_150702_BC6U56ANXX_P1954_101_2.fastq.gz'
    good = [fastq1, fastq2]
    filtered = ["abc123", "foo.txt"]
    bad = ["abc123.fastq.gz"]
    provider = sll.SllProvider(".")
    test_utils.set_pattern(provider, 'lane_first')
    assert_equal(provider.verify_fastq_files(good), True)
    assert_equal(provider.verify_fastq_files(filtered), True)
    assert_raises(ValueError, provider.verify_fastq_files, bad)
def test_verify_fastq_files_project_first():
    fastq1 = 'P1777_101_GTTTCG_L001_R1_001.fastq.gz'
    fastq2 = 'P1777_101_GTTTCG_L001_R2_001.fastq.gz'
    good = [fastq1, fastq2]
    filtered = ["abc123", "foo.txt"]
    bad = ["abc123.fastq.gz"]
    provider = sll.SllProvider(".")
    test_utils.set_pattern(provider, 'project_first')
    assert_equal(provider.verify_fastq_files(good), True)
    assert_equal(provider.verify_fastq_files(filtered), True)
    assert_raises(ValueError, provider.verify_fastq_files, bad)
 def setUp(self):
     self.tmp_root = tempfile.mkdtemp()
     self.provider = sll.SllProvider(self.tmp_root)
     test_utils.set_pattern(self.provider, 'project_first')
     sample_prefix = "P1777"
     sample_format = "{0}_{1}"
     flowcell_dir = "150305_AHCYF2ADXX"
     for i in range(1, 4):
         root = os.path.join(self.tmp_root,
                             sample_format.format(sample_prefix, i))
         fastq_root = os.path.join(root, flowcell_dir)
         os.makedirs(fastq_root)
         test_utils.create_fastq_files("sll", "project_first",
                                       fastq_root, i, bad_format=True)
    def test_get_samples_basic(self):
        fastq_format = "{3}/ABC012345_S{0}_{1}_R{2}_001.fastq.gz"
        lane1_1 = project.Lane("L001", reads=[
            project.Read(fastq_format.format(1, "L001", 1, self.tmp_root)),
            project.Read(fastq_format.format(1, "L001", 2, self.tmp_root))
        ])
        lane2_1 = project.Lane("L002", reads=[
            project.Read(fastq_format.format(1, "L002", 1, self.tmp_root)),
            project.Read(fastq_format.format(1, "L002", 2, self.tmp_root))
        ])
        lane1_2 = project.Lane("L001", reads=[
            project.Read(fastq_format.format(2, "L001", 1, self.tmp_root)),
            project.Read(fastq_format.format(2, "L001", 2, self.tmp_root))
        ])
        lane2_2 = project.Lane("L002", reads=[
            project.Read(fastq_format.format(2, "L002", 1, self.tmp_root)),
            project.Read(fastq_format.format(2, "L002", 2, self.tmp_root))
        ])
        lane1_3 = project.Lane("L001", reads=[
            project.Read(fastq_format.format(3, "L001", 1, self.tmp_root)),
            project.Read(fastq_format.format(3, "L001", 2, self.tmp_root))
        ])
        lane2_3 = project.Lane("L002", reads=[
            project.Read(fastq_format.format(3, "L002", 1, self.tmp_root)),
            project.Read(fastq_format.format(3, "L002", 2, self.tmp_root))
        ])
        lane1_4 = project.Lane("L001", reads=[
            project.Read(fastq_format.format(4, "L001", 1, self.tmp_root)),
            project.Read(fastq_format.format(4, "L001", 2, self.tmp_root))
        ])
        lane2_4 = project.Lane("L002", reads=[
            project.Read(fastq_format.format(4, "L002", 1, self.tmp_root)),
            project.Read(fastq_format.format(4, "L002", 2, self.tmp_root))
        ])
        sample1 = project.Sample("S1", [lane1_1, lane2_1])
        sample2 = project.Sample("S2", [lane1_2, lane2_2])
        sample3 = project.Sample("S3", [lane1_3, lane2_3])
        sample4 = project.Sample("S4", [lane1_4, lane2_4])

        expected = [sample1, sample2, sample3, sample4]

        provider = gcf.GcfProvider(self.tmp_root)
        test_utils.set_pattern(provider, "basic")

        result = provider.get_samples()

        for res, exp in zip(result, expected):
            assert_equal(res.sample_id, exp.sample_id)
            for res_lane, exp_lane in zip(res.lanes, exp.lanes):
                assert_equal(res_lane, exp_lane)
    def test_get_samples_lane_first(self):
        fastq_format = "{3}/{1}_151224_A1B2C3XYZ0_P1234_{0}_{2}.fastq.gz"
        expected = self.create_expected(self.lane_first_dir,
                                        fastq_format, "")

        provider = sll.SllProvider(self.lane_first_dir)
        test_utils.set_pattern(provider, "lane_first")

        result = provider.get_samples()

        for res, exp in zip(result, expected):
            assert_equal(res.sample_id, exp.sample_id)
            for res_lane, exp_lane in zip(res.lanes, exp.lanes):
                assert_equal(res_lane, exp_lane)
    def test_get_samples_project_first(self):
        fastq_format = "{3}/P1234_{0}_TAGCTT_{1}_R{2}_001.fastq.gz"
        expected = self.create_expected(self.project_first_dir,
                                        fastq_format, "L00")

        provider = sll.SllProvider(self.project_first_dir)
        test_utils.set_pattern(provider, "project_first")

        result = provider.get_samples()

        for res, exp in zip(result, expected):
            assert_equal(res.sample_id, exp.sample_id)
            for res_lane, exp_lane in zip(res.lanes, exp.lanes):
                assert_equal(res_lane, exp_lane)
Esempio n. 10
0
def test_basic_file_pattern():
    name = "ABC012345_S1_L001_R1_001.fastq.gz"
    provider = gcf.GcfProvider(".")
    test_utils.set_pattern(provider, 'basic')
    assert_equal(provider.get_filename_part(name),
                 "ABC012345_S1_L001_R1_001.fastq.gz")
    assert_equal(provider.get_filename_part(name, 'project_id'),
                 "ABC012345")
    assert_equal(provider.get_filename_part(name, 'sample_id'),
                 "S1")
    assert_equal(provider.get_filename_part(name, 'lane'),
                 "L001")
    assert_equal(provider.get_filename_part(name, 'read'),
                 "1")
    assert_equal(provider.get_filename_part(name, 'file_ending'),
                 ".fastq.gz")
    assert_equal(provider.get_filename_part(name, 'gzip'),
                 ".gz")
Esempio n. 11
0
def test_get_filename_part_lane_first():
    lane_first = '5_150702_BC6U56ANXX_P1954_101_1.fastq.gz'
    provider = sll.SllProvider(".")
    test_utils.set_pattern(provider, 'lane_first')

    assert_equal(provider.get_filename_part(lane_first, 'all'),
                 lane_first)
    assert_equal(provider.get_filename_part(lane_first, 'sample_id'),
                 "P1954_101")
    assert_equal(provider.get_filename_part(lane_first, 'project_id'),
                 "P1954")
    assert_equal(provider.get_filename_part(lane_first, 'scilab_id'),
                 "101")
    assert_equal(provider.get_filename_part(lane_first, 'lane'),
                 "5")
    assert_equal(provider.get_filename_part(lane_first, 'read'),
                 "1")
    assert_equal(provider.get_filename_part(lane_first, 'file_ending'),
                 ".fastq.gz")
Esempio n. 12
0
def test_get_filename_part_project_first():
    project_first = 'P1777_101_GTTTCG_L001_R2_001.fastq'
    provider = sll.SllProvider(".")
    test_utils.set_pattern(provider, 'project_first')

    assert_equal(provider.get_filename_part(project_first, 'all'),
                 project_first)
    assert_equal(provider.get_filename_part(project_first, 'sample_id'),
                 "P1777_101")
    assert_equal(provider.get_filename_part(project_first, 'project_id'),
                 "P1777")
    assert_equal(provider.get_filename_part(project_first, 'scilab_id'),
                 "101")
    assert_equal(provider.get_filename_part(project_first, 'lane'),
                 "L001")
    assert_equal(provider.get_filename_part(project_first, 'read'),
                 "R2")
    assert_equal(provider.get_filename_part(project_first, 'file_ending'),
                 ".fastq")
def test_aligning_cmd():
    args = get_default_args()

    expected = {}
    expected["L001"] = ("STAR --runThreadN 8 --genomeDir ./genomes "
                        "--readFilesIn "
                        "{0}/{1}/output_paired_1.fastq.gz "
                        "{0}/{1}/output_paired_2.fastq.gz "
                        "--readFilesCommand zcat").format(args['run_root'],
                                                          "L001")
    expected["L002"] = ("STAR --runThreadN 8 --genomeDir ./genomes "
                        "--readFilesIn "
                        "{0}/{1}/output_paired_1.fastq "
                        "{0}/{1}/output_paired_2.fastq").format(
                            args['run_root'],
                            "L002")

    trimmis = trimmostar.Trimmostar(args)
    provider = trimmis.provider
    test_utils.set_pattern(provider, 'project_first')
    assert_equal(trimmis.aligning_cmd(), expected)
def test_trimming_cmd():
    args = get_default_args()
    args.update({
        'trimmomatic_jar': "trimmomatic-0.33.jar",
        'threads': '8',
        'quality_type': "phred33",
        'trimmomatic_log_file': "trimmomatic.log",
        'mode': "paired",
        'trimming_steps': ["LEADING:20", "TRAILING:20"]
    })
    expected_pattern = ("java -jar trimmomatic-0.33.jar PE -threads 8 "
                        "-phred33 -trimlog trimmomatic.log "
                        "{0} "
                        "{1} "
                        "LEADING:20 TRAILING:20")
    expected = {}
    expected["L001"] = expected_pattern.format(
        ("{0}/P1777_101_GTTTCG_L001_R1_001.fastq.gz "
         "{0}/P1777_101_GTTTCG_L001_R2_001.fastq.gz").format(args['run_root']),
        ("{0}/{1}/output_paired_1.fastq.gz "
         "{0}/{1}/output_unpaired_1.fastq.gz "
         "{0}/{1}/output_paired_2.fastq.gz "
         "{0}/{1}/output_unpaired_2.fastq.gz").format(args['run_root'], "L001")
    )
    expected["L002"] = expected_pattern.format(
        ("{0}/P1777_101_GTTTCG_L002_R1_001.fastq "
         "{0}/P1777_101_GTTTCG_L002_R2_001.fastq").format(args['run_root']),
        ("{0}/{1}/output_paired_1.fastq "
         "{0}/{1}/output_unpaired_1.fastq "
         "{0}/{1}/output_paired_2.fastq "
         "{0}/{1}/output_unpaired_2.fastq").format(args['run_root'], "L002")
    )

    trimmis = trimmostar.Trimmostar(args)
    provider = trimmis.provider
    test_utils.set_pattern(provider, 'project_first')
    assert_equal(trimmis.trimming_cmd(), expected)
Esempio n. 15
0
 def test_verify_directory_structure_bad_directory(self):
     provider = gcf.GcfProvider(self.bad_root)
     test_utils.set_pattern(provider, 'basic')
     assert(provider.verify_directory_structure())
 def test_generate_commands(self):
     trimmis = trimmostar.Trimmostar(self.args)
     provider = trimmis.provider
     test_utils.set_pattern(provider, 'project_first')
     assert_equal(trimmis.generate_commands(), self.expected_cmds)
Esempio n. 17
0
def test_get_filename_part_bad_name():
    bad_name = '123.fastq.gz'
    provider = sll.SllProvider(".")
    test_utils.set_pattern(provider, 'project_first')

    assert_raises(ValueError, provider.get_filename_part, bad_name, 'all')