def setUp(self): self.tmp_root = tempfile.mkdtemp() args = get_default_args() provider = gcf.GcfProvider(self.tmp_root) test_utils.set_pattern(provider, 'basic') self.proj = project.Project(args, provider) test_utils.create_fastq_files("gcf", "basic", self.tmp_root)
def setUp(self): self.tmp_root = tempfile.mkdtemp() args = get_default_args() provider = sll.SllProvider(self.tmp_root) test_utils.set_pattern(provider, 'project_first') self.proj = project.Project(args, provider) test_utils.create_fastq_files("sll", "project_first", self.tmp_root)
def test_verify_flowcell_dir(): good1 = ["150606_ABCDE12345"] good2 = ["000000_AAAAAAAAAA"] bad = ["1"] provider = sll.SllProvider(".") test_utils.set_pattern(provider, 'project_first') assert_equal(provider.verify_flowcell_dir(good1), True) assert_equal(provider.verify_flowcell_dir(good2), True) assert_raises(ValueError, provider.verify_flowcell_dir, bad) assert_raises(ValueError, provider.verify_flowcell_dir, good1 + good2)
def test_verify_fastq_files_lane_first(): fastq1 = '5_150702_BC6U56ANXX_P1954_101_1.fastq.gz' fastq2 = '5_150702_BC6U56ANXX_P1954_101_2.fastq.gz' good = [fastq1, fastq2] filtered = ["abc123", "foo.txt"] bad = ["abc123.fastq.gz"] provider = sll.SllProvider(".") test_utils.set_pattern(provider, 'lane_first') assert_equal(provider.verify_fastq_files(good), True) assert_equal(provider.verify_fastq_files(filtered), True) assert_raises(ValueError, provider.verify_fastq_files, bad)
def test_verify_fastq_files_project_first(): fastq1 = 'P1777_101_GTTTCG_L001_R1_001.fastq.gz' fastq2 = 'P1777_101_GTTTCG_L001_R2_001.fastq.gz' good = [fastq1, fastq2] filtered = ["abc123", "foo.txt"] bad = ["abc123.fastq.gz"] provider = sll.SllProvider(".") test_utils.set_pattern(provider, 'project_first') assert_equal(provider.verify_fastq_files(good), True) assert_equal(provider.verify_fastq_files(filtered), True) assert_raises(ValueError, provider.verify_fastq_files, bad)
def setUp(self): self.tmp_root = tempfile.mkdtemp() self.provider = sll.SllProvider(self.tmp_root) test_utils.set_pattern(self.provider, 'project_first') sample_prefix = "P1777" sample_format = "{0}_{1}" flowcell_dir = "150305_AHCYF2ADXX" for i in range(1, 4): root = os.path.join(self.tmp_root, sample_format.format(sample_prefix, i)) fastq_root = os.path.join(root, flowcell_dir) os.makedirs(fastq_root) test_utils.create_fastq_files("sll", "project_first", fastq_root, i, bad_format=True)
def test_get_samples_basic(self): fastq_format = "{3}/ABC012345_S{0}_{1}_R{2}_001.fastq.gz" lane1_1 = project.Lane("L001", reads=[ project.Read(fastq_format.format(1, "L001", 1, self.tmp_root)), project.Read(fastq_format.format(1, "L001", 2, self.tmp_root)) ]) lane2_1 = project.Lane("L002", reads=[ project.Read(fastq_format.format(1, "L002", 1, self.tmp_root)), project.Read(fastq_format.format(1, "L002", 2, self.tmp_root)) ]) lane1_2 = project.Lane("L001", reads=[ project.Read(fastq_format.format(2, "L001", 1, self.tmp_root)), project.Read(fastq_format.format(2, "L001", 2, self.tmp_root)) ]) lane2_2 = project.Lane("L002", reads=[ project.Read(fastq_format.format(2, "L002", 1, self.tmp_root)), project.Read(fastq_format.format(2, "L002", 2, self.tmp_root)) ]) lane1_3 = project.Lane("L001", reads=[ project.Read(fastq_format.format(3, "L001", 1, self.tmp_root)), project.Read(fastq_format.format(3, "L001", 2, self.tmp_root)) ]) lane2_3 = project.Lane("L002", reads=[ project.Read(fastq_format.format(3, "L002", 1, self.tmp_root)), project.Read(fastq_format.format(3, "L002", 2, self.tmp_root)) ]) lane1_4 = project.Lane("L001", reads=[ project.Read(fastq_format.format(4, "L001", 1, self.tmp_root)), project.Read(fastq_format.format(4, "L001", 2, self.tmp_root)) ]) lane2_4 = project.Lane("L002", reads=[ project.Read(fastq_format.format(4, "L002", 1, self.tmp_root)), project.Read(fastq_format.format(4, "L002", 2, self.tmp_root)) ]) sample1 = project.Sample("S1", [lane1_1, lane2_1]) sample2 = project.Sample("S2", [lane1_2, lane2_2]) sample3 = project.Sample("S3", [lane1_3, lane2_3]) sample4 = project.Sample("S4", [lane1_4, lane2_4]) expected = [sample1, sample2, sample3, sample4] provider = gcf.GcfProvider(self.tmp_root) test_utils.set_pattern(provider, "basic") result = provider.get_samples() for res, exp in zip(result, expected): assert_equal(res.sample_id, exp.sample_id) for res_lane, exp_lane in zip(res.lanes, exp.lanes): assert_equal(res_lane, exp_lane)
def test_get_samples_lane_first(self): fastq_format = "{3}/{1}_151224_A1B2C3XYZ0_P1234_{0}_{2}.fastq.gz" expected = self.create_expected(self.lane_first_dir, fastq_format, "") provider = sll.SllProvider(self.lane_first_dir) test_utils.set_pattern(provider, "lane_first") result = provider.get_samples() for res, exp in zip(result, expected): assert_equal(res.sample_id, exp.sample_id) for res_lane, exp_lane in zip(res.lanes, exp.lanes): assert_equal(res_lane, exp_lane)
def test_get_samples_project_first(self): fastq_format = "{3}/P1234_{0}_TAGCTT_{1}_R{2}_001.fastq.gz" expected = self.create_expected(self.project_first_dir, fastq_format, "L00") provider = sll.SllProvider(self.project_first_dir) test_utils.set_pattern(provider, "project_first") result = provider.get_samples() for res, exp in zip(result, expected): assert_equal(res.sample_id, exp.sample_id) for res_lane, exp_lane in zip(res.lanes, exp.lanes): assert_equal(res_lane, exp_lane)
def test_basic_file_pattern(): name = "ABC012345_S1_L001_R1_001.fastq.gz" provider = gcf.GcfProvider(".") test_utils.set_pattern(provider, 'basic') assert_equal(provider.get_filename_part(name), "ABC012345_S1_L001_R1_001.fastq.gz") assert_equal(provider.get_filename_part(name, 'project_id'), "ABC012345") assert_equal(provider.get_filename_part(name, 'sample_id'), "S1") assert_equal(provider.get_filename_part(name, 'lane'), "L001") assert_equal(provider.get_filename_part(name, 'read'), "1") assert_equal(provider.get_filename_part(name, 'file_ending'), ".fastq.gz") assert_equal(provider.get_filename_part(name, 'gzip'), ".gz")
def test_get_filename_part_lane_first(): lane_first = '5_150702_BC6U56ANXX_P1954_101_1.fastq.gz' provider = sll.SllProvider(".") test_utils.set_pattern(provider, 'lane_first') assert_equal(provider.get_filename_part(lane_first, 'all'), lane_first) assert_equal(provider.get_filename_part(lane_first, 'sample_id'), "P1954_101") assert_equal(provider.get_filename_part(lane_first, 'project_id'), "P1954") assert_equal(provider.get_filename_part(lane_first, 'scilab_id'), "101") assert_equal(provider.get_filename_part(lane_first, 'lane'), "5") assert_equal(provider.get_filename_part(lane_first, 'read'), "1") assert_equal(provider.get_filename_part(lane_first, 'file_ending'), ".fastq.gz")
def test_get_filename_part_project_first(): project_first = 'P1777_101_GTTTCG_L001_R2_001.fastq' provider = sll.SllProvider(".") test_utils.set_pattern(provider, 'project_first') assert_equal(provider.get_filename_part(project_first, 'all'), project_first) assert_equal(provider.get_filename_part(project_first, 'sample_id'), "P1777_101") assert_equal(provider.get_filename_part(project_first, 'project_id'), "P1777") assert_equal(provider.get_filename_part(project_first, 'scilab_id'), "101") assert_equal(provider.get_filename_part(project_first, 'lane'), "L001") assert_equal(provider.get_filename_part(project_first, 'read'), "R2") assert_equal(provider.get_filename_part(project_first, 'file_ending'), ".fastq")
def test_aligning_cmd(): args = get_default_args() expected = {} expected["L001"] = ("STAR --runThreadN 8 --genomeDir ./genomes " "--readFilesIn " "{0}/{1}/output_paired_1.fastq.gz " "{0}/{1}/output_paired_2.fastq.gz " "--readFilesCommand zcat").format(args['run_root'], "L001") expected["L002"] = ("STAR --runThreadN 8 --genomeDir ./genomes " "--readFilesIn " "{0}/{1}/output_paired_1.fastq " "{0}/{1}/output_paired_2.fastq").format( args['run_root'], "L002") trimmis = trimmostar.Trimmostar(args) provider = trimmis.provider test_utils.set_pattern(provider, 'project_first') assert_equal(trimmis.aligning_cmd(), expected)
def test_trimming_cmd(): args = get_default_args() args.update({ 'trimmomatic_jar': "trimmomatic-0.33.jar", 'threads': '8', 'quality_type': "phred33", 'trimmomatic_log_file': "trimmomatic.log", 'mode': "paired", 'trimming_steps': ["LEADING:20", "TRAILING:20"] }) expected_pattern = ("java -jar trimmomatic-0.33.jar PE -threads 8 " "-phred33 -trimlog trimmomatic.log " "{0} " "{1} " "LEADING:20 TRAILING:20") expected = {} expected["L001"] = expected_pattern.format( ("{0}/P1777_101_GTTTCG_L001_R1_001.fastq.gz " "{0}/P1777_101_GTTTCG_L001_R2_001.fastq.gz").format(args['run_root']), ("{0}/{1}/output_paired_1.fastq.gz " "{0}/{1}/output_unpaired_1.fastq.gz " "{0}/{1}/output_paired_2.fastq.gz " "{0}/{1}/output_unpaired_2.fastq.gz").format(args['run_root'], "L001") ) expected["L002"] = expected_pattern.format( ("{0}/P1777_101_GTTTCG_L002_R1_001.fastq " "{0}/P1777_101_GTTTCG_L002_R2_001.fastq").format(args['run_root']), ("{0}/{1}/output_paired_1.fastq " "{0}/{1}/output_unpaired_1.fastq " "{0}/{1}/output_paired_2.fastq " "{0}/{1}/output_unpaired_2.fastq").format(args['run_root'], "L002") ) trimmis = trimmostar.Trimmostar(args) provider = trimmis.provider test_utils.set_pattern(provider, 'project_first') assert_equal(trimmis.trimming_cmd(), expected)
def test_verify_directory_structure_bad_directory(self): provider = gcf.GcfProvider(self.bad_root) test_utils.set_pattern(provider, 'basic') assert(provider.verify_directory_structure())
def test_generate_commands(self): trimmis = trimmostar.Trimmostar(self.args) provider = trimmis.provider test_utils.set_pattern(provider, 'project_first') assert_equal(trimmis.generate_commands(), self.expected_cmds)
def test_get_filename_part_bad_name(): bad_name = '123.fastq.gz' provider = sll.SllProvider(".") test_utils.set_pattern(provider, 'project_first') assert_raises(ValueError, provider.get_filename_part, bad_name, 'all')