def test_get_preprocess_fasta_cmd_sff_run_prefix_match_error_1(self):
        # Test that the run prefixes in the prep_template and the file names
        # actually match and raise an error if not
        conn_handler = SQLConnectionHandler()
        sql = ("""
            INSERT INTO qiita.filepath (filepath_id, filepath,
                filepath_type_id, checksum, checksum_algorithm_id,
                data_directory_id) VALUES (19, '1_new.sff', 17, 852952723, 1,
                5);
            INSERT INTO qiita.raw_filepath (raw_data_id , filepath_id) VALUES
                (3, 19);
            INSERT INTO qiita.filepath (filepath_id, filepath,
                filepath_type_id, checksum, checksum_algorithm_id,
                data_directory_id) VALUES (20, '1_error.sff', 17, 852952723,
                1, 5);
            INSERT INTO qiita.raw_filepath (raw_data_id , filepath_id) VALUES
                (3, 20);
            UPDATE qiita.prep_1 SET run_prefix='preprocess_test';
            UPDATE qiita.prep_1 SET run_prefix='new' WHERE
                sample_id = '1.SKB8.640193';
        """)
        conn_handler.execute(sql)

        raw_data = RawData(3)
        params = Preprocessed454Params(1)
        prep_template = PrepTemplate(1)

        with self.assertRaises(ValueError):
            _get_preprocess_fasta_cmd(raw_data, prep_template, params)
    def test_get_preprocess_fasta_cmd_sff_run_prefix_match_error_2(self):
        # Should raise error
        self.sff_prep_template_rp['1.SKB8.640193']['run_prefix'] = 'test1'
        self.sff_prep_template_rp['1.SKD8.640184']['run_prefix'] = 'test2'
        self.sff_prep_template_rp['1.SKB7.640196']['run_prefix'] = 'error'
        self.sff_prep_template_rp.generate_files()
        for _, fp in self.sff_prep_template_rp.get_filepaths():
            self.files_to_remove.append(fp)

        params = Preprocessed454Params(1)
        with self.assertRaises(ValueError):
            _get_preprocess_fasta_cmd(
                self.raw_data_rp, self.sff_prep_template_rp, params)
    def test_get_preprocess_fasta_cmd_sff_run_prefix(self):
        # Need to alter the run_prefix of one sample so we can test the
        # multiple values
        conn_handler = SQLConnectionHandler()
        sql = ("UPDATE qiita.prep_1 SET run_prefix='test1' WHERE "
               "sample_id = '1.SKM9.640192'")
        conn_handler.execute(sql)

        raw_data = RawData(3)
        params = Preprocessed454Params(1)
        prep_template = PrepTemplate(1)

        obs_cmd, obs_output_dir = _get_preprocess_fasta_cmd(
            raw_data, prep_template, params)

        obs_cmds = obs_cmd.split('; ')
        # assumming that test_get_preprocess_fasta_cmd_sff_no_run_prefix is
        # working we only need to test for the commands being ran and
        # that n is valid
        self.assertEqual(len(obs_cmds), 8)
        self.assertTrue(obs_cmds[0].startswith('process_sff.py'))
        self.assertTrue(obs_cmds[1].startswith('process_sff.py'))
        self.assertTrue(obs_cmds[2].startswith('split_libraries.py'))
        self.assertIn('-n 1', obs_cmds[2])
        self.assertTrue(obs_cmds[3].startswith('split_libraries.py'))
        self.assertIn('-n 800000', obs_cmds[3])
        self.assertTrue(obs_cmds[4].startswith('cat'))
        self.assertIn('split_library_log.txt', obs_cmds[4])
        self.assertTrue(obs_cmds[5].startswith('cat'))
        self.assertTrue('seqs.fna', obs_cmds[5])
        self.assertTrue(obs_cmds[6].startswith('cat'))
        self.assertIn('seqs_filtered.qual', obs_cmds[6])
    def test_get_preprocess_fasta_cmd_sff_run_prefix_match(self):
        # Test that the run prefixes in the prep_template and the file names
        # actually match and raise an error if not
        tmp_dir = mkdtemp()
        fp = join(tmp_dir, 'new.sff')
        with open(fp, 'w') as f:
            f.write('\n')
        self.files_to_remove.append(fp)
        self.dirs_to_remove.append(tmp_dir)
        self.raw_data_rp.add_filepaths([(fp, self.raw_sff_id)])
        params = Preprocessed454Params(1)

        obs_cmd, obs_output_dir = _get_preprocess_fasta_cmd(
            self.raw_data_rp, self.sff_prep_template_rp, params)

        obs_cmds = obs_cmd.split('; ')
        # assumming that test_get_preprocess_fasta_cmd_sff_no_run_prefix is
        # working we only need to test for the commands being ran and
        # that n is valid
        self.assertEqual(len(obs_cmds), 9)
        self.assertTrue(obs_cmds[0].startswith('process_sff.py'))
        self.assertTrue(obs_cmds[1].startswith('process_sff.py'))
        self.assertTrue(obs_cmds[2].startswith('process_sff.py'))
        self.assertTrue(obs_cmds[3].startswith('split_libraries.py'))
        self.assertIn('-n 1', obs_cmds[3])
        self.assertTrue(obs_cmds[4].startswith('split_libraries.py'))
        self.assertIn('-n 800000', obs_cmds[4])
        self.assertTrue(obs_cmds[5].startswith('cat'))
        self.assertIn('split_library_log.txt', obs_cmds[5])
        self.assertTrue(obs_cmds[6].startswith('cat'))
        self.assertIn('seqs.fna', obs_cmds[6])
        self.assertEqual(len(obs_cmds[6].split(' ')), 5)
        self.assertTrue(obs_cmds[7].startswith('cat'))
        self.assertIn('seqs_filtered.qual', obs_cmds[7])
        self.assertEqual(len(obs_cmds[7].split(' ')), 5)
 def test_get_preprocess_fasta_cmd_sff_run_prefix_match_error_1(self):
     # Test that the run prefixes in the prep_template and the file names
     # actually match and raise an error if not
     fp = self.path_builder('new.sff')
     with open(fp, 'w') as f:
         f.write('\n')
     self.files_to_remove.append(fp)
     fp_error = self.path_builder('error.sff')
     with open(fp_error, 'w') as f:
         f.write('\n')
     self.files_to_remove.append(fp_error)
     self.raw_data_rp.add_filepaths(
         [(fp, self.raw_sff_id), (fp_error, self.raw_sff_id)])
     params = Preprocessed454Params(1)
     with self.assertRaises(ValueError):
         _get_preprocess_fasta_cmd(
             self.raw_data_rp, self.sff_prep_template_rp, params)
    def test_get_preprocess_fasta_cmd_sff_run_prefix_match_error_2(self):
        # Should raise error
        conn_handler = SQLConnectionHandler()
        sql = ("""
            UPDATE qiita.prep_1 SET run_prefix='test1';
            UPDATE qiita.prep_1 SET run_prefix='test2' WHERE
                sample_id = '1.SKB2.640194';
            UPDATE qiita.prep_1 SET run_prefix='error' WHERE
                sample_id = '1.SKB8.640193';
        """)
        conn_handler.execute(sql)

        raw_data = RawData(3)
        params = Preprocessed454Params(1)
        prep_template = PrepTemplate(1)

        with self.assertRaises(ValueError):
            _get_preprocess_fasta_cmd(raw_data, prep_template, params)
    def test_get_preprocess_fasta_cmd_sff(self):
        raw_data = RawData(3)
        params = Preprocessed454Params(1)
        prep_template = PrepTemplate(1)
        obs_cmd, obs_output_dir = _get_preprocess_fasta_cmd(
            raw_data, prep_template, params)

        get_raw_path = partial(join, self.db_dir, 'raw_data')
        seqs_fp = [
            get_raw_path('preprocess_test1.sff'),
            get_raw_path('preprocess_test2.sff')
        ]

        exp_cmd_1 = ' '.join(
            ["process_sff.py",
             "-i %s" % seqs_fp[0],
             "-o %s" % obs_output_dir])
        exp_cmd_2 = ' '.join(
            ["process_sff.py",
             "-i %s" % seqs_fp[1],
             "-o %s" % obs_output_dir])

        fasta_files = ','.join([
            join(obs_output_dir, "preprocess_test1.fna"),
            join(obs_output_dir, "preprocess_test2.fna")
        ])
        qual_files = ','.join([
            join(obs_output_dir, "preprocess_test1.qual"),
            join(obs_output_dir, "preprocess_test2.qual")
        ])
        exp_cmd_3a = ' '.join(["split_libraries.py", "-f %s" % fasta_files])

        exp_cmd_3b = ' '.join([
            "-q %s" % qual_files, "-d",
            "-o %s" % obs_output_dir,
            params.to_str()
        ])
        exp_cmd_4 = ' '.join([
            "convert_fastaqual_fastq.py",
            "-f %s/seqs.fna" % obs_output_dir,
            "-q %s/seqs_filtered.qual" % obs_output_dir,
            "-o %s" % obs_output_dir, "-F"
        ])

        obs_cmds = obs_cmd.split('; ')

        # We are splitting the command into two parts because there is no way
        # that we can know the filepath of the mapping file. We thus split the
        # command on the mapping file path and we check that the two parts
        # of the commands is correct
        obs_cmd_3a, obs_cmd_3b_temp = obs_cmds[2].split(' -m ', 1)
        obs_cmd_3b = obs_cmd_3b_temp.split(' ', 1)[1]
        self.assertEqual(obs_cmds[0], exp_cmd_1)
        self.assertEqual(obs_cmds[1], exp_cmd_2)
        self.assertEqual(obs_cmd_3a, exp_cmd_3a)
        self.assertEqual(obs_cmd_3b, exp_cmd_3b)
        self.assertEqual(obs_cmds[3], exp_cmd_4)
    def test_get_preprocess_fasta_cmd_sff_no_run_prefix(self):
        params = Preprocessed454Params(1)
        obs_cmd, obs_output_dir = _get_preprocess_fasta_cmd(
            self.raw_data, self.sff_prep_template, params)

        get_raw_path = partial(join, self.db_dir, 'raw_data')
        seqs_fp = [get_raw_path('%d_preprocess_test1.sff' % self.raw_data.id),
                   get_raw_path('%d_preprocess_test2.sff' % self.raw_data.id)]

        exp_cmd_1 = ' '.join(["process_sff.py",
                              "-i %s" % seqs_fp[0],
                              "-o %s" % obs_output_dir])
        exp_cmd_2 = ' '.join(["process_sff.py",
                              "-i %s" % seqs_fp[1],
                              "-o %s" % obs_output_dir])

        fasta_files = ','.join([
            join(obs_output_dir, "%s_preprocess_test1.fna" % self.raw_data.id),
            join(obs_output_dir, "%s_preprocess_test2.fna" % self.raw_data.id)]
            )
        qual_files = ','.join([
            join(obs_output_dir,
                 "%s_preprocess_test1.qual" % self.raw_data.id),
            join(obs_output_dir,
                 "%s_preprocess_test2.qual" % self.raw_data.id)])
        exp_cmd_3a = ' '.join(["split_libraries.py",
                               "-f %s" % fasta_files])

        exp_cmd_3b = ' '.join(["-q %s" % qual_files,
                               "-d",
                               "-o %s" % obs_output_dir,
                               params.to_str()])
        exp_cmd_4 = ' '.join(["convert_fastaqual_fastq.py",
                              "-f %s/seqs.fna" % obs_output_dir,
                              "-q %s/seqs_filtered.qual" % obs_output_dir,
                              "-o %s" % obs_output_dir,
                              "-F"])

        obs_cmds = obs_cmd.split('; ')

        # We are splitting the command into two parts because there is no way
        # that we can know the filepath of the mapping file. We thus split the
        # command on the mapping file path and we check that the two parts
        # of the commands is correct
        obs_cmd_3a, obs_cmd_3b_temp = obs_cmds[2].split(' -m ', 1)
        obs_cmd_3b = obs_cmd_3b_temp.split(' ', 1)[1]
        self.assertEqual(obs_cmds[0], exp_cmd_1)
        self.assertEqual(obs_cmds[1], exp_cmd_2)
        self.assertEqual(obs_cmd_3a, exp_cmd_3a)
        self.assertEqual(obs_cmd_3b, exp_cmd_3b)
        self.assertEqual(obs_cmds[3], exp_cmd_4)
    def test_get_preprocess_fasta_cmd_sff_run_prefix_match(self):
        # Test that the run prefixes in the prep_template and the file names
        # actually match and raise an error if not
        conn_handler = SQLConnectionHandler()
        sql = ("""
            INSERT INTO qiita.filepath (filepath_id, filepath,
                filepath_type_id, checksum, checksum_algorithm_id,
                data_directory_id) VALUES (19, '1_new.sff', 17, 852952723, 1,
                5);
            INSERT INTO qiita.raw_filepath (raw_data_id , filepath_id) VALUES
                (3, 19);
            UPDATE qiita.prep_1 SET run_prefix='preprocess_test';
            UPDATE qiita.prep_1 SET run_prefix='new' WHERE
                sample_id = '1.SKB8.640193';
        """)
        conn_handler.execute(sql)

        raw_data = RawData(3)
        params = Preprocessed454Params(1)
        prep_template = PrepTemplate(1)

        obs_cmd, obs_output_dir = _get_preprocess_fasta_cmd(
            raw_data, prep_template, params)

        obs_cmds = obs_cmd.split('; ')
        # assumming that test_get_preprocess_fasta_cmd_sff_no_run_prefix is
        # working we only need to test for the commands being ran and
        # that n is valid
        self.assertEqual(len(obs_cmds), 9)
        self.assertTrue(obs_cmds[0].startswith('process_sff.py'))
        self.assertTrue(obs_cmds[1].startswith('process_sff.py'))
        self.assertTrue(obs_cmds[2].startswith('process_sff.py'))
        self.assertTrue(obs_cmds[3].startswith('split_libraries.py'))
        self.assertIn('-n 1', obs_cmds[3])
        self.assertTrue(obs_cmds[4].startswith('split_libraries.py'))
        self.assertIn('-n 800000', obs_cmds[4])
        self.assertTrue(obs_cmds[5].startswith('cat'))
        self.assertIn('split_library_log.txt', obs_cmds[5])
        self.assertTrue(obs_cmds[6].startswith('cat'))
        self.assertIn('seqs.fna', obs_cmds[6])
        self.assertEqual(len(obs_cmds[6].split(' ')), 5)
        self.assertTrue(obs_cmds[7].startswith('cat'))
        self.assertIn('seqs_filtered.qual', obs_cmds[7])
        self.assertEqual(len(obs_cmds[7].split(' ')), 5)
    def test_get_preprocess_sff_gz_cmd(self):
        # test the *.sff.gz files are handled correctly
        params = Preprocessed454Params(1)
        obs_cmd, obs_output_dir = _get_preprocess_fasta_cmd(
            self.raw_data_gz, self.sff_prep_template, params)

        obs_cmds = obs_cmd.split('; ')
        # assumming that all the other tests pass, we only need to test
        # gz file format.
        self.assertEqual(len(obs_cmds), 3)

        self.assertRegexpMatches(obs_cmds[0], r'process_sff.py\s+.*'
                                 '-i\s+.*.sff.gz\s+')
        self.assertRegexpMatches(obs_cmds[1], r'split_libraries.py\s+.*'
                                 '-f\s+.*.fna\s+.*'
                                 '-q\s+.*.qual\s+')
        self.assertRegexpMatches(obs_cmds[2], r'convert_fastaqual_fastq.py.*'
                                 '-f\s+.*seqs.fna\s+.*'
                                 '-q\s+.*seqs_filtered.qual\s+')
    def test_get_preprocess_fasta_cmd_sff_run_prefix(self):
        params = Preprocessed454Params(1)
        obs_cmd, obs_output_dir = _get_preprocess_fasta_cmd(
            self.raw_data_rp, self.sff_prep_template_rp, params)

        obs_cmds = obs_cmd.split('; ')
        # assumming that test_get_preprocess_fasta_cmd_sff_no_run_prefix is
        # working we only need to test for the commands being ran and
        # that n is valid
        self.assertEqual(len(obs_cmds), 8)
        self.assertTrue(obs_cmds[0].startswith('process_sff.py'))
        self.assertTrue(obs_cmds[1].startswith('process_sff.py'))
        self.assertTrue(obs_cmds[2].startswith('split_libraries.py'))
        self.assertIn('-n 1', obs_cmds[2])
        self.assertTrue(obs_cmds[3].startswith('split_libraries.py'))
        self.assertIn('-n 800000', obs_cmds[3])
        self.assertTrue(obs_cmds[4].startswith('cat'))
        self.assertIn('split_library_log.txt', obs_cmds[4])
        self.assertTrue(obs_cmds[5].startswith('cat'))
        self.assertIn('seqs.fna', obs_cmds[5])
        self.assertTrue(obs_cmds[6].startswith('cat'))
        self.assertIn('seqs_filtered.qual', obs_cmds[6])