def test_validate_fasta(self):
        """ Overall module runs properly """
        
                   
        validate_fasta(self.sample_fasta_fp, self.sample_mapping_fp,
         self.output_dir)
         
        expected_log_fp = join(self.output_dir,
         split(self.sample_fasta_fp)[1] + "_report.log")
         
        log_f = open(expected_log_fp, "U")
        actual_log_lines = [line.strip() for line in log_f][1:]
        
        expected_log_lines = """Percent duplicate labels: 0.000
Percent QIIME-incompatible fasta labels: 0.000
Percent of labels that fail to map to SampleIDs: 0.000
Percent of sequences with invalid characters: 0.000
Percent of sequences with barcodes detected: 0.000
Percent of sequences with barcodes detected at the beginning of the sequence: 0.000
Percent of sequences with primers detected: 0.000""".split('\n')

        self.assertEqual(actual_log_lines, expected_log_lines)
        
        # Check with all optional values included
        
        validate_fasta(self.sample_fasta_fp, self.sample_mapping_fp,
         self.output_dir, tree_fp=self.sample_tree_5tips_fp, tree_subset=True,
         tree_exact_match=True, same_seq_lens=True, all_ids_found=True)
         
        expected_log_fp = join(self.output_dir,
         split(self.sample_fasta_fp)[1] + "_report.log")
         
        log_f = open(expected_log_fp, "U")
        actual_log_lines = [line.strip() for line in log_f][1:]
        
        expected_log_lines = """Percent duplicate labels: 0.000
Percent QIIME-incompatible fasta labels: 0.000
Percent of labels that fail to map to SampleIDs: 0.000
Percent of sequences with invalid characters: 0.000
Percent of sequences with barcodes detected: 0.000
Percent of sequences with barcodes detected at the beginning of the sequence: 0.000
Percent of sequences with primers detected: 0.000
Sequence lengths report
Counts of sequences, followed by their sequence lengths:
1\t35
1\t32
1\t27
Sample ID in fasta sequences report
The following SampleIDs were not found:
seq2
Fasta label subset in tree tips report
All fasta labels were a subset of tree tips.
Fasta label/tree tip exact match report
All fasta labels found in tree tips.
The following tips were not in fasta labels:
seq2
seq5
seq4""".split('\n')

        self.assertEqual(actual_log_lines, expected_log_lines)
    def test_validate_fasta_suppress_primers_barcodes(self):
        """ Overall module test with primer/barcode check suppressed """
       
        # Should raise errors when both primer/barcode check not suppressed
        self.assertRaises(ValueError, validate_fasta, self.sample_fasta_fp,
         self.sample_mapping_file_errors_fp, self.output_dir)
         
        self.assertRaises(ValueError, validate_fasta, self.sample_fasta_fp,
         self.sample_mapping_file_errors_fp, self.output_dir,
         suppress_primer_checks=True)
        
        self.assertRaises(ValueError, validate_fasta, self.sample_fasta_fp,
         self.sample_mapping_file_errors_fp, self.output_dir,
         suppress_barcode_checks=True)
        
        # No errors when both suppressed
        validate_fasta(self.sample_fasta_fp, self.sample_mapping_file_errors_fp,
         self.output_dir, suppress_primer_checks = True,
         suppress_barcode_checks = True)
         
        expected_log_fp = join(self.output_dir,
         split(self.sample_fasta_fp)[1] + "_report.log")
         
        log_f = open(expected_log_fp, "U")
        actual_log_lines = [line.strip() for line in log_f][1:]
        
        expected_log_lines = """Percent duplicate labels: 0.000
Percent QIIME-incompatible fasta labels: 0.000
Percent of labels that fail to map to SampleIDs: 0.000
Percent of sequences with invalid characters: 0.000
Percent of sequences with barcodes detected: 0.000
Percent of sequences with barcodes detected at the beginning of the sequence: 0.000
Percent of sequences with primers detected: 0.000""".split('\n')

        self.assertEqual(actual_log_lines, expected_log_lines)
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
      
    verbose = opts.verbose
    
    input_fasta_fp = opts.input_fasta_fp
    mapping_fp = opts.mapping_fp
    output_dir = opts.output_dir
    tree_fp = opts.tree_fp
    tree_subset = opts.tree_subset
    tree_exact_match = opts.tree_exact_match
    same_seq_lens = opts.same_seq_lens
    all_ids_found = opts.all_ids_found
    
    
    create_dir(output_dir)
    
    # Test optional filepaths and requirements
    try:
        test_mapping_fp = open(mapping_fp, "U")
        test_mapping_fp.close()
    except IOError:
        raise IOError,("Unable to open mapping file, please check "
         "filepath and read permissions.")
         
         
    if tree_fp:
        try:
            test_tree_fp = open(tree_fp, "U")
            test_tree_fp.close()
        except IOError:
            raise IOError,("Unable to open provided tree filepath, please "+\
             "filepath and permissions.")
             
    if tree_subset or tree_exact_match:
        if not tree_fp:
            raise ValueError,('Must provide tree filepath if -s or -e options '+\
             'are enabled.')
         
    validate_fasta(input_fasta_fp, mapping_fp, output_dir, tree_fp, tree_subset,
     tree_exact_match, same_seq_lens, all_ids_found,
     opts.suppress_barcode_checks, opts.suppress_primer_checks)
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    verbose = opts.verbose

    input_fasta_fp = opts.input_fasta_fp
    mapping_fp = opts.mapping_fp
    output_dir = opts.output_dir
    tree_fp = opts.tree_fp
    tree_subset = opts.tree_subset
    tree_exact_match = opts.tree_exact_match
    same_seq_lens = opts.same_seq_lens
    all_ids_found = opts.all_ids_found

    create_dir(output_dir)

    # Test optional filepaths and requirements
    try:
        test_mapping_fp = open(mapping_fp, "U")
        test_mapping_fp.close()
    except IOError:
        raise IOError("Unable to open mapping file, please check "
                      "filepath and read permissions.")

    if tree_fp:
        try:
            test_tree_fp = open(tree_fp, "U")
            test_tree_fp.close()
        except IOError:
            raise IOError("Unable to open provided tree filepath, please " +
                          "filepath and permissions.")

    if tree_subset or tree_exact_match:
        if not tree_fp:
            raise ValueError('Must provide tree filepath if -s or -e options ' +
                             'are enabled.')

    validate_fasta(
        input_fasta_fp, mapping_fp, output_dir, tree_fp, tree_subset,
        tree_exact_match, same_seq_lens, all_ids_found,
        opts.suppress_barcode_checks, opts.suppress_primer_checks)
    def test_validate_fasta_with_invalid(self):
        """ Overall module runs properly """
                   
        validate_fasta(self.sample_fasta_invalid_fp, self.sample_mapping_fp,
         self.output_dir)
         
        expected_log_fp = join(self.output_dir,
         split(self.sample_fasta_invalid_fp)[1] + "_report.log")
         
        log_f = open(expected_log_fp, "U")
        actual_log_lines = [line.strip() for line in log_f][1:]
        
        expected_log_lines = """Percent duplicate labels: 0.250
Percent QIIME-incompatible fasta labels: 0.500
Percent of labels that fail to map to SampleIDs: 0.750
Percent of sequences with invalid characters: 0.500
Percent of sequences with barcodes detected: 0.250
Percent of sequences with barcodes detected at the beginning of the sequence: 0.000
Percent of sequences with primers detected: 0.250""".split('\n')

        self.assertEqual(actual_log_lines, expected_log_lines)
Exemple #6
0
    def test_validate_fasta_with_invalid(self):
        """ Overall module runs properly """

        validate_fasta(self.sample_fasta_invalid_fp, self.sample_mapping_fp,
                       self.output_dir)

        expected_log_fp = join(
            self.output_dir,
            split(self.sample_fasta_invalid_fp)[1] + "_report.log")

        log_f = open(expected_log_fp, "U")
        actual_log_lines = [line.strip() for line in log_f][1:]

        expected_log_lines = """Percent duplicate labels: 0.250
Percent QIIME-incompatible fasta labels: 0.500
Percent of labels that fail to map to SampleIDs: 0.750
Percent of sequences with invalid characters: 0.500
Percent of sequences with barcodes detected: 0.250
Percent of sequences with barcodes detected at the beginning of the sequence: 0.000
Percent of sequences with primers detected: 0.250""".split('\n')

        self.assertEqual(actual_log_lines, expected_log_lines)