Ejemplo n.º 1
0
    def test_preprocess_coords_file_comparison(self):
        """Check the cases for comparisons plots and the special usages"""
        # shouldn't allow a comparison computation with only one file
        self.assertRaises(AssertionError, preprocess_coords_file,
                          self.coords_header, self.coords_data,
                          self.coords_eigenvalues, self.coords_pct,
                          self.mapping_file_headers_gradient,
                          self.mapping_file_data_gradient, None, None, True)

        out_coords_header, out_coords_data, out_eigenvals, out_pcts,\
            out_coords_low, out_coords_high, o_clones = preprocess_coords_file(
                self.jk_coords_header, self.jk_coords_data,
                self.jk_coords_eigenvalues, self.jk_coords_pcts,
                self.jk_mapping_file_headers, self.jk_mapping_file_data,
                is_comparison=True, pct_variation_below_one=True)

        self.assertEqual(out_coords_header, [
            '1_0', '2_0', '3_0', '1_1', '2_1', '3_1', '1_2', '2_2', '3_2',
            '1_3', '2_3', '3_3'
        ])
        assert_almost_equal(
            out_coords_data,
            array([[1.2, 0.1, -1.2], [-2.5, -4., 4.5], [-1.4, 0.05, 1.3],
                   [2.6, 4.1, -4.7], [-1.5, 0.05, 1.6], [2.4, 4., -4.8],
                   [-1.5, 0.05, 1.6], [2.4, 4., -4.8]]))
        assert_almost_equal(out_eigenvals, self.jk_coords_eigenvalues[0])
        assert_almost_equal(out_pcts, self.jk_coords_pcts[0])
        self.assertEqual(out_coords_low, None)
        self.assertEqual(out_coords_high, None)
        self.assertEqual(o_clones, 4)
Ejemplo n.º 2
0
    def test_preprocess_coords_file_comparison(self):
        """Check the cases for comparisons plots and the special usages"""
        # shouldn't allow a comparison computation with only one file
        self.assertRaises(
            AssertionError, preprocess_coords_file,
            self.coords_header, self.coords_data, self.coords_eigenvalues,
            self.coords_pct, self.mapping_file_headers_gradient,
            self.mapping_file_data_gradient, None, None, True)

        out_coords_header, out_coords_data, out_eigenvals, out_pcts,\
            out_coords_low, out_coords_high, o_clones = preprocess_coords_file(
                self.jk_coords_header, self.jk_coords_data,
                self.jk_coords_eigenvalues, self.jk_coords_pcts,
                self.jk_mapping_file_headers, self.jk_mapping_file_data,
                is_comparison=True, pct_variation_below_one=True)

        self.assertEquals(out_coords_header,
                          ['1_0', '2_0', '3_0', '1_1', '2_1', '3_1', '1_2',
                           '2_2', '3_2', '1_3', '2_3', '3_3'])
        assert_almost_equal(
            out_coords_data,
            array([[1.2, 0.1, -1.2],
                   [-2.5, -4., 4.5],
                   [-1.4, 0.05, 1.3],
                   [2.6, 4.1, -4.7],
                   [-1.5, 0.05, 1.6],
                   [2.4, 4., -4.8],
                   [-1.5, 0.05, 1.6],
                   [2.4, 4., -4.8]]))
        assert_almost_equal(out_eigenvals, self.jk_coords_eigenvalues[0])
        assert_almost_equal(out_pcts, self.jk_coords_pcts[0])
        self.assertEquals(out_coords_low, None)
        self.assertEquals(out_coords_high, None)
        self.assertEquals(o_clones, 4)
Ejemplo n.º 3
0
def generate_pcoa_cloud_from_point_in_omega(map_headers, map_data, biom_object, metric, 
        sequences, iterations, axes, tree_object=None):
    """run the randomisations and get a WebGL PCoA plot string representation

    Input:
    mapping_file_tuple: data and headers tuple for representing the mapping file
    biom_object: otu table biom object
    metric: string of the name for the beta diversity metric, i. e. 'unifrac'
    sequences: number of sequences per sample
    iterations: number of iterations to generate the pcoa plot
    axes: number of axes to account for
    tree_object: tree to perform the beta diversity calculation

    Output:
    WebGL string representing the PCoA plot
    """
    
    pcoa_input = {'pcoa_headers':[], 'pcoa_values':[], 'eigenvalues':[], 'coords_pct':[]}
    for i in range(iterations):
        rare_biom_table = get_rare_data(biom_object, sequences)
        beta_dm = single_object_beta(rare_biom_table, metric, tree_object)
        pcoa_results = pcoa(beta_dm)

        pcoa_file = StringIO()
        pcoa_file.write(pcoa_results)
        pcoa_file.seek(0)
        pcoa_headers, pcoa_values, eigenvalues, coords_pct = parse_coords(pcoa_file)
        pcoa_file.close()
        pcoa_input['pcoa_headers'].append(pcoa_headers)
        pcoa_input['pcoa_values'].append(pcoa_values)
        pcoa_input['eigenvalues'].append(eigenvalues)
        pcoa_input['coords_pct'].append(coords_pct)
    
    if iterations==1:
        coords_headers = pcoa_input['pcoa_headers'][0]
        coords_data = pcoa_input['pcoa_values'][0]
        coords_eigenvalues = pcoa_input['eigenvalues'][0]
        coords_pct = pcoa_input['coords_pct'][0]
        coords_low, coords_high = None, None
    else:
        coords_headers, coords_data, coords_eigenvalues, coords_pct, coords_low,\
            coords_high, clones = preprocess_coords_file(pcoa_input['pcoa_headers'],
            pcoa_input['pcoa_values'], pcoa_input['eigenvalues'], 
            pcoa_input['coords_pct'], map_headers, map_data, custom_axes=None, 
            jackknifing_method='IQR', is_comparison=False)
    
    return make_pcoa_plot(coords_headers, coords_data, coords_eigenvalues, coords_pct, \
        map_headers, map_data, coords_low, coords_high, True)
Ejemplo n.º 4
0
    # terminate the program for the cases where a mapping field was not found
    # or when a mapping field didn't meet the criteria of being numeric
    if offending_fields:
        option_parser.error("Invalid field(s) '%s'; the valid field(s) are:"
            " '%s'" % (', '.join(offending_fields), ', '.join(header)))
    if non_numeric_categories:
        option_parser.error(('The following field(s): \'%s\' contain values '
            'that are not numeric, hence not suitable for \'--custom_axes\' nor'
            ' for \'--add_vectors\'. Try the \'--missing_custom_axes_values\' '
            'option to fix these values.' % ', '.join(non_numeric_categories)))

    # process the coordinates file first, preventing the case where the custom
    # axes is not in the coloring categories i. e. in the --colory_by categories
    coords_headers, coords_data, coords_eigenvalues, coords_pct, coords_low,\
        coords_high, clones = preprocess_coords_file(coords_headers,coords_data,
        coords_eigenvalues, coords_pct, header, mapping_data, custom_axes,
        jackknifing_method=jackknifing_method, is_comparison=compare_plots,
        pct_variation_below_one=pct_variation_below_one)

    # process the otu table after processing the coordinates to get custom axes
    # (when available) or any other change that occurred to the coordinates
    otu_coords, otu_table, otu_lineages, otu_prevalence, lines =\
        preprocess_otu_table(otu_sample_ids, otu_table, lineages,
        coords_data, coords_headers, n_taxa_to_keep)

    # remove the columns in the mapping file that are not informative taking
    # into account the header names that were already authorized to be used
    # and take care of concatenating the fields for the && merged columns
    mapping_data, header = preprocess_mapping_file(mapping_data, header,
        color_by_column_names, unique=not add_unique_columns, clones=clones)

    # create the output directory before creating any other output
Ejemplo n.º 5
0
    # terminate the program for the cases where a mapping field was not found
    # or when a mapping field didn't meet the criteria of being numeric
    if offending_fields:
        option_parser.error("Invalid field(s) '%s'; the valid field(s) are:"
            " '%s'" % (', '.join(offending_fields), ', '.join(header)))
    if non_numeric_categories:
        option_parser.error(('The following field(s): \'%s\' contain values '
            'that are not numeric, hence not suitable for \'--custom_axes\' nor'
            ' for \'--add_vectors\'. Try the \'--missing_custom_axes_values\' '
            'option to fix these values.' % ', '.join(non_numeric_categories)))

    # process the coordinates file first, preventing the case where the custom
    # axes is not in the coloring categories i. e. in the --colory_by categories
    coords_headers, coords_data, coords_eigenvalues, coords_pct, coords_low,\
        coords_high, clones = preprocess_coords_file(coords_headers,coords_data,
        coords_eigenvalues, coords_pct, header, mapping_data, custom_axes,
        jackknifing_method=jackknifing_method, is_comparison=compare_plots)

    # process the otu table after processing the coordinates to get custom axes
    # (when available) or any other change that occurred to the coordinates
    otu_coords, otu_table, otu_lineages, otu_prevalence, lines =\
        preprocess_otu_table(otu_sample_ids, otu_table, lineages,
        coords_data, coords_headers, n_taxa_to_keep)

    # remove the columns in the mapping file that are not informative taking
    # into account the header names that were already authorized to be used
    # and take care of concatenating the fields for the && merged columns
    mapping_data, header = preprocess_mapping_file(mapping_data, header,
        color_by_column_names, unique=not add_unique_columns, clones=clones)

    # create the output directory before creating any other output
Ejemplo n.º 6
0
    def test_preprocess_coords_file(self):
        """Check correct processing is applied to the coords"""

        # case with custom axes
        out_coords_header, out_coords_data, out_eigenvals, out_pcts,\
            out_coords_low, out_coords_high, o_clones = preprocess_coords_file(
                self.coords_header, self.coords_data, self.coords_eigenvalues,
                self.coords_pct, self.mapping_file_headers_gradient,
                self.mapping_file_data_gradient, ['Time'])

        expected_coords_data = array(
            [[0.03333333, -0.2, -0.1, 0.06, -0.06],
             [0.03333333, -0.3, 0.04, -0.1, 0.15],
             [0.2, 0.1, -0.1, -0.2, 0.08],
             [-0.3, 0.04, -0.01,  0.06, -0.34]])

        self.assertEquals(out_coords_header, self.coords_header)
        self.assertEquals(out_coords_high, None)
        self.assertEquals(out_coords_low, None)
        assert_almost_equal(self.coords_eigenvalues, array([1, 2, 3, 4]))
        assert_almost_equal(self.coords_pct, array([40, 30, 20, 10]))
        self.assertEquals(o_clones, 0)

        # check each individual value because currently cogent assertEquals
        # fails when comparing the whole matrix at once
        for out_el, exp_el in zip(out_coords_data, expected_coords_data):
            for out_el_sub, exp_el_sub in zip(out_el, exp_el):
                self.assertAlmostEquals(out_el_sub, exp_el_sub)

        # case for jackknifing, based on qiime/tests/test_util.summarize_pcoas
        out_coords_header, out_coords_data, out_eigenvals, out_pcts,\
            out_coords_low, out_coords_high, o_clones = preprocess_coords_file(
                self.jk_coords_header, self.jk_coords_data,
                self.jk_coords_eigenvalues, self.jk_coords_pcts,
                self.jk_mapping_file_headers, self.jk_mapping_file_data,
                jackknifing_method='sdev', pct_variation_below_one=True)

        self.assertEquals(out_coords_header, ['1', '2', '3'])
        assert_almost_equal(out_coords_data, array([[1.4, -0.0125, -1.425],
                                                    [-2.475, -4.025, 4.7]]))
        assert_almost_equal(out_eigenvals, array([0.81, 0.14, 0.05]))
        assert_almost_equal(out_pcts, array([0.8, 0.1, 0.1]))
        self.assertEquals(o_clones, 0)

        # test the coords are working fine
        assert_almost_equal(out_coords_low,
                            array([[-0.07071068, -0.0375, -0.10307764],
                                   [-0.04787136, -0.025, -0.07071068]]))
        assert_almost_equal(out_coords_high,
                            array([[0.07071068, 0.0375, 0.10307764],
                                   [0.04787136, 0.025, 0.07071068]]))

        # test custom axes and jackknifed plots
        out_coords_header, out_coords_data, out_eigenvals, out_pcts,\
            out_coords_low, out_coords_high, o_clones = preprocess_coords_file(
                self.jk_coords_header_gradient, self.jk_coords_data_gradient,
                self.jk_coords_eigenvalues_gradient,
                self.jk_coords_pcts_gradient,
                self.jk_mapping_file_headers_gradient,
                self.jk_mapping_file_data_gradient, custom_axes=['Time'],
                jackknifing_method='sdev', pct_variation_below_one=True)

        self.assertEquals(out_coords_header,
                          ['PC.354', 'PC.355', 'PC.635', 'PC.636'])
        assert_almost_equal(out_coords_data,
                            array([[-2.4, 1.15, 0.55, -0.95, 0.85],
                                   [0.73333333, -2.4, -3.5, 4.25, 1.025],
                                   [0.73333333, 0.5, 0.45, 3.5, 1.2505],
                                   [2.3, 0.6325, 0.2575, 1.0675, 2.125]]))
        assert_almost_equal(out_eigenvals, array([0.81, 0.14, 0.05, 0.]))
        assert_almost_equal(out_pcts, array([0.8, 0.1, 0.1, 0.]))

        # test the coords are working fine
        assert_almost_equal(
            out_coords_low,
            array([[0., -0.25980762, -0.25, -0.25],
                   [0., -0.5, -0.25, -0.725],
                   [0., -0.85, -0., -0.24983344],
                   [0., -0.02809953, -0.07877976, -0.04787136]]))
        assert_almost_equal(
            out_coords_high,
            array([[1.00000000e-05, 2.59807621e-01, 2.50000000e-01,
                    2.50000000e-01],
                   [1.00000000e-05, 5.00000000e-01, 2.50000000e-01,
                    7.25000000e-01],
                   [1.00000000e-05, 8.50000000e-01, 0.00000000e+00,
                    2.49833445e-01],
                   [1.00000000e-05, 2.80995255e-02, 7.87797563e-02,
                    4.78713554e-02]]))
        self.assertEquals(o_clones, 0)

        # test that pct_variation_below_one is working
        out_coords_header, out_coords_data, out_eigenvals, out_pcts,\
            out_coords_low, out_coords_high, o_clones = preprocess_coords_file(
                self.jk_coords_header_gradient, self.jk_coords_data_gradient,
                self.jk_coords_eigenvalues_gradient,
                self.jk_coords_pcts_gradient,
                self.jk_mapping_file_headers_gradient,
                self.jk_mapping_file_data_gradient, custom_axes=['Time'],
                jackknifing_method='sdev', pct_variation_below_one=False)

        self.assertEquals(out_coords_header,
                          ['PC.354', 'PC.355', 'PC.635', 'PC.636'])
        assert_almost_equal(
            out_coords_data,
            array([[-2.4, 1.15, 0.55, -0.95, 0.85],
                   [0.73333333, -2.4, -3.5, 4.25, 1.025],
                   [0.73333333, 0.5, 0.45, 3.5, 1.2505],
                   [2.3, 0.6325, 0.2575, 1.0675, 2.125]]))
        assert_almost_equal(out_eigenvals, array([0.81, 0.14, 0.05, 0.]))
        assert_almost_equal(out_pcts, array([80, 10, 10, 0]))
Ejemplo n.º 7
0
    def test_preprocess_coords_file(self):
        """Check correct processing is applied to the coords"""

        # case with custom axes
        out_coords_header, out_coords_data, out_eigenvals, out_pcts,\
            out_coords_low, out_coords_high, o_clones = preprocess_coords_file(
                self.coords_header, self.coords_data, self.coords_eigenvalues,
                self.coords_pct, self.mapping_file_headers_gradient,
                self.mapping_file_data_gradient, ['Time'])

        expected_coords_data = array([[0.03333333, -0.2, -0.1, 0.06, -0.06],
                                      [0.03333333, -0.3, 0.04, -0.1, 0.15],
                                      [0.2, 0.1, -0.1, -0.2, 0.08],
                                      [-0.3, 0.04, -0.01, 0.06, -0.34]])

        self.assertEqual(out_coords_header, self.coords_header)
        self.assertEqual(out_coords_high, None)
        self.assertEqual(out_coords_low, None)
        assert_almost_equal(self.coords_eigenvalues, array([1, 2, 3, 4]))
        assert_almost_equal(self.coords_pct, array([40, 30, 20, 10]))
        self.assertEqual(o_clones, 0)

        # check each individual value because currently cogent assertEquals
        # fails when comparing the whole matrix at once
        for out_el, exp_el in zip(out_coords_data, expected_coords_data):
            for out_el_sub, exp_el_sub in zip(out_el, exp_el):
                self.assertAlmostEqual(out_el_sub, exp_el_sub)

        # case for jackknifing, based on qiime/tests/test_util.summarize_pcoas
        out_coords_header, out_coords_data, out_eigenvals, out_pcts,\
            out_coords_low, out_coords_high, o_clones = preprocess_coords_file(
                self.jk_coords_header, self.jk_coords_data,
                self.jk_coords_eigenvalues, self.jk_coords_pcts,
                self.jk_mapping_file_headers, self.jk_mapping_file_data,
                jackknifing_method='sdev', pct_variation_below_one=True)

        self.assertEqual(out_coords_header, ['1', '2', '3'])
        assert_almost_equal(
            out_coords_data,
            array([[1.4, -0.0125, -1.425], [-2.475, -4.025, 4.7]]))
        assert_almost_equal(out_eigenvals, array([0.81, 0.14, 0.05]))
        assert_almost_equal(out_pcts, array([0.8, 0.1, 0.1]))
        self.assertEqual(o_clones, 0)

        # test the coords are working fine
        assert_almost_equal(
            out_coords_low,
            array([[-0.07071068, -0.0375, -0.10307764],
                   [-0.04787136, -0.025, -0.07071068]]))
        assert_almost_equal(
            out_coords_high,
            array([[0.07071068, 0.0375, 0.10307764],
                   [0.04787136, 0.025, 0.07071068]]))

        # test custom axes and jackknifed plots
        out_coords_header, out_coords_data, out_eigenvals, out_pcts,\
            out_coords_low, out_coords_high, o_clones = preprocess_coords_file(
                self.jk_coords_header_gradient, self.jk_coords_data_gradient,
                self.jk_coords_eigenvalues_gradient,
                self.jk_coords_pcts_gradient,
                self.jk_mapping_file_headers_gradient,
                self.jk_mapping_file_data_gradient, custom_axes=['Time'],
                jackknifing_method='sdev', pct_variation_below_one=True)

        self.assertEqual(out_coords_header,
                         ['PC.354', 'PC.355', 'PC.635', 'PC.636'])
        assert_almost_equal(
            out_coords_data,
            array([[-2.4, 1.15, 0.55, -0.95, 0.85],
                   [0.73333333, -2.4, -3.5, 4.25, 1.025],
                   [0.73333333, 0.5, 0.45, 3.5, 1.2505],
                   [2.3, 0.6325, 0.2575, 1.0675, 2.125]]))
        assert_almost_equal(out_eigenvals, array([0.81, 0.14, 0.05, 0.]))
        assert_almost_equal(out_pcts, array([0.8, 0.1, 0.1, 0.]))

        # test the coords are working fine
        assert_almost_equal(
            out_coords_low,
            array([[0., -0.05, -0.25980762, -0.25, -0.25],
                   [0., -0.1, -0.5, -0.25, -0.725],
                   [0., -0., -0.85, -0., -0.24983344],
                   [0., -0.0283945, -0.02809953, -0.07877976, -0.04787136]]))
        assert_almost_equal(
            out_coords_high,
            array([[
                1.00000000e-05, 5.0000000e-02, 2.59807621e-01, 2.50000000e-01,
                2.50000000e-01
            ],
                   [
                       1.00000000e-05, 1.0000000e-01, 5.00000000e-01,
                       2.50000000e-01, 7.25000000e-01
                   ],
                   [
                       1.00000000e-05, 0.0, 8.50000000e-01, 0.00000000e+00,
                       2.49833445e-01
                   ],
                   [
                       1.00000000e-05, 2.83945417e-02, 2.80995255e-02,
                       7.87797563e-02, 4.78713554e-02
                   ]]))
        self.assertEqual(o_clones, 0)

        # test that pct_variation_below_one is working
        out_coords_header, out_coords_data, out_eigenvals, out_pcts,\
            out_coords_low, out_coords_high, o_clones = preprocess_coords_file(
                self.jk_coords_header_gradient, self.jk_coords_data_gradient,
                self.jk_coords_eigenvalues_gradient,
                self.jk_coords_pcts_gradient,
                self.jk_mapping_file_headers_gradient,
                self.jk_mapping_file_data_gradient, custom_axes=['Time'],
                jackknifing_method='sdev', pct_variation_below_one=False)

        self.assertEqual(out_coords_header,
                         ['PC.354', 'PC.355', 'PC.635', 'PC.636'])
        assert_almost_equal(
            out_coords_data,
            array([[-2.4, 1.15, 0.55, -0.95, 0.85],
                   [0.73333333, -2.4, -3.5, 4.25, 1.025],
                   [0.73333333, 0.5, 0.45, 3.5, 1.2505],
                   [2.3, 0.6325, 0.2575, 1.0675, 2.125]]))
        assert_almost_equal(out_eigenvals, array([0.81, 0.14, 0.05, 0.]))
        assert_almost_equal(out_pcts, array([80, 10, 10, 0]))
Ejemplo n.º 8
0
        option_parser.error("Invalid field(s) '%s'; the valid field(s) are: "
                            "'%s'" %
                            (', '.join(offending_fields), ', '.join(header)))
    if non_numeric_categories:
        option_parser.error(
            ("The following field(s): '%s' contain values "
             "that are not numeric, hence not suitable for "
             "'--custom_axes' nor for '--add_vectors'. Try "
             "the '--missing_custom_axes_values' option to "
             "fix these values." % ', '.join(non_numeric_categories)))

    # process the coordinates file first, preventing the case where the custom
    # axes is not in the coloring categories i. e. in the --colory_by
    # categories
    preprocessed_coords = preprocess_coords_file(
        coords_headers, coords_data, coords_eigenvalues, coords_pct, header,
        mapping_data, custom_axes, jackknifing_method, compare_plots,
        pct_variation_below_one)
    coords_headers = preprocessed_coords[0]
    coords_data = preprocessed_coords[1]
    coords_eigenvalues = preprocessed_coords[2]
    coords_pct = preprocessed_coords[3]
    coords_low = preprocessed_coords[4]
    coords_high = preprocessed_coords[5]
    clones = preprocessed_coords[6]

    # process the otu table after processing the coordinates to get custom axes
    # (when available) or any other change that occurred to the coordinates
    preprocessed_otu_table = preprocess_otu_table(otu_sample_ids, otu_table,
                                                  lineages, coords_data,
                                                  coords_headers,
                                                  n_taxa_to_keep)
Ejemplo n.º 9
0
def main():
    #option_parser, opts, args = parse_command_line_parameters(**script_info)
    input_coords = args.input_coords
    map_fp = args.map_fp
    output_dir = args.output_dir
    
    color_by_column_names = None  #opts.color_by
    add_unique_columns = False      #opts.add_unique_columns
    custom_axes = None  #opts.custom_axes
    ignore_missing_samples = False      #opts.ignore_missing_samples
    missing_custom_axes_values = None   #opts.missing_custom_axes_values
    jackknifing_method = 'IQR'      #opts.ellipsoid_method
    master_pcoa = None      #opts.master_pcoa
    taxa_fp = None      #opts.taxa_fp
    n_taxa_to_keep = False      #opts.n_taxa_to_keep
    biplot_fp = None        #opts.biplot_fp
    add_vectors = [None, None]      #opts.add_vectors
    verbose_output = False      #opts.verbose
    number_of_axes = 10     #opts.number_of_axes
    compare_plots = False       #opts.compare_plots
    number_of_segments = 8      #opts.number_of_segments
    pct_variation_below_one = True  #opts.pct_variation_below_one

    # add some metadata to the output
    emperor_autograph = format_emperor_autograph(map_fp, input_coords, 'HTML')

    # verifying that the number of axes requested is greater than 3
    if number_of_axes < 3:
        print(('You need to plot at least 3 axes.'))

    # verifying that the number of segments is between the desired range
    if not (4 <= number_of_segments <= 14):
        print(('number_of_segments should be between 4 and 14.'))

    # append headernames that the script didn't find in the mapping file
    # according to different criteria to the following variables
    offending_fields = []
    non_numeric_categories = []

    serial_comparison = True

    # can't do averaged pcoa plots _and_ custom axes in the same plot
    if custom_axes is not None and isdir(input_coords):
        if custom_axes.count(',') > 0:
            print(('Jackknifed plots are limited to one custom '
                                 'axis, currently trying to use: %s. Make '
                                 'sure you use only one.' % custom_axes))

    # make sure the flag is not misunderstood from the command line interface
    if not isdir(input_coords) and compare_plots:
        print("Cannot use the '--compare_plots' flag unless the "
                            "input path is a directory.")

    # before creating any output, check correct parsing of the main input files
    #try:
    mapping_data, header, comments = parse_mapping_file(open(map_fp, 'U'))
    try:
        pass
    except:
        sys.exit(("The metadata mapping file '%s' does not seem "
                             "to be formatted correctly, verify the "
                             "formatting is QIIME compliant by using "
                             "validate_mapping_file.py") % map_fp)
            
    else:
        # use this set variable to make presence/absensce checks faster
        lookup_header = set(header)
        mapping_ids = {row[0] for row in mapping_data}

    # dir means jackknifing or coordinate comparison type of processing
    if isdir(input_coords):
        offending_coords_fp = []
        coords_headers = []
        coords_data = []
        coords_eigenvalues = []
        coords_pct = []

        coord_fps = guess_coordinates_files(input_coords)

        # QIIME generates folders of transformed coordinates for the specific
        # purpose of connecting all coordinates to a set of origin coordinates.
        # The name of this file is suffixed as _transformed_reference.txt
        trans_suf = '_transformed_reference.txt'
        transformed = [f for f in coord_fps if f.endswith(trans_suf)]

        # this could happen and we rather avoid this problem
        if len(coord_fps) == 0:

            print('Could not use any of the files in the input '
                                'directory.')

        # the master pcoa must be the first in the list of coordinates; however
        # if the visualization is not a jackknifed plot this gets ignored
        if master_pcoa and not compare_plots:
            if master_pcoa in coord_fps:  # remove it if duplicated
                coord_fps.remove(master_pcoa)
            coord_fps = [master_pcoa] + coord_fps  # prepend it to the list
        # passing a master file means that the comparison is not serial
        elif master_pcoa and compare_plots:
            serial_comparison = False

            # guarantee that the master is the first and is not repeated
            if master_pcoa in coord_fps:
                coord_fps.remove(master_pcoa)
                sorted_filenames = sort_comparison_filenames(coord_fps)
                coord_fps = [master_pcoa] + sorted_filenames

        elif master_pcoa is None and len(transformed):
            master_pcoa = transformed[0]
            serial_comparison = False

            # Note: the following steps are to guarantee consistency.
            # remove the master from the list and re-add it as a first element
            # the rest of the files must be sorted alphabetically so the result
            # will be: ['unifrac_transformed_reference.txt',
            # 'unifrac_transformed_q1.txt', 'unifrac_transformed_q2.txt'] etc
            coord_fps.remove(master_pcoa)
            coord_fps = [master_pcoa] + sort_comparison_filenames(coord_fps)

        for fp in coord_fps:
            try:
                parsed = parse_coords(open(fp, 'U'))
            except (ValueError, QiimeParseError):
                offending_coords_fp.append(fp)

                # do not add any of the data and move along
                continue
            else:
                # pack all the data correspondingly only if it was correctly
                # parsed
                coords_headers.append(parsed[0])
                coords_data.append(parsed[1])
                coords_eigenvalues.append(parsed[2])
                coords_pct.append(parsed[3])

        # in case there were files that couldn't be parsed
        if offending_coords_fp:
            errout = ', '.join(offending_coords_fp)
            sys.exit(("The following file(s): '%s' could not be "
                                 "parsed properly. Make sure the input folder "
                                 "only contains coordinates files.") % errout)

        # check all files contain the same sample identifiers by flattening the
        # list of available sample ids and returning the sample ids that are
        # in one of the sets of sample ids but not in the globablly shared ids
        _coords_headers = set(flatten(coords_headers))
        _per_file_missing = [_coords_headers - set(e) for e in coords_headers]
        non_shared_ids = set(flatten(_per_file_missing))
        if non_shared_ids:
            errout = ', '.join(non_shared_ids)
            sys.exit(("The following sample identifier(s): '%s' "
                                 "are not shared between all the files. The "
                                 "files used to make a jackknifed PCoA plot "
                                 "or coordinate comparison plot (procustes "
                                 "plot) must share all the same sample "
                                 "identifiers between each other.") % errout)

        # number of samples ids that are shared between coords and mapping
        # files
        sids_intersection = mapping_ids.intersection(_coords_headers)

        # sample ids that are not mapped but are in the coords
        sids_difference = _coords_headers.difference(mapping_ids)

        # used to perform different validations in the script, very similar for
        # the case where the input is not a directory
        number_intersected_sids = len(sids_intersection)
        required_number_of_sids = len(coords_headers[0])

    else:
        try:
            parsed = parse_coords(open(input_coords, 'U'))
        # this exception was noticed when there were letters in the coords file
        # other exeptions should be catched here; code will be updated then
        except (ValueError, QiimeParseError):
            sys.exit(("The PCoA file '%s' does not seem to be a "
                                 "coordinates formatted file, verify by "
                                 "manually inspecting the contents.") %
                                input_coords)
        else:
            coords_headers = parsed[0]
            coords_data = parsed[1]
            coords_eigenvalues = parsed[2]
            coords_pct = parsed[3]

        # number of samples ids that are shared between coords and mapping
        # files
        sids_intersection = mapping_ids.intersection(coords_headers)

        # sample ids that are not mapped but are in the coords
        sids_difference = set(coords_headers).difference(mapping_ids)

        number_intersected_sids = len(sids_intersection)
        required_number_of_sids = len(coords_headers)

    if taxa_fp:
        try:
            # This should really use BIOM's Table.from_tsv
            # for summarized tables the "otu_ids" are really the "lineages"
            parsed = parse_otu_table(open(taxa_fp, 'U'), count_map_f=float,
                                     remove_empty_rows=True)
        except ValueError(e):
            sys.exit(("There was a problem parsing the --taxa_fp: "
                                 "%s" % e.message))
        else:
            otu_sample_ids = parsed[0]
            lineages = parsed[1]
            otu_table = parsed[2]

        # make sure there are matching sample ids with the otu table
        if not sids_intersection.issuperset(otu_sample_ids):
            sys.exit("The sample identifiers in the OTU table must "
                                "have at least one match with the data in the "
                                "mapping file and with the coordinates file. "
                                "Verify you are using input files that belong "
                                "to the same dataset.")
        if len(lineages) <= 1:
            sys.exit("Contingency tables with one or fewer rows "
                                "are not supported, please try passing a "
                                "contingency table with more than one row.")
    else:
        # empty lists indicate that there was no taxa file passed in
        otu_sample_ids, lineages, otu_table = [], [], []

    # sample ids must be shared between files
    if number_intersected_sids <= 0:
        sys.exit('None of your sample identifiers match between the'
                            ' mapping file and the coordinates file. Verify '
                            'you are using a coordinates file and a mapping '
                            'file that belong to the same dataset.')

    # the intersection of the sample ids in the coords and the sample ids in
    # the mapping file must at the very least include all ids in the coords
    # file Otherwise it isn't valid; unless --ignore_missing_samples is set
    # True
    if number_intersected_sids != required_number_of_sids:
        if ignore_missing_samples:
            # keep only the samples that are mapped in the mapping file
            coords_headers, coords_data = keep_samples_from_pcoa_data(
                coords_headers, coords_data, sids_intersection)
        else:
            message = ("The metadata mapping file has fewer sample "
                       "identifiers than the coordinates file. Verify you are "
                       "using a mapping file that contains at least all the "
                       "samples contained in the coordinates file(s). You can "
                       "force the script to ignore these samples by passing "
                       "the '--ignore_missing_samples' flag.")

            if verbose_output:
                missing_ids = ', '.join(sids_difference)
                message += ' Offending sample identifier(s): %s.' % missing_ids

            sys.exit(message)

    # ignore samples that exist in the coords but not in the mapping file,
    # note: we're using sids_intersection so if --ignore_missing_samples is
    # enabled we account for unmapped coords, else the program will exit before
    # this point
    header, mapping_data = filter_mapping_file(mapping_data, header,
                                               sids_intersection,
                                               include_repeat_cols=True)

    # catch the errors that could occur when filling the mapping file values
    if missing_custom_axes_values:
        # the fact that this uses parse_metadata_state_descriptions makes the
        # following option '-x Category:7;PH:12' to work as well as the
        # script-interface-documented '-x Category:7 -x PH:12' option
        for val in missing_custom_axes_values:
            if ':' not in val:
                sys.exit("Not valid missing value for custom "
                                    "axes: %s" % val)
        _mcav = ';'.join(missing_custom_axes_values)
        try:
            mapping_data = fill_mapping_field_from_mapping_file(mapping_data,
                                                                header, _mcav)
        except AssertionError(e):
            print(e.message)
        except EmperorInputFilesError(e):
            print(e.message)

    # check that all the required columns exist in the metadata mapping file
    if color_by_column_names:
        color_by_column_names = color_by_column_names.split(',')

        # check for all the mapping fields
        for col in color_by_column_names:
            # for concatenated columns check each individual field
            parts = col.split('&&')
            offending_fields.extend(p for p in parts if p not in lookup_header)
    else:
        # if the user didn't specify the header names display everything
        color_by_column_names = [None]

    # extract a list of the custom axes provided and each element is numeric
    if custom_axes:
        custom_axes = custom_axes.strip().strip("'").strip('"').split(',')

        # the MetadataMap object makes some checks easier
        map_object = MetadataMap(mapping_file_to_dict(mapping_data, header),
                                 [])
        for axis in custom_axes:
            # append the field to the error queue that it belongs to
            if axis not in lookup_header:
                offending_fields.append(axis)
                break
            # make sure this value is in the mapping file
            elif axis not in color_by_column_names:
                color_by_column_names.append(axis)
        # perform only if the for loop does not call break
        else:
            # make sure all these axes are numeric
            for axis in custom_axes:
                if not map_object.isNumericCategory(axis):
                    non_numeric_categories.append(axis)

    # make multiple checks for the add_vectors option
    if add_vectors != [None, None]:
        add_vectors = add_vectors.split(',')
        # check there are at the most two categories specified for this option
        if len(add_vectors) > 2:
            print("The '--add_vectors' option can accept up to "
                                "two different fields from the mapping file; "
                                "currently trying to use %d (%s)." %
                                (len(add_vectors), ', '.join(add_vectors)))
        # make sure the field(s) exist
        for col in add_vectors:
            # concatenated fields are allowed now so check for each field
            if '&&' in col:
                for _col in col.split('&&'):
                    if _col not in lookup_header:
                        offending_fields.append(col)
                        break
                # only execute this block of code if all checked fields exist
                else:
                    # make sure that if it's going to be used for vector
                    # creation it gets used for coloring and map postprocessing
                    if col not in color_by_column_names:
                        color_by_column_names.append(col)
            # if it's a column without concatenations
            elif col not in lookup_header:
                offending_fields.append(col)
                break
            else:
                # check this vector value is in the color by category
                if col not in color_by_column_names:
                    color_by_column_names.append(col)
        # perform only if the for loop does not call break
        else:
            # check that the second category is all with numeric values
            if len(add_vectors) == 2:
                map_object = MetadataMap(mapping_file_to_dict(mapping_data,
                                                              header),
                                         [])
                # if it has non-numeric values add it to the list of offenders
                if not map_object.isNumericCategory(add_vectors[1]):
                    msg = add_vectors[1] + '(used in --add_vectors)'
                    non_numeric_categories.append(msg)
            else:
                add_vectors.append(None)

    # terminate the program for the cases where a mapping field was not found
    # or when a mapping field didn't meet the criteria of being numeric
    if offending_fields:
        sys.exit("Invalid field(s) '%s'; the valid field(s) are: "
                            "'%s'" % (', '.join(offending_fields),
                                      ', '.join(header)))
    if non_numeric_categories:
        sys.exit(("The following field(s): '%s' contain values "
                             "that are not numeric, hence not suitable for "
                             "'--custom_axes' nor for '--add_vectors'. Try "
                             "the '--missing_custom_axes_values' option to "
                             "fix these values." %
                             ', '.join(non_numeric_categories)))

    # process the coordinates file first, preventing the case where the custom
    # axes is not in the coloring categories i. e. in the --colory_by
    # categories
    preprocessed_coords = preprocess_coords_file(coords_headers, coords_data,
                                                 coords_eigenvalues,
                                                 coords_pct, header,
                                                 mapping_data, custom_axes,
                                                 jackknifing_method,
                                                 compare_plots,
                                                 pct_variation_below_one)
    coords_headers = preprocessed_coords[0]
    coords_data = preprocessed_coords[1]
    coords_eigenvalues = preprocessed_coords[2]
    coords_pct = preprocessed_coords[3]
    coords_low = preprocessed_coords[4]
    coords_high = preprocessed_coords[5]
    clones = preprocessed_coords[6]

    # process the otu table after processing the coordinates to get custom axes
    # (when available) or any other change that occurred to the coordinates
    preprocessed_otu_table = preprocess_otu_table(otu_sample_ids, otu_table,
                                                  lineages, coords_data,
                                                  coords_headers,
                                                  n_taxa_to_keep)
    otu_coords = preprocessed_otu_table[0]
    otu_table = preprocessed_otu_table[1]
    otu_lineages = preprocessed_otu_table[2]
    otu_prevalence = preprocessed_otu_table[3]
    lines = preprocessed_otu_table[4]

    # remove the columns in the mapping file that are not informative taking
    # into account the header names that were already authorized to be used
    # and take care of concatenating the fields for the && merged columns
    mapping_data, header = preprocess_mapping_file(mapping_data, header,
                                                   color_by_column_names,
                                                   not add_unique_columns,
                                                   clones=clones)

    # create the output directory before creating any other output
    if not isdir(opts.output_dir):
        makedirs(opts.output_dir)

    fp_out = open(join(output_dir, 'index.html'), 'w')
    fp_out.write(emperor_autograph+'\n')
    fp_out.write(EMPEROR_HEADER_HTML_STRING)

    # write the html file
    fp_out.write(format_mapping_file_to_js(mapping_data, header, header))

    # certain percents being explained cannot be displayed in the GUI
    try:
        fp_out.write(format_pcoa_to_js(coords_headers, coords_data,
                                       coords_eigenvalues, coords_pct,
                                       custom_axes, coords_low,
                                       coords_high,
                                       number_of_axes=number_of_axes,
                                       number_of_segments=number_of_segments))
    except EmperorLogicError(e):
        sys.exit(e.message)

    fp_out.write(format_taxa_to_js(otu_coords, otu_lineages, otu_prevalence))
    fp_out.write(format_vectors_to_js(mapping_data, header, coords_data,
                                      coords_headers, add_vectors[0],
                                      add_vectors[1]))
    fp_out.write(format_comparison_bars_to_js(coords_data, coords_headers,
                                              clones, serial_comparison))
    has_taxa = taxa_fp is not None
    has_input_coords = isdir(input_coords) and not compare_plots
    has_add_vectors = add_vectors != [None, None]
    has_clones = clones > 0
    fp_out.write(format_emperor_html_footer_string(has_taxa, has_input_coords,
                                                   has_add_vectors,
                                                   has_clones))
    fp_out.close()
    copy_support_files(output_dir)

    # write the biplot coords in the output file if a path is passed
    if biplot_fp and taxa_fp:
        if biplot_fp.endswith('/') or isdir(biplot_fp):
            print("Do not specify a path to a new (path ending "
                                "in a slash) or existing directory for "
                                "biplot_fp. The output file will be a "
                                "tab-delimited text file.")

        # make sure this file can be created
        try:
            fd = open(biplot_fp, 'w')
        except IOError:
            sys.exit("There was a problem creating the file with "
                                "the coordinates for the biplots (%s)."
                                % biplot_fp)
        else:
            fd.writelines(lines)
            fd.close()