def test_get_log_transform(self):
     eps = .01
     obs = get_log_transform(self.otu_table, eps=eps)
     xform = asarray(self.otu_table, dtype=float64)
     xform[xform==0] = eps
 
     for i, row in enumerate(obs):
         self.assertEqual(row, log10(xform[i]))
    def test_get_log_transform(self):
        obs = get_log_transform(self.otu_table)

        data = [val for val in self.otu_table.iter_data(axis='observation')]
        xform = asarray(data, dtype=float64)

        for (i, val) in enumerate(obs.iter_data(axis='observation')):
            non_zeros = argwhere(xform[i] != 0)
            xform[i, non_zeros] = log10(xform[i, non_zeros])
            assert_almost_equal(val, xform[i])
    def test_get_log_transform(self):
        eps = .01
        obs = get_log_transform(self.otu_table, eps=eps)

        data = [val for val in self.otu_table.iterObservationData()]
        xform = asarray(data, dtype=float64)
        xform[xform==0] = eps

        for (i, val) in enumerate(obs.iterObservationData()):
            self.assertEqual(val, log10(xform[i]))
Exemple #4
0
    def test_get_log_transform(self):
        eps = .01
        obs = get_log_transform(self.otu_table, eps=eps)

        data = [val for val in self.otu_table.iterObservationData()]
        xform = asarray(data, dtype=float64)
        xform[xform == 0] = eps

        for (i, val) in enumerate(obs.iterObservationData()):
            assert_almost_equal(val, log10(xform[i]))
    def test_get_log_transform(self):
        obs = get_log_transform(self.otu_table)

        data = [val for val in self.otu_table.iter_data(axis="observation")]
        xform = asarray(data, dtype=float64)

        for (i, val) in enumerate(obs.iter_data(axis="observation")):
            non_zeros = argwhere(xform[i] != 0)
            xform[i, non_zeros] = log10(xform[i, non_zeros])
            assert_almost_equal(val, xform[i])
Exemple #6
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    otu_table = parse_biom_table(open(opts.otu_table_fp, 'U'))
    lineages = []
    if (otu_table.ObservationMetadata is None
            or 'taxonomy' not in otu_table.ObservationMetadata[0]):
        print '\n\nWarning: The lineages are missing from the OTU table. If you used single_rarefaction.py to create your otu_table, make sure you included the OTU lineages.\n'
        lineages = [''] * len(otu_table.ObservationIds)
    else:
        for val, id, meta in otu_table.iterObservations():
            lineages.append([v for v in meta['taxonomy']])

    otu_labels = make_otu_labels(otu_table.ObservationIds, lineages)

    # Convert to relative abundance if requested
    if not opts.absolute_abundance:
        otu_table = otu_table.normObservationBySample()

    # Get log transform if requested
    if not opts.no_log_transform:
        if not opts.log_eps is None and opts.log_eps <= 0:
            print "Parameter 'log_eps' must be positive. Value was", opts.log_eps
            exit(1)
        otu_table = get_log_transform(otu_table, opts.log_eps)

    if opts.output_dir:
        if os.path.exists(opts.output_dir):
            dir_path = opts.output_dir
        else:
            try:
                os.mkdir(opts.output_dir)
                dir_path = opts.output_dir
            except OSError:
                pass
    else:
        dir_path = './'

    # Re-order samples by tree if provided
    if not opts.sample_tree is None:
        sample_order = get_order_from_tree(otu_table.SampleIds,
                                           open(opts.sample_tree, 'U'))

    # if there's no sample tree, sort samples by mapping file
    elif not opts.map_fname is None:
        lines = open(opts.map_fname, 'U').readlines()
        metadata = list(parse_mapping_file(lines))
        new_map, otu_table = get_overlapping_samples(metadata[0], otu_table)
        metadata[0] = new_map
        map_sample_ids = zip(*metadata[0])[0]

        # if there's a category, do clustering within each category
        if not opts.category is None:
            category_labels = \
                extract_metadata_column(otu_table.SampleIds, \
                        metadata, opts.category)
            sample_order = \
                get_order_from_categories(otu_table, category_labels)
        # else: just use the mapping file order
        else:
            ordered_sample_ids = []
            for sample_id in map_sample_ids:
                if sample_id in otu_table.SampleIds:
                    ordered_sample_ids.append(sample_id)
            sample_order = names_to_indices(otu_table.SampleIds,
                                            ordered_sample_ids)
    # if no tree or mapping file, perform upgma euclidean
    elif not opts.suppress_column_clustering:
        data = asarray([i for i in otu_table.iterObservationData()])
        sample_order = get_clusters(data, axis='column')
    # else just use OTU table ordering
    else:
        sample_order = arange(len(otu_table.SampleIds))

    # re-order OTUs by tree (if provided), or clustering
    if not opts.otu_tree is None:
        # open tree file
        try:
            f = open(opts.otu_tree, 'U')
        except (TypeError, IOError):
            raise MissingFileError, \
                "Couldn't read tree file at path: %s" % opts.otu_tree
        otu_order = get_order_from_tree(otu_table.ObservationIds, f)
        f.close()
    # if no tree or mapping file, perform upgma euclidean
    elif not opts.suppress_row_clustering:
        data = asarray([i for i in otu_table.iterObservationData()])
        otu_order = get_clusters(data, axis='row')
    # else just use OTU table ordering
    else:
        otu_order = arange(len(otu_table.ObservationIds))

    # otu_order and sample_order should be ids, rather than indices
    #  to use in sortObservationOrder/sortSampleOrder
    otu_id_order = [otu_table.ObservationIds[i] for i in otu_order]
    sample_id_order = [otu_table.SampleIds[i] for i in sample_order]

    # Re-order otu table, sampleids, etc. as necessary
    otu_table = otu_table.sortObservationOrder(otu_id_order)
    # otu_ids not used after: tagged for deletion
    otu_ids = array(otu_table.ObservationIds)[otu_order]
    otu_labels = array(otu_labels)[otu_order]
    otu_table = otu_table.sortSampleOrder(sample_id_order)
    sample_ids = array(otu_table.SampleIds)[sample_order]

    plot_heatmap(otu_table,
                 otu_labels,
                 sample_ids,
                 filename=join(dir_path, 'heatmap.pdf'))
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    otu_table = load_table(opts.otu_table_fp)
    obs_md_category = opts.obs_md_category
    obs_md_level = opts.obs_md_level
    if obs_md_level is None:
        # grab the last level if the user didn't specify a level
        obs_md_level = -1
    else:
        # convert to 0-based indexing
        obs_md_level -= 1
    obs_md = otu_table.metadata(axis='observation')
    # create reference to the observation metadata for the first
    # observation for convenient lookup
    obs_md_0 = obs_md[0]
    obs_md_labels = []
    if (obs_md is None or obs_md_category not in obs_md_0):
        obs_md_labels = [['']] * len(otu_table.ids(axis='observation'))
    else:
        for _, _, md in otu_table.iter(axis='observation'):
            current_md = md[obs_md_category]
            if obs_md_level < len(current_md):
                current_md_at_level = current_md[obs_md_level]
            else:
                current_md_at_level = ''
            obs_md_labels.append([current_md_at_level])

    otu_labels = make_otu_labels(otu_table.ids(axis='observation'),
                                 obs_md_labels)

    # Convert to relative abundance if requested
    if not opts.absolute_abundance:
        otu_table = otu_table.norm(axis='observation')

    # Get log transform if requested
    if not opts.no_log_transform:
        otu_table = get_log_transform(otu_table)

    # Re-order samples by tree if provided
    if opts.sample_tree is not None:
        sample_order = get_order_from_tree(otu_table.ids(),
                                           open(opts.sample_tree, 'U'))

    # if there's no sample tree, sort samples by mapping file
    elif opts.map_fname is not None:
        lines = open(opts.map_fname, 'U').readlines()
        metadata = list(parse_mapping_file(lines))
        new_map, otu_table = get_overlapping_samples(metadata[0], otu_table)
        metadata[0] = new_map
        map_sample_ids = zip(*metadata[0])[0]

        # if there's a category, do clustering within each category
        if opts.category is not None:
            category_labels = extract_metadata_column(otu_table.ids(),
                                                      metadata, opts.category)
            sample_order = get_order_from_categories(otu_table,
                                                     category_labels)
        # else: just use the mapping file order
        else:
            ordered_sample_ids = []
            for sample_id in map_sample_ids:
                if otu_table.exists(sample_id):
                    ordered_sample_ids.append(sample_id)
            sample_order = names_to_indices(
                otu_table.ids(),
                ordered_sample_ids)
    # if no tree or mapping file, perform upgma euclidean
    elif not opts.suppress_column_clustering:
        data = np.asarray([i for i in otu_table.iter_data(axis='observation')])
        sample_order = get_clusters(data, axis='column')
    # else just use OTU table ordering
    else:
        sample_order = np.arange(len(otu_table.ids()))

    # re-order OTUs by tree (if provided), or clustering
    if opts.otu_tree is not None:
        # open tree file
        try:
            f = open(opts.otu_tree, 'U')
        except (TypeError, IOError):
            raise MissingFileError("Couldn't read tree file at path: %s" %
                                   opts.otu_tree)
        otu_order = get_order_from_tree(otu_table.ids(axis='observation'), f)
        f.close()
    # if no tree or mapping file, perform upgma euclidean
    elif not opts.suppress_row_clustering:
        data = np.asarray([i for i in otu_table.iter_data(axis='observation')])
        otu_order = get_clusters(data, axis='row')
    # else just use OTU table ordering
    else:
        otu_order = np.arange(len(otu_table.ids(axis='observation')))

    # otu_order and sample_order should be ids, rather than indices
    #  to use in sortObservationOrder/sortSampleOrder
    otu_id_order = [otu_table.ids(axis='observation')[i] for i in otu_order]
    sample_id_order = [otu_table.ids()[i] for i in sample_order]

    # Re-order otu table, sampleids, etc. as necessary
    otu_table = otu_table.sort_order(otu_id_order, axis='observation')
    # otu_ids not used after: tagged for deletion
    otu_ids = np.array(otu_table.ids(axis='observation'))[otu_order]
    otu_labels = np.array(otu_labels)[otu_order]
    otu_table = otu_table.sort_order(sample_id_order)
    sample_ids = np.array(otu_table.ids())[sample_order]

    plot_heatmap(otu_table, otu_labels, sample_ids, opts.output_fp,
                 imagetype=opts.imagetype, width=opts.width,
                 height=opts.height, dpi=opts.dpi,
                 color_scheme=opts.color_scheme)
Exemple #8
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    otu_table = parse_biom_table(open(opts.otu_table_fp, 'U'))
    obs_md_category = opts.obs_md_category
    obs_md_level = opts.obs_md_level
    if obs_md_level is None:
        # grab the last level if the user didn't specify a level
        obs_md_level = -1
    else:
        # convert to 0-based indexing
        obs_md_level -= 1
    obs_md = otu_table.ObservationMetadata
    # create reference to the observation metadata for the first
    # observation for convenient lookup
    obs_md_0 = obs_md[0]
    obs_md_labels = []
    if (obs_md is None or obs_md_category not in obs_md_0):
        obs_md_labels = [['']] * len(otu_table.ObservationIds)
    else:
        for _, _, md in otu_table.iterObservations():
            current_md = md[obs_md_category]
            if obs_md_level < len(current_md):
                current_md_at_level = current_md[obs_md_level]
            else:
                current_md_at_level = ''
            obs_md_labels.append([current_md_at_level])

    otu_labels = make_otu_labels(otu_table.ObservationIds, 
                                 obs_md_labels)

    # Convert to relative abundance if requested
    if not opts.absolute_abundance:
        otu_table = otu_table.normObservationBySample()

    # Get log transform if requested
    if not opts.no_log_transform:
        if not opts.log_eps is None and opts.log_eps <= 0:
            print "Parameter 'log_eps' must be positive. Value was", opts.log_eps
            exit(1)
        otu_table = get_log_transform(otu_table, opts.log_eps)

    if opts.output_dir:
        if os.path.exists(opts.output_dir):
            dir_path = opts.output_dir
        else:
            try:
                os.mkdir(opts.output_dir)
                dir_path = opts.output_dir
            except OSError:
                pass
    else:
        dir_path = './'

    # Re-order samples by tree if provided
    if not opts.sample_tree is None:
        sample_order = get_order_from_tree(otu_table.SampleIds,
                                           open(opts.sample_tree, 'U'))

    # if there's no sample tree, sort samples by mapping file
    elif not opts.map_fname is None:
        lines = open(opts.map_fname, 'U').readlines()
        metadata = list(parse_mapping_file(lines))
        new_map, otu_table = get_overlapping_samples(metadata[0], otu_table)
        metadata[0] = new_map
        map_sample_ids = zip(*metadata[0])[0]

        # if there's a category, do clustering within each category
        if not opts.category is None:
            category_labels = \
                extract_metadata_column(otu_table.SampleIds,
                                        metadata, opts.category)
            sample_order = \
                get_order_from_categories(otu_table, category_labels)
        # else: just use the mapping file order
        else:
            ordered_sample_ids = []
            for sample_id in map_sample_ids:
                if sample_id in otu_table.SampleIds:
                    ordered_sample_ids.append(sample_id)
            sample_order = names_to_indices(
                otu_table.SampleIds,
                ordered_sample_ids)
    # if no tree or mapping file, perform upgma euclidean
    elif not opts.suppress_column_clustering:
        data = asarray([i for i in otu_table.iterObservationData()])
        sample_order = get_clusters(data, axis='column')
    # else just use OTU table ordering
    else:
        sample_order = arange(len(otu_table.SampleIds))

    # re-order OTUs by tree (if provided), or clustering
    if not opts.otu_tree is None:
        # open tree file
        try:
            f = open(opts.otu_tree, 'U')
        except (TypeError, IOError):
            raise MissingFileError(
                "Couldn't read tree file at path: %s" %
                opts.otu_tree)
        otu_order = get_order_from_tree(otu_table.ObservationIds, f)
        f.close()
    # if no tree or mapping file, perform upgma euclidean
    elif not opts.suppress_row_clustering:
        data = asarray([i for i in otu_table.iterObservationData()])
        otu_order = get_clusters(data, axis='row')
    # else just use OTU table ordering
    else:
        otu_order = arange(len(otu_table.ObservationIds))

    # otu_order and sample_order should be ids, rather than indices
    #  to use in sortObservationOrder/sortSampleOrder
    otu_id_order = [otu_table.ObservationIds[i] for i in otu_order]
    sample_id_order = [otu_table.SampleIds[i] for i in sample_order]

    # Re-order otu table, sampleids, etc. as necessary
    otu_table = otu_table.sortObservationOrder(otu_id_order)
    # otu_ids not used after: tagged for deletion
    otu_ids = array(otu_table.ObservationIds)[otu_order]
    otu_labels = array(otu_labels)[otu_order]
    otu_table = otu_table.sortSampleOrder(sample_id_order)
    sample_ids = array(otu_table.SampleIds)[sample_order]

    plot_heatmap(otu_table, otu_labels, sample_ids,
                 filename=join(dir_path, 'heatmap.pdf'),
                 color_scheme=opts.color_scheme)
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    data = {}

    # Open and get coord data
    otu_table = get_otu_counts(opts.otu_table_fp)
    # determine whether fractional values are present in OTU table
    num_otu_hits = opts.num_otu_hits
    if opts.log_transform:
        otu_table = get_log_transform(otu_table)
        num_otu_hits = 0

    fractional_values = False
    max_val = -1
    for val in otu_table.iter_data(axis='observation'):
        max_val = maximum(max_val, val.max())

    # the data cannot be of mixed types: if one is float, all are float
    fractional_values = (
        max_val.dtype.name == 'float32' or max_val.dtype.name == 'float64')

    if fractional_values and max_val <= 1:
        if num_otu_hits > 0:
            print ("Warning: OTU table appears to be using relative "
                   "abundances and num_otu_hits was set to %d. Setting "
                   "num_otu_hits to 0." % num_otu_hits)
            num_otu_hits = 0

    filepath = opts.otu_table_fp
    filename = filepath.strip().split('/')[-1].split('.')[0]

    dir_path = opts.output_dir
    create_dir(dir_path)

    js_dir_path = os.path.join(dir_path, 'js')
    create_dir(js_dir_path)

    qiime_dir = get_qiime_project_dir()

    js_path = os.path.join(qiime_dir, 'qiime/support_files/js')

    shutil.copyfile(os.path.join(js_path, 'overlib.js'),
                    os.path.join(js_dir_path, 'overlib.js'))
    shutil.copyfile(
        os.path.join(js_path,
                     'otu_count_display.js'),
        os.path.join(js_dir_path,
                     'otu_count_display.js'))
    shutil.copyfile(os.path.join(js_path, 'jquery.js'),
                    os.path.join(js_dir_path, 'jquery.js'))
    shutil.copyfile(
        os.path.join(js_path,
                     'jquery.tablednd_0_5.js'),
        os.path.join(js_dir_path,
                     'jquery.tablednd_0_5.js'))

    # load tree for sorting OTUs
    ordered_otu_names = None
    if opts.tree is not None:
        try:
            f = open(opts.tree, 'U')
        except (TypeError, IOError):
            raise TreeMissingError(
                "Couldn't read tree file at path: %s" %
                tree_source)
        tree = parse_newick(f, PhyloNode)
        f.close()
        ordered_otu_names = [tip.Name for tip in tree.iterTips()]
    ordered_sample_names = None

    # load tree for sorting Samples
    if opts.sample_tree is not None:
        try:
            f = open(opts.sample_tree, 'U')
        except (TypeError, IOError):
            raise TreeMissingError(
                "Couldn't read tree file at path: %s" %
                tree_source)
        tree = parse_newick(f, PhyloNode)
        f.close()
        ordered_sample_names = [tip.Name for tip in tree.iterTips()]
    # if there's no sample tree, load sample map for sorting samples
    elif opts.map_fname is not None:
        lines = open(opts.map_fname, 'U').readlines()
        map = parse_mapping_file(lines)[0]
        ordered_sample_names = [row[0] for row in map]

    try:
        action = generate_heatmap_plots
    except NameError:
        action = None

    # Place this outside try/except so we don't mask NameError in action
    if action:
        action(
            num_otu_hits, otu_table, ordered_otu_names, ordered_sample_names,
            dir_path, js_dir_path, filename, fractional_values)
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    data = {}

    # Open and get coord data
    otu_table = get_otu_counts(opts.otu_table_fp)
    # determine whether fractional values are present in OTU table
    num_otu_hits = opts.num_otu_hits
    if opts.log_transform:
        otu_table = get_log_transform(otu_table)
        num_otu_hits = 0

    fractional_values = False
    max_val = -1
    for val in otu_table.iter_data(axis='observation'):
        max_val = maximum(max_val, val.max())

    # the data cannot be of mixed types: if one is float, all are float
    fractional_values = (max_val.dtype.name == 'float32'
                         or max_val.dtype.name == 'float64')

    if fractional_values and max_val <= 1:
        if num_otu_hits > 0:
            print("Warning: OTU table appears to be using relative "
                  "abundances and num_otu_hits was set to %d. Setting "
                  "num_otu_hits to 0." % num_otu_hits)
            num_otu_hits = 0

    filepath = opts.otu_table_fp
    filename = filepath.strip().split('/')[-1].split('.')[0]

    dir_path = opts.output_dir
    create_dir(dir_path)

    js_dir_path = os.path.join(dir_path, 'js')
    create_dir(js_dir_path)

    qiime_dir = get_qiime_project_dir()

    js_path = os.path.join(qiime_dir, 'qiime/support_files/js')

    shutil.copyfile(os.path.join(js_path, 'overlib.js'),
                    os.path.join(js_dir_path, 'overlib.js'))
    shutil.copyfile(os.path.join(js_path, 'otu_count_display.js'),
                    os.path.join(js_dir_path, 'otu_count_display.js'))
    shutil.copyfile(os.path.join(js_path, 'jquery.js'),
                    os.path.join(js_dir_path, 'jquery.js'))
    shutil.copyfile(os.path.join(js_path, 'jquery.tablednd_0_5.js'),
                    os.path.join(js_dir_path, 'jquery.tablednd_0_5.js'))

    # load tree for sorting OTUs
    ordered_otu_names = None
    if opts.tree is not None:
        try:
            f = open(opts.tree, 'U')
        except (TypeError, IOError):
            raise TreeMissingError("Couldn't read tree file at path: %s" %
                                   tree_source)
        tree = parse_newick(f, PhyloNode)
        f.close()
        ordered_otu_names = [tip.Name for tip in tree.iterTips()]
    ordered_sample_names = None

    # load tree for sorting Samples
    if opts.sample_tree is not None:
        try:
            f = open(opts.sample_tree, 'U')
        except (TypeError, IOError):
            raise TreeMissingError("Couldn't read tree file at path: %s" %
                                   tree_source)
        tree = parse_newick(f, PhyloNode)
        f.close()
        ordered_sample_names = [tip.Name for tip in tree.iterTips()]
    # if there's no sample tree, load sample map for sorting samples
    elif opts.map_fname is not None:
        lines = open(opts.map_fname, 'U').readlines()
        map = parse_mapping_file(lines)[0]
        ordered_sample_names = [row[0] for row in map]

    try:
        action = generate_heatmap_plots
    except NameError:
        action = None

    # Place this outside try/except so we don't mask NameError in action
    if action:
        action(num_otu_hits, otu_table, ordered_otu_names,
               ordered_sample_names, dir_path, js_dir_path, filename,
               fractional_values)
Exemple #11
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    otu_table = parse_biom_table(open(opts.otu_table_fp, 'U'))
    lineages = []
    if (otu_table.ObservationMetadata is None or 'taxonomy' not in otu_table.ObservationMetadata[0]):
        print '\n\nWarning: The lineages are missing from the OTU table. If you used single_rarefaction.py to create your otu_table, make sure you included the OTU lineages.\n'
        lineages = [''] * len(otu_table.ObservationIds)
    else:
        for val, id, meta in otu_table.iterObservations():
            lineages.append([v for v in meta['taxonomy']])

    otu_labels = make_otu_labels(otu_table.ObservationIds, lineages)

    # Convert to relative abundance if requested
    if not opts.absolute_abundance:
        otu_table = otu_table.normObservationBySample()

    # Get log transform if requested
    if not opts.no_log_transform:
        if not opts.log_eps is None and opts.log_eps <= 0:
            print "Parameter 'log_eps' must be positive. Value was", opts.log_eps
            exit(1)
        otu_table = get_log_transform(otu_table, opts.log_eps)

    if opts.output_dir:
        if os.path.exists(opts.output_dir):
            dir_path = opts.output_dir
        else:
            try:
                os.mkdir(opts.output_dir)
                dir_path = opts.output_dir
            except OSError:
                pass
    else:
        dir_path = './'

    # Re-order samples by tree if provided
    if not opts.sample_tree is None:
        sample_order = get_order_from_tree(otu_table.SampleIds,
                                           open(opts.sample_tree, 'U'))

    # if there's no sample tree, sort samples by mapping file
    elif not opts.map_fname is None:
        lines = open(opts.map_fname, 'U').readlines()
        metadata = list(parse_mapping_file(lines))
        new_map, otu_table = get_overlapping_samples(metadata[0], otu_table)
        metadata[0] = new_map
        map_sample_ids = zip(*metadata[0])[0]

        # if there's a category, do clustering within each category
        if not opts.category is None:
            category_labels = \
                extract_metadata_column(otu_table.SampleIds,
                                        metadata, opts.category)
            sample_order = \
                get_order_from_categories(otu_table, category_labels)
        # else: just use the mapping file order
        else:
            ordered_sample_ids = []
            for sample_id in map_sample_ids:
                if sample_id in otu_table.SampleIds:
                    ordered_sample_ids.append(sample_id)
            sample_order = names_to_indices(
                otu_table.SampleIds,
                ordered_sample_ids)
    # if no tree or mapping file, perform upgma euclidean
    elif not opts.suppress_column_clustering:
        data = asarray([i for i in otu_table.iterObservationData()])
        sample_order = get_clusters(data, axis='column')
    # else just use OTU table ordering
    else:
        sample_order = arange(len(otu_table.SampleIds))

    # re-order OTUs by tree (if provided), or clustering
    if not opts.otu_tree is None:
        # open tree file
        try:
            f = open(opts.otu_tree, 'U')
        except (TypeError, IOError):
            raise MissingFileError(
                "Couldn't read tree file at path: %s" %
                opts.otu_tree)
        otu_order = get_order_from_tree(otu_table.ObservationIds, f)
        f.close()
    # if no tree or mapping file, perform upgma euclidean
    elif not opts.suppress_row_clustering:
        data = asarray([i for i in otu_table.iterObservationData()])
        otu_order = get_clusters(data, axis='row')
    # else just use OTU table ordering
    else:
        otu_order = arange(len(otu_table.ObservationIds))

    # otu_order and sample_order should be ids, rather than indices
    #  to use in sortObservationOrder/sortSampleOrder
    otu_id_order = [otu_table.ObservationIds[i] for i in otu_order]
    sample_id_order = [otu_table.SampleIds[i] for i in sample_order]

    # Re-order otu table, sampleids, etc. as necessary
    otu_table = otu_table.sortObservationOrder(otu_id_order)
    # otu_ids not used after: tagged for deletion
    otu_ids = array(otu_table.ObservationIds)[otu_order]
    otu_labels = array(otu_labels)[otu_order]
    otu_table = otu_table.sortSampleOrder(sample_id_order)
    sample_ids = array(otu_table.SampleIds)[sample_order]

    plot_heatmap(otu_table, otu_labels, sample_ids,
                 filename=join(dir_path, 'heatmap.pdf'),
                 color_scheme=opts.color_scheme)
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    #Get OTU counts
    sample_ids, otu_ids, otus, lineages = \
            list(parse_otu_table(open(opts.otu_table_fp,'U'), 
                    count_map_f=float))

    # set 'blank' lineages if not supplied
    if lineages == []:
        print '\n\nWarning: The lineages are missing from the OTU table. If you used single_rarefaction.py to create your otu_table, make sure you pass the "--lineages_included" option.\n'
        lineages = [''] * len(otu_ids)
    otu_labels = make_otu_labels(otu_ids, lineages)

    # Convert to relative abundance if requested
    if not opts.absolute_abundance:
        for i,row in enumerate(otus):
            if row.sum() > 0:
                otus[i] = row/row.sum()

    # Get log transform if requested
    if not opts.no_log_transform:
        if not opts.log_eps is None and opts.log_eps <= 0:
            print "Parameter 'log_eps' must be positive. Value was", opts.log_eps
            exit(1)
        otus = get_log_transform(otus, opts.log_eps)
        
    if opts.output_dir:
        if os.path.exists(opts.output_dir):
            dir_path=opts.output_dir
        else:
            try:
                os.mkdir(opts.output_dir)
                dir_path=opts.output_dir
            except OSError:
                pass
    else:
        dir_path='./'


    # Re-order samples by tree if provided
    if not opts.sample_tree is None:
        sample_order = get_order_from_tree(sample_ids, opts.sample_tree)

    # if there's no sample tree, sort samples by mapping file
    elif not opts.map_fname is None:
        lines = open(opts.map_fname,'U').readlines()
        metadata = list(parse_mapping_file(lines))
        sample_ids, new_map, otus = \
            get_overlapping_samples(sample_ids, metadata[0], otus)
        metadata[0] = new_map
        map_sample_ids = zip(*metadata[0])[0]
        
        # if there's a category, do clustering within each category
        if not opts.category is None:
            category_labels = \
                extract_metadata_column(sample_ids, \
                        metadata, opts.category)
            sample_order = \
                get_order_from_categories(otus, category_labels)
        # else: just use the mapping file order
        else:
            ordered_sample_ids = []
            for sample_id in map_sample_ids:
                if sample_id in sample_ids:
                    ordered_sample_ids.append(sample_id)
            sample_order = names_to_indices(sample_ids, ordered_sample_ids)
    # if no tree or mapping file, use euclidean upgma
    else:
        sample_order = arange(len(sample_ids))
    
    # re-order OTUs by tree (if provided), or clustering
    if not opts.otu_tree is None:
        # open tree file
        try:
            f = open(opts.otu_tree, 'U')
        except (TypeError, IOError):
            raise MissingFileError, \
                "Couldn't read tree file at path: %s" % opts.otu_tree
        otu_order = get_order_from_tree(otu_ids, f)
        f.close()
    # if no tree, use euclidean upgma
    else:
        otu_order = get_clusters(otus,axis='row')

    # Re-order otu table, sampleids, etc. as necessary
    otus = otus[otu_order,:]
    otu_ids = array(otu_ids)[otu_order]
    otu_labels = array(otu_labels)[otu_order]
    otus = otus[:,sample_order]
    sample_ids = array(sample_ids)[sample_order]

    plot_heatmap(otus, otu_labels, sample_ids, 
        filename=join(dir_path,'heatmap.pdf'))
Exemple #13
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    otu_table = load_table(opts.otu_table_fp)
    obs_md_category = opts.obs_md_category
    obs_md_level = opts.obs_md_level
    if obs_md_level is None:
        # grab the last level if the user didn't specify a level
        obs_md_level = -1
    else:
        # convert to 0-based indexing
        obs_md_level -= 1
    obs_md = otu_table.metadata(axis='observation')

    obs_md_labels = []
    if (obs_md is None or obs_md_category not in obs_md[0]):
        obs_md_labels = [['']] * len(otu_table.ids(axis='observation'))
    else:
        for _, _, md in otu_table.iter(axis='observation'):
            current_md = md[obs_md_category]
            if obs_md_level < len(current_md):
                current_md_at_level = current_md[obs_md_level]
            else:
                current_md_at_level = ''
            obs_md_labels.append([current_md_at_level])

    otu_labels = make_otu_labels(otu_table.ids(axis='observation'),
                                 obs_md_labels)

    # Convert to relative abundance if requested
    if not opts.absolute_abundance:
        otu_table = otu_table.norm(axis='observation')

    # Get log transform if requested
    if not opts.no_log_transform:
        otu_table = get_log_transform(otu_table)

    # Re-order samples by tree if provided
    if opts.sample_tree is not None:
        sample_order = get_order_from_tree(otu_table.ids(),
                                           open(opts.sample_tree, 'U'))

    # if there's no sample tree, sort samples by mapping file
    elif opts.map_fname is not None:
        lines = open(opts.map_fname, 'U').readlines()
        metadata = list(parse_mapping_file(lines))
        new_map, otu_table = get_overlapping_samples(metadata[0], otu_table)
        metadata[0] = new_map
        map_sample_ids = zip(*metadata[0])[0]

        # if there's a category, do clustering within each category
        if opts.category is not None:
            category_labels = extract_metadata_column(otu_table.ids(),
                                                      metadata, opts.category)
            sample_order = get_order_from_categories(otu_table,
                                                     category_labels)
        # else: just use the mapping file order
        else:
            ordered_sample_ids = []
            for sample_id in map_sample_ids:
                if otu_table.exists(sample_id):
                    ordered_sample_ids.append(sample_id)
            sample_order = names_to_indices(otu_table.ids(),
                                            ordered_sample_ids)
    # if no tree or mapping file, perform upgma euclidean
    elif not opts.suppress_column_clustering:
        data = np.asarray([i for i in otu_table.iter_data(axis='observation')])
        sample_order = get_clusters(data, axis='column')
    # else just use OTU table ordering
    else:
        sample_order = np.arange(len(otu_table.ids()))

    # re-order OTUs by tree (if provided), or clustering
    if opts.otu_tree is not None:
        # open tree file
        try:
            f = open(opts.otu_tree, 'U')
        except (TypeError, IOError):
            raise MissingFileError("Couldn't read tree file at path: %s" %
                                   opts.otu_tree)
        otu_order = get_order_from_tree(otu_table.ids(axis='observation'), f)
        f.close()
    # if no tree or mapping file, perform upgma euclidean
    elif not opts.suppress_row_clustering:
        data = np.asarray([i for i in otu_table.iter_data(axis='observation')])
        otu_order = get_clusters(data, axis='row')
    # else just use OTU table ordering
    else:
        otu_order = np.arange(len(otu_table.ids(axis='observation')))

    # otu_order and sample_order should be ids, rather than indices
    #  to use in sortObservationOrder/sortSampleOrder
    otu_id_order = [otu_table.ids(axis='observation')[i] for i in otu_order]
    sample_id_order = [otu_table.ids()[i] for i in sample_order]

    # Re-order otu table, sampleids, etc. as necessary
    otu_table = otu_table.sort_order(otu_id_order, axis='observation')
    otu_labels = np.array(otu_labels)[otu_order]
    otu_table = otu_table.sort_order(sample_id_order)
    sample_labels = otu_table.ids()

    plot_heatmap(otu_table,
                 otu_labels,
                 sample_labels,
                 opts.output_fp,
                 imagetype=opts.imagetype,
                 width=opts.width,
                 height=opts.height,
                 dpi=opts.dpi,
                 color_scheme=opts.color_scheme)