コード例 #1
0
 def test_filter_samples_from_distance_matrix(self):
     """filter_samples_from_distance_matrix functions as expected """
     actual = filter_samples_from_distance_matrix(parse_distmat(self.input_dm1),
                                            ["GHI blah","XYZ"])
     self.assertEqual(actual,expected_dm1a)
     actual = filter_samples_from_distance_matrix(parse_distmat(self.input_dm1),
                                           ["GHI","DEF"])
     self.assertEqual(actual,expected_dm1b)
コード例 #2
0
 def test_filter_samples_from_distance_matrix_file_input(self):
     """filter_samples_from_distance_matrix handles file input """
     actual = filter_samples_from_distance_matrix(self.input_dm1,
                                            ["GHI blah","XYZ"])
     self.assertEqual(actual,expected_dm1a)
     actual = filter_samples_from_distance_matrix(self.input_dm1,
                                           ["GHI","DEF"])
     self.assertEqual(actual,expected_dm1b)
コード例 #3
0
 def test_filter_samples_from_distance_matrix_negate(self):
     """filter_samples_from_distance_matrix functions w negate """
     actual = filter_samples_from_distance_matrix(
       parse_distmat(self.input_dm1),
       ["ABC blah","DEF"],
       negate=True)
     self.assertEqual(actual,expected_dm1a)
     actual = filter_samples_from_distance_matrix(\
      parse_distmat(self.input_dm1),
      ["ABC","XYZ"],
      negate=True)
     self.assertEqual(actual,expected_dm1b)
コード例 #4
0
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    output_f = open(opts.output_distance_matrix, 'w')
    if opts.otu_table_fp:
        otu_table = parse_biom_table(open(opts.otu_table_fp, 'U'))
        samples_to_keep = otu_table.SampleIds
        #samples_to_keep = \
        # sample_ids_from_otu_table(open(opts.otu_table_fp,'U'))
    elif opts.sample_id_fp:
        samples_to_keep = \
         get_seqs_to_keep_lookup_from_seq_id_file(open(opts.sample_id_fp,'U'))
    elif opts.mapping_fp and opts.valid_states:
        samples_to_keep = sample_ids_from_metadata_description(
            open(opts.mapping_fp, 'U'), opts.valid_states)
    else:
        option_parser.error(
            'must pass either --sample_id_fp, -t, or -m and -s')
    # note that negate gets a little weird here. The function we're calling removes the specified
    # samples from the distance matrix, but the other QIIME filter scripts keep these samples specified.
    # So, the interface of this script is designed to keep the specified samples, and therefore
    # negate=True is passed to filter_samples_from_distance_matrix by default.
    d = filter_samples_from_distance_matrix(parse_distmat(
        open(opts.input_distance_matrix, 'U')),
                                            samples_to_keep,
                                            negate=not opts.negate)
    output_f.write(d)
    output_f.close()
コード例 #5
0
def reconcile_hosts_symbionts(otu_file, host_dist):

    # filter cOTU table by samples present in host_tree/dm

    filtered_cotu_table = filter_samples_from_otu_table(otu_file,
                                                        host_dist[0],
                                                        negate=True)

    # Now the cOTU table only has the samples present in the host dm

    # parse the filtered cOTU table
    sample_names, taxon_names, data, lineages = parse_otu_table(
        filtered_cotu_table)

    # filter cOTU table again because skip_empty doesn't seem to be
    # working in format_otu_table called from
    # filter_samples_from_otu_table

    sample_names, taxon_names, data, lineages = filter_otu_table_by_min(
        sample_names, taxon_names, data, lineages, min=1)

    # Filter the host_dists to match the newly trimmed subtree
    # Note: this is requiring the modified filter_dist method which
    # returns a native dm tuple rather than a string.

    host_dist_filtered = filter_samples_from_distance_matrix(
        host_dist, sample_names, negate=True)

    filtered_otu_table_lines = format_otu_table(
        sample_names, taxon_names, data, lineages)

    return StringIO(filtered_otu_table_lines), host_dist_filtered
コード例 #6
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    output_f = open(opts.output_distance_matrix, 'w')
    if opts.otu_table_fp:
        otu_table = load_table(opts.otu_table_fp)
        samples_to_keep = otu_table.ids()
        # samples_to_keep = \
        # sample_ids_from_otu_table(open(opts.otu_table_fp,'U'))
    elif opts.sample_id_fp:
        samples_to_keep = \
            get_seqs_to_keep_lookup_from_seq_id_file(
                open(opts.sample_id_fp, 'U'))
    elif opts.mapping_fp and opts.valid_states:
        try:
            samples_to_keep = sample_ids_from_metadata_description(
                open(opts.mapping_fp, 'U'), opts.valid_states)
        except ValueError as e:
            option_parser.error(e.message)
    else:
        option_parser.error('must pass either --sample_id_fp, -t, or -m and '
                            '-s')
    # note that negate gets a little weird here. The function we're calling
    # removes the specified samples from the distance matrix, but the other
    # QIIME filter scripts keep these samples specified.  So, the interface of
    # this script is designed to keep the specified samples, and therefore
    # negate=True is passed to filter_samples_from_distance_matrix by default.
    d = filter_samples_from_distance_matrix(
        parse_distmat(
            open(opts.input_distance_matrix, 'U')),
        samples_to_keep,
        negate=not opts.negate)
    output_f.write(d)
    output_f.close()
コード例 #7
0
ファイル: util.py プロジェクト: gregcaporaso/microbiogeo
def subset_groups(dm_f, map_f, category, max_group_size):
    dm_labels, dm_data = parse_distmat(dm_f)
    metadata_map = MetadataMap.parseMetadataMap(map_f)

    category_map = defaultdict(list)
    for samp_id in metadata_map.SampleIds:
        # Mapping files can have more samples than distance matrices, which can
        # happen in this case since we are dealing with rarefied OTU tables
        # (samples get dropped).
        if samp_id in dm_labels:
            category_val = metadata_map.getCategoryValue(samp_id, category)
            category_map[category_val].append(samp_id)

    samp_ids_to_keep = []
    for category_val, samp_ids in category_map.items():
        samp_ids_to_keep.extend(
                sample(samp_ids, min(max_group_size, len(samp_ids))))

    return filter_samples_from_distance_matrix((dm_labels, dm_data),
                                               samp_ids_to_keep, negate=True)
コード例 #8
0
def make_dists_and_tree(sample_names, host_fp):
    """
    This routine reads in your host information (tree, alignment, or distance 
    matrix) and converts it to a distance matrix and a tree. These are subsetted
    to just the samples passed to the routine. The resulting subtree is 
    written to the same directory as the original tree for reference. Both the 
    distance matrix and host subtree are passed back to the main routine for 
    testing.
    """
    hostf = open(host_fp, 'r')
    host_str = hostf.read()
    hostf.close()

    # Attempt to parse the host tree/alignment/distance matrix
    if isTree(host_str):
        host_tree, host_dist = processTree(host_str)
        print "Input is tree"

    elif isAlignment(host_str):
        host_tree, host_dist = processAlignment(host_str)
        print "Input is alignment"

    elif isMatrix(host_str):
        host_tree, host_dist = processMatrix(host_str)
        print "Input is distance matrix"

    else:
        print "Host information file could not be parsed"

    # Remove any sample names not in host tree
    sample_names = filter(
        lambda x: x if x in host_tree.getTipNames() else None, sample_names)
    print sample_names
    # Get host subtree and filter distance matrix so they only include samples
    # present in the pOTU table
    host_tree = host_tree.getSubTree(sample_names)

    host_dist = filter_samples_from_distance_matrix(
        host_dist, sample_names, negate=True)
    return host_tree, host_dist
コード例 #9
0
    if opts.otu_table_fp:
        otu_table = parse_biom_table(open(opts.otu_table_fp,'U'))
        samples_to_keep = otu_table.SampleIds
        #samples_to_keep = \
        # sample_ids_from_otu_table(open(opts.otu_table_fp,'U'))
    elif opts.sample_id_fp:
        samples_to_keep = \
         get_seqs_to_keep_lookup_from_seq_id_file(open(opts.sample_id_fp,'U'))
    elif opts.mapping_fp and opts.valid_states:
        try:
            samples_to_keep = sample_ids_from_metadata_description(
                open(opts.mapping_fp,'U'),opts.valid_states)
        except ValueError, e:
            option_parser.error(e.message)
    else:
        option_parser.error('must pass either --sample_id_fp, -t, or -m and -s')
    # note that negate gets a little weird here. The function we're calling removes the specified 
    # samples from the distance matrix, but the other QIIME filter scripts keep these samples specified. 
    # So, the interface of this script is designed to keep the specified samples, and therefore
    # negate=True is passed to filter_samples_from_distance_matrix by default.
    d = filter_samples_from_distance_matrix(
                               parse_distmat(open(opts.input_distance_matrix,'U')),
                               samples_to_keep,
                               negate=not opts.negate)
    output_f.write(d)
    output_f.close()
    


if __name__ == "__main__":
    main()
コード例 #10
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    category = opts.category
    mapping_fp = opts.mapping_fp

    colors_used = []

    if (category and mapping_fp == None) or (category == None and mapping_fp):
        option_parser.error('If coloring by a metadata category, both the '
                            'category and the mapping file must be supplied.')
    elif mapping_fp and category:
        mapping_data, mapping_headers, _ = parse_mapping_file(
            open(mapping_fp, 'U'))
        if category not in mapping_headers:
            option_parser.error("The category supplied must exist in the "
                                "metadata mapping file, '%s' does not exist." %
                                category)
        index = mapping_headers.index(category)
        categories = list(set([line[index] for line in mapping_data]))
    list_of_plots = []

    if opts.binning is None:
        ranges = []
    else:
        # simple ranges format validation
        if opts.binning.count('[')!=opts.binning.count(']') or\
          opts.binning.count('[')!=opts.binning.count(','):
            raise ValueError, "The binning input has an error: '%s'; " % +\
             "\nthe format should be [increment1,top_limit1][increment2,top_limit2]"
        # spliting in ranges
        rgn_txt = opts.binning.split('][')
        # removing left [ and right ]
        rgn_txt[0] = rgn_txt[0][1:]
        rgn_txt[-1] = rgn_txt[-1][:-1]
        # converting into int
        ranges = []
        max = 0

        for i, r in enumerate(rgn_txt):
            try:
                values = map(float, r.split(','))
            except ValueError:
                raise ValueError, "Not a valid format for binning %s" % opts.binning
            if len(values) != 2:
                raise ValueError, "All ranges must have only 2 values: [%s]" % r
            elif i + 1 != len(rgn_txt):
                if values[0] > values[1]:
                    raise ValueError, "The bin value can't be greater than the max value: [%s]" % r
                elif values < 0:
                    raise ValueError, "This value can not be negative: [%s]" % r
                elif max > values[1]:
                    raise ValueError, "This value can not smaller than the previous one: [%s]" % r
                else:
                    max = values[1]

            ranges.append(values)

    x_samples, x_distmtx = parse_distmat(open(opts.input_path_x, 'U'))
    y_samples, y_distmtx = parse_distmat(open(opts.input_path_y, 'U'))

    if opts.ignore_missing_samples:
        ignoring_from_x = list(set(x_samples) - set(y_samples))
        ignoring_from_y = list(set(y_samples) - set(x_samples))

        if opts.verbose:
            print '\nFrom %s we are ignoring: %s\n' % (opts.input_path_x,
                                                       ignoring_from_x)
            print '\nFrom %s we are ignoring: %s\n' % (opts.input_path_y,
                                                       ignoring_from_y)
            print '\nOnly using: %s\n' % (
                list(set(x_samples) & set(y_samples)))

        x_file = StringIO(\
            filter_samples_from_distance_matrix((x_samples, x_distmtx), ignoring_from_x))
        x_samples, x_distmtx = parse_distmat(x_file)

        y_file = StringIO(\
            filter_samples_from_distance_matrix((y_samples, y_distmtx), ignoring_from_y))
        y_samples, y_distmtx = parse_distmat(y_file)
    else:
        if x_distmtx.shape != y_distmtx.shape:
            raise ValueError, 'The distance matrices have different sizes. ' +\
                'You can cancel this error by passing --ignore_missing_samples'

    figure()
    if category == None:
        x_val, y_val, x_fit, y_fit, func_text = fit_semivariogram(
            (x_samples, x_distmtx), (y_samples, y_distmtx), opts.model, ranges)

        plot(x_val,
             y_val,
             color=opts.dot_color,
             marker=opts.dot_marker,
             linestyle="None",
             alpha=opts.dot_alpha)
        plot(x_fit,
             y_fit,
             linewidth=2.0,
             color=opts.line_color,
             alpha=opts.line_alpha)
    else:
        for index, single_category in enumerate(categories):
            good_sample_ids = sample_ids_from_metadata_description(
                open(mapping_fp), '%s:%s' % (category, single_category))

            _y_samples, _y_distmtx = parse_distmat(
                StringIO(
                    filter_samples_from_distance_matrix((y_samples, y_distmtx),
                                                        good_sample_ids,
                                                        negate=True)))
            _x_samples, _x_distmtx = parse_distmat(
                StringIO(
                    filter_samples_from_distance_matrix((x_samples, x_distmtx),
                                                        good_sample_ids,
                                                        negate=True)))

            x_val, y_val, x_fit, y_fit, func_text = fit_semivariogram(
                (_x_samples, _x_distmtx), (_y_samples, _y_distmtx), opts.model,
                ranges)

            # retrieve one of the colors the "QIIME" colors and add it to the
            # list of used colors for the creation of the legends in the plot
            color_only = get_qiime_hex_string_color(index)
            colors_used.append(color_only)

            plot(x_val,
                 y_val,
                 color=color_only,
                 marker=opts.dot_marker,
                 linestyle="None",
                 alpha=opts.dot_alpha)
            plot(x_fit,
                 y_fit,
                 linewidth=2.0,
                 color=color_only,
                 alpha=opts.line_alpha,
                 label=single_category)

    if opts.x_min != None and opts.x_max != None:
        xlim([opts.x_min, opts.x_max])
    if opts.y_min != None and opts.y_max != None:
        ylim([opts.y_min, opts.y_max])

    x_label = opts.x_label
    y_label = opts.y_label
    fig_title = '%s (%s)' % (opts.fig_title, opts.model)

    xlabel(x_label)
    ylabel(y_label)
    if opts.print_model:
        title(fig_title + ' ' + func_text)
    else:
        title(fig_title)

    savefig(opts.output_path)

    # print the legends after the figure is exported to avoid conflicts
    if category:
        # if there's a desired format, use that, else default it to png
        _, extension = splitext(opts.output_path)

        # remove the dot, else, make_legend will add it to the filename
        extension = extension.replace('.', '')

        if extension == '':
            extension = 'png'
        make_legend(categories, colors_used, 0, 0, 'black', 'white',
                    opts.output_path, extension, 80)
コード例 #11
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    category = opts.category
    mapping_fp = opts.mapping_fp

    colors_used = []

    if (category and mapping_fp is None) or (category is None and mapping_fp):
        option_parser.error('If coloring by a metadata category, both the '
                            'category and the mapping file must be supplied.')
    elif mapping_fp and category:
        mapping_data, mapping_headers, _ = parse_mapping_file(open(mapping_fp,
                                                                   'U'))
        if category not in mapping_headers:
            option_parser.error("The category supplied must exist in the "
                                "metadata mapping file, '%s' does not exist." % category)
        index = mapping_headers.index(category)
        categories = list(set([line[index] for line in mapping_data]))
    list_of_plots = []

    if opts.binning is None:
        ranges = []
    else:
        # simple ranges format validation
        if opts.binning.count('[') != opts.binning.count(']') or\
                opts.binning.count('[') != opts.binning.count(','):
            raise ValueError("The binning input has an error: '%s'; " % +
                             "\nthe format should be [increment1,top_limit1][increment2,top_limit2]")
        # spliting in ranges
        rgn_txt = opts.binning.split('][')
        # removing left [ and right ]
        rgn_txt[0] = rgn_txt[0][1:]
        rgn_txt[-1] = rgn_txt[-1][:-1]
        # converting into int
        ranges = []
        max = 0

        for i, r in enumerate(rgn_txt):
            try:
                values = map(float, r.split(','))
            except ValueError:
                raise ValueError(
                    "Not a valid format for binning %s" %
                    opts.binning)
            if len(values) != 2:
                raise ValueError(
                    "All ranges must have only 2 values: [%s]" %
                    r)
            elif i + 1 != len(rgn_txt):
                if values[0] > values[1]:
                    raise ValueError(
                        "The bin value can't be greater than the max value: [%s]" %
                        r)
                elif values < 0:
                    raise ValueError(
                        "This value can not be negative: [%s]" %
                        r)
                elif max > values[1]:
                    raise ValueError(
                        "This value can not smaller than the previous one: [%s]" %
                        r)
                else:
                    max = values[1]

            ranges.append(values)

    x_samples, x_distmtx = parse_distmat(open(opts.input_path_x, 'U'))
    y_samples, y_distmtx = parse_distmat(open(opts.input_path_y, 'U'))

    if opts.ignore_missing_samples:
        ignoring_from_x = list(set(x_samples) - set(y_samples))
        ignoring_from_y = list(set(y_samples) - set(x_samples))

        if opts.verbose:
            print '\nFrom %s we are ignoring: %s\n' % (opts.input_path_x, ignoring_from_x)
            print '\nFrom %s we are ignoring: %s\n' % (opts.input_path_y, ignoring_from_y)
            print '\nOnly using: %s\n' % (list(set(x_samples) & set(y_samples)))

        x_file = StringIO(
            filter_samples_from_distance_matrix((x_samples, x_distmtx), ignoring_from_x))
        x_samples, x_distmtx = parse_distmat(x_file)

        y_file = StringIO(
            filter_samples_from_distance_matrix((y_samples, y_distmtx), ignoring_from_y))
        y_samples, y_distmtx = parse_distmat(y_file)
    else:
        if x_distmtx.shape != y_distmtx.shape:
            raise ValueError('The distance matrices have different sizes. ' +
                             'You can cancel this error by passing --ignore_missing_samples')

    figure()
    if category is None:
        x_val, y_val, x_fit, y_fit, func_text = fit_semivariogram(
            (x_samples, x_distmtx), (y_samples, y_distmtx), opts.model, ranges)

        plot(
            x_val,
            y_val,
            color=opts.dot_color,
            marker=opts.dot_marker,
            linestyle="None",
            alpha=opts.dot_alpha)
        plot(
            x_fit,
            y_fit,
            linewidth=2.0,
            color=opts.line_color,
            alpha=opts.line_alpha)
    else:
        # not all the categories that are going to be enumerated are found in
        # the distance matrices i.e. the mapping file is a superset that can
        # contain more samples than the distance matrices
        used_categories = deepcopy(categories)

        for index, single_category in enumerate(categories):
            good_sample_ids = sample_ids_from_metadata_description(
                open(mapping_fp), '%s:%s' % (category, single_category))

            try:
                _y_samples, _y_distmtx = parse_distmat(StringIO(
                    filter_samples_from_distance_matrix((y_samples, y_distmtx),
                                                        good_sample_ids, negate=True)))
                _x_samples, _x_distmtx = parse_distmat(StringIO(
                    filter_samples_from_distance_matrix((x_samples, x_distmtx),
                                                        good_sample_ids, negate=True)))
            except ValueError:
                # no samples found for this category
                used_categories.remove(single_category)
                continue

            x_val, y_val, x_fit, y_fit, func_text = fit_semivariogram(
                (_x_samples, _x_distmtx), (_y_samples, _y_distmtx),
                opts.model, ranges)

            # retrieve one of the colors the "QIIME" colors and add it to the
            # list of used colors for the creation of the legends in the plot
            color_only = get_qiime_hex_string_color(index)
            colors_used.append(color_only)

            plot(x_val, y_val, color=color_only, marker=opts.dot_marker,
                 linestyle="None", alpha=opts.dot_alpha)
            plot(x_fit, y_fit, linewidth=2.0, color=color_only,
                 alpha=opts.line_alpha, label=single_category)

    # set plot limits if requested
    x_lb, x_ub = xlim()
    y_lb, y_ub = ylim()
    if opts.x_min is not None:
        x_lb = opts.x_min
    if opts.x_max is not None:
        x_ub = opts.x_max
    if opts.y_min is not None:
        y_lb = opts.y_min
    if opts.y_max is not None:
        y_ub = opts.y_max
    xlim(x_lb, x_ub)
    ylim(y_lb, y_ub)


    x_label = opts.x_label
    y_label = opts.y_label
    fig_title = '%s (%s)' % (opts.fig_title, opts.model)

    xlabel(x_label)
    ylabel(y_label)
    if opts.print_model:
        title(fig_title + ' ' + func_text)
    else:
        title(fig_title)

    savefig(opts.output_path)

    # print the legends after the figure is exported to avoid conflicts
    if category:
        # if there's a desired format, use that, else default it to png
        _, extension = splitext(opts.output_path)

        # remove the dot, else, make_legend will add it to the filename
        extension = extension.replace('.', '')

        if extension == '':
            extension = 'png'
        make_legend(used_categories, colors_used, 0, 0, 'black', 'white',
                    opts.output_path, extension, 80)
コード例 #12
0
    if opts.otu_table_fp:
        otu_table = parse_biom_table(open(opts.otu_table_fp, 'U'))
        samples_to_keep = otu_table.SampleIds
        #samples_to_keep = \
        # sample_ids_from_otu_table(open(opts.otu_table_fp,'U'))
    elif opts.sample_id_fp:
        samples_to_keep = \
         get_seqs_to_keep_lookup_from_seq_id_file(open(opts.sample_id_fp,'U'))
    elif opts.mapping_fp and opts.valid_states:
        try:
            samples_to_keep = sample_ids_from_metadata_description(
                open(opts.mapping_fp, 'U'), opts.valid_states)
        except ValueError, e:
            option_parser.error(e.message)
    else:
        option_parser.error(
            'must pass either --sample_id_fp, -t, or -m and -s')
    # note that negate gets a little weird here. The function we're calling removes the specified
    # samples from the distance matrix, but the other QIIME filter scripts keep these samples specified.
    # So, the interface of this script is designed to keep the specified samples, and therefore
    # negate=True is passed to filter_samples_from_distance_matrix by default.
    d = filter_samples_from_distance_matrix(parse_distmat(
        open(opts.input_distance_matrix, 'U')),
                                            samples_to_keep,
                                            negate=not opts.negate)
    output_f.write(d)
    output_f.close()


if __name__ == "__main__":
    main()
コード例 #13
0
ファイル: plot_semivariogram.py プロジェクト: gxenomics/qiime
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    
    if opts.binning is None:
        ranges = []
    else:
        # simple ranges format validation
        if opts.binning.count('[')!=opts.binning.count(']') or\
          opts.binning.count('[')!=opts.binning.count(','):
            raise ValueError, "The binning input has an error: '%s'; " % +\
             "\nthe format should be [increment1,top_limit1][increment2,top_limit2]" 
        # spliting in ranges
        rgn_txt = opts.binning.split('][')
        # removing left [ and right ]
        rgn_txt[0] = rgn_txt[0][1:]
        rgn_txt[-1] = rgn_txt[-1][:-1]
        # converting into int
        ranges = []
        max = 0
        
        for i,r in enumerate(rgn_txt):
            try:
                values = map(float,r.split(','))
            except ValueError:
                raise ValueError, "Not a valid format for binning %s" % opts.binning 
            if len(values)!=2:
                raise ValueError, "All ranges must have only 2 values: [%s]" % r
            elif i+1!=len(rgn_txt): 
                if values[0]>values[1]:
                    raise ValueError, "The bin value can't be greater than the max value: [%s]" % r
                elif values<0:
                    raise ValueError, "This value can not be negative: [%s]" % r
                elif max>values[1]:
                    raise ValueError, "This value can not smaller than the previous one: [%s]" % r
                else:
                    max=values[1]
            
            ranges.append(values)
    
    x_samples, x_distmtx = parse_distmat(open(opts.input_path_x,'U'))
    y_samples, y_distmtx = parse_distmat(open(opts.input_path_y,'U'))
    
    if opts.ignore_missing_samples:
        ignoring_from_x = list(set(x_samples)-set(y_samples))
        ignoring_from_y = list(set(y_samples)-set(x_samples))
        
        if opts.verbose:
            print '\nFrom %s we are ignoring: %s\n' % (opts.input_path_x, ignoring_from_x)
            print '\nFrom %s we are ignoring: %s\n' % (opts.input_path_y, ignoring_from_y)
            print '\nOnly using: %s\n' % (list(set(x_samples) & set(y_samples)))
        
        x_file = StringIO(\
            filter_samples_from_distance_matrix((x_samples, x_distmtx), ignoring_from_x))
        x_samples, x_distmtx = parse_distmat(x_file)
        
        y_file = StringIO(\
            filter_samples_from_distance_matrix((y_samples, y_distmtx), ignoring_from_y))
        y_samples, y_distmtx = parse_distmat(y_file)
    else:
        if x_distmtx.shape!=y_distmtx.shape:
            raise ValueError, 'The distance matrices have different sizes. ' +\
                'You can cancel this error by passing --ignore_missing_samples'
        
    (x_val,y_val,x_fit,y_fit,func_text) =\
          fit_semivariogram((x_samples,x_distmtx), (y_samples,y_distmtx), opts.model, ranges)
    
    plot(x_val, y_val, color=opts.dot_color, marker=opts.dot_marker, linestyle="None", alpha=opts.dot_alpha)
    plot(x_fit, y_fit, linewidth=2.0, color=opts.line_color, alpha=opts.line_alpha)
    
    if opts.x_min!=None and opts.x_max!=None:
        xlim([opts.x_min,opts.x_max])
    if opts.y_min!=None and opts.y_max!=None:
        ylim([opts.y_min,opts.y_max])
        
    x_label = opts.x_label
    y_label = opts.y_label
    fig_title = '%s (%s)' % (opts.fig_title, opts.model)
    
    xlabel(x_label)
    ylabel(y_label)
    if opts.print_model:
        title(fig_title + ' ' + func_text)
    else:
        title(fig_title)
    
    savefig(opts.output_path)
コード例 #14
0
ファイル: plot_semivariogram.py プロジェクト: jb2263/qiime
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    
    if opts.binning is None:
        ranges = []
    else:
        # simple ranges format validation
        if opts.binning.count('[')!=opts.binning.count(']') or\
          opts.binning.count('[')!=opts.binning.count(','):
            raise ValueError, "The binning input has an error: '%s'; " % +\
             "\nthe format should be [increment1,top_limit1][increment2,top_limit2]" 
        # spliting in ranges
        rgn_txt = opts.binning.split('][')
        # removing left [ and right ]
        rgn_txt[0] = rgn_txt[0][1:]
        rgn_txt[-1] = rgn_txt[-1][:-1]
        # converting into int
        ranges = []
        max = 0
        
        for i,r in enumerate(rgn_txt):
            try:
                values = map(float,r.split(','))
            except ValueError:
                raise ValueError, "Not a valid format for binning %s" % opts.binning 
            if len(values)!=2:
                raise ValueError, "All ranges must have only 2 values: [%s]" % r
            elif i+1!=len(rgn_txt): 
                if values[0]>values[1]:
                    raise ValueError, "The bin value can't be greater than the max value: [%s]" % r
                elif values<0:
                    raise ValueError, "This value can not be negative: [%s]" % r
                elif max>values[1]:
                    raise ValueError, "This value can not smaller than the previous one: [%s]" % r
                else:
                    max=values[1]
            
            ranges.append(values)
    
    x_samples, x_distmtx = parse_distmat(open(opts.input_path_x,'U'))
    y_samples, y_distmtx = parse_distmat(open(opts.input_path_y,'U'))
    
    if opts.ignore_missing_samples:
        ignoring_from_x = list(set(x_samples)-set(y_samples))
        ignoring_from_y = list(set(y_samples)-set(x_samples))
        
        if opts.verbose:
            print '\nFrom %s we are ignoring: %s\n' % (opts.input_path_x, ignoring_from_x)
            print '\nFrom %s we are ignoring: %s\n' % (opts.input_path_y, ignoring_from_y)
            print '\nOnly using: %s\n' % (list(set(x_samples) & set(y_samples)))
        
        x_file = StringIO(\
            filter_samples_from_distance_matrix((x_samples, x_distmtx), ignoring_from_x))
        x_samples, x_distmtx = parse_distmat(x_file)
        
        y_file = StringIO(\
            filter_samples_from_distance_matrix((y_samples, y_distmtx), ignoring_from_y))
        y_samples, y_distmtx = parse_distmat(y_file)
    else:
        if x_distmtx.shape!=y_distmtx.shape:
            raise ValueError, 'The distance matrices have different sizes. ' +\
                'You can cancel this error by passing --ignore_missing_samples'
        
    (x_val,y_val,x_fit,y_fit,func_text) =\
          fit_semivariogram((x_samples,x_distmtx), (y_samples,y_distmtx), opts.model, ranges)
    
    plot(x_val, y_val, color=opts.dot_color, marker=opts.dot_marker, linestyle="None", alpha=opts.dot_alpha)
    plot(x_fit, y_fit, linewidth=2.0, color=opts.line_color, alpha=opts.line_alpha)
    
    if opts.x_min!=None and opts.x_max!=None:
        xlim([opts.x_min,opts.x_max])
    if opts.y_min!=None and opts.y_max!=None:
        ylim([opts.y_min,opts.y_max])
        
    x_label = opts.x_label
    y_label = opts.y_label
    fig_title = '%s (%s)' % (opts.fig_title, opts.model)
    
    xlabel(x_label)
    ylabel(y_label)
    if opts.print_model:
        title(fig_title + ' ' + func_text)
    else:
        title(fig_title)
    
    savefig(opts.output_path)
コード例 #15
0
ファイル: util.py プロジェクト: gregcaporaso/microbiogeo
def subset_dm(dm_f, num_samps):
    labels, dm_data = parse_distmat(dm_f)
    samp_ids_to_keep = sample(labels, num_samps)
    return filter_samples_from_distance_matrix((labels, dm_data),
                                               samp_ids_to_keep, negate=True)