예제 #1
0
def generate_alpha_rarefaction_data_from_point_in_omega(
        biom_object, metrics, sequences, iterations, tree_object=None):
    """generate alpha rarefaction data from a biom table and mapping file

	Inputs:
	biom_object: OTU table to be rarefied and used to compute alpha diversity
	metrics: list of metrics, phylogenetic or non phylogenetic
	sequences: maximum number of sequences for the rarefaction plots
	iterations: number of repetitions per rarefaction
	tree_object: tree to perform the phylogenetic operations, default is None

	Output:
	alpha_rarefaction_data: dictionary where the keys are alpha diversity
	metrics and the values are tuples; in these tuples the first element is a
	list of column headers for an alpha diversity file, the second element is a
	list of row headers for an alpha diversity file and the third element is a
	list of lists containing the alpha diversity data computed at multiple
	rarefaction depths and as many iterations as specified.
	"""
    # The minimum depth is defined by the size of the maximum depth
    steps = 4
    min_depth = int(ceil(sequences / steps))

    # get a rarefied biom with the proper identifiers
    rarefied_bioms_list = get_rarefactions(biom_object, min_depth, sequences,\
     iterations, steps)

    alpha_rs = {}
    alpha_filenames = []
    # rarefy all the biom objects and get the alpha diversity values
    for rarefied_biom in rarefied_bioms_list:
        # this tag contains data about the iteration and the depth
        identifier = 'alpha_rare_%s_%s' % (str(
            rarefied_biom[0]), str(rarefied_biom[1]))
        alpha_values = single_object_alpha(rarefied_biom[2], metrics,
                                           tree_object)
        alpha_rs[identifier] = (rarefied_biom[0], rarefied_biom[1],
                                alpha_values.split('\n'))
        alpha_filenames.append(identifier)

    # use the rarefaction with the fewest sequences per sample as the reference
    ref_rare = single_object_alpha(rarefied_bioms_list[0][2], metrics,\
     tree_object=tree_object).split('\n')
    all_metrics, all_samples, example_data = parse_matrix(ref_rare)

    # build a dictionary with the data for each of the metrics specified
    metrics_data = {}
    for metric in all_metrics:
        per_metric_data = []
        for filename in alpha_filenames:
            f_metrics, f_samples, f_data = parse_matrix(alpha_rs[filename][2])
            per_metric_data.append(make_output_row(f_metrics, metric,\
             f_samples, f_data, filename, len(all_samples), all_samples))
        metrics_data[metric] = per_metric_data

    # now format the dictionary to make it compatible with make_averages
    alpha_rarefaction_data = _format_rarefactions(metrics_data, all_samples)

    return alpha_rarefaction_data
예제 #2
0
def generate_alpha_rarefaction_data_from_point_in_omega(biom_object, metrics,
													sequences, iterations,
													tree_object=None):
	"""generate alpha rarefaction data from a biom table and mapping file

	Inputs:
	biom_object: OTU table to be rarefied and used to compute alpha diversity
	metrics: list of metrics, phylogenetic or non phylogenetic
	sequences: maximum number of sequences for the rarefaction plots
	iterations: number of repetitions per rarefaction
	tree_object: tree to perform the phylogenetic operations, default is None

	Output:
	alpha_rarefaction_data: dictionary where the keys are alpha diversity
	metrics and the values are tuples; in these tuples the first element is a
	list of column headers for an alpha diversity file, the second element is a
	list of row headers for an alpha diversity file and the third element is a
	list of lists containing the alpha diversity data computed at multiple
	rarefaction depths and as many iterations as specified.
	"""
	# The minimum depth is defined by the size of the maximum depth
	steps = 4
	min_depth = int(ceil(sequences / steps))

	# get a rarefied biom with the proper identifiers
	rarefied_bioms_list = get_rarefactions(biom_object, min_depth, sequences,\
		iterations, steps)

	alpha_rs = {}
	alpha_filenames = []
	# rarefy all the biom objects and get the alpha diversity values
	for rarefied_biom in rarefied_bioms_list:
		# this tag contains data about the iteration and the depth
		identifier = 'alpha_rare_%s_%s' % (str(rarefied_biom[0]), str(rarefied_biom[1]))
		alpha_values = single_object_alpha(rarefied_biom[2], metrics, tree_object)
		alpha_rs[identifier] = (rarefied_biom[0], rarefied_biom[1], alpha_values.split('\n'))
		alpha_filenames.append(identifier)

	# use the rarefaction with the fewest sequences per sample as the reference
	ref_rare = single_object_alpha(rarefied_bioms_list[0][2], metrics,\
		tree_object=tree_object).split('\n')
	all_metrics, all_samples, example_data = parse_matrix(ref_rare)

	# build a dictionary with the data for each of the metrics specified
	metrics_data = {}
	for metric in all_metrics:
		per_metric_data = []
		for filename in alpha_filenames:
			f_metrics, f_samples, f_data = parse_matrix(alpha_rs[filename][2])
			per_metric_data.append(make_output_row(f_metrics, metric,\
				f_samples, f_data, filename, len(all_samples), all_samples))
		metrics_data[metric] = per_metric_data

	# now format the dictionary to make it compatible with make_averages
	alpha_rarefaction_data = _format_rarefactions(metrics_data, all_samples)

	return alpha_rarefaction_data
예제 #3
0
 def test_make_output_rows(self):
     f_metrics = ["met1"]
     metric = "met1"
     f_samples = ["s1", "s2"]
     f_data = numpy.array([[0.4], [0.8]])
     fname = "alpha_rarefaction_10_7"
     num_cols = 2
     all_samples = ["s1", "s2"]
     res = make_output_row(f_metrics, metric, f_samples, f_data, fname, num_cols, all_samples)
     self.assertEqual(res, ["alpha_rarefaction_10_7", 10, 7, "0.4", "0.8"])
예제 #4
0
 def test_make_output_rows(self):
     f_metrics = ['met1']
     metric = 'met1'
     f_samples = ['s1','s2']
     f_data = numpy.array([[.4],[.8]])
     fname = 'alpha_rarefaction_10_7'
     num_cols= 2
     all_samples = ['s1','s2']
     res = make_output_row(f_metrics, metric, f_samples, 
         f_data, fname, num_cols, all_samples)
     self.assertEqual(res,['alpha_rarefaction_10_7', 10, 7, '0.4', '0.8'])
예제 #5
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    if len(args) != 0:
        parser.error("Positional argument detected.  make sure all" +
                     ' parameters are identified.' +
                     '\ne.g.: include the \"-m\" in \"-m MINIMUM_LENGTH\"')

    input_dir = opts.input_path
    output_dir = opts.output_path
    example_filepath = opts.example_path

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    file_names = os.listdir(input_dir)
    file_names = [fname for fname in file_names if not fname.startswith('.')]

    if example_filepath is None:
        # table row is base_name, seqs_per_sam, iters, ext
        file_name_table = map(parse_rarefaction_fname, file_names)
        # sort on seqs/sam
        sorted_fname_table = sorted(
            file_name_table,
            key=operator.itemgetter(1))
        # now map back to file name
        example_fname = file_names[
            file_name_table.index(sorted_fname_table[0])]
        example_filepath = os.path.join(input_dir, example_fname)
    f = open(example_filepath, 'U')
    all_metrics, all_samples, example_data = parse_matrix(f)
    num_cols = len(all_samples)
    f.close()

    # make the table 1 row at a time
    # we're building a rarefaction by sample mtx from
    # a sample by metric matrix
    # each metric is one output file
    for metric in all_metrics:
        metric_file_data = []
        for fname in file_names:
            # f_ here refers to the input file currently being processed
            # to distinguish from the output file we're building
            f = open(os.path.join(input_dir, fname), 'U')
            f_metrics, f_samples, f_data = parse_matrix(f)
            f.close()
            metric_file_data.append(
                make_output_row(f_metrics, metric, f_samples,
                                f_data, fname, num_cols, all_samples))

        write_output_file(metric_file_data, output_dir, metric, all_samples)
예제 #6
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    if len(args) != 0:
        parser.error("Positional argument detected.  make sure all" +
                     ' parameters are identified.' +
                     '\ne.g.: include the \"-m\" in \"-m MINIMUM_LENGTH\"')

    input_dir = opts.input_path
    output_dir = opts.output_path
    example_filepath = opts.example_path

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    file_names = os.listdir(input_dir)
    file_names = [fname for fname in file_names if not fname.startswith('.')]

    if example_filepath is None:
        # table row is base_name, seqs_per_sam, iters, ext
        file_name_table = map(parse_rarefaction_fname, file_names)
        # sort on seqs/sam
        sorted_fname_table = sorted(
            file_name_table,
            key=operator.itemgetter(1))
        # now map back to file name
        example_fname = file_names[
            file_name_table.index(sorted_fname_table[0])]
        example_filepath = os.path.join(input_dir, example_fname)
    f = open(example_filepath, 'U')
    all_metrics, all_samples, example_data = parse_matrix(f)
    num_cols = len(all_samples)
    f.close()

    # make the table 1 row at a time
    # we're building a rarefaction by sample mtx from
    # a sample by metric matrix
    # each metric is one output file
    for metric in all_metrics:
        metric_file_data = []
        for fname in file_names:
            # f_ here refers to the input file currently being processed
            # to distinguish from the output file we're building
            f = open(os.path.join(input_dir, fname), 'U')
            f_metrics, f_samples, f_data = parse_matrix(f)
            f.close()
            metric_file_data.append(
                make_output_row(f_metrics, metric, f_samples,
                                f_data, fname, num_cols, all_samples))

        write_output_file(metric_file_data, output_dir, metric, all_samples)