def test_format_otu_category_significance_tables_as_html(self): """test that a value error is raised if number not between 0 and 1 is passed""" self.assertRaises(ValueError, format_otu_category_significance_tables_as_html, otu_category_significance_text, 10, 'output_dir', ['Self','Other']) obs = format_otu_category_significance_tables_as_html( [self.otu_cat_sig_gut_fp, self.otu_cat_sig_palm_fp], 0.05, self.output_dir,['Self','Other'], rep_set_fp=self.rep_seqs_fp) self.assertEquals(obs, ['gut.html', 'palm.html']) out_f = open(join(self.output_dir, 'gut.html'), 'U') obs = out_f.read() out_f.close() self.assertEqual(obs, exp_otu_cat_sig_gut)
def create_personal_results(output_dir, mapping_fp, coord_fp, collated_dir, otu_table_fp, prefs_fp, personal_id_column, personal_ids=None, column_title='Self', individual_titles=None, category_to_split='BodySite', time_series_category='WeeksSinceStart', rarefaction_depth=10000, alpha=0.05, rep_set_fp=None, parameter_fp=None, body_site_rarefied_otu_table_dir=None, retain_raw_data=False, suppress_alpha_rarefaction=False, suppress_beta_diversity=False, suppress_taxa_summary_plots=False, suppress_alpha_diversity_boxplots=False, suppress_otu_category_significance=False, command_handler=call_commands_serially, status_update_callback=no_status_updates): # Create our output directory and copy over the resources the personalized # pages need (e.g. javascript, images, etc.). create_dir(output_dir) support_files_dir = join(output_dir, 'support_files') if not exists(support_files_dir): copytree(join(get_project_dir(), 'my_microbes', 'support_files'), support_files_dir) logger = WorkflowLogger(generate_log_fp(output_dir)) mapping_data, header, comments = parse_mapping_file(open(mapping_fp, 'U')) try: personal_id_index = header.index(personal_id_column) except ValueError: raise ValueError("Personal ID field '%s' is not a mapping file column " "header." % personal_id_column) try: bodysite_index = header.index(category_to_split) except ValueError: raise ValueError("Category to split field '%s' is not a mapping file " "column header." % category_to_split) header = header[:-1] + [column_title] + [header[-1]] # column that differentiates between body-sites within a single individual # used for the creation of the vectors in make_3d_plots.py, this data is # created by concatenating the two columns when writing the mapping file site_id_category = '%s&&%s' % (personal_id_column, category_to_split) header.insert(len(header)-1, site_id_category) all_personal_ids = get_personal_ids(mapping_data, personal_id_index) if personal_ids == None: personal_ids = all_personal_ids else: for pid in personal_ids: if pid not in all_personal_ids: raise ValueError("'%s' is not a personal ID in the mapping " "file column '%s'." % (pid, personal_id_column)) if time_series_category not in header: raise ValueError("Time series field '%s' is not a mapping file column " "header." % time_series_category) otu_table_title = splitext(basename(otu_table_fp)) output_directories = [] raw_data_files = [] raw_data_dirs = [] # Rarefy the OTU table and split by body site here (instead of on a # per-individual basis) as we can use the same rarefied and split tables # for each individual. if not suppress_otu_category_significance: rarefied_otu_table_fp = join(output_dir, add_filename_suffix(otu_table_fp, '_even%d' % rarefaction_depth)) if body_site_rarefied_otu_table_dir is None: commands = [] cmd_title = 'Rarefying OTU table' cmd = 'single_rarefaction.py -i %s -o %s -d %s' % (otu_table_fp, rarefied_otu_table_fp, rarefaction_depth) commands.append([(cmd_title, cmd)]) raw_data_files.append(rarefied_otu_table_fp) per_body_site_dir = join(output_dir, 'per_body_site_otu_tables') cmd_title = 'Splitting rarefied OTU table by body site' cmd = 'split_otu_table.py -i %s -m %s -f %s -o %s' % ( rarefied_otu_table_fp, mapping_fp, category_to_split, per_body_site_dir) commands.append([(cmd_title, cmd)]) raw_data_dirs.append(per_body_site_dir) command_handler(commands, status_update_callback, logger, close_logger_on_success=False) else: per_body_site_dir = body_site_rarefied_otu_table_dir for person_of_interest in personal_ids: create_dir(join(output_dir, person_of_interest)) personal_mapping_file_fp = join(output_dir, person_of_interest, 'mapping_file.txt') html_fp = join(output_dir, person_of_interest, 'index.html') personal_mapping_data = create_personal_mapping_file(mapping_data, person_of_interest, personal_id_index, bodysite_index, individual_titles) personal_mapping_f = open(personal_mapping_file_fp, 'w') personal_mapping_f.write( format_mapping_file(header, personal_mapping_data, comments)) personal_mapping_f.close() raw_data_files.append(personal_mapping_file_fp) column_title_index = header.index(column_title) column_title_values = set([e[column_title_index] for e in personal_mapping_data]) cat_index = header.index(category_to_split) cat_values = set([e[cat_index] for e in personal_mapping_data]) # Generate alpha diversity boxplots, split by body site, one per # metric. We run this one first because it completes relatively # quickly and it does not call any QIIME scripts. alpha_diversity_boxplots_html = '' if not suppress_alpha_diversity_boxplots: adiv_boxplots_dir = join(output_dir, person_of_interest, 'adiv_boxplots') create_dir(adiv_boxplots_dir) output_directories.append(adiv_boxplots_dir) logger.write("\nGenerating alpha diversity boxplots (%s)\n\n" % person_of_interest) plot_filenames = _generate_alpha_diversity_boxplots( collated_dir, personal_mapping_file_fp, category_to_split, column_title, rarefaction_depth, adiv_boxplots_dir) # Create relative paths for use with the index page. rel_boxplot_dir = basename(normpath(adiv_boxplots_dir)) plot_fps = [join(rel_boxplot_dir, plot_filename) for plot_filename in plot_filenames] alpha_diversity_boxplots_html = \ create_alpha_diversity_boxplots_html(plot_fps) ## Alpha rarefaction steps if not suppress_alpha_rarefaction: rarefaction_dir = join(output_dir, person_of_interest, 'alpha_rarefaction') output_directories.append(rarefaction_dir) commands = [] cmd_title = 'Creating rarefaction plots (%s)' % person_of_interest cmd = 'make_rarefaction_plots.py -i %s -m %s -p %s -o %s' % ( collated_dir, personal_mapping_file_fp, prefs_fp, rarefaction_dir) commands.append([(cmd_title, cmd)]) raw_data_dirs.append(join(rarefaction_dir, 'average_plots')) raw_data_dirs.append(join(rarefaction_dir, 'average_tables')) command_handler(commands, status_update_callback, logger, close_logger_on_success=False) ## Beta diversity steps if not suppress_beta_diversity: pcoa_dir = join(output_dir, person_of_interest, 'beta_diversity') pcoa_time_series_dir = join(output_dir, person_of_interest, 'beta_diversity_time_series') output_directories.append(pcoa_dir) output_directories.append(pcoa_time_series_dir) commands = [] cmd_title = 'Creating beta diversity time series plots (%s)' % \ person_of_interest cmd = 'make_3d_plots.py -m %s -p %s -i %s -o %s --custom_axes=' % ( personal_mapping_file_fp, prefs_fp, coord_fp, pcoa_time_series_dir) +\ '\'%s\' --add_vectors=\'%s,%s\'' % (time_series_category, site_id_category, time_series_category) commands.append([(cmd_title, cmd)]) cmd_title = 'Creating beta diversity plots (%s)' % \ person_of_interest cmd = 'make_3d_plots.py -m %s -p %s -i %s -o %s' % (personal_mapping_file_fp, prefs_fp, coord_fp, pcoa_dir) commands.append([(cmd_title, cmd)]) command_handler(commands, status_update_callback, logger, close_logger_on_success=False) ## Time series taxa summary plots steps if not suppress_taxa_summary_plots: area_plots_dir = join(output_dir, person_of_interest, 'time_series') create_dir(area_plots_dir) output_directories.append(area_plots_dir) ## Split OTU table into self/other per-body-site tables commands = [] cmd_title = 'Splitting OTU table into self/other (%s)' % \ person_of_interest cmd = 'split_otu_table.py -i %s -m %s -f %s -o %s' % (otu_table_fp, personal_mapping_file_fp, column_title, area_plots_dir) commands.append([(cmd_title, cmd)]) command_handler(commands, status_update_callback, logger, close_logger_on_success=False) for column_title_value in column_title_values: biom_fp = join(area_plots_dir, add_filename_suffix(otu_table_fp, '_%s' % column_title_value)) column_title_map_fp = join(area_plots_dir, 'mapping_%s.txt' % column_title_value) raw_data_files.append(biom_fp) raw_data_files.append(column_title_map_fp) body_site_dir = join(area_plots_dir, column_title_value) commands = [] cmd_title = 'Splitting "%s" OTU table by body site (%s)' % \ (column_title_value, person_of_interest) cmd = 'split_otu_table.py -i %s -m %s -f %s -o %s' % (biom_fp, personal_mapping_file_fp, category_to_split, body_site_dir) commands.append([(cmd_title, cmd)]) raw_data_dirs.append(body_site_dir) command_handler(commands, status_update_callback, logger, close_logger_on_success=False) commands = [] for cat_value in cat_values: body_site_otu_table_fp = join(body_site_dir, add_filename_suffix(biom_fp, '_%s' % cat_value)) # We won't always get an OTU table if the mapping file # category contains samples that aren't in the OTU table # (e.g. the 'na' state for body site). if exists(body_site_otu_table_fp): plots = join(area_plots_dir, 'taxa_plots_%s_%s' % ( column_title_value, cat_value)) cmd_title = 'Creating taxa summary plots (%s)' % \ person_of_interest cmd = ('summarize_taxa_through_plots.py -i %s ' '-o %s -c %s -m %s -s' % (body_site_otu_table_fp, plots, time_series_category, personal_mapping_file_fp)) if parameter_fp is not None: cmd += ' -p %s' % parameter_fp commands.append([(cmd_title, cmd)]) raw_data_files.append(join(plots, '*.biom')) raw_data_files.append(join(plots, '*.txt')) create_comparative_taxa_plots_html(cat_value, join(area_plots_dir, '%s_comparative.html' % cat_value)) command_handler(commands, status_update_callback, logger, close_logger_on_success=False) # Generate OTU category significance tables (per body site). otu_cat_sig_output_fps = [] otu_category_significance_html = '' if not suppress_otu_category_significance: otu_cat_sig_dir = join(output_dir, person_of_interest, 'otu_category_significance') create_dir(otu_cat_sig_dir) output_directories.append(otu_cat_sig_dir) # For each body-site rarefied OTU table, run # otu_category_significance.py using self versus other category. # Keep track of each output file that is created because we need to # parse these later on. commands = [] for cat_value in cat_values: body_site_otu_table_fp = join(per_body_site_dir, add_filename_suffix(rarefied_otu_table_fp, '_%s' % cat_value)) if exists(body_site_otu_table_fp): otu_cat_output_fp = join(otu_cat_sig_dir, 'otu_cat_sig_%s.txt' % cat_value) cmd_title = ('Testing for significant differences in ' 'OTU abundances in "%s" body site (%s)' % ( cat_value, person_of_interest)) cmd = ('otu_category_significance.py -i %s -m %s -c %s ' '-o %s' % (body_site_otu_table_fp, personal_mapping_file_fp, column_title, otu_cat_output_fp)) commands.append([(cmd_title, cmd)]) raw_data_files.append(otu_cat_output_fp) otu_cat_sig_output_fps.append(otu_cat_output_fp) command_handler(commands, status_update_callback, logger, close_logger_on_success=False) # Reformat otu category significance tables. otu_cat_sig_html_filenames = \ format_otu_category_significance_tables_as_html( otu_cat_sig_output_fps, alpha, otu_cat_sig_dir, individual_titles, rep_set_fp=rep_set_fp) # Create relative paths for use with the index page. rel_otu_cat_sig_dir = basename(normpath(otu_cat_sig_dir)) otu_cat_sig_html_fps = [join(rel_otu_cat_sig_dir, html_filename) for html_filename in otu_cat_sig_html_filenames] otu_category_significance_html = \ create_otu_category_significance_html(otu_cat_sig_html_fps) # Create the index.html file for the current individual. create_index_html(person_of_interest, html_fp, alpha_diversity_boxplots_html=alpha_diversity_boxplots_html, otu_category_significance_html=otu_category_significance_html) logger.close() # Clean up the unnecessary raw data files and directories. glob will only # grab paths that exist. if not retain_raw_data: for raw_data_fp_glob in raw_data_files: remove_files(glob(raw_data_fp_glob)) for raw_data_dir_glob in raw_data_dirs: for dir_to_remove in glob(raw_data_dir_glob): rmtree(dir_to_remove) return output_directories