def plot( ranks: str, table: str, sample_metadata: str, feature_metadata: str, output_dir: str, extreme_feature_count: int, assume_gnps_feature_metadata: bool, debug: bool, ) -> None: """Generates a visualization of feature rankings and log-ratios. The resulting visualization contains two plots. The first plot shows how features are ranked, and the second plot shows the log-ratio of "selected" features' abundances within samples. The visualization is interactive, so which features are "selected" to construct log-ratios -- as well as various other properties of the visualization -- can be changed by the user. """ # inspired by https://stackoverflow.com/a/14098306/10730311 if debug: logging.basicConfig(level=logging.DEBUG) logging.debug("Starting the standalone Qurro script.") loaded_biom = load_table(table) logging.debug("Loaded BIOM table.") df_sample_metadata = escape_columns(read_metadata_file(sample_metadata), "sample metadata") feature_ranks = read_rank_file(ranks) df_feature_metadata = None if feature_metadata is not None: if assume_gnps_feature_metadata: df_feature_metadata = read_gnps_feature_metadata_file( feature_metadata, feature_ranks) else: df_feature_metadata = escape_columns( read_metadata_file(feature_metadata), "feature metadata") logging.debug("Read in metadata.") process_and_generate( feature_ranks, df_sample_metadata, loaded_biom, output_dir, df_feature_metadata, extreme_feature_count, ) print("Successfully generated a visualization in the folder {}.".format( output_dir))
def create_q2_visualization( output_dir, feature_ranks, rank_type, table, sample_metadata, feature_metadata, extreme_feature_count, debug, ): # Same thing as in the standalone version of Qurro -- only show debug # messages if explicitly requested with --(p-)debug. As with there, this is # inspired by https://stackoverflow.com/a/14098306/10730311. if debug: logging.basicConfig(level=logging.DEBUG) logging.debug("Starting create_q2_visualization().") df_feature_metadata = None if feature_metadata is not None: df_feature_metadata = escape_columns(feature_metadata.to_dataframe(), "feature metadata") df_sample_metadata = escape_columns(sample_metadata.to_dataframe(), "sample metadata") logging.debug("Converted metadata to DataFrames.") feature_ranks = escape_columns(feature_ranks, "feature ranks") index_path = process_and_generate( feature_ranks, rank_type, df_sample_metadata, table, output_dir, df_feature_metadata, extreme_feature_count, ) # render the visualization using q2templates.render(). # TODO: do we need to specify plot_name in the context in this way? I'm not # sure where it is being used in the first place, honestly. plot_name = output_dir.split("/")[-1] q2templates.render(index_path, output_dir, context={"plot_name": plot_name})
def read_rank_file(file_loc): """Converts an input file of ranks to a DataFrame.""" if file_loc.endswith(".tsv"): rank_df = differentials_to_df(file_loc) else: # ordination_to_df() will raise an appropriate error if it can't # process this file. rank_df = ordination_to_df(file_loc) return escape_columns(rank_df, "feature ranks")
def read_rank_file(file_loc): """Converts an input file of ranks to a DataFrame. Also returns a human-readable "rank type" -- either "Differential" or "Feature Loading". """ if file_loc.endswith(".tsv"): rank_df = differentials_to_df(file_loc) rank_type = "Differential" else: # ordination_to_df() will raise an appropriate error if it can't # process this file. rank_df = ordination_to_df(file_loc) rank_type = "Feature Loading" return escape_columns(rank_df, "feature ranks"), rank_type