Esempio n. 1
0
def compare_column_across_conditions(column, df):
    df = df.copy() # Don't modify what was passed in
    lim_pirna_common.add_conditions_and_sample_types(df)

    comparison_df = pd.DataFrame(index=SAMPLE_NAMES.values(), columns=['WT mean', 'pin/pin mean', 'p-value'])    

    for (sample_type, sample_df) in df.groupby("sample_type"):
        condition_gb = sample_df.groupby("condition")[column]
        wt = condition_gb.get_group('WT')
        mut = condition_gb.get_group('pin/pin')
    
        (_, p) = stats.ttest_ind(wt, mut)
        sample_stats = comparison_df.ix[sample_type]
        sample_stats['WT mean'] = wt.mean()
        sample_stats['pin/pin mean'] = mut.mean()
        sample_stats['p-value'] = p

    comparison_df.to_csv("%s_comparison.csv" % column.replace(' ', '_'))
def plot_sequence_lengths(description, sequence_lengths_df, min_length, max_length):
    graph_dir = "%s_read_length_graphs" % description
    sacgf_utils.mk_path(graph_dir)

    total_reads = sequence_lengths_df.sum(axis=1)
    reads_percent = sequence_lengths_df.divide(total_reads / 100.0, axis=0)

    max_percent = reads_percent.max().max()
    y_limit = np.floor(1 + max_percent / 5) * 5 # use multiples of 5
    add_conditions_and_sample_types(reads_percent)

    for (sample_type, sample_df) in reads_percent.groupby(SAMPLE_TYPE):
        for (color_scheme, colors) in colour_sets.iteritems():
            for plot_mean in [False, True]:
                mean_description = "_mean" if plot_mean else ""
                graph_image = "read_lengths_%s%s_%s_boxplot" % (color_scheme, mean_description, sample_type)
                graph_image = os.path.join(graph_dir, graph_image)
                plot_greyscale_boxplot(graph_image, description, sample_type, sample_df, colors, y_limit, min_length, max_length, plot_mean)
Esempio n. 3
0
def save_df_with_condition_and_samples(filename, df):
    df = df.copy() # Don't modify what was passed in
    lim_pirna_common.add_conditions_and_sample_types(df)
    df.to_csv(filename)