def get_mean_rates(treatment): ''' get average rate between every two consecutive sampling points. :param treatment: str either 't'(MRE treated) or 'c' for control. designates which treatment to slice out. :return: class Stats mean respiration rates averaged between each two consecutive sampling points. ''' # empty dataframes to store results n_intervals_first = 5 # number of intervals in the 1st week n_intervals_second = 6 # same as above for 2nd week n_intervals_third = 5 # dito weeks = [1] * n_intervals_first + [2] * n_intervals_second + [ 3 ] * n_intervals_third levels = [ weeks, BEGININGS, ENDINGS, ] names = ['week', 't_initial', 't_end'] multi_index = MultiIndex.from_arrays(arrays=levels, names=names) respiration_rates = DataFrame(index=multi_index, columns=SOILS) rates_stnd_errors = DataFrame(index=multi_index, columns=SOILS) # data RESP_stats = get_stats(RAW_DATA, treatment) RESP_means = RESP_stats.means RESP_stde = RESP_stats.stde for soil in SOILS: soil_respiration = RESP_means[soil] soil_stde = RESP_stde[soil] mean_rates = [] stnd_errors = [] for interval in INTERVALS_LIST: t_initial = interval[0] t_end = interval[1] t_initial_means = soil_respiration.loc[t_initial] t_initial_stde = soil_stde.loc[t_initial] t_end_means = soil_respiration.loc[t_end] t_end_stde = soil_stde.loc[t_end] mean = (t_initial_means + t_end_means) / 2 stde = (t_initial_stde**2 + t_end_stde**2)**0.5 / 2 mean_rates.append(mean) stnd_errors.append(stde) respiration_rates[soil] = mean_rates rates_stnd_errors[soil] = stnd_errors return Stats(means=respiration_rates, stde=rates_stnd_errors)
def normalize_to_initial(raw_data, treatment='t', initial=None): # raw data from treated samples treatment_raw = raw_data[treatment] if treatment else raw_data # get the mean of the first sampling of the control treatment if initial: control_raw = get_raw_data(initial)['c'] else: control_raw = raw_data['c'] control_means = stats.get_stats(control_raw).means # shape ->(10,3) day_zero = control_means.loc[0] # empty dataframe with the same shape and indexes as raw_t control_reindexed = DataFrame().reindex_like( treatment_raw) # shape ->(10,12) # fill above shaped empty dataframe with the mean value for every set of replicates for row in treatment_raw.index: for column in treatment_raw.columns: soil = column[ 0] # because there is a 'replicate' level, otherwise soil=column control_reindexed.loc[row, column] = day_zero[soil] normalized = treatment_raw - control_reindexed return normalized
def plot_total_increase(raw_data_sets: dict) -> Figure: for data_set_name, data_set in raw_data_sets.items(): # baseline baseline = get_baseline_stats(data_set)[0] baseline_std_error = get_baseline_stats(data_set)[1] # last day of incubation MRE_means = get_stats(data_set).MRE MRE_std_error = get_stats(data_set).MRE_SE last_day_means = MRE_means.iloc[-1] last_day_std_error = MRE_std_error.iloc[-1] # total increase baseline_increase = last_day_means - baseline normalized = baseline_increase / baseline
def get_carbon_efficiency(treatment): wknds = [0, 7, 14, 21] # raw data raw_mbc = get_raw_data('MBC')[treatment] raw_mbc = raw_mbc.loc[wknds] # start-finish of first 3 weeks # get stats mbc_stats = get_stats(raw_mbc) mbc_means = mbc_stats.means mbc_errors = mbc_stats.stde # weekly change weekly_mbc_change = mbc_means.diff() weekly_mbc_change = weekly_mbc_change.shift(-1).drop(21) # associated errors for weekly change errors_squared = mbc_errors**2 add_errors = errors_squared.add(errors_squared.shift(1)) square_root = add_errors**0.5 error_mbc_change = square_root.shift(-1).drop(21) # weekly respiration stats weekly_respiration_stats = get_weekly_respiration(treatment) weekly_respiration = weekly_respiration_stats.means repiration_error = weekly_respiration_stats.stde # impose same index for MBC and Respiration data index = weekly_respiration.index weekly_mbc_change.index = index error_mbc_change.index = index # assimilation-to-consumption ratio (CUE) CUE = weekly_mbc_change / (weekly_respiration + weekly_mbc_change) # error propogation growth_relative_err = error_mbc_change / weekly_mbc_change respiration_relative_err = repiration_error / weekly_respiration CUE_error = propagate_error(CUE, respiration_relative_err, growth_relative_err) return Stats( means=CUE, stde=CUE_error, )
def control_normalize(raw_data, control=None): ''' divide each replicate with the average of corresponding control replicates. each treatment replicate is divided by the average of 4 (or less) corresponding control replicates and finally returned as a percantage combination. parameter: raw_data: DataFrame the data to be normalized. parameter: control: str the name of the data set from which control values will be taken and normalized by. if this parameter is not given control values will be taken from raw_data. ''' # raw data of treated samples treatment_raw = raw_data.loc[:, 't'] # control raw_data if control: control_raw = get_raw_data(control)['c'] else: control_raw = raw_data['c'] control_means = stats.get_stats(control_raw).means # shape ->(10,3) # empty dataframe with the same shape and indexes as raw_t control_reindexed = DataFrame().reindex_like( treatment_raw) # shape ->(10,12) # fill above shaped empty dataframe with the mean value for every set of replicates for row in treatment_raw.index: for column in treatment_raw.columns: soil = column[0] control_reindexed.loc[row, column] =\ control_means.loc[row, soil] normalized = treatment_raw - control_reindexed normalized return normalized
def plot_control_composite(raw_data_sets): def configure_axes(axes: Axes): axes.margins(x=0.1, y=0.1) axes.xaxis.set_minor_locator(MINOR_LOCATOR) axes.xaxis.set_major_locator(MAJOR_LOCATOR) data_name = axes.get_label() axes.text(27, 0.8, data_name) axes.label_outer() # get the data data_names = raw_data_sets.keys() raw_data = raw_data_sets.values() zipped = zip(data_names, raw_data) control_means = {} stde = {} for name, data in zipped: treatment_stats = get_stats(data, 't') treatment_means = treatment_stats.means treatment_stde = treatment_stats.stde treatment_relative_stde = treatment_stde / treatment_means control_stats = get_stats(data, 'c') control_means = control_stats.means # max = control_means.max().max() # highest value measured for all 3 soils control_means_normalized = (control_means / treatment_means) * 100 control_stde = control_stats.stde control_relative_stde = control_stde / control_means control_stde_normalized = (control_relative_stde**2 + treatment_relative_stde**2)**0.5 control_means[name] = control_means_normalized * 100 stde[name] = control_stde_normalized * 100 # arrays to iterate over control_data = control_means.values() control_data_stde = stde.values() control_zipped = zip(data_names, control_data, control_data_stde) # subplots rows & columns n_rows = int(4) n_columns = int(2) # make figure and subplots figure, axes = pyplot.subplots( n_rows, n_columns, sharex=True, sharey=True, figsize=(15, 20), gridspec_kw={ 'hspace': 0, 'wspace': 0 }, ) figure.text(0.5, 0.05, r'$incubation\ time\ \/\ days$', ha='center') figure.text(0.05, 0.5, r'$%\ of treatment mean$', va='center', rotation=0.45) # plot i = 0 for name, data, error in control_zipped: x = data.index.values for soil in SOILS: axes = axes.flatten() y = data[soil].values err = error[soil].values axes[i].errorbar(x, y, yerr=err) axes[i].set_label(name) i += 1 # configure axes for ax in axes: configure_axes(ax) # legend handles = axes[0].get_lines() labels = SOILS figure.legend(handles, labels, 'center right') return figure
COLORS = Constants.colors MARKERS = Constants.markers OUTPUT_FOLDER = Constants.output_folder # plotting parameters major_locator = MultipleLocator(7) # major ticks locations minor_locator = MultipleLocator(1) # minor ticks locations # which data set to load setup_arguments = get_setup_arguments() data_set_name = setup_arguments.sets[0] # get data set raw_data = get_raw_data(data_set_name) norm_raw = normalize_to_control(raw_data) stats = get_stats(norm_raw, 't') data_set = stats.means data_stdv = stats.stdv DAYS_TO_FIT = data_set.index.values[1:] def get_chi_square(fit_result): def exclude_nans(array): where_nan = numpy.isnan(array) # boolean array, True where NAN has_nan = numpy.any(where_nan) if has_nan: excluded = ma.masked_array(array, where_nan)