def distribution(excel_rows, item_name, items, file_name): # Using 95% confidence interval # (1-0.95)/2 Z_score = abs(st.norm.ppf(0.025)) alpha = 1 - 0.95 data_files = {} # create dataframe for item in items: if item_name == "Monkey": df = (monkey_df[(monkey_df.Monkey == item)]) elif item_name == "gender": df = (gender_df[(gender_df.gender == item)]) z = BayesianEstimator(model, df) cat_cpd = z.estimate_cpd('Category', prior_type="bdeu", equivalent_sample_size=0) # .to_factor() for condition in conditions: for category in categories: try: count = list( z.state_counts('Category') [condition].to_dict().values())[0][category] # count = z.state_counts('Category')[condition][category][category] prob = cat_cpd.get_value(**{ 'Condition': condition, 'Category': category }) # print(prob) # p_hat and q_hat set to conservative since we have no previous data #0.5 for each # Since its probability I clip to 0 lower_ci = max( prob - Z_score * math.sqrt((0.5 * 0.5) / df.shape[0]), 0) upper_ci = prob + Z_score * math.sqrt( (0.5 * 0.5) / df.shape[0]) if not isNaN(prob) and prob > 0: excel_rows.append([ item, condition, category, count, prob, lower_ci, upper_ci, alpha ]) else: pass # excel_rows.append([item, left, right, cat, count, prob, 0, 0, 0]) except KeyError: pass # excel_rows.append([item, left, right, cat, count, 0, 0 , 0, 0]) prob_df = pd.DataFrame.from_records(excel_rows[1:], columns=excel_rows[0]) writer = pd.ExcelWriter(file_name + ".xlsx") prob_df.to_excel(writer, sheet_name='Distribution') prob_df.sort_values('Probability', ascending=True).drop_duplicates( [item_name]).to_excel(writer, sheet_name='prefference') writer.save() return prob_df
def distribution(excel_rows, item_name, items, file_name, df_cols, groupby_cols, bp_group): # Using 95% confidence interval # (1-0.95)/2 Z_score = abs(st.norm.ppf(0.025)) alpha = 1 - 0.95 data_files = {} Orientations = ["left", "right"] # create dataframe for item in items: if item_name == "Monkey": df = (monkey_df[(monkey_df.Monkey == item)]) elif item_name == "gender": df = (gender_df[(gender_df.gender == item)]) z = BayesianEstimator(model, df) cat_cpd = z.estimate_cpd('Orientation', prior_type="bdeu", equivalent_sample_size=6) # .to_factor() for left in categories: for right in categories: for cat in Orientations: try: count = z.state_counts('Orientation')[left][right][cat] prob = cat_cpd.get_value( **{ 'Left_categ': left, 'Right_categ': right, 'Orientation': cat }) # p_hat and q_hat set to conservative since we have no previous data #0.5 for each # Since its probability I clip to 0 lower_ci = max( prob - Z_score * math.sqrt( (0.5 * 0.5) / df.shape[0]), 0) upper_ci = prob + Z_score * math.sqrt( (0.5 * 0.5) / df.shape[0]) if not isNaN(prob) and prob > 0: excel_rows.append([ item, left, right, cat, count, prob, lower_ci, upper_ci, alpha ]) else: pass # excel_rows.append([item, left, right, cat, count, prob, 0, 0, 0]) except KeyError: pass # excel_rows.append([item, left, right, cat, count, 0, 0 , 0, 0]) prob_df = pd.DataFrame.from_records(excel_rows[1:], columns=excel_rows[0]) gen_df = prob_df[df_cols].groupby(groupby_cols)['Count'].agg( ['sum']) # .reset_index() ax, bp = gen_df.boxplot(rot=90, fontsize=12, figsize=(16, 10), column=['sum'], by=bp_group, return_type="both")[0] plt.title(item_name.capitalize() + " Box plot grouped by : " + str(bp_group)) plt.suptitle('') plt.ylabel("sum") # group = ['Left-Category', 'Category'] # ax, bp = gen_df.boxplot(rot=90, fontsize=12, figsize=(24, 12), column=['sum'], by=group, return_type="both")[0] # plt.title("Box plot grouped by : " + str(group)) # plt.suptitle('') # plt.ylabel("sum") # # # group = ['Right-Category', 'Category'] # ax, bp = gen_df.boxplot(rot=90, fontsize=12, figsize=(24, 12), column=['sum'], by=group, return_type="both")[0] # plt.title("Box plot grouped by : " + str(group)) # plt.suptitle('') # plt.ylabel("sum") writer = pd.ExcelWriter(file_name + ".xlsx") prob_df.to_excel(writer, sheet_name='Distribution') prob_df.sort_values('Probability', ascending=False).drop_duplicates( [item_name]).to_excel(writer, sheet_name='prefference') writer.save() plt.savefig(file_name + ".png", dpi=100) plt.show() plt.clf()