## Load data X_ldp, X_dpj, X_jrp = csv_to_mats('./utas12_ooc.csv', rtype="v", jrp=True) X_ldp['policy00'] = X_ldp['policy00'].replace([0, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 3, 3, 4, 4, 5, 5]) X_dpj['policy00'] = X_dpj['policy00'].replace([0, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 3, 3, 4, 4, 5, 5]) X_jrp['policy00'] = X_jrp['policy00'].replace([0, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 3, 3, 4, 4, 5, 5]) X = pd.concat([X_ldp, X_dpj, X_jrp]) print(X_ldp.shape, X_dpj.shape, X_jrp.shape, X.shape) ##Disctionay for Level and Party party = {"LDP": "LDP", "DPJ": "DPJ", "JRP": "JRP"} ##Fitting cMCA and export plots cmca = CMCA(n_components=2, copy=True, check_input=True) cmca = cmca.fit(fg=X_ldp.iloc[:, 6:X.shape[1]], bg=X_dpj.iloc[:, 6:X.shape[1]], alpha=1.5) Y_fg = np.array(cmca.transform(X_ldp.iloc[:, 6:X.shape[1]])) Y_bg = np.array(cmca.transform(X_dpj.iloc[:, 6:X.shape[1]])) f = plt.figure() plt.xlim([-1.5, 2]) plt.ylim([-1.5, 1.5]) plt.scatter(Y_fg[:, 0], Y_fg[:, 1], c=tableau10[X_ldp["psup_short"].iloc[0]], label=party[X_ldp["psup_short"].iloc[0]], alpha=0.8, linewidths=0)
print(X_con.shape, X_lab.shape, X_ldp.shape, X_snp.shape, X_gre.shape, X_uip.shape, X_oth.shape, X.shape) ##Disctionay for Level and Party party = { 1: "Con", 2: "Lab", 3: "LD", 4: "SNP", 5: "Green", 6: "UKIP", 7: "Other" } ##Fitting cMCA and export plots cmca = CMCA(n_components=2, copy=True, check_input=True) cmca = cmca.fit(fg=X_con.iloc[:, 0:(X_con.shape[1] - 3)], bg=X_uip.iloc[:, 0:(X_uip.shape[1] - 3)], alpha=100) Y_fg = np.array(cmca.transform(X_con.iloc[:, 0:(X.shape[1] - 3)])) Y_bg = np.array(cmca.transform(X_uip.iloc[:, 0:(X.shape[1] - 3)])) Y_fg_col = np.array( cmca.transform(X_con.iloc[:, 0:(X.shape[1] - 3)], axis='col')) prefix_to_info = cmca.gen_prefix_to_info() used_others_label = False f = plt.figure() for key in prefix_to_info.keys(): indices = prefix_to_info[key]['indices'] rank_1 = prefix_to_info[key]['loading_ranks_norm_0']
fillna_based_on_dtype(X_d) fillna_based_on_dtype(X_r) return (X_d, X_r) X_d, X_r = csv_to_mats_2('./issuevalue_short.csv') X_d['partyid'] = X_d['partyid'].replace([3, 2], 1) X_r['partyid'] = X_r['partyid'].replace([5, 6, 7], 2) X = pd.concat([X_d, X_r]) ##Disctionay for Level and Party party = {1: "Dem", 2: "Rep"} cmca = CMCA(n_components=2, copy=True, check_input=True) cmca = cmca.fit(fg=X_r.iloc[:, 1:(X.shape[1] - 1)], bg=X_d.iloc[:, 1:(X.shape[1] - 1)], alpha=1.5) Y_fg = np.array(cmca.transform(X_r.iloc[:, 1:(X.shape[1] - 1)])) Y_bg = np.array(cmca.transform(X_d.iloc[:, 1:(X.shape[1] - 1)])) Y_fg_col = np.array(cmca.transform(X_r.iloc[:, 1:(X.shape[1] - 1)], axis='col')) prefix_to_info = cmca.gen_prefix_to_info() used_others_label = False f = plt.figure() for key in prefix_to_info.keys(): indices = prefix_to_info[key]['indices'] rank = prefix_to_info[key]['loading_ranks_norm_1'] rank = rank if rank < 9 else -1
# Congressional Voting Records Data Set # https://archive.ics.uci.edu/ml/datasets/Congressional+Voting+Records df = pd.read_csv('./data/house-votes-84.data', header=None) with open('./data/house-votes-84.col_names', 'r') as f: # chr(10) is newline (to avoid newline when generating doc with sphinx) df.columns = [line.replace(chr(10), '') for line in f] X = df.iloc[:, 1:] y = np.array(df.iloc[:, 0]) fg = X.iloc[y == 'democrat'] bg = X.iloc[y == 'republican'] # alpha = 0 (normal MCA on fg) # alpha = 10 (contrastive MCA fg vs bg) # alpha = 'auto' (contrastive MCA with auto selection of alpha) cmca = CMCA(n_components=2) for alpha in [0, 10, 'auto']: ### cMCA auto_alpha = False if alpha == 'auto': alpha = None auto_alpha = True cmca.fit(fg, bg, alpha=alpha, auto_alpha_selection=auto_alpha) # row coordinates (cloud of individuals) Y_fg_row = np.array(cmca.transform(fg, axis='row')) Y_bg_row = np.array(cmca.transform(bg, axis='row')) # col coordinates (cloud of categories) Y_fg_col = np.array(cmca.transform(fg, axis='col')) Y_bg_col = np.array(cmca.transform(bg, axis='col'))