def genMIC(x, y, t): mic = {} mic_mean = np.zeros((1, x[list(x.keys())[0]].shape[1])) f_names = list(x[list(x.keys())[0]].columns.values) mine = mp.MINE(alpha=0.6, c=15, est="mic_approx") for ticker in x: for i in range(x[ticker].shape[1]): feature_name = x[ticker].columns.values[i] if (i == 0): mine.compute_score(x[ticker].iloc[:, i], y[ticker].iloc[:, 0]) mic[ticker] = mine_stats(mine) mic[ticker].rename( columns={mic[ticker].columns[0]: feature_name}, inplace=True) else: mine.compute_score(x[ticker].iloc[:, i], y[ticker].iloc[:, 0]) mic[ticker][feature_name] = mine_stats(mine) mic_mean += mic[ticker] mic_mean /= len(x) mic_mean = pd.DataFrame(mic_mean) mic_mean.columns = f_names plot = mic_mean.plot.bar(figsize=(22.0, 14.0)) plot.set_xlabel('Features', fontsize=24) plot.set_ylabel('MIC', fontsize=24) plot.tick_params(labelsize=24) fig = plot.get_figure() fig.savefig('../3_Deliverables/Final Paper/data/MICT' + str(t) + '.png') return (mic_mean)
def get_mic_co(lagrange_l, neuron_l, layer_ind, neuron_ind): def compute_alpha(npoints): NPOINTS_BINS = [ 1, 25, 50, 250, 500, 1000, 2500, 5000, 10000, 40000 ] ALPHAS = [0.85, 0.80, 0.75, 0.70, 0.65, 0.6, 0.55, 0.5, 0.45, 0.4] if npoints < 1: raise ValueError("the number of points must be >=1") return ALPHAS[np.digitize([npoints], NPOINTS_BINS)[0] - 1] alpha_cl = compute_alpha(lagrange_l.shape[0]) mine = minepy.MINE(alpha=alpha_cl, c=5, est="mic_e") mine.compute_score(lagrange_l, neuron_l) mic = mine.mic() range_neuron = max(neuron_l) - min(neuron_l) if range_neuron < 1e-5: mic = 0 if np.isnan(mic): mic = 0 return NonLinearGlobalDictRow.get_non_linear_global_dict_row( mic, layer_ind, neuron_ind)
def sstats(X, Y, alpha=0.6, c=15, est="mic_approx"): mic, tic = [], [] mine = minepy.MINE(alpha=alpha, c=c, est=est) for i in range(min(X.shape[0], Y.shape[0])): mine.compute_score(X[i], Y[i]) mic.append(mine.mic()) tic.append(mine.tic(norm=True)) mic, tic = np.asarray(mic), np.asarray(tic) return mic, tic
def max_info_coef(x, y): mine = mp.MINE(alpha=0.6, c=15, est="mic_approx") x_test = np.asarray(x) y_test = np.asarray(y) mine.compute_score(x_test, y_test) mic_val = mine.mic() return mic_val
def mic(x, y): # calculate the maximal information coefficient x = np.array(x) y = np.array(y) mine = mp.MINE(alpha=0.6, c=15, est="mic_approx") mine.compute_score(x, y) return mine.mic()
def compute_null_oneclass(X, Y=None, rowwise=False, B=9, c=5, nperm=250000, seed=0): mictools.utils.check_data(X, Y=Y) bins = np.linspace(0, 1, NULL_HIST_RES + 1) hist = np.zeros(NULL_HIST_RES, dtype=np.int64) rs = np.random.RandomState(seed) mine = minepy.MINE(alpha=B, c=c, est="mic_e") Xa = X.as_matrix() if Y is None: idx = np.arange(Xa.shape[0]) Ya, max_idx_rowwise = None, None else: Ya = Y.as_matrix() if rowwise: max_idx_rowwise = min(Xa.shape[0], Ya.shape[0]) idx = None for n in range(nperm): if Y is None: i, j = rs.choice(idx, size=2, replace=False) x, y = Xa[i], rs.permutation(Xa[j]) else: if rowwise: i = j = rs.randint(max_idx_rowwise) else: i, j = rs.randint(Xa.shape[0]), rs.randint(Ya.shape[0]) x, y = Xa[i], rs.permutation(Ya[j]) mine.compute_score(x, y) tic = mine.tic(norm=True) hist_idx = min(np.digitize([tic], bins)[0] - 1, NULL_HIST_RES - 1) hist[hist_idx] += 1 # right-tailed area hist_cum = np.cumsum(hist[::-1])[::-1] index = pd.MultiIndex.from_arrays([bins[:-1], bins[1:]], names=('BinStart', 'BinEnd')) hist_df = pd.DataFrame({ "NullCount": hist, "NullCountCum": hist_cum }, index=index, columns=["NullCount", "NullCountCum"]) return hist_df
def _feature_impact_(self): X, y = self.X, self.y colnames = X.columns m = minepy.MINE() corr_scores = pd.Series([None] * len(colnames), index=colnames) for colname in colnames: x = X[colname] m.compute_score(x, y) mic = np.around(m.mic(), decimals=4) pearson = np.around(pearsonr(x, y)[0], decimals=4) corr_scores[colname] = max(mic, pearson) corr_scores = corr_scores.sort_values(ascending=False) return corr_scores
def corr_func(X1, X2, corr_type=None): if corr_type == None: corr_type = cf.corr_type X1 = pd.Series(np.array(X1.reshape(-1, 1)).T[0]) X2 = pd.Series(np.array(X2.reshape(-1, 1)).T[0]) if corr_type == 'MIC': mine = minepy.MINE(alpha=0.6, c=15, est="mic_approx") mine.compute_score(X1, X2) corr = mine.mic() if corr_type == 'pearson': corr = X1.corr(X2) if corr_type == 'spearman': corr = X1.corr(X2, method="spearman") if corr_type == 'kendall': corr = X1.corr(X2, method="kendall") return abs(corr)
def maximal_information_coefficient(series1, series2): """ Compute the maximal information coefficient between two series. MIC captures a wide range of associations both functional and not, and for functional relationships provides a score that roughly equals the coefficient of determination (R^2) of the data relative to the regression function Args: series1 (numpy.ndarray): First series series2 (numpy.ndarray): Second series Returns: Maximal information coefficient between the two series """ mine = minepy.MINE() mine.compute_score(series1, series2) return mine.mic()
def mic_of_simulation(trajectories): """ returns the MIC values for one set of the SCN trajectories in question """ avpvipsol = trajectories[:, 1:(160 + 1)] navsol = trajectories[:, (160 + 1):] per2 = np.hstack([avpvipsol[:, ::4], navsol[:, ::3]]) numcells = per2.shape[1] # set up mic calculator mic = mp.MINE(alpha=0.6, c=15, est='mic_approx') mic_values = [] for combo in combinations(range(numcells), 2): mic.compute_score(per2[:, combo[0]], per2[:, combo[1]]) mic_values.append(mic.mic()) return mic_values
def get_mic_df(df, **kwargs): df = df.select_dtypes(include=['int', 'float']) cols_product = list(itertools.product(df.columns, repeat=2)) mic = [] for i in range(len(cols_product)): mi = minepy.MINE(**kwargs) mi.compute_score(df[cols_product[i][0]], df[cols_product[i][1]]) mic_i = [ cols_product[i][0], cols_product[i][1], mi.mic(), mi.mas(), mi.mev(), mi.mcn(0), mi.tic() ] mic.append(mic_i) df_mic = pd.DataFrame(mic) df_mic.columns = ['col', 'col2', 'MIC', 'MAS', 'MEV', 'MCN', 'TIC'] df_corr = df.corr() df_corr = pd.DataFrame(df_corr.stack()).reset_index() df_corr = df_corr.rename(columns={ 'level_0': 'col', 'level_1': 'col2', 0: 'corr' }) df_corr['R2'] = df_corr['corr'].apply(lambda x: x**2) df_r2 = df_corr.filter(items=['col', 'col2', 'R2']) res = pd.DataFrame.merge(df_mic, df_r2, on=['col', 'col2']) res['MICR2'] = res['MIC'] - res['R2'] res = res.filter( items=['col', 'col2', 'MIC', 'MAS', 'MEV', 'MCN', 'TIC', 'MICR2']) return res
def scatter_the_nonlinear_significant_but_not_linear_ones(lagrangian_values, layer_values_list, linear_threshold, nonlinear_threshold, out_dir): for key, nda in lagrangian_values.items(): for ind, lagrange_l in enumerate(nda): for layer_ind, layer in enumerate(layer_values_list): for neuron_ind, neuron_l in enumerate(layer): linear_co = np.corrcoef(lagrange_l, neuron_l)[1, 0] alpha_cl = compute_alpha(lagrange_l.shape[0]) mine = minepy.MINE(alpha=alpha_cl, c=5, est="mic_e") mine.compute_score(lagrange_l, neuron_l) mic = mine.mic() if abs(linear_co) < linear_threshold and mic > nonlinear_threshold: name = f"{out_dir}/{key}_index_{ind}_VS_layer_{layer_ind}_neuron_{neuron_ind}_" \ f"linear_correlation{linear_co}_nonlinear_correlation{mic}.jpg" plt.figure() plt.scatter(lagrange_l, neuron_l) plt.xlabel("lagrange") plt.ylabel("neuron") plt.savefig(name) plt.close()
def plot_betas(nodes1, nodes2, betas, **kwargs): def st_sign(p): if p < 0.05: return p else: return 0 acc = [st_sign(pearsonr(zscore(a), zd2)[1]) for a in betas.values] #acc = [normalized_mutual_information(np.array(a), zd1) for a in betas.values] mine = minepy.MINE() #acc = list() for a in betas.values: mine.compute_score(np.array(a), zd1) #acc.append(mine.mic()) df = dict(n1=nodes1, n2=nodes2, a=acc) df = pd.DataFrame(df) pdf = df.pivot("n1", "n2", "a") nz = np.nonzero(np.isnan(pdf.values)) pdf.values[nz] = pdf.values[nz[::-1]] sns.heatmap(pdf, annot=True, cmap="RdBu_r", fmt=".4f", center=0.)
def mic(x,y): alpha_cl = compute_alpha(x.shape[0]) mine = minepy.MINE(alpha=alpha_cl, c=5, est="mic_e") mine.compute_score(x, y) mic = mine.mic() return mic
def getMIC(self): self.X = np.asarray(self.X, dtype='float') self.Y = np.asarray(self.Y, dtype='float') mine = minepy.MINE(alpha=0.6, c=15) mine.compute_score(self.X, self.Y) return mine
def __init__(self, alpha=0.6, c=15): alpha, c = float(alpha), int(c) assert alpha > 0 and alpha <= 1 and c > 0 self.mine = minepy.MINE(alpha=alpha, c=c) super(MINEComputer, self).__init__()
def mic(x, y): m = minepy.MINE() m.compute_score(x, y) # 计算x、y之间的最大标准互信息评分 return (m.mic(), 0.5) # m.mic 返回最大信息系数
def compute_strength(X, pval, output_fn, labels=None, Y=None, t=0.05, alpha=None, c=5): mictools.utils.check_data(X, labels=labels, Y=Y) if labels is None: labels = pd.Series('None', index=X.columns) # compute MIC_e for pairs with at least one p-value < t index = pval.index[(pval < t).sum(axis=1) > 0] if alpha is None: sys.stdout.write("Automatically chosen alphas:\n") strength_handle = open(output_fn, 'w') strength_writer = csv.writer(strength_handle, delimiter='\t', lineterminator='\n') header = [ "Class", "Var1", "Var2", "TICePVal", "PearsonR", "SpearmanRho", "MICe" ] strength_writer.writerow(header) clss = sorted(labels.unique()) for cl in clss: keep = (cl == labels) X_cl = X.loc[:, keep] if Y is not None: Y_cl = Y.loc[:, keep] if alpha is None: alpha_cl = compute_alpha(X_cl.shape[1]) sys.stdout.write("* {}: {:f}\n".format(cl, alpha_cl)) else: alpha_cl = alpha mine = minepy.MINE(alpha=alpha_cl, c=c, est="mic_e") for var1, var2 in index: x = X_cl.loc[var1] y = X_cl.loc[var2] if Y is None else Y_cl.loc[var2] mine.compute_score(x, y) mic = mine.mic() p = pval.loc[(var1, var2), cl] R, _ = scipy.stats.pearsonr(x, y) rho, _ = scipy.stats.spearmanr(x, y) row = [ cl, var1, var2, "{:e}".format(p), "{:.6f}".format(R), "{:.6f}".format(rho), "{:.6f}".format(mic) ] strength_writer.writerow(row) strength_handle.close()
def coeff_agreement(obs_linear, sim_linear, obs_shps, sim_shps, poly, weight_sim, obs_dt): # kp<0, the agreement is worsen than random, kp=1 perfect agreement. ICC=1 perfect! mine = minepy.MINE() def _kp(sim, obs): bp_s = [] bp_o = [] for i, v in enumerate(sim): t_v = (v, obs[i]) kp = [(1, 1) if all(v >= 0 for v in t_v) else (0, 0) if all(v < 0 for v in t_v) else (0, 1) if (t_v[0] < 0) & (t_v[0] >= 0) else (1, 0)][0] bp_s.append(kp[0]) bp_o.append(kp[1]) return bp_o, bp_s l_sign = [(1, 1) if all(round(v, 2) > 0 for v in t_v) else (0, 0) if all(round(v, 2) <= 0 for v in t_v) else (0, 1) if (round(t_v[0], 2) <= 0) & (round(t_v[1], 2) > 0) else (1, 0) for t_v in [(obs_linear[p]['bp_params']['slope'], sim_linear[p]['bp_params']['slope']) for p in poly]] trend_agreement = { 'slope': {}, 'slope_intercept': {}, 'bp': {}, 'points_diff': {} } trend_agreement[ 'slope'] = { #'slp_kp': cohen_kappa_score([i[0] for i in l_sign], [i[1] for i in l_sign]), 'slp_sign': l_sign } trend_agreement['slope'].update({ 'sign_kendal': estad.kendalltau([i[0] for i in l_sign], [i[1] for i in l_sign])[0] }) trend_agreement['slope'].update({ 'sign_spearmamanr': estad.spearmanr([i[0] for i in l_sign], [i[1] for i in l_sign])[0] }) obs_bp_slp = [obs_linear[p]['bp_params']['slope'] for p in poly] sim_bp_slp = [sim_linear[p]['bp_params']['slope'] for p in poly] trend_agreement['slope'].update( {'bp_kendal': estad.kendalltau(obs_bp_slp, sim_bp_slp)[0]}) trend_agreement['slope'].update( {'bp_spearmanr': estad.spearmanr(obs_bp_slp, sim_bp_slp)[0]}) l_sim = [ bp for l_bp in [list(sim_linear[p]['bp_params'].values()) for p in poly] for bp in l_bp ] l_obs = [ bp for l_bp in [list(obs_linear[p]['bp_params'].values()) for p in poly] for bp in l_bp ] trend_agreement['slope_intercept'].update( {'bp_icc': ICC_rep_anova(np.asarray([l_obs, l_sim]).T)[0]}) trend_agreement['slope_intercept'].update( {'bp_kendall': estad.kendalltau(l_obs, l_sim)[0]}) trend_agreement['slope_intercept'].update( {'bp_spearmanr': estad.spearmanr(l_obs, l_sim)[0]}) mine.compute_score(l_obs, l_sim) trend_agreement['slope_intercept'].update({'bp_mic': mine.mic()}) kpx, kpy = _kp(l_sim, l_obs) trend_agreement['slope_intercept'].update( {'sign_kendall': estad.kendalltau(kpx, kpy)[0]}) trend_agreement['slope_intercept'].update( {'sign_spearmanr': estad.spearmanr(kpx, kpy)[0]}) # trend_agreement['slope_intercept'].update({'kappa': cohen_kappa_score(_kp(l_sim, l_obs))}) # 1why kp<icc ? 2.why slp_kp<0 # as kp is qualitative and quantitative # l_s = [list(sim_linear[p]['bp_params'].values()) for p in poly] # l_o = [list(obs_linear[p]['bp_params'].values()) for p in poly] # # trend_agreement['slope_intercept']['poly_icc'] = {} # for p in range(len(poly)): # trend_agreement['slope_intercept']['poly_icc'].update({p: ICC_rep_anova(np.asarray([l_o[p], l_s[p]]).T)[0]}) b_sim = [ bp for l_bp in [list(sim_shps[p]['bp_params'].values()) for p in poly] for bp in l_bp ] b_obs = [ bp for l_bp in [list(obs_shps[p]['bp_params'].values()) for p in poly] for bp in l_bp ] trend_agreement['bp'] = { 'icc': ICC_rep_anova(np.asarray([b_obs, b_sim]).T)[0] } mine.compute_score(b_obs, b_sim) trend_agreement['bp'] = {'mic': mine.mic()} for stat in ['icc', 'spearmanr', 'mic']: trend_agreement['points_diff'][stat] = np.zeros([len(poly)]) for p in range(len(poly)): wt_sim = np.delete(weight_sim[:, p], np.where(np.isnan(obs_dt[:, p]))) obs = obs_dt[:, p][np.where(~np.isnan(obs_dt[:, p]))] trend_agreement['points_diff']['icc'][p] = ICC_rep_anova( np.asarray([wt_sim, obs]).T)[0] mine.compute_score(wt_sim, obs) trend_agreement['points_diff']['mic'][p] = mine.mic() # data*2 # trend_agreement['bp'].update({'kappa': _kp(b_sim, b_obs)}) # trend_agreement['bp'].update({'kendal': estad.kendalltau(b_sim, b_obs)[0]}) # b_s = [list(sim_shps[p]['bp_params'].values()) for p in poly] # b_o = [list(obs_shps[p]['bp_params'].values()) for p in poly] # # trend_agreement['bp']['poly_icc'] = np.zeros([len(poly)]) # for p in range(len(poly)): # trend_agreement['bp']['poly_icc'][p] = ICC_rep_anova(np.asarray([b_o[p], b_s[p]]).T)[0] # trend_agreement['bp']['poly_icc_points'][p] = ICC_rep_anova(np.asarray(weight_sim[p], obs_dt[p]).T)[0] return trend_agreement
def mic(k, y_predict, y_obs): mine = minepy.MINE() mine.compute_score(y_predict, y_obs) return mine.mic()