def genMIC(x, y, t):
    mic = {}
    mic_mean = np.zeros((1, x[list(x.keys())[0]].shape[1]))
    f_names = list(x[list(x.keys())[0]].columns.values)
    mine = mp.MINE(alpha=0.6, c=15, est="mic_approx")

    for ticker in x:
        for i in range(x[ticker].shape[1]):

            feature_name = x[ticker].columns.values[i]
            if (i == 0):
                mine.compute_score(x[ticker].iloc[:, i], y[ticker].iloc[:, 0])
                mic[ticker] = mine_stats(mine)
                mic[ticker].rename(
                    columns={mic[ticker].columns[0]: feature_name},
                    inplace=True)
            else:
                mine.compute_score(x[ticker].iloc[:, i], y[ticker].iloc[:, 0])
                mic[ticker][feature_name] = mine_stats(mine)

        mic_mean += mic[ticker]
    mic_mean /= len(x)

    mic_mean = pd.DataFrame(mic_mean)
    mic_mean.columns = f_names

    plot = mic_mean.plot.bar(figsize=(22.0, 14.0))
    plot.set_xlabel('Features', fontsize=24)
    plot.set_ylabel('MIC', fontsize=24)
    plot.tick_params(labelsize=24)
    fig = plot.get_figure()
    fig.savefig('../3_Deliverables/Final Paper/data/MICT' + str(t) + '.png')

    return (mic_mean)
Beispiel #2
0
    def get_mic_co(lagrange_l, neuron_l, layer_ind, neuron_ind):
        def compute_alpha(npoints):
            NPOINTS_BINS = [
                1, 25, 50, 250, 500, 1000, 2500, 5000, 10000, 40000
            ]
            ALPHAS = [0.85, 0.80, 0.75, 0.70, 0.65, 0.6, 0.55, 0.5, 0.45, 0.4]

            if npoints < 1:
                raise ValueError("the number of points must be >=1")

            return ALPHAS[np.digitize([npoints], NPOINTS_BINS)[0] - 1]

        alpha_cl = compute_alpha(lagrange_l.shape[0])
        mine = minepy.MINE(alpha=alpha_cl, c=5, est="mic_e")
        mine.compute_score(lagrange_l, neuron_l)
        mic = mine.mic()

        range_neuron = max(neuron_l) - min(neuron_l)
        if range_neuron < 1e-5:
            mic = 0
        if np.isnan(mic):
            mic = 0

        return NonLinearGlobalDictRow.get_non_linear_global_dict_row(
            mic, layer_ind, neuron_ind)
Beispiel #3
0
def sstats(X, Y, alpha=0.6, c=15, est="mic_approx"):
    mic, tic = [], []
    mine = minepy.MINE(alpha=alpha, c=c, est=est)
    for i in range(min(X.shape[0], Y.shape[0])):
        mine.compute_score(X[i], Y[i])
        mic.append(mine.mic())
        tic.append(mine.tic(norm=True))
    mic, tic = np.asarray(mic), np.asarray(tic)

    return mic, tic
Beispiel #4
0
def max_info_coef(x, y):
    mine = mp.MINE(alpha=0.6, c=15, est="mic_approx")
    
    x_test = np.asarray(x)
    y_test = np.asarray(y)
    
    mine.compute_score(x_test, y_test)
    mic_val = mine.mic()
    
    return mic_val
Beispiel #5
0
def mic(x, y):

    # calculate the maximal information coefficient

    x = np.array(x)
    y = np.array(y)
    mine = mp.MINE(alpha=0.6, c=15, est="mic_approx")
    mine.compute_score(x, y)

    return mine.mic()
Beispiel #6
0
def compute_null_oneclass(X,
                          Y=None,
                          rowwise=False,
                          B=9,
                          c=5,
                          nperm=250000,
                          seed=0):

    mictools.utils.check_data(X, Y=Y)

    bins = np.linspace(0, 1, NULL_HIST_RES + 1)
    hist = np.zeros(NULL_HIST_RES, dtype=np.int64)
    rs = np.random.RandomState(seed)
    mine = minepy.MINE(alpha=B, c=c, est="mic_e")

    Xa = X.as_matrix()
    if Y is None:
        idx = np.arange(Xa.shape[0])
        Ya, max_idx_rowwise = None, None
    else:
        Ya = Y.as_matrix()
        if rowwise:
            max_idx_rowwise = min(Xa.shape[0], Ya.shape[0])
            idx = None

    for n in range(nperm):
        if Y is None:
            i, j = rs.choice(idx, size=2, replace=False)
            x, y = Xa[i], rs.permutation(Xa[j])
        else:
            if rowwise:
                i = j = rs.randint(max_idx_rowwise)
            else:
                i, j = rs.randint(Xa.shape[0]), rs.randint(Ya.shape[0])

            x, y = Xa[i], rs.permutation(Ya[j])

        mine.compute_score(x, y)
        tic = mine.tic(norm=True)
        hist_idx = min(np.digitize([tic], bins)[0] - 1, NULL_HIST_RES - 1)
        hist[hist_idx] += 1

    # right-tailed area
    hist_cum = np.cumsum(hist[::-1])[::-1]

    index = pd.MultiIndex.from_arrays([bins[:-1], bins[1:]],
                                      names=('BinStart', 'BinEnd'))
    hist_df = pd.DataFrame({
        "NullCount": hist,
        "NullCountCum": hist_cum
    },
                           index=index,
                           columns=["NullCount", "NullCountCum"])

    return hist_df
    def _feature_impact_(self):
        X, y = self.X, self.y
        colnames = X.columns
        m = minepy.MINE()
        corr_scores = pd.Series([None] * len(colnames), index=colnames)
        for colname in colnames:
            x = X[colname]
            m.compute_score(x, y)
            mic = np.around(m.mic(), decimals=4)
            pearson = np.around(pearsonr(x, y)[0], decimals=4)
            corr_scores[colname] = max(mic, pearson)

        corr_scores = corr_scores.sort_values(ascending=False)
        return corr_scores
def corr_func(X1, X2, corr_type=None):
    if corr_type == None:
        corr_type = cf.corr_type

    X1 = pd.Series(np.array(X1.reshape(-1, 1)).T[0])
    X2 = pd.Series(np.array(X2.reshape(-1, 1)).T[0])
    if corr_type == 'MIC':
        mine = minepy.MINE(alpha=0.6, c=15, est="mic_approx")
        mine.compute_score(X1, X2)
        corr = mine.mic()
    if corr_type == 'pearson':
        corr = X1.corr(X2)
    if corr_type == 'spearman':
        corr = X1.corr(X2, method="spearman")
    if corr_type == 'kendall':
        corr = X1.corr(X2, method="kendall")
    return abs(corr)
def maximal_information_coefficient(series1, series2):
    """
    Compute the maximal information coefficient between two series.

    MIC captures a wide range of associations both functional and not,
    and for functional relationships provides a score that roughly equals
    the coefficient of determination (R^2) of the data relative to the
    regression function

    Args:
        series1 (numpy.ndarray): First series
        series2 (numpy.ndarray): Second series

    Returns:
        Maximal information coefficient between the two series
    """
    mine = minepy.MINE()
    mine.compute_score(series1, series2)
    return mine.mic()
    def mic_of_simulation(trajectories):
        """
        returns the MIC values for one set of the SCN trajectories in question
        """

        avpvipsol = trajectories[:, 1:(160 + 1)]
        navsol = trajectories[:, (160 + 1):]

        per2 = np.hstack([avpvipsol[:, ::4], navsol[:, ::3]])
        numcells = per2.shape[1]

        # set up mic calculator
        mic = mp.MINE(alpha=0.6, c=15, est='mic_approx')
        mic_values = []
        for combo in combinations(range(numcells), 2):
            mic.compute_score(per2[:, combo[0]], per2[:, combo[1]])
            mic_values.append(mic.mic())

        return mic_values
Beispiel #11
0
def get_mic_df(df, **kwargs):

    df = df.select_dtypes(include=['int', 'float'])
    cols_product = list(itertools.product(df.columns, repeat=2))

    mic = []

    for i in range(len(cols_product)):
        mi = minepy.MINE(**kwargs)
        mi.compute_score(df[cols_product[i][0]], df[cols_product[i][1]])

        mic_i = [
            cols_product[i][0], cols_product[i][1],
            mi.mic(),
            mi.mas(),
            mi.mev(),
            mi.mcn(0),
            mi.tic()
        ]

        mic.append(mic_i)

    df_mic = pd.DataFrame(mic)
    df_mic.columns = ['col', 'col2', 'MIC', 'MAS', 'MEV', 'MCN', 'TIC']

    df_corr = df.corr()
    df_corr = pd.DataFrame(df_corr.stack()).reset_index()
    df_corr = df_corr.rename(columns={
        'level_0': 'col',
        'level_1': 'col2',
        0: 'corr'
    })

    df_corr['R2'] = df_corr['corr'].apply(lambda x: x**2)
    df_r2 = df_corr.filter(items=['col', 'col2', 'R2'])

    res = pd.DataFrame.merge(df_mic, df_r2, on=['col', 'col2'])
    res['MICR2'] = res['MIC'] - res['R2']
    res = res.filter(
        items=['col', 'col2', 'MIC', 'MAS', 'MEV', 'MCN', 'TIC', 'MICR2'])

    return res
Beispiel #12
0
def scatter_the_nonlinear_significant_but_not_linear_ones(lagrangian_values,
                                                          layer_values_list, linear_threshold, nonlinear_threshold, out_dir):
    for key, nda in lagrangian_values.items():
        for ind, lagrange_l in enumerate(nda):
            for layer_ind, layer in enumerate(layer_values_list):
                for neuron_ind, neuron_l in enumerate(layer):
                    linear_co = np.corrcoef(lagrange_l, neuron_l)[1, 0]
                    alpha_cl = compute_alpha(lagrange_l.shape[0])
                    mine = minepy.MINE(alpha=alpha_cl, c=5, est="mic_e")
                    mine.compute_score(lagrange_l, neuron_l)
                    mic = mine.mic()

                    if abs(linear_co) < linear_threshold and mic > nonlinear_threshold:
                        name = f"{out_dir}/{key}_index_{ind}_VS_layer_{layer_ind}_neuron_{neuron_ind}_" \
                               f"linear_correlation{linear_co}_nonlinear_correlation{mic}.jpg"
                        plt.figure()
                        plt.scatter(lagrange_l, neuron_l)
                        plt.xlabel("lagrange")
                        plt.ylabel("neuron")

                        plt.savefig(name)
                        plt.close()
def plot_betas(nodes1, nodes2, betas, **kwargs):
    def st_sign(p):
        if p < 0.05:
            return p
        else:
            return 0

    acc = [st_sign(pearsonr(zscore(a), zd2)[1]) for a in betas.values]
    #acc = [normalized_mutual_information(np.array(a), zd1) for a in betas.values]

    mine = minepy.MINE()
    #acc = list()
    for a in betas.values:
        mine.compute_score(np.array(a), zd1)
        #acc.append(mine.mic())

    df = dict(n1=nodes1, n2=nodes2, a=acc)
    df = pd.DataFrame(df)
    pdf = df.pivot("n1", "n2", "a")

    nz = np.nonzero(np.isnan(pdf.values))
    pdf.values[nz] = pdf.values[nz[::-1]]

    sns.heatmap(pdf, annot=True, cmap="RdBu_r", fmt=".4f", center=0.)
def mic(x,y):
    alpha_cl = compute_alpha(x.shape[0])
    mine = minepy.MINE(alpha=alpha_cl, c=5, est="mic_e")
    mine.compute_score(x, y)
    mic = mine.mic()
    return mic
Beispiel #15
0
 def getMIC(self):
     self.X = np.asarray(self.X, dtype='float')
     self.Y = np.asarray(self.Y, dtype='float')
     mine = minepy.MINE(alpha=0.6, c=15)
     mine.compute_score(self.X, self.Y)
     return mine
Beispiel #16
0
 def __init__(self, alpha=0.6, c=15):
     alpha, c = float(alpha), int(c)
     assert alpha > 0 and alpha <= 1 and c > 0
     self.mine = minepy.MINE(alpha=alpha, c=c)
     super(MINEComputer, self).__init__()
Beispiel #17
0
def mic(x, y):
    m = minepy.MINE()
    m.compute_score(x, y)  # 计算x、y之间的最大标准互信息评分
    return (m.mic(), 0.5)  # m.mic 返回最大信息系数
Beispiel #18
0
def compute_strength(X,
                     pval,
                     output_fn,
                     labels=None,
                     Y=None,
                     t=0.05,
                     alpha=None,
                     c=5):

    mictools.utils.check_data(X, labels=labels, Y=Y)

    if labels is None:
        labels = pd.Series('None', index=X.columns)

    # compute MIC_e for pairs with at least one p-value < t
    index = pval.index[(pval < t).sum(axis=1) > 0]

    if alpha is None:
        sys.stdout.write("Automatically chosen alphas:\n")

    strength_handle = open(output_fn, 'w')
    strength_writer = csv.writer(strength_handle,
                                 delimiter='\t',
                                 lineterminator='\n')
    header = [
        "Class", "Var1", "Var2", "TICePVal", "PearsonR", "SpearmanRho", "MICe"
    ]

    strength_writer.writerow(header)

    clss = sorted(labels.unique())
    for cl in clss:
        keep = (cl == labels)
        X_cl = X.loc[:, keep]

        if Y is not None:
            Y_cl = Y.loc[:, keep]

        if alpha is None:
            alpha_cl = compute_alpha(X_cl.shape[1])
            sys.stdout.write("* {}: {:f}\n".format(cl, alpha_cl))
        else:
            alpha_cl = alpha

        mine = minepy.MINE(alpha=alpha_cl, c=c, est="mic_e")

        for var1, var2 in index:
            x = X_cl.loc[var1]
            y = X_cl.loc[var2] if Y is None else Y_cl.loc[var2]
            mine.compute_score(x, y)
            mic = mine.mic()
            p = pval.loc[(var1, var2), cl]
            R, _ = scipy.stats.pearsonr(x, y)
            rho, _ = scipy.stats.spearmanr(x, y)

            row = [
                cl, var1, var2, "{:e}".format(p), "{:.6f}".format(R),
                "{:.6f}".format(rho), "{:.6f}".format(mic)
            ]

            strength_writer.writerow(row)

    strength_handle.close()
Beispiel #19
0
def coeff_agreement(obs_linear, sim_linear, obs_shps, sim_shps, poly,
                    weight_sim, obs_dt):
    # kp<0, the agreement is worsen than random, kp=1 perfect agreement. ICC=1 perfect!
    mine = minepy.MINE()

    def _kp(sim, obs):
        bp_s = []
        bp_o = []
        for i, v in enumerate(sim):
            t_v = (v, obs[i])
            kp = [(1, 1) if all(v >= 0 for v in t_v) else
                  (0, 0) if all(v < 0 for v in t_v) else (0, 1) if
                  (t_v[0] < 0) & (t_v[0] >= 0) else (1, 0)][0]

            bp_s.append(kp[0])
            bp_o.append(kp[1])
        return bp_o, bp_s

    l_sign = [(1, 1) if all(round(v, 2) > 0 for v in t_v) else
              (0, 0) if all(round(v, 2) <= 0 for v in t_v) else (0, 1) if
              (round(t_v[0], 2) <= 0) & (round(t_v[1], 2) > 0) else (1, 0)
              for t_v in [(obs_linear[p]['bp_params']['slope'],
                           sim_linear[p]['bp_params']['slope']) for p in poly]]

    trend_agreement = {
        'slope': {},
        'slope_intercept': {},
        'bp': {},
        'points_diff': {}
    }
    trend_agreement[
        'slope'] = {  #'slp_kp': cohen_kappa_score([i[0] for i in l_sign], [i[1] for i in l_sign]),
            'slp_sign': l_sign
        }

    trend_agreement['slope'].update({
        'sign_kendal':
        estad.kendalltau([i[0] for i in l_sign], [i[1] for i in l_sign])[0]
    })
    trend_agreement['slope'].update({
        'sign_spearmamanr':
        estad.spearmanr([i[0] for i in l_sign], [i[1] for i in l_sign])[0]
    })

    obs_bp_slp = [obs_linear[p]['bp_params']['slope'] for p in poly]
    sim_bp_slp = [sim_linear[p]['bp_params']['slope'] for p in poly]

    trend_agreement['slope'].update(
        {'bp_kendal': estad.kendalltau(obs_bp_slp, sim_bp_slp)[0]})

    trend_agreement['slope'].update(
        {'bp_spearmanr': estad.spearmanr(obs_bp_slp, sim_bp_slp)[0]})

    l_sim = [
        bp
        for l_bp in [list(sim_linear[p]['bp_params'].values()) for p in poly]
        for bp in l_bp
    ]
    l_obs = [
        bp
        for l_bp in [list(obs_linear[p]['bp_params'].values()) for p in poly]
        for bp in l_bp
    ]

    trend_agreement['slope_intercept'].update(
        {'bp_icc': ICC_rep_anova(np.asarray([l_obs, l_sim]).T)[0]})
    trend_agreement['slope_intercept'].update(
        {'bp_kendall': estad.kendalltau(l_obs, l_sim)[0]})
    trend_agreement['slope_intercept'].update(
        {'bp_spearmanr': estad.spearmanr(l_obs, l_sim)[0]})
    mine.compute_score(l_obs, l_sim)
    trend_agreement['slope_intercept'].update({'bp_mic': mine.mic()})

    kpx, kpy = _kp(l_sim, l_obs)
    trend_agreement['slope_intercept'].update(
        {'sign_kendall': estad.kendalltau(kpx, kpy)[0]})
    trend_agreement['slope_intercept'].update(
        {'sign_spearmanr': estad.spearmanr(kpx, kpy)[0]})

    # trend_agreement['slope_intercept'].update({'kappa': cohen_kappa_score(_kp(l_sim, l_obs))})
    # 1why kp<icc ? 2.why slp_kp<0 # as kp is qualitative and quantitative

    # l_s = [list(sim_linear[p]['bp_params'].values()) for p in poly]
    # l_o = [list(obs_linear[p]['bp_params'].values()) for p in poly]
    #
    # trend_agreement['slope_intercept']['poly_icc'] = {}
    # for p in range(len(poly)):
    #     trend_agreement['slope_intercept']['poly_icc'].update({p: ICC_rep_anova(np.asarray([l_o[p], l_s[p]]).T)[0]})
    b_sim = [
        bp for l_bp in [list(sim_shps[p]['bp_params'].values()) for p in poly]
        for bp in l_bp
    ]
    b_obs = [
        bp for l_bp in [list(obs_shps[p]['bp_params'].values()) for p in poly]
        for bp in l_bp
    ]

    trend_agreement['bp'] = {
        'icc': ICC_rep_anova(np.asarray([b_obs, b_sim]).T)[0]
    }
    mine.compute_score(b_obs, b_sim)
    trend_agreement['bp'] = {'mic': mine.mic()}

    for stat in ['icc', 'spearmanr', 'mic']:
        trend_agreement['points_diff'][stat] = np.zeros([len(poly)])

    for p in range(len(poly)):
        wt_sim = np.delete(weight_sim[:, p], np.where(np.isnan(obs_dt[:, p])))
        obs = obs_dt[:, p][np.where(~np.isnan(obs_dt[:, p]))]
        trend_agreement['points_diff']['icc'][p] = ICC_rep_anova(
            np.asarray([wt_sim, obs]).T)[0]
        mine.compute_score(wt_sim, obs)
        trend_agreement['points_diff']['mic'][p] = mine.mic()  # data*2
    # trend_agreement['bp'].update({'kappa': _kp(b_sim, b_obs)})
    # trend_agreement['bp'].update({'kendal': estad.kendalltau(b_sim, b_obs)[0]})
    # b_s = [list(sim_shps[p]['bp_params'].values()) for p in poly]
    # b_o = [list(obs_shps[p]['bp_params'].values()) for p in poly]
    #
    # trend_agreement['bp']['poly_icc'] = np.zeros([len(poly)])
    # for p in range(len(poly)):
    #     trend_agreement['bp']['poly_icc'][p] = ICC_rep_anova(np.asarray([b_o[p], b_s[p]]).T)[0]
    #     trend_agreement['bp']['poly_icc_points'][p] = ICC_rep_anova(np.asarray(weight_sim[p], obs_dt[p]).T)[0]

    return trend_agreement
Beispiel #20
0
def mic(k, y_predict, y_obs):
    mine = minepy.MINE()
    mine.compute_score(y_predict, y_obs)
    return mine.mic()