Esempi in Python per sign_test, esempi in Python per statsmodels.stats.descriptivestats.sign_test

Esempio n. 1

0

Mostra file

def test_nonparam(data, m, alpha, alternative='two-sided', verbose=False):
    if verbose:
        print('Non-parametric mean test:')

    data = np.array(data)

    # Sign test
    M, p_value = sign_test(data, m)
    res = p_value>alpha
    if verbose:
        print('sign-test:\t{2} --> M={0}, p_value={1}'.format(M, p_value, str(res)))

    # Wilcoxon signe-rank test
    t, p_value = stats.wilcoxon(data - m)
    res = p_value>alpha
    if verbose:
        print('sign-rank-test:\t{2} --> T={0}, p_value={1}'.format(t, p_value, str(res)))

    # Permutation test
    t, p_value = permutation_test(data, m)
    res = p_value>alpha
    if verbose:
        print('perm-test:\t{2} --> T={0}, p_value={1}'.format(t, p_value, str(res)))

    return

Esempio n. 2

0

Mostra file

File: myml.py Progetto: drimalla/ml-autism

def evaluate(y_true, predictions, asq, asq_index):
    i = 0
    stat_key = []
    stat_M = []
    stat_Mp = []
    stat_R = []
    stat_Rp = []

    for key in predictions:
        pred = predictions[key] > 0.5
        correcte_pred = (y_true == pred
                         )  # Prüft, ob korrekte Vorhersage durch Classifier
        correcte_mayority_pred = (np.ones(len(y_true)) == y_true
                                  )  #Baseline: Mehrheitsklasse zum vergleichen

        stat_key.append(key)
        M, p = sign_test((correcte_mayority_pred - correcte_pred),
                         mu0=0)  #returns M = (N(+) - N(-))/2 and p
        stat_results_M.append(M)
        stat_results_Mp.append(p)

        plt.figure()
        sns.regplot(predictions[key][asq_index], asq[asq_index])
        plt.savefig('Correlation' + key + '.png')
        plt.show()
        plt.close()

        R, p = stats.pearsonr(predictions[key][asq_index], asq[asq_index])
        stat_results_R.append(R)
        stat_results_Rp.append(p)

    results = pd.DataFrame([stat_key, stat_M, stat_Mp, stat_R, stat_Rp])
    results.to_csv('RF_Stats.csv')

Esempio n. 3

0

Mostra file

File: test_descriptivestats.py Progetto: quuhua911/statsmodels

def test_sign_test():
    x = [7.8, 6.6, 6.5, 7.4, 7.3, 7.0, 6.4, 7.1, 6.7, 7.6, 6.8]
    M, p = sign_test(x, mu0=6.5)
    # from R SIGN.test(x, md=6.5)
    # from R
    assert_almost_equal(p, 0.02148, 5)
    # not from R, we use a different convention
    assert_equal(M, 4)

Esempio n. 4

0

Mostra file

File: test_descriptivestats.py Progetto: B-Rich/statsmodels

def test_sign_test():
    x = [7.8, 6.6, 6.5, 7.4, 7.3, 7.0, 6.4, 7.1, 6.7, 7.6, 6.8]
    M, p = sign_test(x, mu0=6.5)
    # from R SIGN.test(x, md=6.5)
    # from R
    assert_almost_equal(p, 0.02148, 5)
    # not from R, we use a different convention
    assert_equal(M, 4)

Esempio n. 5

0

Mostra file

def sign_test_1sample(sample, mean):
    """Одновыборочный критерий знаков

    Проверяет гипотезу о равенстве среднего конкретному значению mean

    Parameters
    ----------
    sample : array_like
        Массив наблюдений
    mean : float
        Проверяемое значение

    Returns
    -------
    statistic : float or array
        Статистика
    pvalue : float or array
        two-tailed p-value
    """
    res = sign_test(sample, mean)
    return SigntestResult(*res)

Esempio n. 6

0

Mostra file

File: descstats.py Progetto: yarikoptic/pystatsmodels

def descstats(data, cols=None, axis=0):
    '''
    Prints descriptive statistics for one or multiple variables.

    Parameters
    ------------
    data: numpy array
        `x` is the data

    v: list, optional
        A list of the column number or field names (for a recarray) of variables.
        Default is all columns.

    axis: 1 or 0
        axis order of data.  Default is 0 for column-ordered data.

    Examples
    --------
    >>> descstats(data.exog,v=['x_1','x_2','x_3'])
    '''

    x = np.array(data)  # or rather, the data we're interested in
    if cols is None:
        #       if isinstance(x, np.recarray):
        #            cols = np.array(len(x.dtype.names))
        if not isinstance(x, np.recarray) and x.ndim == 1:
            x = x[:, None]

    if x.shape[1] == 1:
        desc = '''
    ---------------------------------------------
    Univariate Descriptive Statistics
    ---------------------------------------------

    Var. Name   %(name)12s
    ----------
    Obs.          %(nobs)22i  Range                  %(range)22s
    Sum of Wts.   %(sum)22s  Coeff. of Variation     %(coeffvar)22.4g
    Mode          %(mode)22.4g  Skewness                %(skewness)22.4g
    Repeats       %(nmode)22i  Kurtosis                %(kurtosis)22.4g
    Mean          %(mean)22.4g  Uncorrected SS          %(uss)22.4g
    Median        %(median)22.4g  Corrected SS            %(ss)22.4g
    Variance      %(variance)22.4g  Sum Observations        %(sobs)22.4g
    Std. Dev.     %(stddev)22.4g
    ''' % {'name': cols, 'sum': 'N/A', 'nobs': len(x), 'mode': \
    stats.mode(x)[0][0], 'nmode': stats.mode(x)[1][0], \
    'mean': x.mean(), 'median': np.median(x), 'range': \
    '('+str(x.min())+', '+str(x.max())+')', 'variance': \
    x.var(), 'stddev': x.std(), 'coeffvar': \
    stats.variation(x), 'skewness': stats.skew(x), \
    'kurtosis': stats.kurtosis(x), 'uss': stats.ss(x),\
    'ss': stats.ss(x-x.mean()), 'sobs': np.sum(x)}

        #    ''' % {'name': cols[0], 'sum': 'N/A', 'nobs': len(x[cols[0]]), 'mode': \
        #    stats.mode(x[cols[0]])[0][0], 'nmode': stats.mode(x[cols[0]])[1][0], \
        #    'mean': x[cols[0]].mean(), 'median': np.median(x[cols[0]]), 'range': \
        #    '('+str(x[cols[0]].min())+', '+str(x[cols[0]].max())+')', 'variance': \
        #    x[cols[0]].var(), 'stddev': x[cols[0]].std(), 'coeffvar': \
        #    stats.variation(x[cols[0]]), 'skewness': stats.skew(x[cols[0]]), \
        #    'kurtosis': stats.kurtosis(x[cols[0]]), 'uss': stats.ss(x[cols[0]]),\
        #    'ss': stats.ss(x[cols[0]]-x[cols[0]].mean()), 'sobs': np.sum(x[cols[0]])}

        desc += '''

    Percentiles
    -------------
    1  %%          %12.4g
    5  %%          %12.4g
    10 %%          %12.4g
    25 %%          %12.4g

    50 %%          %12.4g

    75 %%          %12.4g
    90 %%          %12.4g
    95 %%          %12.4g
    99 %%          %12.4g
    ''' % tuple([
            stats.scoreatpercentile(x, per)
            for per in (1, 5, 10, 25, 50, 75, 90, 95, 99)
        ])
        t, p_t = stats.ttest_1samp(x, 0)
        M, p_M = sign_test(x)
        S, p_S = stats.wilcoxon(np.squeeze(x))

        desc += '''

    Tests of Location (H0: Mu0=0)
    -----------------------------
    Test                Statistic       Two-tailed probability
    -----------------+-----------------------------------------
    Student's t      |  t %7.5f   Pr > |t|   <%.4f
    Sign             |  M %8.2f   Pr >= |M|  <%.4f
    Signed Rank      |  S %8.2f   Pr >= |S|  <%.4f

    ''' % (t, p_t, M, p_M, S, p_S)
# Should this be part of a 'descstats'
# in any event these should be split up, so that they can be called
# individually and only returned together if someone calls summary
# or something of the sort

    elif x.shape[1] > 1:
        desc ='''
    Var. Name   |     Obs.        Mean    Std. Dev.           Range
    ------------+--------------------------------------------------------'''+\
            os.linesep

        # for recarrays with columns passed as names
        #        if isinstance(cols[0],str):
        #            for var in cols:
        #                desc += "%(name)15s %(obs)9i %(mean)12.4g %(stddev)12.4g \
        #%(range)20s" %  {'name': var, 'obs': len(x[var]), 'mean': x[var].mean(),
        #        'stddev': x[var].std(), 'range': '('+str(x[var].min())+', '\
        #                +str(x[var].max())+')'+os.linesep}
        #        else:
        for var in range(x.shape[1]):
            desc += "%(name)15s %(obs)9i %(mean)12.4g %(stddev)12.4g \
%(range)20s"         % {'name': var, 'obs': len(x[:,var]), 'mean': x[:,var].mean(),
            'stddev': x[:,var].std(), 'range': '('+str(x[:,var].min())+', '+\
            str(x[:,var].max())+')'+os.linesep}
    else:
        raise ValueError, "data not understood"

    return desc

Esempio n. 7

0

Mostra file

print("p-value is: " + str(Chi_pValue))

##########################End of chi-square test################################################

#1 Sample Sign Test
import statsmodels.stats.descriptivestats as sd
#importing the data set of signtest.csv
data = pd.read_csv(
    "C:/Users/suri/Desktop/practice programs/Hypothesis testing/Signtest.csv")
#normality test
data_socres = stats.shapiro(data.Scores)
data_pValue = data_socres[1]
print("p-value is: " + str(data_pValue))

#1 Sample Sign Test
sd.sign_test(data.Scores, mu0=0)
############################End of 1 Sample Sign test###########################################

#2-Proportion Test
two_prop_test = pd.read_csv(
    "C:/Users/suri/Desktop/practice programs/Hypothesis testing/JohnyTalkers.csv"
)
#importing packages to do 2 proportion test
from statsmodels.stats.proportion import proportions_ztest
#we do the cross table and see How many adults or children are purchasing
tab = two_prop_test.groupby(['Person', 'Icecream']).size()
count = np.array([58, 152])  #How many adults and childeren are purchasing
nobs = np.array([480, 740])  #Total number of adults and childern are there

stat, pval = proportions_ztest(count, nobs, alternative='two-sided')
#Alternative The alternative hypothesis can be either two-sided or one of the one- sided tests

Esempio n. 8

0

Mostra file

File: statistics.py Progetto: busetopcu/Python

"""T Testi UYGULANABİLİR"""

#HİPOTEZ TESTİNİN UYGULANMASI

st.ttest_1samp(olcumler, popmean=170)

#Çıktı= Ttest_1sampResult(statistic=-2.2287204362493114, pvalue=0.030357490117026445)

#P value değeri kabul edilebilir hata miktarı olarak kabul edilen alfa -0.05- değerinden küçük olduğu için H0 hipotezi reddedilir.
#Ortalama süre 170'den küçüktür
"""Nonparametrik Örneklem Testi"""

#Varsayım sağlanmadığı zaman kullanılır - sign testi

from statsmodels.stats.descriptivestats import sign_test
sign_test(olcumler, 170)

#çıktıdaki ikinci değer 0.05 den küçük olması koşulun sağlanmış olmasını gösterir.
"""
Korelasyon Testi
İki değişken içinde normallik varsayımı
Varsayım sağlanıyorsa Korelasyon Katsayısı
Varsayım sağlanmıyorsa  Spearman Korelasyon Katsayısı

-> Bahşiş İle Ödenen Hesap Arasındaki İlişkinin İncelenmesi

"""

import seaborn as sb
tips = sb.load_dataset("tips")
df = tips.copy()

Esempio n. 9

0

Mostra file

File: descstats.py Progetto: alfonsodiecko/PYTHON_DIST

def descstats(data, cols=None, axis=0):
    '''
    Prints descriptive statistics for one or multiple variables.

    Parameters
    ------------
    data: numpy array
        `x` is the data

    v: list, optional
        A list of the column number or field names (for a recarray) of variables.
        Default is all columns.

    axis: 1 or 0
        axis order of data.  Default is 0 for column-ordered data.

    Examples
    --------
    >>> descstats(data.exog,v=['x_1','x_2','x_3'])
    '''

    x = np.array(data)  # or rather, the data we're interested in
    if cols is None:
#       if isinstance(x, np.recarray):
#            cols = np.array(len(x.dtype.names))
        if not isinstance(x, np.recarray) and x.ndim == 1:
            x = x[:,None]

    if x.shape[1] == 1:
        desc = '''
    ---------------------------------------------
    Univariate Descriptive Statistics
    ---------------------------------------------

    Var. Name   %(name)12s
    ----------
    Obs.          %(nobs)22i  Range                  %(range)22s
    Sum of Wts.   %(sum)22s  Coeff. of Variation     %(coeffvar)22.4g
    Mode          %(mode)22.4g  Skewness                %(skewness)22.4g
    Repeats       %(nmode)22i  Kurtosis                %(kurtosis)22.4g
    Mean          %(mean)22.4g  Uncorrected SS          %(uss)22.4g
    Median        %(median)22.4g  Corrected SS            %(ss)22.4g
    Variance      %(variance)22.4g  Sum Observations        %(sobs)22.4g
    Std. Dev.     %(stddev)22.4g
    ''' % {'name': cols, 'sum': 'N/A', 'nobs': len(x), 'mode': \
    stats.mode(x)[0][0], 'nmode': stats.mode(x)[1][0], \
    'mean': x.mean(), 'median': np.median(x), 'range': \
    '('+str(x.min())+', '+str(x.max())+')', 'variance': \
    x.var(), 'stddev': x.std(), 'coeffvar': \
    stats.variation(x), 'skewness': stats.skew(x), \
    'kurtosis': stats.kurtosis(x), 'uss': stats.ss(x),\
    'ss': stats.ss(x-x.mean()), 'sobs': np.sum(x)}

#    ''' % {'name': cols[0], 'sum': 'N/A', 'nobs': len(x[cols[0]]), 'mode': \
#    stats.mode(x[cols[0]])[0][0], 'nmode': stats.mode(x[cols[0]])[1][0], \
#    'mean': x[cols[0]].mean(), 'median': np.median(x[cols[0]]), 'range': \
#    '('+str(x[cols[0]].min())+', '+str(x[cols[0]].max())+')', 'variance': \
#    x[cols[0]].var(), 'stddev': x[cols[0]].std(), 'coeffvar': \
#    stats.variation(x[cols[0]]), 'skewness': stats.skew(x[cols[0]]), \
#    'kurtosis': stats.kurtosis(x[cols[0]]), 'uss': stats.ss(x[cols[0]]),\
#    'ss': stats.ss(x[cols[0]]-x[cols[0]].mean()), 'sobs': np.sum(x[cols[0]])}

        desc+= '''

    Percentiles
    -------------
    1  %%          %12.4g
    5  %%          %12.4g
    10 %%          %12.4g
    25 %%          %12.4g

    50 %%          %12.4g

    75 %%          %12.4g
    90 %%          %12.4g
    95 %%          %12.4g
    99 %%          %12.4g
    ''' % tuple([stats.scoreatpercentile(x,per) for per in (1,5,10,25,
                50,75,90,95,99)])
        t,p_t=stats.ttest_1samp(x,0)
        M,p_M=sign_test(x)
        S,p_S=stats.wilcoxon(np.squeeze(x))

        desc+= '''

    Tests of Location (H0: Mu0=0)
    -----------------------------
    Test                Statistic       Two-tailed probability
    -----------------+-----------------------------------------
    Student's t      |  t %7.5f   Pr > |t|   <%.4f
    Sign             |  M %8.2f   Pr >= |M|  <%.4f
    Signed Rank      |  S %8.2f   Pr >= |S|  <%.4f

    ''' % (t,p_t,M,p_M,S,p_S)
# Should this be part of a 'descstats'
# in any event these should be split up, so that they can be called
# individually and only returned together if someone calls summary
# or something of the sort

    elif x.shape[1] > 1:
        desc ='''
    Var. Name   |     Obs.        Mean    Std. Dev.           Range
    ------------+--------------------------------------------------------'''+\
            os.linesep

# for recarrays with columns passed as names
#        if isinstance(cols[0],str):
#            for var in cols:
#                desc += "%(name)15s %(obs)9i %(mean)12.4g %(stddev)12.4g \
#%(range)20s" %  {'name': var, 'obs': len(x[var]), 'mean': x[var].mean(),
#        'stddev': x[var].std(), 'range': '('+str(x[var].min())+', '\
#                +str(x[var].max())+')'+os.linesep}
#        else:
        for var in range(x.shape[1]):
                desc += "%(name)15s %(obs)9i %(mean)12.4g %(stddev)12.4g \
%(range)20s" % {'name': var, 'obs': len(x[:,var]), 'mean': x[:,var].mean(),
                'stddev': x[:,var].std(), 'range': '('+str(x[:,var].min())+', '+\
                str(x[:,var].max())+')'+os.linesep}
    else:
        raise ValueError("data not understood")

    return desc

Esempio n. 10

0

Mostra file

def evaluate_preference(
    starting_state  # starting state of roll-outs
    ,
    action_1  # first action to execute at the starting-state
    ,
    action_2  # second action to execute at the starting state
    ,
    policy_in  # policy to folow
    ,
    environment_name='ChemoSimulation-v0'  # name of the environment
    #, discount_fac = 1        # discounting factor
    ,
    n_rollouts=10  # number of roll-outs to generate per action
    ,
    max_rollout_len=6  # maximum length of a roll-out
    ,
    label_ranker=False  # whether to use the label-ranking model or not
    ,
    modified_algo=False  # Whether evaluations run for modified algorithm or not
    ,
    p_sig=0.1  # p-value to use for t-test (to compare returns of roll-outs)
    ,
    tracking=False,
    use_toxi_n_tsize=False  # generate preferences based on sum(max toxicity, tumor size) of rollouts
):
    """
    Description:
    
        - Roll-outs are generated at each state in the initial state set by starting from the given input action 
            and following the given policy afterwards. 
        - Returns of the roll-outs are used to generate preferences for the input action pair.
        - Generated preferences are returned to be create a training dataset to learn the LabelRanker model.    
    """

    # initializing variables
    policy = policy_in
    n_rollouts = n_rollouts
    #gamma = discount_fac
    s_init = starting_state
    max_traj_len = max_rollout_len

    # we store the num. actions executed within the evaluation process (to measure complexity)
    action_count = 0

    # Dictionary to store input action values
    actions = {'one': action_1, 'two': action_2}

    # Dictionary to store tumor sizes at the end of roll-outs
    t_mass = {'one': [None] * n_rollouts, 'two': [None] * n_rollouts}

    # Dictionary to store max toxicity values of roll-outs
    toxicity = {'one': [None] * n_rollouts, 'two': [None] * n_rollouts}

    # Dictionary to store sum(max toxicity, tumor-size) values of roll-outs
    toxicity_n_tmass = {'one': [None] * n_rollouts, 'two': [None] * n_rollouts}

    # Dictionary to store the probability of death at the end of roll-outs
    prob_death = {'one': [None] * n_rollouts, 'two': [None] * n_rollouts}

    # Dictionary to rollout-preferences for each action
    accu_preferences = {'one': 0, 'two': 0}

    # List to store sign-test data of rollouts
    sign_test_vals = [None] * n_rollouts

    # Generate the defined number of roll-outs for selected action
    for rollout in range(n_rollouts):

        # Select each starting action of the input actions to generate a roll-out
        for action_key, action_value in actions.items():

            # Create an environment object and set the starting state to the input (initial) state
            env = gym.make(environment_name)
            env.reset(init_state=s_init)

            # Apply the action
            observation, reward, done, _ = env.step(np.array([action_value]))

            # Define the history variable to store the last observed state
            hist = observation

            # Variables to store the last tumor-size, max-toxicity, and the probab. of death of a rollout
            max_toxicity = -1_000
            last_t_mass = None
            last_p_death = None

            # Follow the given policy to generate a roll-out trajectory
            traj_len = 1
            while traj_len <= max_traj_len and not done:

                # Sample next state using the label-ranking model (if TRUE)
                if label_ranker:
                    observation, reward, done, p_death = env.step(
                        policy.label_ranking_policy(hist))

                    # Replace current history with the observed state
                    hist = observation
                    action_count += 1  # Increase the action count by 1

                else:
                    # Sample next state using a random policy
                    observation, reward, done, p_death = env.step(policy(env))
                    action_count += 1  # Increase the action count by 1

                # Increment the trajectory length count by 1
                traj_len += 1

                # Update the placeholder variables with latest values
                last_t_mass = observation[0]
                max_toxicity = max(max_toxicity, observation[1])
                last_p_death = p_death

            # Store the last observed rollout information
            t_mass[action_key][rollout] = last_t_mass
            toxicity[action_key][rollout] = max_toxicity
            prob_death[action_key][rollout] = last_p_death
            toxicity_n_tmass[action_key][rollout] = last_t_mass + max_toxicity

            # close the environment after creating all roll-outs for a specific starting action
            env.close()
            del env

        # Generate preferences based on the comparison between MAX TOXICITY + TUMOR SIZE
        if use_toxi_n_tsize:

            # Generate preference relation information for the rollout
            if (np.round(prob_death['one'][rollout], 2) >= 1.0) and (np.round(
                    prob_death['two'][rollout], 2) >= 1.0):
                continue

            elif (np.round(prob_death['one'][rollout], 2) < 1.0) and (np.round(
                    prob_death['two'][rollout], 2) >= 1.0):
                # Patient survives for the trajectory starting from action 'one'
                accu_preferences['one'] += 1
                sign_test_vals[rollout] = +1

            elif (np.round(prob_death['one'][rollout], 2) >=
                  1.0) and (np.round(prob_death['two'][rollout], 2) < 1.0):
                # Patient survives for the trajectory starting from action 'two'
                accu_preferences['two'] += 1
                sign_test_vals[rollout] = -1

            elif (np.round(prob_death['one'][rollout], 2) < 1.0) and (np.round(
                    prob_death['one'][rollout], 2) < 1.0):
                # Patient survives for both trajectories starting from both actions

                if (np.round(toxicity_n_tmass['one'][rollout], 4) <= np.round(
                        toxicity_n_tmass['two'][rollout], 4)):
                    # Sum of toxicity and tumor size of the patient starting from action 'one' is smaller
                    accu_preferences['one'] += 1
                    sign_test_vals[rollout] = +1

                if (np.round(toxicity_n_tmass['one'][rollout], 4) >= np.round(
                        toxicity_n_tmass['two'][rollout], 4)):
                    # Sum of toxicity and tumor size of the patient starting from action 'two' is smaller
                    accu_preferences['two'] += 1

                    # In case both max. toxicity and tumor size are equal for both action trajectories
                    if sign_test_vals[rollout] == +1:
                        sign_test_vals[rollout] = 0
                    else:
                        sign_test_vals[rollout] = -1

            else:
                # No preference is generated for the rollout
                accu_preferences['one'] += 0
                accu_preferences['two'] += 0
                sign_test_vals[rollout] = 0

        # Generate preferences based on the comparison between MAX TOXICITY and TUMOR SIZE separately
        else:

            # Generate preference relation information for the rollout
            if (np.round(prob_death['one'][rollout], 2) >= 1.0) and (np.round(
                    prob_death['two'][rollout], 2) >= 1.0):
                continue

            elif (np.round(prob_death['one'][rollout], 2) < 1.0) and (np.round(
                    prob_death['two'][rollout], 2) >= 1.0):
                # Patient survives for the trajectory starting from action 'one'
                accu_preferences['one'] += 1
                sign_test_vals[rollout] = +1

            elif (np.round(prob_death['one'][rollout], 2) >=
                  1.0) and (np.round(prob_death['two'][rollout], 2) < 1.0):
                # Patient survives for the trajectory starting from action 'two'
                accu_preferences['two'] += 1
                sign_test_vals[rollout] = -1

            elif (np.round(prob_death['one'][rollout], 2) < 1.0) and (np.round(
                    prob_death['one'][rollout], 2) < 1.0):
                # Patient survives for both trajectories starting from both actions

                if (np.round(toxicity['one'][rollout],4) <= np.round(toxicity['two'][rollout],4)) and \
                    (np.round(t_mass['one'][rollout],4)  <= np.round(t_mass['two'][rollout],4)):
                    # Max toxicity and tumor size of the patient starting from action 'one' is smaller
                    accu_preferences['one'] += 1
                    sign_test_vals[rollout] = +1

                if (np.round(toxicity['one'][rollout],4) >= np.round(toxicity['two'][rollout],4)) and \
                    (np.round(t_mass['one'][rollout],4) >= np.round(t_mass['two'][rollout],4)):
                    # Max toxicity and tumor size of the patient starting from action 'two' is smaller
                    accu_preferences['two'] += 1

                    # In case both max. toxicity and tumor size are equal for both action trajectories
                    if sign_test_vals[rollout] == +1:
                        sign_test_vals[rollout] = 0
                    else:
                        sign_test_vals[rollout] = -1

            else:
                # No preference is generated for the rollout
                accu_preferences['one'] += 0
                accu_preferences['two'] += 0
                sign_test_vals[rollout] = 0

    # Clean-up sign-test data after removing 'None' entries
    sign_test_vals = [val for val in sign_test_vals if val is not None]

    # Run sign-test
    m, p_val = sign_test(sign_test_vals)

    # print('mass', t_mass, '\n\ntoxicity',toxicity, '\n\np-death',prob_death)
    # print('Action 1 preferences:' , accu_preferences['one']
    #         , 'Action 2 preferences:', accu_preferences['two']
    #         , 'sign_test', sign_test_vals, 'p-val', p_val, 'm', m)

    # track output
    if tracking:
        print(
            f"state: {s_init} | a_j(R): {accu_preferences['one']} | a_k(R): {accu_preferences['two']} | sig: {'Yes' if (p_val <= p_sig) else '--'}"
        )

    # return preference information
    if (m > 0) and (p_val <= p_sig):
        return {
            'state': s_init,
            'a_j': actions['one'],
            'a_k': actions['two'],
            'preference_label': 1
        }, action_count

    elif (m < 0) and (p_val <= p_sig):
        return {
            'state': s_init,
            'a_j': actions['one'],
            'a_k': actions['two'],
            'preference_label': 0
        }, action_count

    # return NaN if avg. returns are not significantly different from each other OR are equal
    else:
        return {
            'state': np.nan,
            'a_j': np.nan,
            'a_k': np.nan,
            'preference_label': np.nan
        }, action_count

Esempio n. 11

0

Mostra file

File: project.py Progetto: busetopcu/Python

### ilk değer test istatistiğini, ikinci değer P değerini temsil eder
# P değeri 0.05 den küçük çıktığından dolayı H0 reddedilir.
""" ornek dagılımı ile teorik dagılımı arasında istatistiksel olarak bir fark VARDIR """

sb.distplot(mallCustomer["Annual Income (k$)"], kde=False)
sb.distplot(mallCustomer["Annual Income (k$)"])
""" 
T testi uygulanamaz 
T-testi ile varsayımın sağlanmadığı durumda Nonparametrik tek örneklem test kullanılır.
Bu testlerden biri olan Sign Testini uygulayacağım.
"""
""" SIGN TESTİ """

from statsmodels.stats.descriptivestats import sign_test

test_istatistigi, pvalue = sign_test(mallCustomer["Annual Income (k$)"], 61)
print("Test İstatistiği = %.7f, p-degeri = %.7f" % (test_istatistigi, pvalue))
# P deger 0.05 den buyuk H0 hipotezi kabul edilir.

#------------------------------------------------------------------------------------------------------

dataFrame = pd.read_csv("online_shoppers_intention.csv")
shop = dataFrame.copy()
shop.columns
""" Siteden çıkma oranı ortalama 0.04 sn midir? 

H0: mü = 0.04
H1: mü != 0.04

T-Testi
"""

Esempio n. 12

0

Mostra file

def tg_performance_accum_success(dat, agdat, save=False, size='1col'):

    ns = len(dat['subject'].unique())
    helper_idx = -1
    threshold = 11
    p_g1_success_1 = np.zeros((4, ns))
    p_g2_success_1 = np.zeros((4, ns))
    p_fail_1 = np.zeros((4, ns))
    accum_reward_1 = np.zeros((4, ns))
    for j in range(2, -2, -1):
        helper_idx += 1
        helper_idx2 = -1
        for s in range(ns):
            helper_idx2 += 1

            if helper_idx < 3:
                idx = (dat['phase'] > 1) & (
                    (dat['start_condition'] == 1 + 2 * j) |
                    (dat['start_condition'] == 2 + 2 * j)) & (
                        dat['trial'] == 15) & (dat['subject'] == s + 1)
                idx_g1 = (dat['phase'] > 1) & (
                    (dat['start_condition'] == 1 + 2 * j) |
                    (dat['start_condition'] == 2 + 2 * j)) & (
                        dat['trial'] == 15) & (dat['subject'] == s + 1) & (
                            (dat['score_A_after'] >= threshold) ^
                            (dat['score_B_after'] >= threshold))
                idx_g2 = (dat['phase'] > 1) & (
                    (dat['start_condition'] == 1 + 2 * j) |
                    (dat['start_condition'] == 2 + 2 * j)) & (
                        dat['trial'] == 15) & (dat['subject'] == s + 1) & (
                            (dat['score_A_after'] >= threshold) &
                            (dat['score_B_after'] >= threshold))
                idx_fail = (dat['phase'] > 1) & (
                    (dat['start_condition'] == 1 + 2 * j) |
                    (dat['start_condition'] == 2 + 2 * j)) & (
                        dat['trial'] == 15) & (dat['subject'] == s + 1) & (
                            (dat['score_A_after'] < threshold) &
                            (dat['score_B_after'] < threshold))

                n_elements = np.nansum(idx)
                n_g1_success = np.nansum(idx_g1)
                n_g2_success = np.nansum(idx_g2)
                n_fail = np.nansum(idx_fail)
                p_g1_success_1[helper_idx,
                               helper_idx2] = n_g1_success / n_elements
                p_g2_success_1[helper_idx,
                               helper_idx2] = n_g2_success / n_elements
                p_fail_1[helper_idx, helper_idx2] = n_fail / n_elements
                accum_reward_1[
                    helper_idx,
                    helper_idx2] = n_g1_success * 5 + n_g2_success * 10
            else:
                idx = (dat['phase'] > 1) & (dat['trial']
                                            == 15) & (dat['subject'] == s + 1)
                idx_g1 = (dat['phase'] > 1) & (dat['trial'] == 15) & (
                    dat['subject'] == s + 1) & (
                        (dat['score_A_after'] >= threshold) ^
                        (dat['score_B_after'] >= threshold))
                idx_g2 = (dat['phase'] > 1) & (dat['trial'] == 15) & (
                    dat['subject'] == s + 1) & (
                        (dat['score_A_after'] >= threshold) &
                        (dat['score_B_after'] >= threshold))
                idx_fail = (dat['phase'] > 1) & (dat['trial'] == 15) & (
                    dat['subject'] == s + 1) & (
                        (dat['score_A_after'] < threshold) &
                        (dat['score_B_after'] < threshold))

                n_elements = np.nansum(idx)
                n_g1_success = np.nansum(idx_g1)
                n_g2_success = np.nansum(idx_g2)
                n_fail = np.nansum(idx_fail)
                p_g1_success_1[helper_idx,
                               helper_idx2] = n_g1_success / n_elements
                p_g2_success_1[helper_idx,
                               helper_idx2] = n_g2_success / n_elements
                p_fail_1[helper_idx, helper_idx2] = n_fail / n_elements
                accum_reward_1[
                    helper_idx,
                    helper_idx2] = n_g1_success * 5 + n_g2_success * 10

    mean_g1_success_1 = np.nanmean(p_g1_success_1, 1)
    mean_g2_success_1 = np.nanmean(p_g2_success_1, 1)
    mean_fail_1 = np.nanmean(p_fail_1, 1)
    mean_accum_reward_1 = np.nanmean(accum_reward_1, 1)
    std_g1_success_1 = np.nanstd(p_g1_success_1, 1)
    std_g2_success_1 = np.nanstd(p_g2_success_1, 1)
    std_fail_1 = np.nanstd(p_fail_1, 1)
    std_accum_reward_1 = np.nanstd(accum_reward_1, 1)

    ## Agent
    ns = len(agdat['subject'].unique())
    helper_idx = -1
    threshold = 11
    p_g1_success_2 = np.zeros((4, ns))
    p_g2_success_2 = np.zeros((4, ns))
    p_fail_2 = np.zeros((4, ns))
    accum_reward_2 = np.zeros((4, ns))
    for j in range(2, -2, -1):
        helper_idx += 1
        helper_idx2 = -1
        for s in range(ns):
            helper_idx2 += 1

            if helper_idx < 3:
                idx = (agdat['phase'] > 1) & (
                    (agdat['start_condition'] == 1 + 2 * j) |
                    (agdat['start_condition'] == 2 + 2 * j)) & (
                        agdat['trial'] == 15) & (agdat['subject'] == s + 1)
                idx_g1 = (agdat['phase'] > 1) & (
                    (agdat['start_condition'] == 1 + 2 * j) |
                    (agdat['start_condition'] == 2 + 2 * j)) & (
                        agdat['trial'] == 15) & (agdat['subject'] == s + 1) & (
                            (agdat['score_A_after'] >= threshold) ^
                            (agdat['score_B_after'] >= threshold))
                idx_g2 = (agdat['phase'] > 1) & (
                    (agdat['start_condition'] == 1 + 2 * j) |
                    (agdat['start_condition'] == 2 + 2 * j)) & (
                        agdat['trial'] == 15) & (agdat['subject'] == s + 1) & (
                            (agdat['score_A_after'] >= threshold) &
                            (agdat['score_B_after'] >= threshold))
                idx_fail = (agdat['phase'] > 1) & (
                    (agdat['start_condition'] == 1 + 2 * j) |
                    (agdat['start_condition'] == 2 + 2 * j)) & (
                        agdat['trial'] == 15) & (agdat['subject'] == s + 1) & (
                            (agdat['score_A_after'] < threshold) &
                            (agdat['score_B_after'] < threshold))

                n_elements = np.nansum(idx)
                n_g1_success = np.nansum(idx_g1)
                n_g2_success = np.nansum(idx_g2)
                n_fail = np.nansum(idx_fail)
                p_g1_success_2[helper_idx,
                               helper_idx2] = n_g1_success / n_elements
                p_g2_success_2[helper_idx,
                               helper_idx2] = n_g2_success / n_elements
                p_fail_2[helper_idx, helper_idx2] = n_fail / n_elements
                accum_reward_2[
                    helper_idx,
                    helper_idx2] = n_g1_success * 5 + n_g2_success * 10

            else:
                idx = (agdat['phase'] > 1) & (agdat['trial'] == 15) & (
                    agdat['subject'] == s + 1)
                idx_g1 = (agdat['phase'] > 1) & (agdat['trial'] == 15) & (
                    agdat['subject'] == s + 1) & (
                        (agdat['score_A_after'] >= threshold) ^
                        (agdat['score_B_after'] >= threshold))
                idx_g2 = (agdat['phase'] > 1) & (agdat['trial'] == 15) & (
                    agdat['subject'] == s + 1) & (
                        (agdat['score_A_after'] >= threshold) &
                        (agdat['score_B_after'] >= threshold))
                idx_fail = (agdat['phase'] > 1) & (agdat['trial'] == 15) & (
                    agdat['subject'] == s + 1) & (
                        (agdat['score_A_after'] < threshold) &
                        (agdat['score_B_after'] < threshold))

                n_elements = np.nansum(idx)
                n_g1_success = np.nansum(idx_g1)
                n_g2_success = np.nansum(idx_g2)
                n_fail = np.nansum(idx_fail)
                p_g1_success_2[helper_idx,
                               helper_idx2] = n_g1_success / n_elements
                p_g2_success_2[helper_idx,
                               helper_idx2] = n_g2_success / n_elements
                p_fail_2[helper_idx, helper_idx2] = n_fail / n_elements
                accum_reward_2[
                    helper_idx,
                    helper_idx2] = n_g1_success * 5 + n_g2_success * 10

    mean_g1_success_2 = np.nanmean(p_g1_success_2, 1)
    mean_g2_success_2 = np.nanmean(p_g2_success_2, 1)
    mean_fail_2 = np.nanmean(p_fail_2, 1)
    mean_accum_reward_2 = np.nanmean(accum_reward_2, 1)

    std_g1_success_2 = np.nanstd(p_g1_success_2, 1)
    std_g2_success_2 = np.nanstd(p_g2_success_2, 1)
    std_fail_2 = np.nanstd(p_fail_2, 1)
    std_accum_reward_2 = np.nanstd(accum_reward_2, 1)

    ## Difference between groups (Subject - Agent)
    mean_diff_g1_success = mean_g1_success_1 - mean_g1_success_2
    mean_diff_g2_success = mean_g2_success_1 - mean_g2_success_2
    mean_diff_fail = mean_fail_1 - mean_fail_2
    mean_diff_accum = mean_accum_reward_1 - mean_accum_reward_2
    std_diff_g1_success = std_g1_success_1 + std_g1_success_2
    std_diff_g2_success = std_g2_success_1 + std_g2_success_2
    std_diff_fail = std_fail_1 + std_fail_2
    std_diff_accum = std_accum_reward_1 + std_accum_reward_2

    #%% Plotting Accumulated Reward
    if size == '1col':
        width = 3.5
        height = 4
    elif size == '2col':
        width = 7
        height = 10
    elif size == 'big':
        width = 7 * 3
        height = 10 * 3

    # Signtest
    M_accum = np.zeros((4, 1))
    psign_accum = np.zeros((4, 1))
    for i in range(4):
        M_accum[i], psign_accum[i] = stats2.sign_test(accum_reward_1[i, :] -
                                                      accum_reward_2[i, 0])

    barx = np.array([1, 2, 3, 4])

    bary_1 = np.ravel(mean_accum_reward_1)
    bary_2 = np.ravel(mean_accum_reward_2)
    bary_diff = np.ravel(mean_diff_accum)

    barerr_1 = np.ravel(std_accum_reward_1)
    barerr_2 = np.ravel(std_accum_reward_2)
    barerr_diff = np.ravel(std_diff_accum)

    # Plotting specs
    my_colors = np.array(['red', 'green', 'blue', 'grey'])
    ylabel = 'Total Reward [Cents]'
    titles = ['Subjects', 'Optimal agent', 'Difference (subject-agent)']
    subplot_labels = ['A', 'C', 'E']

    xticks = barx
    xticklabels = ['easy', 'medium', 'hard', 'all']
    red_patch = mpatches.Patch(color='red', label='easy')
    green_patch = mpatches.Patch(color='green', label='medium')
    blue_patch = mpatches.Patch(color='blue', label='hard')
    grey_patch = mpatches.Patch(color='grey', label='all')
    ylim = (-40, 40)
    tick_length = 2
    tick_width = 1
    alph = 0.5

    fig, ax = plt.subplots(nrows=3,
                           ncols=2,
                           figsize=(width, height),
                           gridspec_kw={'width_ratios': [1, 3]})
    plt.tight_layout()
    for i in range(np.size(barx)):
        ax.flat[0].bar(barx[i],
                       bary_1[i],
                       width=0.7,
                       color=my_colors[i],
                       yerr=barerr_1[i],
                       error_kw=dict(elinewidth=1),
                       alpha=alph)
        ax.flat[2].bar(barx[i],
                       bary_2[i],
                       width=0.7,
                       color=my_colors[i],
                       yerr=barerr_2[i],
                       error_kw=dict(elinewidth=1),
                       alpha=alph)
        ax.flat[4].bar(barx[i],
                       bary_diff[i],
                       width=0.7,
                       color=my_colors[i],
                       yerr=barerr_diff[i],
                       error_kw=dict(elinewidth=1),
                       alpha=alph)
        if (psign_accum[i] <= 0.05) & (psign_accum[i] > 0.01):
            ax.flat[4].text(barx[i],
                            bary_diff[i] - barerr_diff[i] - 10,
                            '*',
                            fontsize=6,
                            fontweight='bold',
                            horizontalalignment='center',
                            verticalalignment='center')
        elif (psign_accum[i] <= 0.01) & (psign_accum[i] > 0.001):
            ax.flat[4].text(barx[i],
                            bary_diff[i] - barerr_diff[i] - 10,
                            '**',
                            fontsize=6,
                            fontweight='bold',
                            horizontalalignment='center',
                            verticalalignment='center')
        elif (psign_accum[i] <= 0.001):
            ax.flat[4].text(barx[i],
                            bary_diff[i] - barerr_diff[i] - 10,
                            '***',
                            fontsize=6,
                            fontweight='bold',
                            horizontalalignment='center',
                            verticalalignment='center')

    for i, axes in enumerate([ax.flat[0], ax.flat[2], ax.flat[4]]):

        axes.set_xticks(xticks)
        axes.set_xticklabels([])
        axes.text(-0.1,
                  1.12,
                  subplot_labels[i],
                  transform=axes.transAxes,
                  horizontalalignment='center',
                  verticalalignment='center',
                  fontsize=10,
                  fontweight='bold')
        axes.spines['top'].set_visible(False)
        axes.spines['right'].set_visible(False)
        axes.xaxis.set_tick_params(top='off', direction='out', width=1)
        axes.yaxis.set_tick_params(right='off', direction='out', width=1)
        axes.set_ylabel(ylabel, fontsize=8, labelpad=None)

        if i == 0:
            axes.legend(
                handles=[red_patch, green_patch, blue_patch, grey_patch],
                loc='upper center',
                bbox_to_anchor=(2.2, 1.75),
                ncol=4,
                frameon=False)

        if i < 2:
            axes.tick_params(length=tick_length, width=tick_width)
            axes.text(3.3,
                      1.30,
                      titles[i],
                      transform=axes.transAxes,
                      horizontalalignment='center',
                      verticalalignment='center',
                      fontsize=9)
            axes.set_ylim((0, 450))

        else:
            axes.tick_params(length=0, width=0, axis='x')
            axes.tick_params(length=tick_length, width=tick_width)

        if i == 2:
            axes.axhline(0, linewidth=0.5, color='black')
            axes.xaxis.set_tick_params(bottom='off')
            axes.spines['bottom'].set_visible(False)
            axes.set_ylim(ylim)

    # Sign test
    M_g1 = np.zeros((4, 1))
    psign_g1 = np.zeros((4, 1))
    M_g2 = np.zeros((4, 1))
    psign_g2 = np.zeros((4, 1))
    M_fail = np.zeros((4, 1))
    psign_fail = np.zeros((4, 1))
    for i in range(4):
        M_g1[i], psign_g1[i] = stats2.sign_test(p_g1_success_1[i, :] -
                                                p_g1_success_2[i, 0])
        M_g2[i], psign_g2[i] = stats2.sign_test(p_g2_success_1[i, :] -
                                                p_g2_success_2[i, 0])
        M_fail[i], psign_fail[i] = stats2.sign_test(p_fail_1[i, :] -
                                                    p_fail_2[i, 0])

    # Additional tests
    M_g2_easy_vs_medium, psign_g2_easy_vs_medium = stats2.sign_test(
        p_g2_success_1[0, :], p_g2_success_1[1, :])

    # data to be plotted
    barx = np.array([1, 2, 3, 5, 6, 7, 9, 10, 11, 13, 14, 15])

    bary_1 = np.ravel(
        np.column_stack((mean_g1_success_1, mean_g2_success_1, mean_fail_1)))
    bary_2 = np.ravel(
        np.column_stack((mean_g1_success_2, mean_g2_success_2, mean_fail_2)))
    bary_diff = np.ravel(
        np.column_stack(
            (mean_diff_g1_success, mean_diff_g2_success, mean_diff_fail)))

    barerr_1 = np.ravel(
        np.column_stack((std_g1_success_1, std_g2_success_1, std_fail_1)))
    barerr_2 = np.ravel(
        np.column_stack((std_g1_success_2, std_g2_success_2, std_fail_2)))
    barerr_diff = np.ravel(
        np.column_stack(
            (std_diff_g1_success, std_diff_g2_success, std_diff_fail)))

    # plotting specs
    bar_p = np.ravel(np.column_stack((psign_g1, psign_g2, psign_fail)))
    tmp = barerr_diff.copy()
    tmp[bary_diff < 0] = 0
    tmp2 = bary_diff.copy()
    tmp2[bary_diff < 0] = 0
    bar_ppos = tmp2 + tmp + 0.02
    my_colors = np.array([
        'red', 'red', 'red', 'green', 'green', 'green', 'blue', 'blue', 'blue',
        'grey', 'grey', 'grey'
    ])
    ylabel = 'Proportion Success'
    titles = ['Subjects', 'Optimal agent', 'Difference (subject - agent)']
    subplot_labels = ['B', 'D', 'F']
    xticks = barx
    xticklabels = [
        'G1', 'G2', 'fail', 'G1', 'G2', 'fail', 'G1', 'G2', 'fail', 'G1', 'G2',
        'fail'
    ]
    red_patch = mpatches.Patch(color='red', label='easy')
    green_patch = mpatches.Patch(color='green', label='medium')
    blue_patch = mpatches.Patch(color='blue', label='hard')
    grey_patch = mpatches.Patch(color='grey', label='all')
    tick_length = 2
    tick_width = 1
    alph = 0.5
    yticks = np.arange(0, 1.2, 0.2)
    yticklabels = np.round(np.arange(0, 1.2, 0.2), 1)

    for i in range(np.size(barx)):
        ax.flat[1].bar(barx[i],
                       bary_1[i],
                       width=0.8,
                       color=my_colors[i],
                       yerr=barerr_1[i],
                       error_kw=dict(elinewidth=1),
                       alpha=alph)
        ax.flat[3].bar(barx[i],
                       bary_2[i],
                       width=0.8,
                       color=my_colors[i],
                       yerr=barerr_2[i],
                       error_kw=dict(elinewidth=1),
                       alpha=alph)
        ax.flat[5].bar(barx[i],
                       bary_diff[i],
                       width=0.8,
                       color=my_colors[i],
                       yerr=barerr_diff[i],
                       error_kw=dict(elinewidth=1),
                       alpha=alph)
        if (bar_p[i] <= 0.05) & (bar_p[i] > 0.01):
            ax.flat[5].text(barx[i],
                            bar_ppos[i],
                            '*',
                            fontsize=6,
                            fontweight='bold',
                            horizontalalignment='center',
                            verticalalignment='center')
        elif (bar_p[i] <= 0.01) & (bar_p[i] > 0.001):
            ax.flat[5].text(barx[i],
                            bar_ppos[i],
                            '**',
                            fontsize=6,
                            fontweight='bold',
                            horizontalalignment='center',
                            verticalalignment='center')
        elif (bar_p[i] <= 0.001):
            ax.flat[5].text(barx[i],
                            bar_ppos[i],
                            '***',
                            fontsize=6,
                            fontweight='bold',
                            horizontalalignment='center',
                            verticalalignment='center')

    for i, axes in enumerate([ax.flat[1], ax.flat[3], ax.flat[5]]):

        axes.set_xticks(xticks)
        axes.set_xticklabels(xticklabels)
        axes.text(-0.1,
                  1.12,
                  subplot_labels[i],
                  transform=axes.transAxes,
                  horizontalalignment='center',
                  verticalalignment='center',
                  fontsize=10,
                  fontweight='bold')
        axes.tick_params(axis='x', labelsize=5)
        axes.spines['top'].set_visible(False)
        axes.spines['right'].set_visible(False)
        axes.xaxis.set_tick_params(top='off', direction='out', width=1)
        axes.yaxis.set_tick_params(right='off', direction='out', width=1)
        axes.set_ylabel(ylabel, fontsize=8)

        if i < 2:
            axes.tick_params(length=tick_length, width=tick_width)
            axes.set_ylim((0, 1))
            axes.set_yticks(yticks)
            axes.set_yticklabels(yticklabels)
            axes.text(-0.5,
                      1.30,
                      titles[i],
                      transform=axes.transAxes,
                      horizontalalignment='center',
                      verticalalignment='center',
                      fontsize=9)

        else:
            axes.tick_params(length=tick_length, width=tick_width)
            axes.tick_params(length=0, width=0, axis='x')

        if i == 2:
            axes.axhline(0, linewidth=0.5, color='black')
            axes.xaxis.set_tick_params(bottom='off')
            axes.spines['bottom'].set_visible(False)
            axes.set_ylabel(ylabel, fontsize=8, labelpad=-2)
            axes.text(0,
                      1.3,
                      titles[i],
                      transform=axes.transAxes,
                      horizontalalignment='center',
                      verticalalignment='center',
                      fontsize=9)

        plt.subplots_adjust(left=None,
                            bottom=None,
                            right=None,
                            top=None,
                            wspace=None,
                            hspace=0.7)

        if save == True:
            fig.savefig('performance.png',
                        dpi=300,
                        bbox_inches='tight',
                        transparent=True)

        return_dict = {
            'm_accum': M_accum,
            'p_accum': psign_accum,
            'm_g1': M_g1,
            'p_g1': psign_g1,
            'm_g2': M_g2,
            'p_g2': psign_g2,
            'm_fail': M_fail,
            'p_fail': psign_fail
        }

    return return_dict

Esempio n. 13

0

Mostra file

File: discreetest.py Progetto: yanlirock/Benchmarking_TSDiscretizations

    else:
        for f in glob.glob(discretizeddirectory + prefix + '*.csv'):
            logger.info('loading data from %s', f)
            dict_data_discretized[f] = np.loadtxt(f, delimiter=',')

    mabc_values = {}
    for discretization, data_d in dict_data_discretized.items():
        scaler = MinMaxScaler()
        data_d_s = scaler.fit_transform(data_d.astype(float))
        if data_original.shape != data_d_s.shape:
            raise AssertionError('data dimensions do not match')
        if np.isnan(data_original).any() or np.isnan(data_d_s).any():
            raise AssertionError(
                'please make sure your data does not have nan values')
        statistic, pvalue = sign_test(
            data_original.flatten(), data_d_s.flatten()
        )  # this is different than the original code. flatten() turns the T x D matrix into one vector
        if pvalue < alpha:
            logger.info('%s failed the sign test, assigning inf mabc',
                        discretization)
            mabc_values[discretization] = float("inf")
        else:
            mabc_values[discretization] = calculateMABC(
                data_d_s, data_original)

    logger.info('mabc values: %s', mabc_values)
    np.save(writedir + 'mabc_values_trial_' + str(int(starttime)) + '.npy',
            mabc_values)
    best = min(mabc_values, key=mabc_values.get)
    logger.info('best discretization: %s with mabc = %s', best,
                mabc_values[best])

Esempio n. 14

0

Mostra file

import scipy 
from scipy import stats
import statsmodels.stats.descriptivestats as sd
# import plotly.plotly as py
# import plotly.graph_objs as go
# from plotly.tools import FigureFactory as FF

############## 1 Sample Sign Test(Student_Scores) ################
# import statsmodels.stats.descriptivestats as sd
data=pd.read_excel("E:/Day Wise/Day 08 Hypothesis Testing/Data/Marks-1sample sign test.xlsx")
data
#############Normality test###############
print(stats.shapiro(data.Marks))

###################1 Sample Sign Test #############
sd.sign_test(data.Marks,mu0=82)
help(sd.sign_test)

############### Mann-Whitney test(Vehicles with and without addictive) ############
data=pd.read_excel("E:\Excelr Data\RCodes\Hyothesis Testing\Mann_whitney.xlsx")
data
data.columns="Without_additive","With_additive"
#############Normality test###############
print(stats.shapiro(data.Without_additive))
print(stats.shapiro(data.With_additive))

############## Mann-Whitney test #############
# import statsmodels.stats.descriptivestats as sd
scipy.stats.mannwhitneyu(data.Without_additive, data.With_additive)

Esempio n. 15

0

Mostra file

 def nonparametrik_tek_orneklem_testi(self, beklenen_deger):
     return float(sign_test(self.choice_array, beklenen_deger)[1])