コード例 #1
def determine_estimation_timing(seed=None):
    Generates dataframe with timing info for event-related version of task.
    mu = 4  # mean of 4s
    raw_itis = gumbel_r.rvs(size=100000, loc=mu, scale=1)
    possible_itis = np.round(raw_itis, 1)
    # crop to 2-8s
    possible_itis = possible_itis[possible_itis >= 2]
    possible_itis = possible_itis[possible_itis <= 8]

    missing_time = np.finfo(dtype='float64').max
    if not seed:
        seed = np.random.randint(1000, 9999)

    while (not np.isclose(missing_time, 0.0, atol=10)) or (missing_time < 0):
        state = np.random.RandomState(seed=seed)
        durations = state.uniform(DUR_RANGE[0], DUR_RANGE[1], N_TRIALS_TOTAL)
        durations = np.round(durations, 1)

        itis = state.choice(possible_itis, size=N_TRIALS_TOTAL, replace=True)
        missing_time = TASK_TIME - np.sum([durations.sum(), itis.sum()])
        seed += 1

    trial_types = randomize_carefully(CONDITIONS, N_TRIALS_PER_COND)
    timing_dict = {
        'duration': durations,
        'iti': itis,
        'trial_type': trial_types,
    timing_df = pd.DataFrame(timing_dict)
    return timing_df, seed
コード例 #2
    def setUp(self):
        Note that the spatial test data used in many of these tests comes from
        Efron, Bradley, and Robert J. Tibshirani. An Introduction to the
        Bootstrap. CRC press, 1994. Chapter 14.
        # Determine the number of parameters and number of bootstrap replicates
        num_replicates = 100
        num_params = 5
        # Create a set of fake bootstrap replicates
        self.bootstrap_replicates =\
            (np.arange(1, 1 + num_replicates)[:, None] *
             np.arange(1, 1 + num_params)[None, :])
        # Create a fake maximum likelihood parameter estimate
        self.mle_params = self.bootstrap_replicates[50, :]
        # Create a set of fake jackknife replicates
        array_container = []
        for est in self.mle_params:
            array_container.append(gumbel_r.rvs(loc=est, size=10))
        self.jackknife_replicates =\
            np.concatenate([x[:, None] for x in array_container], axis=1)
        # Create a fake confidence percentage.
        self.conf_percentage = 94.88

        # Store the spatial test data from Efron and Tibshirani (1994)
        self.test_data =\
            np.array([48, 36, 20, 29, 42, 42, 20, 42, 22, 41, 45, 14, 6,
                      0, 33, 28, 34, 4, 32, 24, 47, 41, 24, 26, 30, 41])

        # Note how many test data observations there are.
        num_test_obs = self.test_data.size

        # Create the function to calculate the jackknife replicates.
        def calc_theta(array):
            result = ((array - array.mean())**2).sum() / float(array.size)
            return result

        self.calc_theta = calc_theta
        self.test_theta_hat = np.array([calc_theta(self.test_data)])

        # Create a pandas series of the data. Allows for easy case deletion.
        raw_series = pd.Series(self.test_data)
        # Create the array of jackknife replicates
        jackknife_replicates = np.empty((num_test_obs, 1), dtype=float)
        for obs in xrange(num_test_obs):
            current_data = raw_series[raw_series.index != obs].values
            jackknife_replicates[obs] = calc_theta(current_data)
        self.test_jackknife_replicates = jackknife_replicates

        return None
コード例 #3
    def setUp(self):
        Note that the spatial test data used in many of these tests comes from
        Efron, Bradley, and Robert J. Tibshirani. An Introduction to the
        Bootstrap. CRC press, 1994. Chapter 14.
        # Determine the number of parameters and number of bootstrap replicates
        num_replicates = 100
        num_params = 5
        # Create a set of fake bootstrap replicates
        self.bootstrap_replicates =\
            (np.arange(1, 1 + num_replicates)[:, None] *
             np.arange(1, 1 + num_params)[None, :])
        # Create a fake maximum likelihood parameter estimate
        self.mle_params = self.bootstrap_replicates[50, :]
        # Create a set of fake jackknife replicates
        array_container = []
        for est in self.mle_params:
            array_container.append(gumbel_r.rvs(loc=est, size=10))
        self.jackknife_replicates =\
            np.concatenate([x[:, None] for x in array_container], axis=1)
        # Create a fake confidence percentage.
        self.conf_percentage = 94.88

        # Store the spatial test data from Efron and Tibshirani (1994)
        self.test_data =\
            np.array([48, 36, 20, 29, 42, 42, 20, 42, 22, 41, 45, 14, 6,
                      0, 33, 28, 34, 4, 32, 24, 47, 41, 24, 26, 30, 41])

        # Note how many test data observations there are.
        num_test_obs = self.test_data.size

        # Create the function to calculate the jackknife replicates.
        def calc_theta(array):
            result = ((array - array.mean())**2).sum() / float(array.size)
            return result
        self.calc_theta = calc_theta
        self.test_theta_hat = np.array([calc_theta(self.test_data)])

        # Create a pandas series of the data. Allows for easy case deletion.
        raw_series = pd.Series(self.test_data)
        # Create the array of jackknife replicates
        jackknife_replicates = np.empty((num_test_obs, 1), dtype=float)
        for obs in xrange(num_test_obs):
            current_data = raw_series[raw_series.index != obs].values
            jackknife_replicates[obs] = calc_theta(current_data)
        self.test_jackknife_replicates = jackknife_replicates

        return None
コード例 #4
# Display the probability density function (``pdf``):

x = np.linspace(gumbel_r.ppf(0.01), gumbel_r.ppf(0.99), 100)
ax.plot(x, gumbel_r.pdf(x), 'r-', lw=5, alpha=0.6, label='gumbel_r pdf')

# Alternatively, the distribution object can be called (as a function)
# to fix the shape, location and scale parameters. This returns a "frozen"
# RV object holding the given parameters fixed.

# Freeze the distribution and display the frozen ``pdf``:

rv = gumbel_r()
ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')

# Check accuracy of ``cdf`` and ``ppf``:

vals = gumbel_r.ppf([0.001, 0.5, 0.999])
np.allclose([0.001, 0.5, 0.999], gumbel_r.cdf(vals))
# True

# Generate random numbers:

r = gumbel_r.rvs(size=1000)

# And compare the histogram:

ax.hist(r, normed=True, histtype='stepfilled', alpha=0.2)
ax.legend(loc='best', frameon=False)
コード例 #5
def determine_timing(n_trials=24, null_rate=0.33,
                     operators=['*', '+', '-', '/'],
                     num_types=['numeric', 'word'],
                     feedback_types=['informative', 'noninformative'],
    Generate configuration files.

    n_runs : int
        Number of runs
    n_trials : int
        Number of trials
    null_rate : float
        Proportion of trials to set as null trial type.
        Default set to 1/3 per Dr. Aaron Mattfeld's recommendation.
    operators : list
        List of valid operations to include
    num_types : list
        Number representations to include. May include numeric, word, and/or analog.
    feedback_types : list
        Feedback types to include. May include informative and uninformative.
    n_null_trials = int(np.ceil(n_trials * null_rate))
    n_math_trials = n_trials - n_null_trials

    # Timing
    mu = 4  # mean of 4s
    raw_intervals = gumbel_r.rvs(size=100000, loc=mu, scale=1)
    possible_intervals = np.round(raw_intervals, 1)
    # crop to 2-8s
    possible_intervals = possible_intervals[possible_intervals >= INTERVAL_RANGE[0]]
    possible_intervals = possible_intervals[possible_intervals <= INTERVAL_RANGE[1]]

    missing_time = np.finfo(dtype='float64').max
    if not seed:
        seed = np.random.randint(1000, 9999)

    while (not np.isclose(missing_time, 0.0, atol=10)) or (missing_time < 0):
        state = np.random.RandomState(seed=seed)
        eq_durations = state.uniform(EQ_DUR_RANGE[0], EQ_DUR_RANGE[1], n_trials)
        eq_durations = np.round(eq_durations, 1)
        comp_durations = state.uniform(COMP_DUR_RANGE[0], COMP_DUR_RANGE[1], n_trials)
        comp_durations = np.round(comp_durations, 1)
        fdbk_durations = np.ones(n_trials) * FEEDBACK_DURATION
        isi1s = state.choice(possible_intervals, size=n_trials, replace=True)
        isi2s = state.choice(possible_intervals, size=n_trials, replace=True)
        itis = state.choice(possible_intervals, size=n_trials, replace=True)

        missing_time = TASK_TIME - np.sum([eq_durations.sum(), comp_durations.sum(),
                                           isi1s.sum(), isi2s.sum(), itis.sum()])
        seed += 1

    full_operators = operators * int(np.ceil(n_math_trials / len(operators)))
    full_num_types = num_types * int(np.ceil(n_trials / len(num_types)))
    full_feedback_types = feedback_types * int(np.ceil(n_trials / len(feedback_types)))

    # Get distribution of difference scores to control math difficulty
    # We want a sort of flattened normal distribution for this
    value_range = 20
    raw_difference_scores = np.random.binomial(n=value_range, p=0.5, size=100000) - int(value_range / 2)
    x = np.arange(value_range+1, dtype=int) - int(value_range / 2)
    x, y = get_hist(raw_difference_scores, x)
    uniform = np.ones(len(x)) * np.mean(y)
    updated_distribution = np.mean(np.vstack((y, uniform)), axis=0)
    probabilities = updated_distribution / np.sum(updated_distribution)

    # Slightly more complicated approach chosen over np.random.choice
    # to make numbers of trials with each type as balanced as possible
    chosen_operators = np.random.choice(full_operators, n_math_trials, replace=False)
    chosen_equation_num_types = np.random.choice(full_num_types, n_trials, replace=False)
    chosen_comparison_num_types = np.random.choice(full_num_types, n_trials, replace=False)
    chosen_feedback_types = np.random.choice(full_feedback_types, n_trials, replace=False)

    difference_scores = np.random.choice(x, size=n_trials, p=probabilities)
    difference_scores = [int(ds) for ds in difference_scores]

    equations, comparisons, solutions = [], [], []

    # Set order of trial types. 1 = math, 0 = baseline
    ttype_dict = {0: 'baseline', 1: 'math'}
    trial_types = np.ones(n_trials, int)
    trial_types[:n_null_trials] = 0
    math_counter = 0

    for j_trial in range(n_trials):
        if trial_types[j_trial] == 1:
            first_val = str(np.random.randint(1, 31))
            second_val = str(np.random.randint(1, 31))
            operator = chosen_operators[math_counter]
            # If the result of division would be less than 1, flip the values
            if (operator == '/') and (int(first_val) < int(second_val)):
                first_val, second_val = second_val, first_val
            elif operator == '*':
                second_val = str(np.random.randint(1, 10))
            equation = first_val + operator + second_val
            solution = eval(equation)
            math_counter += 1
            solution = np.random.randint(1, 31)
            equation = str(solution)

        comparison = int(np.round(solution + difference_scores[j_trial]))

    timing_dict = {
        'trial_type': [ttype_dict[tt] for tt in trial_types],
        'equation': equations,
        'solution': solutions,
        'comparison': comparisons,
        'equation_representation': chosen_equation_num_types,
        'comparison_representation': chosen_comparison_num_types,
        'feedback': chosen_feedback_types,
        'rounded_difference': difference_scores,
        'equation_duration': eq_durations,
        'isi1': isi1s,
        'comparison_duration': comp_durations,
        'isi2': isi2s,
        'feedback_duration': fdbk_durations,
        'iti': itis,
    df = pd.DataFrame(timing_dict)
    return df, seed
コード例 #6
def est_skewed_dist():
    r = gumbel_r.rvs(size=1000)
    plt.hist(r, bins=20)