コード例 #1
0
def determine_estimation_timing(seed=None):
    """
    Generates dataframe with timing info for event-related version of task.
    """
    mu = 4  # mean of 4s
    raw_itis = gumbel_r.rvs(size=100000, loc=mu, scale=1)
    possible_itis = np.round(raw_itis, 1)
    # crop to 2-8s
    possible_itis = possible_itis[possible_itis >= 2]
    possible_itis = possible_itis[possible_itis <= 8]

    missing_time = np.finfo(dtype='float64').max
    if not seed:
        seed = np.random.randint(1000, 9999)

    while (not np.isclose(missing_time, 0.0, atol=10)) or (missing_time < 0):
        state = np.random.RandomState(seed=seed)
        durations = state.uniform(DUR_RANGE[0], DUR_RANGE[1], N_TRIALS_TOTAL)
        durations = np.round(durations, 1)

        itis = state.choice(possible_itis, size=N_TRIALS_TOTAL, replace=True)
        missing_time = TASK_TIME - np.sum([durations.sum(), itis.sum()])
        seed += 1

    trial_types = randomize_carefully(CONDITIONS, N_TRIALS_PER_COND)
    timing_dict = {
        'duration': durations,
        'iti': itis,
        'trial_type': trial_types,
    }
    timing_df = pd.DataFrame(timing_dict)
    return timing_df, seed
コード例 #2
0
    def setUp(self):
        """
        Note that the spatial test data used in many of these tests comes from
        Efron, Bradley, and Robert J. Tibshirani. An Introduction to the
        Bootstrap. CRC press, 1994. Chapter 14.
        """
        # Determine the number of parameters and number of bootstrap replicates
        num_replicates = 100
        num_params = 5
        # Create a set of fake bootstrap replicates
        self.bootstrap_replicates =\
            (np.arange(1, 1 + num_replicates)[:, None] *
             np.arange(1, 1 + num_params)[None, :])
        # Create a fake maximum likelihood parameter estimate
        self.mle_params = self.bootstrap_replicates[50, :]
        # Create a set of fake jackknife replicates
        array_container = []
        for est in self.mle_params:
            array_container.append(gumbel_r.rvs(loc=est, size=10))
        self.jackknife_replicates =\
            np.concatenate([x[:, None] for x in array_container], axis=1)
        # Create a fake confidence percentage.
        self.conf_percentage = 94.88

        # Store the spatial test data from Efron and Tibshirani (1994)
        self.test_data =\
            np.array([48, 36, 20, 29, 42, 42, 20, 42, 22, 41, 45, 14, 6,
                      0, 33, 28, 34, 4, 32, 24, 47, 41, 24, 26, 30, 41])

        # Note how many test data observations there are.
        num_test_obs = self.test_data.size

        # Create the function to calculate the jackknife replicates.
        def calc_theta(array):
            result = ((array - array.mean())**2).sum() / float(array.size)
            return result

        self.calc_theta = calc_theta
        self.test_theta_hat = np.array([calc_theta(self.test_data)])

        # Create a pandas series of the data. Allows for easy case deletion.
        raw_series = pd.Series(self.test_data)
        # Create the array of jackknife replicates
        jackknife_replicates = np.empty((num_test_obs, 1), dtype=float)
        for obs in xrange(num_test_obs):
            current_data = raw_series[raw_series.index != obs].values
            jackknife_replicates[obs] = calc_theta(current_data)
        self.test_jackknife_replicates = jackknife_replicates

        return None
コード例 #3
0
    def setUp(self):
        """
        Note that the spatial test data used in many of these tests comes from
        Efron, Bradley, and Robert J. Tibshirani. An Introduction to the
        Bootstrap. CRC press, 1994. Chapter 14.
        """
        # Determine the number of parameters and number of bootstrap replicates
        num_replicates = 100
        num_params = 5
        # Create a set of fake bootstrap replicates
        self.bootstrap_replicates =\
            (np.arange(1, 1 + num_replicates)[:, None] *
             np.arange(1, 1 + num_params)[None, :])
        # Create a fake maximum likelihood parameter estimate
        self.mle_params = self.bootstrap_replicates[50, :]
        # Create a set of fake jackknife replicates
        array_container = []
        for est in self.mle_params:
            array_container.append(gumbel_r.rvs(loc=est, size=10))
        self.jackknife_replicates =\
            np.concatenate([x[:, None] for x in array_container], axis=1)
        # Create a fake confidence percentage.
        self.conf_percentage = 94.88

        # Store the spatial test data from Efron and Tibshirani (1994)
        self.test_data =\
            np.array([48, 36, 20, 29, 42, 42, 20, 42, 22, 41, 45, 14, 6,
                      0, 33, 28, 34, 4, 32, 24, 47, 41, 24, 26, 30, 41])

        # Note how many test data observations there are.
        num_test_obs = self.test_data.size

        # Create the function to calculate the jackknife replicates.
        def calc_theta(array):
            result = ((array - array.mean())**2).sum() / float(array.size)
            return result
        self.calc_theta = calc_theta
        self.test_theta_hat = np.array([calc_theta(self.test_data)])

        # Create a pandas series of the data. Allows for easy case deletion.
        raw_series = pd.Series(self.test_data)
        # Create the array of jackknife replicates
        jackknife_replicates = np.empty((num_test_obs, 1), dtype=float)
        for obs in xrange(num_test_obs):
            current_data = raw_series[raw_series.index != obs].values
            jackknife_replicates[obs] = calc_theta(current_data)
        self.test_jackknife_replicates = jackknife_replicates

        return None
コード例 #4
0
# Display the probability density function (``pdf``):

x = np.linspace(gumbel_r.ppf(0.01), gumbel_r.ppf(0.99), 100)
ax.plot(x, gumbel_r.pdf(x), 'r-', lw=5, alpha=0.6, label='gumbel_r pdf')

# Alternatively, the distribution object can be called (as a function)
# to fix the shape, location and scale parameters. This returns a "frozen"
# RV object holding the given parameters fixed.

# Freeze the distribution and display the frozen ``pdf``:

rv = gumbel_r()
ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')

# Check accuracy of ``cdf`` and ``ppf``:

vals = gumbel_r.ppf([0.001, 0.5, 0.999])
np.allclose([0.001, 0.5, 0.999], gumbel_r.cdf(vals))
# True

# Generate random numbers:

r = gumbel_r.rvs(size=1000)

# And compare the histogram:

ax.hist(r, normed=True, histtype='stepfilled', alpha=0.2)
ax.legend(loc='best', frameon=False)
plt.show()
コード例 #5
0
def determine_timing(n_trials=24, null_rate=0.33,
                     operators=['*', '+', '-', '/'],
                     num_types=['numeric', 'word'],
                     feedback_types=['informative', 'noninformative'],
                     seed=None):
    """
    Generate configuration files.

    Parameters
    ----------
    n_runs : int
        Number of runs
    n_trials : int
        Number of trials
    null_rate : float
        Proportion of trials to set as null trial type.
        Default set to 1/3 per Dr. Aaron Mattfeld's recommendation.
    operators : list
        List of valid operations to include
    num_types : list
        Number representations to include. May include numeric, word, and/or analog.
    feedback_types : list
        Feedback types to include. May include informative and uninformative.
    """
    n_null_trials = int(np.ceil(n_trials * null_rate))
    n_math_trials = n_trials - n_null_trials

    # Timing
    mu = 4  # mean of 4s
    raw_intervals = gumbel_r.rvs(size=100000, loc=mu, scale=1)
    possible_intervals = np.round(raw_intervals, 1)
    # crop to 2-8s
    possible_intervals = possible_intervals[possible_intervals >= INTERVAL_RANGE[0]]
    possible_intervals = possible_intervals[possible_intervals <= INTERVAL_RANGE[1]]

    missing_time = np.finfo(dtype='float64').max
    if not seed:
        seed = np.random.randint(1000, 9999)

    while (not np.isclose(missing_time, 0.0, atol=10)) or (missing_time < 0):
        state = np.random.RandomState(seed=seed)
        eq_durations = state.uniform(EQ_DUR_RANGE[0], EQ_DUR_RANGE[1], n_trials)
        eq_durations = np.round(eq_durations, 1)
        comp_durations = state.uniform(COMP_DUR_RANGE[0], COMP_DUR_RANGE[1], n_trials)
        comp_durations = np.round(comp_durations, 1)
        fdbk_durations = np.ones(n_trials) * FEEDBACK_DURATION
        isi1s = state.choice(possible_intervals, size=n_trials, replace=True)
        isi2s = state.choice(possible_intervals, size=n_trials, replace=True)
        itis = state.choice(possible_intervals, size=n_trials, replace=True)

        missing_time = TASK_TIME - np.sum([eq_durations.sum(), comp_durations.sum(),
                                           fdbk_durations.sum(),
                                           isi1s.sum(), isi2s.sum(), itis.sum()])
        seed += 1

    full_operators = operators * int(np.ceil(n_math_trials / len(operators)))
    full_num_types = num_types * int(np.ceil(n_trials / len(num_types)))
    full_feedback_types = feedback_types * int(np.ceil(n_trials / len(feedback_types)))

    # Get distribution of difference scores to control math difficulty
    # We want a sort of flattened normal distribution for this
    value_range = 20
    raw_difference_scores = np.random.binomial(n=value_range, p=0.5, size=100000) - int(value_range / 2)
    x = np.arange(value_range+1, dtype=int) - int(value_range / 2)
    x, y = get_hist(raw_difference_scores, x)
    uniform = np.ones(len(x)) * np.mean(y)
    updated_distribution = np.mean(np.vstack((y, uniform)), axis=0)
    probabilities = updated_distribution / np.sum(updated_distribution)

    # Slightly more complicated approach chosen over np.random.choice
    # to make numbers of trials with each type as balanced as possible
    chosen_operators = np.random.choice(full_operators, n_math_trials, replace=False)
    chosen_equation_num_types = np.random.choice(full_num_types, n_trials, replace=False)
    chosen_comparison_num_types = np.random.choice(full_num_types, n_trials, replace=False)
    chosen_feedback_types = np.random.choice(full_feedback_types, n_trials, replace=False)

    difference_scores = np.random.choice(x, size=n_trials, p=probabilities)
    difference_scores = [int(ds) for ds in difference_scores]

    equations, comparisons, solutions = [], [], []

    # Set order of trial types. 1 = math, 0 = baseline
    ttype_dict = {0: 'baseline', 1: 'math'}
    trial_types = np.ones(n_trials, int)
    trial_types[:n_null_trials] = 0
    np.random.shuffle(trial_types)
    math_counter = 0

    for j_trial in range(n_trials):
        if trial_types[j_trial] == 1:
            first_val = str(np.random.randint(1, 31))
            second_val = str(np.random.randint(1, 31))
            operator = chosen_operators[math_counter]
            # If the result of division would be less than 1, flip the values
            if (operator == '/') and (int(first_val) < int(second_val)):
                first_val, second_val = second_val, first_val
            elif operator == '*':
                second_val = str(np.random.randint(1, 10))
            equation = first_val + operator + second_val
            solution = eval(equation)
            math_counter += 1
        else:
            solution = np.random.randint(1, 31)
            equation = str(solution)

        comparison = int(np.round(solution + difference_scores[j_trial]))
        equations.append(equation)
        comparisons.append(comparison)
        solutions.append(solution)

    timing_dict = {
        'trial_type': [ttype_dict[tt] for tt in trial_types],
        'equation': equations,
        'solution': solutions,
        'comparison': comparisons,
        'equation_representation': chosen_equation_num_types,
        'comparison_representation': chosen_comparison_num_types,
        'feedback': chosen_feedback_types,
        'rounded_difference': difference_scores,
        'equation_duration': eq_durations,
        'isi1': isi1s,
        'comparison_duration': comp_durations,
        'isi2': isi2s,
        'feedback_duration': fdbk_durations,
        'iti': itis,
    }
    df = pd.DataFrame(timing_dict)
    return df, seed
コード例 #6
0
def est_skewed_dist():
    r = gumbel_r.rvs(size=1000)
    plt.hist(r, bins=20)
    plt.show()