def determine_estimation_timing(seed=None): """ Generates dataframe with timing info for event-related version of task. """ mu = 4 # mean of 4s raw_itis = gumbel_r.rvs(size=100000, loc=mu, scale=1) possible_itis = np.round(raw_itis, 1) # crop to 2-8s possible_itis = possible_itis[possible_itis >= 2] possible_itis = possible_itis[possible_itis <= 8] missing_time = np.finfo(dtype='float64').max if not seed: seed = np.random.randint(1000, 9999) while (not np.isclose(missing_time, 0.0, atol=10)) or (missing_time < 0): state = np.random.RandomState(seed=seed) durations = state.uniform(DUR_RANGE[0], DUR_RANGE[1], N_TRIALS_TOTAL) durations = np.round(durations, 1) itis = state.choice(possible_itis, size=N_TRIALS_TOTAL, replace=True) missing_time = TASK_TIME - np.sum([durations.sum(), itis.sum()]) seed += 1 trial_types = randomize_carefully(CONDITIONS, N_TRIALS_PER_COND) timing_dict = { 'duration': durations, 'iti': itis, 'trial_type': trial_types, } timing_df = pd.DataFrame(timing_dict) return timing_df, seed
def setUp(self): """ Note that the spatial test data used in many of these tests comes from Efron, Bradley, and Robert J. Tibshirani. An Introduction to the Bootstrap. CRC press, 1994. Chapter 14. """ # Determine the number of parameters and number of bootstrap replicates num_replicates = 100 num_params = 5 # Create a set of fake bootstrap replicates self.bootstrap_replicates =\ (np.arange(1, 1 + num_replicates)[:, None] * np.arange(1, 1 + num_params)[None, :]) # Create a fake maximum likelihood parameter estimate self.mle_params = self.bootstrap_replicates[50, :] # Create a set of fake jackknife replicates array_container = [] for est in self.mle_params: array_container.append(gumbel_r.rvs(loc=est, size=10)) self.jackknife_replicates =\ np.concatenate([x[:, None] for x in array_container], axis=1) # Create a fake confidence percentage. self.conf_percentage = 94.88 # Store the spatial test data from Efron and Tibshirani (1994) self.test_data =\ np.array([48, 36, 20, 29, 42, 42, 20, 42, 22, 41, 45, 14, 6, 0, 33, 28, 34, 4, 32, 24, 47, 41, 24, 26, 30, 41]) # Note how many test data observations there are. num_test_obs = self.test_data.size # Create the function to calculate the jackknife replicates. def calc_theta(array): result = ((array - array.mean())**2).sum() / float(array.size) return result self.calc_theta = calc_theta self.test_theta_hat = np.array([calc_theta(self.test_data)]) # Create a pandas series of the data. Allows for easy case deletion. raw_series = pd.Series(self.test_data) # Create the array of jackknife replicates jackknife_replicates = np.empty((num_test_obs, 1), dtype=float) for obs in xrange(num_test_obs): current_data = raw_series[raw_series.index != obs].values jackknife_replicates[obs] = calc_theta(current_data) self.test_jackknife_replicates = jackknife_replicates return None
# Display the probability density function (``pdf``): x = np.linspace(gumbel_r.ppf(0.01), gumbel_r.ppf(0.99), 100) ax.plot(x, gumbel_r.pdf(x), 'r-', lw=5, alpha=0.6, label='gumbel_r pdf') # Alternatively, the distribution object can be called (as a function) # to fix the shape, location and scale parameters. This returns a "frozen" # RV object holding the given parameters fixed. # Freeze the distribution and display the frozen ``pdf``: rv = gumbel_r() ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') # Check accuracy of ``cdf`` and ``ppf``: vals = gumbel_r.ppf([0.001, 0.5, 0.999]) np.allclose([0.001, 0.5, 0.999], gumbel_r.cdf(vals)) # True # Generate random numbers: r = gumbel_r.rvs(size=1000) # And compare the histogram: ax.hist(r, normed=True, histtype='stepfilled', alpha=0.2) ax.legend(loc='best', frameon=False) plt.show()
def determine_timing(n_trials=24, null_rate=0.33, operators=['*', '+', '-', '/'], num_types=['numeric', 'word'], feedback_types=['informative', 'noninformative'], seed=None): """ Generate configuration files. Parameters ---------- n_runs : int Number of runs n_trials : int Number of trials null_rate : float Proportion of trials to set as null trial type. Default set to 1/3 per Dr. Aaron Mattfeld's recommendation. operators : list List of valid operations to include num_types : list Number representations to include. May include numeric, word, and/or analog. feedback_types : list Feedback types to include. May include informative and uninformative. """ n_null_trials = int(np.ceil(n_trials * null_rate)) n_math_trials = n_trials - n_null_trials # Timing mu = 4 # mean of 4s raw_intervals = gumbel_r.rvs(size=100000, loc=mu, scale=1) possible_intervals = np.round(raw_intervals, 1) # crop to 2-8s possible_intervals = possible_intervals[possible_intervals >= INTERVAL_RANGE[0]] possible_intervals = possible_intervals[possible_intervals <= INTERVAL_RANGE[1]] missing_time = np.finfo(dtype='float64').max if not seed: seed = np.random.randint(1000, 9999) while (not np.isclose(missing_time, 0.0, atol=10)) or (missing_time < 0): state = np.random.RandomState(seed=seed) eq_durations = state.uniform(EQ_DUR_RANGE[0], EQ_DUR_RANGE[1], n_trials) eq_durations = np.round(eq_durations, 1) comp_durations = state.uniform(COMP_DUR_RANGE[0], COMP_DUR_RANGE[1], n_trials) comp_durations = np.round(comp_durations, 1) fdbk_durations = np.ones(n_trials) * FEEDBACK_DURATION isi1s = state.choice(possible_intervals, size=n_trials, replace=True) isi2s = state.choice(possible_intervals, size=n_trials, replace=True) itis = state.choice(possible_intervals, size=n_trials, replace=True) missing_time = TASK_TIME - np.sum([eq_durations.sum(), comp_durations.sum(), fdbk_durations.sum(), isi1s.sum(), isi2s.sum(), itis.sum()]) seed += 1 full_operators = operators * int(np.ceil(n_math_trials / len(operators))) full_num_types = num_types * int(np.ceil(n_trials / len(num_types))) full_feedback_types = feedback_types * int(np.ceil(n_trials / len(feedback_types))) # Get distribution of difference scores to control math difficulty # We want a sort of flattened normal distribution for this value_range = 20 raw_difference_scores = np.random.binomial(n=value_range, p=0.5, size=100000) - int(value_range / 2) x = np.arange(value_range+1, dtype=int) - int(value_range / 2) x, y = get_hist(raw_difference_scores, x) uniform = np.ones(len(x)) * np.mean(y) updated_distribution = np.mean(np.vstack((y, uniform)), axis=0) probabilities = updated_distribution / np.sum(updated_distribution) # Slightly more complicated approach chosen over np.random.choice # to make numbers of trials with each type as balanced as possible chosen_operators = np.random.choice(full_operators, n_math_trials, replace=False) chosen_equation_num_types = np.random.choice(full_num_types, n_trials, replace=False) chosen_comparison_num_types = np.random.choice(full_num_types, n_trials, replace=False) chosen_feedback_types = np.random.choice(full_feedback_types, n_trials, replace=False) difference_scores = np.random.choice(x, size=n_trials, p=probabilities) difference_scores = [int(ds) for ds in difference_scores] equations, comparisons, solutions = [], [], [] # Set order of trial types. 1 = math, 0 = baseline ttype_dict = {0: 'baseline', 1: 'math'} trial_types = np.ones(n_trials, int) trial_types[:n_null_trials] = 0 np.random.shuffle(trial_types) math_counter = 0 for j_trial in range(n_trials): if trial_types[j_trial] == 1: first_val = str(np.random.randint(1, 31)) second_val = str(np.random.randint(1, 31)) operator = chosen_operators[math_counter] # If the result of division would be less than 1, flip the values if (operator == '/') and (int(first_val) < int(second_val)): first_val, second_val = second_val, first_val elif operator == '*': second_val = str(np.random.randint(1, 10)) equation = first_val + operator + second_val solution = eval(equation) math_counter += 1 else: solution = np.random.randint(1, 31) equation = str(solution) comparison = int(np.round(solution + difference_scores[j_trial])) equations.append(equation) comparisons.append(comparison) solutions.append(solution) timing_dict = { 'trial_type': [ttype_dict[tt] for tt in trial_types], 'equation': equations, 'solution': solutions, 'comparison': comparisons, 'equation_representation': chosen_equation_num_types, 'comparison_representation': chosen_comparison_num_types, 'feedback': chosen_feedback_types, 'rounded_difference': difference_scores, 'equation_duration': eq_durations, 'isi1': isi1s, 'comparison_duration': comp_durations, 'isi2': isi2s, 'feedback_duration': fdbk_durations, 'iti': itis, } df = pd.DataFrame(timing_dict) return df, seed
def est_skewed_dist(): r = gumbel_r.rvs(size=1000) plt.hist(r, bins=20) plt.show()