expert_project_short_name=gold_standard_data, expert_user_ids_to_include=expert_ids, ) # Create list of users list_of_users = create_list_of_users(filtered_dict=combined_dict) # Create list of number of citizen scientists no_citizen_scientists = [10, 15] # Define number of iterations no_iterations = 5 # Generate random samples random_samples = generate_random_samples( list_of_filtered_ids=list_of_users, no_of_iterations=no_iterations, number_cit_sci=no_citizen_scientists ) # dimension samples*images*user_configurations accuracy_array = np.array(np.zeros([no_iterations, len(no_citizen_scientists)])) sensitivity_array = np.array(np.zeros([no_iterations, len(no_citizen_scientists)])) specificity_array = np.array(np.zeros([no_iterations, len(no_citizen_scientists)])) f_measure_array = np.array(np.zeros([no_iterations, len(no_citizen_scientists)])) # precision_array = np.array(np.zeros([no_iterations, len(list_of_images), len(no_citizen_scientists)])) kappa_array = np.array(np.zeros([no_iterations, len(no_citizen_scientists)])) auc_array = np.array(np.zeros([no_iterations, len(no_citizen_scientists)])) count_subjects = 0 for outer_sample in random_samples: count_samples = 0
include_ids = project["include_user_ids"] # Create a list of user_ids for this project that have not completed the required number of tasks exclude_id_based_on_task_count = create_list_of_users_not_completing_req_no_of_tasks( project_short_name, min_no_tasks=360 ) # Filter the include_ids removing any that should be excluded based on task count include_ids = [id for id in include_ids if id not in exclude_id_based_on_task_count] # Create a list of user_ids to exclude based on marginal distribution exclude_id_based_on_marginal_distribution = calculate_marginal_distribution_for_each_user(project_short_name) # Filter the include_ids removing any that should be excluded based on marginal distributions include_ids = [id for id in include_ids if id not in exclude_id_based_on_marginal_distribution] dict_of_user_ids_values = create_individual_dict( project_short_name=project_short_name, user_ids_to_include=include_ids ) list_of_users = create_list_of_users(filtered_dict=dict_of_user_ids_values) all_samples = generate_random_samples( list_of_filtered_ids=list_of_users ) # include no_of_iterations as a parameter here if you don't want the default value of 10 print( calculate_auc( project_short_name=project_short_name, dict_of_user_ids_values=dict_of_user_ids_values, user_ids_to_include=include_ids, ) )