def collate_individual_user_results():
    # A list to hold the resulting stats in
    project_by_project_results = []

    # loop over the projects in the project_configuration above
    for project in project_configuration:

        # A list to hold the resulting stats in
        individual_project_results = []

        # Get the basic details we need from the project
        project_short_name = project["project_short_name"]

        # Get a list of all potential include ids for this projects
        include_ids = project["include_user_ids"]

        # Create a list of user_ids for this project that have not completed the required number of tasks
        exclude_id_based_on_task_count = create_list_of_users_not_completing_req_no_of_tasks(project_short_name, min_no_tasks = 324)

        # Filter the include_ids removing any that should be excluded based on task count
        include_ids = [id for id in include_ids if id not in exclude_id_based_on_task_count]

        # Create a list of user_ids to exclude based on marginal distribution
        exclude_id_based_on_marginal_distribution = calculate_marginal_distribution_for_each_user(project_short_name)

        # Filter the include_ids removing any that should be excluded based on marginal distributions
        include_ids = [id for id in include_ids if id not in exclude_id_based_on_marginal_distribution]

        # Select the correct gold standard data set and associated expert_ids for each project
        gold_standard_data = define_gold_standard_data(project_short_name=project_short_name)
        expert_ids = define_gold_standard_ids(project_short_name=project_short_name)

        # Build the combined dict using the filtered ids and relevant gold standard data
        combined_dict = build_combined_dict_keyed_on_composite_key(project_short_name=project_short_name, user_ids_to_include=include_ids, expert_project_short_name=gold_standard_data, expert_user_ids_to_include=expert_ids)


        # Run the analysis on the combined dict
        create_user_agreement_pre_processing_dict(combined_dict)
        individual_user_results = create_user_agreement_dict(combined_dict)


        # Compile the project results object

        for user_id, results_dict in individual_user_results.items():
            project_results = {
                "project_name": project_short_name.split("-")[-1],
                "user_id": user_id,
                "accuracy": results_dict["accuracy_based_on_excluding_tied"],
                "sensitivity": results_dict["sensitivity_excluding_tied"],
                "specificity": results_dict["specificity_excluding_tied"],
                "precision": results_dict["precision_excluding_tied"],
                "f-measure": results_dict["f_measure_excluding_tied"],
                "kappa": results_dict["inter_rater_agreement_excluding_tied"]
            }
            individual_project_results.append(project_results)

        project_by_project_results.append(individual_project_results)

    return project_by_project_results
コード例 #2
0
        include_ids = [id for id in include_ids if id not in exclude_id_based_on_task_count]

        # Create a list of user_ids to exclude based on marginal distribution
        exclude_id_based_on_marginal_distribution = calculate_marginal_distribution_for_each_user(project_short_name)

        # Filter the include_ids removing any that should be excluded based on marginal distributions
        include_ids = [id for id in include_ids if id not in exclude_id_based_on_marginal_distribution]

        # Select the appropriate gold standard data and corresponding expert_ids for the project
        gold_standard_data = define_gold_standard_data(project_short_name=project_short_name)
        expert_ids = define_gold_standard_ids(project_short_name=project_short_name)

        # Build the complete dictionary using all of the above
        combined_dict = build_combined_dict_keyed_on_composite_key(
            project_short_name=project_short_name,
            user_ids_to_include=include_ids,
            expert_project_short_name=gold_standard_data,
            expert_user_ids_to_include=expert_ids,
        )

        # Create list of users
        list_of_users = create_list_of_users(filtered_dict=combined_dict)

        # Create list of number of citizen scientists
        no_citizen_scientists = [10, 15]

        # Define number of iterations
        no_iterations = 5

        # Generate random samples
        random_samples = generate_random_samples(
            list_of_filtered_ids=list_of_users, no_of_iterations=no_iterations, number_cit_sci=no_citizen_scientists
        # Filter the include_ids removing any that should be excluded based on task count
        include_ids = [id for id in include_ids if id not in exclude_id_based_on_task_count]

        # Create a list of user_ids to exclude based on marginal distribution
        exclude_id_based_on_marginal_distribution = calculate_marginal_distribution_for_each_user(project_short_name)

        # Filter the include_ids removing any that should be excluded based on marginal distributions
        include_ids = [id for id in include_ids if id not in exclude_id_based_on_marginal_distribution]

        # Select the appropriate gold standard data and corresponding expert_ids for the project
        gold_standard_data = define_gold_standard_data(project_short_name=project_short_name)
        expert_ids = define_gold_standard_ids(project_short_name=project_short_name)

        # Build the complete dictionary using all of the above
        combined_dict = build_combined_dict_keyed_on_composite_key(project_short_name=project_short_name, user_ids_to_include=include_ids, expert_project_short_name=gold_standard_data, expert_user_ids_to_include=expert_ids)

        # Create list of images
        list_of_images = create_list_of_images(dict_with_composite_key = combined_dict)

        for image in list_of_images:

            image_dict = {}
            sensitivity_list = []
            specificity_list = []
            f_measure_list = []

            for composite_key, list_of_values in combined_dict.items():

                split_comp_ind = composite_key.split(":")
                image_ck = split_comp_ind[0]