예제 #1
0
 def test_folder(self):
     test_config = self.test_conf_const.__class__
     data, _ = crowdtruth.load(directory=TEST_FILE_PREF + "dir/",
                               config=test_config())
     self.assertEqual(data["workers"].shape[0], 7)
     self.assertEqual(data["units"].shape[0], 2)
     self.assertEqual(data["judgments"].shape[0], 12)
예제 #2
0
    def test_empty_rows(self):
        test_without = self.test_conf_const.__class__
        data_without, _ = crowdtruth.load(file=TEST_FILE_PREF +
                                          "empty_rows.csv",
                                          config=test_without())
        self.assertEqual(data_without["judgments"].shape[0], 24)

        test_proc_judg = self.test_process_judg.__class__
        data_proc_judg, _ = crowdtruth.load(file=TEST_FILE_PREF +
                                            "empty_rows.csv",
                                            config=test_proc_judg())
        self.assertEqual(data_proc_judg["judgments"].shape[0], 24)

        test_with = self.test_keep_empty_rows.__class__
        data_with, _ = crowdtruth.load(file=TEST_FILE_PREF + "empty_rows.csv",
                                       config=test_with())
        self.assertEqual(data_with["judgments"].shape[0], 27)
def main():
    runs = ['3', '4']
    #sources = ['raw_processed', 'clean_contradictions_batch_0.5']
    sources = [
        'data_processed', 'clean_contradictions_total_1',
        'clean_contradictions_pair_0.5', 'clean_contradictions_batch_0.5'
    ]
    group = 'experiment*'
    n_q = '*'
    n_lists = '*'
    batch = '*'

    #name = f'run{"_".join(runs)}-group_{group}-batch{batch}'.replace('*', '-all-')
    n_lists = '*'
    for source in sources:
        data_dict_list = []
        name = f'run{"_".join(runs)}-group_{group}-batch{batch}'.replace(
            '*', '-all-')
        name = f'{name}-{source}'
        print(name)
        for run in runs:
            print(run)
            data = load_processed_data(run, group, n_q, n_lists, batch, source)
            data_dict_list.extend(data)
            # data_dict_list.extend(load_experiment_data(run, group, n_q, n_lists, batch, remove_not_val = True))
        print(len(data_dict_list))
        print('checking if concepts are there:')
        check_data(data_dict_list)

        print('creating input')
        input_df = create_input_df(data_dict_list)
        print(input_df.columns)
        input_dir = '../analyses/crowdtruth/input/'
        input_path = f'{input_dir}{name}.csv'
        os.makedirs(input_dir, exist_ok=True)
        input_df.to_csv(input_path, index=False)

        res_dir = '../analyses/crowdtruth/results/'
        res_path = f'{res_dir}{name}'
        os.makedirs(res_dir, exist_ok=True)

        print('running crowdtruth')
        input_file = input_path
        data, config = crowdtruth.load(file=input_file, config=TestConfig())
        results = crowdtruth.run(data, config)
        print('crowdtruth done')
        unit_scores = results['units']
        split_unit_annotation_score(unit_scores)
        unit_scores.to_csv(f'{res_path}-units.csv')

        worker_scores = results['workers']
        worker_scores.to_csv(f'{res_path}-workers.csv')

        annotation_scores = results["annotations"]
        annotation_scores.to_csv(f'{res_path}-annotations.csv')
        print(f'results stored: {res_path}')
예제 #4
0
 def test_data_frame(self):
     for w in range(1, 6):
         test_config_file = self.test_conf_const.__class__
         data_file, _ = crowdtruth.load(file=TEST_FILE_PREF +
                                        "platform_cf" + str(w) + ".csv",
                                        config=test_config_file())
         df = pd.read_csv(TEST_FILE_PREF + "platform_cf" + str(w) + ".csv")
         test_config_df = self.test_conf_const.__class__
         data_df, _ = crowdtruth.load(data_frame=df,
                                      config=test_config_df())
         self.assertEqual((set(data_df["units"]["duration"].keys()) -
                           set(data_file["units"]["duration"].keys())),
                          set([]))
         self.assertEqual((set(data_df["workers"]["judgment"].keys()) -
                           set(data_file["workers"]["judgment"].keys())),
                          set([]))
         self.assertEqual(
             set(data_df["workers"]["judgment"] -
                 data_file["workers"]["judgment"]), set([0]))
예제 #5
0
 def test_platform(self):
     for w in range(1, 6):
         test_config_amt = self.test_conf_const.__class__
         data_amt, _ = crowdtruth.load(file=TEST_FILE_PREF +
                                       "platform_amt" + str(w) + ".csv",
                                       config=test_config_amt())
         test_config_cf = self.test_conf_const.__class__
         data_cf, _ = crowdtruth.load(file=TEST_FILE_PREF + "platform_cf" +
                                      str(w) + ".csv",
                                      config=test_config_cf())
         self.assertEqual((set(data_cf["units"]["duration"].keys()) -
                           set(data_amt["units"]["duration"].keys())),
                          set([]))
         self.assertEqual((set(data_cf["workers"]["judgment"].keys()) -
                           set(data_amt["workers"]["judgment"].keys())),
                          set([]))
         self.assertEqual(
             set(data_cf["workers"]["judgment"] -
                 data_amt["workers"]["judgment"]), set([0]))
예제 #6
0
 def test_all_workers_agree(self):
     for w in range(2,11):
         test_config = self.test_conf_const.__class__
         data, config = crowdtruth.load(
             file=TEST_FILE_PREF + str(w) + "work_agr.csv",
             config=test_config())
         results = crowdtruth.run(data, config)
         self.assertAlmostEqual(results["units"]["uqs"].at[1], 1.0)
         for wid in range(w):
             self.assertAlmostEqual(results["workers"]["wqs"].at["W" + str(wid + 1)], 1.0)
         if not config.open_ended_task:
             self.assertAlmostEqual(results["annotations"]["aqs"]["A"], 1.0)
예제 #7
0
 def test_metrics_correct_interval(self):
     test_conf_const = TutorialCustomizedConfig()
     test_config = test_conf_const.__class__
     data, config = crowdtruth.load(
         file="tutorial/relex_example_custom.csv",
         config=test_config())
     results = crowdtruth.run(data, config)
     for _, val_arr in results["units"]["unit_annotation_score"].items():
         for _, val in val_arr.items():
             self.assertGreaterEqual(val, 0.0)
             self.assertLessEqual(val, 1.0)
     for _, val in results["units"]["uqs"].items():
         self.assertGreaterEqual(val, 0.0)
         self.assertLessEqual(val, 1.0)
     for _, val in results["workers"]["wqs"].items():
         self.assertGreaterEqual(val, 0.0)
         self.assertLessEqual(val, 1.0)
     for _, val in results["annotations"]["aqs"].items():
         self.assertGreaterEqual(val, 0.0)
         self.assertLessEqual(val, 1.0)
def create_analysis_files(dataset, max_no_workers, max_runs, storing_folder):
    unique_unit_ids = get_uniq_unit_ids(dataset)

    for subset_size in xrange(3, max_no_workers):
        workers_directory = storing_folder + str(subset_size) + "workers"
        if not os.path.exists(workers_directory):
            os.makedirs(workers_directory)

        map_unit_id_combinations = {}
        for unit_id in xrange(0, len(unique_unit_ids)):
            (count,
             subset_unit_id) = get_no_work_unit_id(dataset,
                                                   unique_unit_ids[unit_id])
            combinations = gen_all_worker_combinations(subset_size, count,
                                                       subset_unit_id)
            map_unit_id_combinations[unique_unit_ids[unit_id]] = combinations

        import csv
        fields = [
            'F1_nr_binary', 'F1_n_binary', 'F1_nr_ternary', 'F1_r_ternary',
            'F1_hr_ternary'
        ]
        with open('F1_' + str(subset_size) + ".csv", 'w') as f:
            writer = csv.writer(f)
            writer.writerow(fields)

        for run_no in xrange(1, max_runs):
            unit_worker_set = {}
            for unit_id, worker_sets in map_unit_id_combinations.iteritems():
                unit_worker_set[unit_id] = pick_random_worker_set(worker_sets)

            df_subset_size = pd.DataFrame()
            for unit_id, worker_set in unit_worker_set.iteritems():
                df_subset = dataset[(dataset["_unit_id"] == unit_id)
                                    & (dataset["_worker_id"].isin(worker_set))]
                frames = [df_subset_size, df_subset]
                df_subset_size = pd.concat(frames)

            df_subset_size.to_csv(workers_directory + "/run.csv", index=False)

            filename = workers_directory + "/run.csv"
            results_with_newGT = pd.read_csv(
                "../ground_truth_data/reviewers_pilot_aggregated_judgments.csv"
            )

            class config(DefaultConfig):
                inputColumns = [
                    "index", "bin", "doc_len", "document_id", "document_body",
                    "document_title", "rel", "topic", "topic_description",
                    "topic_query"
                ]
                outputColumns = ["relevant_snippets"]

                # processing of a closed task
                open_ended_task = True
                annotation_separator = ","
                annotation_vector = []

                def processJudgments(self, judgments):
                    for col in self.outputColumns:
                        judgments[col] = judgments[col].apply(
                            lambda x: x.replace('[', ''))
                        judgments[col] = judgments[col].apply(
                            lambda x: x.replace(']', ''))
                        judgments[col] = judgments[col].apply(
                            lambda x: x.replace('"', ''))
                        judgments[col] = judgments[col].apply(
                            lambda x: x.replace(' ', ','))
                    return judgments

            # Read data
            data, config = crowdtruth.load(file=filename, config=config())

            data['judgments'].head()
            results = crowdtruth.run(data, config)

            results["units"]['max_relevance_score'] = pd.Series(
                np.random.randn(len(results["units"])),
                index=results["units"].index)
            for i in xrange(0, len(results["units"])):
                maxVal = 0.0
                for key, value in results["units"][
                        "unit_annotation_score"].iloc[i].items():
                    if key != "none":
                        if value > maxVal:
                            maxVal = value
                results["units"]['max_relevance_score'].iloc[i] = maxVal

            results["units"]["reviewers_rel"] = pd.Series(
                np.random.randn(len(results["units"])),
                index=results["units"].index)
            results["units"]["reviewers_rel_merged"] = pd.Series(
                np.random.randn(len(results["units"])),
                index=results["units"].index)
            for i in xrange(0, len(results_with_newGT.index)):
                for j in xrange(0, len(results["units"].index)):
                    if (results_with_newGT["topic"].iloc[i]
                            == results["units"]["input.topic"].iloc[j]) and (
                                results_with_newGT["document_id"].iloc[i] ==
                                results["units"]["input.document_id"].iloc[j]):
                        results["units"]["reviewers_rel"].iloc[
                            j] = results_with_newGT["reviewers_rel"].iloc[i]
                        results["units"]["reviewers_rel_merged"].iloc[
                            j] = results_with_newGT[
                                "reviewers_rel_merged"].iloc[i]

            F1_notrelevant_binary = compute_F1_score_not_relevant_binary(
                results["units"])
            F1_relevant_binary = compute_F1_score_relevant_binary(
                results["units"])

            F1_notrelevant_ternary = compute_F1_score_not_relevant_ternary(
                results["units"])
            F1_relevant_ternary = compute_F1_score_relevant_ternary(
                results["units"])
            F1_highlyrelevant_ternary = compute_F1_score_highly_relevant_ternary(
                results["units"])

            row = [
                F1_notrelevant_binary, F1_relevant_binary,
                F1_notrelevant_ternary, F1_relevant_ternary,
                F1_highlyrelevant_ternary
            ]
            with open('F1_' + str(subset_size) + ".csv", 'a') as f:
                writer = csv.writer(f)
                writer.writerow(row)
예제 #9
0
    def test_incremental_worker_agreement(self):
        for w in range(4, 11):
            test_config = self.test_conf_const.__class__
            data, config = crowdtruth.load(
                file=TEST_FILE_PREF + str(w - 2) + "vs" + str(w - 1) + "work_agr.csv",
                config=test_config())
            results = crowdtruth.run(data, config)

            # print str(config.open_ended_task)

            # check that workers that agree on the same unit have the same quality score
            for x in range(2, w):
                if x != (w - 1):
                    self.assertAlmostEqual(
                        results["workers"]["wqs"].at["W1"],
                        results["workers"]["wqs"].at["W" + str(x)],)
                self.assertAlmostEqual(
                    results["workers"]["wqs"].at["W" + str(w)],
                    results["workers"]["wqs"].at["W" + str(w + x - 1)])

            # workers that agree have a greater WQS than the worker that disagrees
            self.assertGreater(
                results["workers"]["wqs"].at["W1"],
                results["workers"]["wqs"].at["W" + str(w - 1)])
            self.assertGreater(
                results["workers"]["wqs"].at["W" + str(w)],
                results["workers"]["wqs"].at["W" + str(2 * w - 1)])

            # the more workers agree on a unit, the higher the worker quality score
            self.assertGreater(
                results["workers"]["wqs"].at["W" + str(w)],
                results["workers"]["wqs"].at["W1"])
            # print "W" + str(w) + ": " + str(results["workers"]["wqs"].at["W" + str(w)])
            # print "W1: " + str(results["workers"]["wqs"].at["W1"])

            # the more workers agree on a unit, the higher the unit quality score
            self.assertLess(
                results["units"]["uqs"].at[1],
                results["units"]["uqs"].at[2])
            self.assertLess(
                results["units"]["uqs"].at[1],
                results["units"]["uqs"].at[3])
            self.assertLess(
                results["units"]["uqs"].at[2],
                results["units"]["uqs"].at[3])

            # the more workers agree on an annotation, the higher the unit quality score
            if not config.open_ended_task:
                self.assertLess(
                    results["annotations"]["aqs"].at["A"],
                    results["annotations"]["aqs"].at["C"])
                self.assertLess(
                    results["annotations"]["aqs"].at["B"],
                    results["annotations"]["aqs"].at["A"])
                self.assertLess(
                    results["annotations"]["aqs"].at["D"],
                    results["annotations"]["aqs"].at["C"])
                self.assertLess(
                    results["annotations"]["aqs"].at["A"],
                    results["annotations"]["aqs"].at["E"])
                self.assertLess(
                    results["annotations"]["aqs"].at["C"],
                    results["annotations"]["aqs"].at["E"])