def compute_scores(
        data: Table,
        genes: Table,
        p_threshold: float,
        p_value_fun: str,
        scoring: str,
        start: float,
        end: float,
        result: Result,
        state: TaskState,
    ):
        if not data or not genes:
            result.scores.z_vals = None
            result.scores.annotations = None
            result.scores.p_vals = None
            result.scores.table = None
        else:
            state.set_status("Computing scores...")
            weights = np.array([15, 75, 10]) * (end - start) / 100

            if not result.scores.z_vals:
                result.scores.z_vals = AnnotateSamplesMeta.mann_whitney_test(
                    data)
                state.set_partial_result(("scores", result))
            state.set_progress_value(weights[0])
            if state.is_interruption_requested():
                return

            if not result.scores.annotations or not result.scores.p_vals:
                annot, p_vals = AnnotateSamplesMeta.assign_annotations(
                    result.scores.z_vals,
                    genes,
                    data,
                    p_value_fun=p_value_fun,
                    scoring=scoring)
                result.scores.annotations = annot
                result.scores.p_vals = p_vals
                state.set_partial_result(("scores", result))
            state.set_progress_value(weights[1])
            if state.is_interruption_requested():
                return

            result.scores.table = AnnotateSamplesMeta.filter_annotations(
                result.scores.annotations,
                result.scores.p_vals,
                p_threshold=p_threshold)

        state.set_partial_result(("scores", result))
    def setUp(self):
        m_domain = Domain(
            [], None,
            [StringVariable("Cell Type"),
             StringVariable("Entrez ID")])
        m_data = [
            ["Type 1", "111"],
            ["Type 1", "112"],
            ["Type 1", "113"],
            ["Type 1", "114"],
            ["Type 2", "211"],
            ["Type 2", "212"],
            ["Type 2", "213"],
            ["Type 2", "214"],
        ]
        self.markers = Table(m_domain, np.empty((len(m_data), 0)), None,
                             m_data)

        genes = ["111", "112", "113", "114", "211", "212", "213", "214"]
        self.domain = Domain([ContinuousVariable(str(g)) for g in genes])
        for v, g in zip(self.domain.attributes, genes):
            v.attributes = {"Entrez ID": g}
        self.data = Table(
            self.domain,
            np.array([
                [1, 1, 1, 1.1, 0, 0, 0, 0],
                [1, 0.8, 0.9, 1, 0, 0, 0, 0],
                [0.7, 1.1, 1, 1.2, 0, 0, 0, 0],
                [0.8, 0.7, 1.1, 1, 0, 0.1, 0, 0],
                [0, 0, 0, 0, 1.05, 1.05, 1.1, 1],
                [0, 0, 0, 0, 1.1, 1.0, 1.05, 1.1],
                [0, 0, 0, 0, 1.05, 0.9, 1.1, 1.1],
                [0, 0, 0, 0, 0.9, 0.9, 1.2, 1],
            ]),
        )
        self.data.attributes[TAX_ID] = "9606"  # id for a human

        self.annotator = AnnotateSamplesMeta()
class TestAnnotateSamples(unittest.TestCase):
    def setUp(self):
        m_domain = Domain(
            [], None,
            [StringVariable("Cell Type"),
             StringVariable("Entrez ID")])
        m_data = [
            ["Type 1", "111"],
            ["Type 1", "112"],
            ["Type 1", "113"],
            ["Type 1", "114"],
            ["Type 2", "211"],
            ["Type 2", "212"],
            ["Type 2", "213"],
            ["Type 2", "214"],
        ]
        self.markers = Table(m_domain, np.empty((len(m_data), 0)), None,
                             m_data)

        genes = ["111", "112", "113", "114", "211", "212", "213", "214"]
        self.domain = Domain([ContinuousVariable(str(g)) for g in genes])
        for v, g in zip(self.domain.attributes, genes):
            v.attributes = {"Entrez ID": g}
        self.data = Table(
            self.domain,
            np.array([
                [1, 1, 1, 1.1, 0, 0, 0, 0],
                [1, 0.8, 0.9, 1, 0, 0, 0, 0],
                [0.7, 1.1, 1, 1.2, 0, 0, 0, 0],
                [0.8, 0.7, 1.1, 1, 0, 0.1, 0, 0],
                [0, 0, 0, 0, 1.05, 1.05, 1.1, 1],
                [0, 0, 0, 0, 1.1, 1.0, 1.05, 1.1],
                [0, 0, 0, 0, 1.05, 0.9, 1.1, 1.1],
                [0, 0, 0, 0, 0.9, 0.9, 1.2, 1],
            ]),
        )
        self.data.attributes[TAX_ID] = "9606"  # id for a human

        self.annotator = AnnotateSamplesMeta()

    def test_mann_whitney_test(self):
        d = self.annotator.mann_whitney_test(self.data)
        self.assertEqual(type(d), Table)
        self.assertTupleEqual(self.data.X.shape, d.X.shape)

    def annotate_samples(self,
                         data,
                         markers,
                         return_nonzero_annotations=True,
                         scoring=SCORING_EXP_RATIO,
                         p_value_fun=PFUN_BINOMIAL):
        """
        Helper method that performs all operations and returns final results.
        """
        z_values = self.annotator.mann_whitney_test(data)
        scores, fdrs = self.annotator.assign_annotations(
            z_values, markers, data, scoring=scoring, p_value_fun=p_value_fun)
        return self.annotator.filter_annotations(scores, fdrs,
                                                 return_nonzero_annotations)

    def test_artificial_data(self):
        annotations = self.annotate_samples(self.data, self.markers)

        self.assertEqual(type(annotations), Table)
        self.assertEqual(len(annotations), len(self.data))
        self.assertEqual(len(annotations[0]), 2)  # two types in the data
        self.assertGreater(np.nansum(annotations.X), 0)
        self.assertLessEqual(np.nanmax(annotations.X), 1)
        self.assertGreaterEqual(np.nanmin(annotations.X), 0)

    def test_remove_empty_column(self):
        """
        Type 3 column must be removed here
        """
        m_domain = Domain(
            [], None,
            [StringVariable("Cell Type"),
             StringVariable("Entrez ID")])
        m_data = [
            ["Type 1", "111"],
            ["Type 1", "112"],
            ["Type 1", "113"],
            ["Type 1", "114"],
            ["Type 2", "211"],
            ["Type 2", "212"],
            ["Type 2", "213"],
            ["Type 2", "214"],
            ["Type 3", "311"],
            ["Type 3", "312"],
            ["Type 3", "313"],
        ]
        markers = Table(m_domain, np.empty((len(m_data), 0)), None, m_data)

        annotations = self.annotate_samples(self.data, markers)

        self.assertEqual(type(annotations), Table)
        self.assertEqual(len(annotations), len(self.data))
        self.assertEqual(len(annotations[0]), 2)  # two types in the data
        self.assertGreater(np.nansum(annotations.X), 0)
        self.assertLessEqual(np.nanmax(annotations.X), 1)
        self.assertGreaterEqual(np.nanmin(annotations.X), 0)

        annotations = self.annotate_samples(self.data,
                                            markers,
                                            return_nonzero_annotations=False)
        self.assertEqual(len(annotations), len(self.data))
        self.assertEqual(len(annotations[0]), 3)  # two types in the data
        self.assertGreater(np.nansum(annotations.X), 0)
        self.assertLessEqual(np.nanmax(annotations.X), 1)
        self.assertGreaterEqual(np.nanmin(annotations.X), 0)

    def test_sf(self):
        """
        Test annotations with hypergeom.sf
        """
        annotations = self.annotate_samples(self.data,
                                            self.markers,
                                            p_value_fun=PFUN_HYPERGEOMETRIC)

        self.assertEqual(type(annotations), Table)
        self.assertEqual(len(annotations), len(self.data))
        self.assertEqual(len(annotations[0]), 2)  # two types in the data
        self.assertGreater(np.nansum(annotations.X), 0)
        self.assertLessEqual(np.nanmax(annotations.X), 1)
        self.assertGreaterEqual(np.nanmin(annotations.X), 0)

    def test_two_example(self):
        self.data = self.data[:2]

        annotations = self.annotate_samples(self.data, self.markers)

        self.assertEqual(type(annotations), Table)
        self.assertEqual(len(annotations), len(self.data))

    def test_markers_without_entrez_id(self):
        self.markers[1, "Entrez ID"] = "?"
        annotations = self.annotate_samples(self.data,
                                            self.markers,
                                            return_nonzero_annotations=False)

        self.assertEqual(type(annotations), Table)
        self.assertEqual(len(annotations), len(self.data))
        self.assertEqual(len(annotations[0]), 2)  # two types in the data
        self.assertGreater(np.nansum(annotations.X), 0)
        self.assertLessEqual(np.nanmax(annotations.X), 1)
        self.assertGreaterEqual(np.nanmin(annotations.X), 0)

    def test_select_attributes(self):
        z = self.annotator.mann_whitney_test(self.data)

        self.assertEqual(z.X.shape, self.data.X.shape)
        self.assertGreaterEqual(z.X[0, 0], 1)
        self.assertGreaterEqual(z.X[0, 1], 1)
        self.assertGreaterEqual(z.X[0, 3], 1)

    def test_assign_annotations(self):
        z = np.array([
            [1.1, 1.1, 1.1, 1.1, 0, 0, 0, 0],
            [1.1, 1.1, 0, 0, 1.1, 0, 0, 0],
            [1.1, 0, 0, 0, 1.1, 1.1, 0, 0],
            [0, 0, 0, 0, 1.1, 1.1, 1.1, 1.1],
        ])
        z_table = Table(self.domain, z)
        attrs = [
            {"111", "112", "113", "114"},
            {"111", "112", "211"},
            {"211", "212", "111"},
            {"211", "212", "213", "214"},
        ]
        exp_ann = np.array([[1, 0], [1 / 2, 1 / 4], [1 / 4, 1 / 2], [0, 1]])
        annotations, fdrs = self.annotator.assign_annotations(
            z_table, self.markers, self.data[:4])

        self.assertEqual(len(attrs), len(annotations))
        self.assertEqual(len(attrs), len(fdrs))
        self.assertEqual(2,
                         annotations.X.shape[1])  # only two types in markers
        self.assertEqual(2, fdrs.X.shape[1])
        np.testing.assert_array_almost_equal(exp_ann, annotations)

        exp_fdrs_smaller = np.array([[0.05, 2], [2, 2], [2, 2], [2, 0.05]])

        np.testing.assert_array_less(fdrs, exp_fdrs_smaller)

    def test_scoring(self):
        # scoring SCORING_EXP_RATIO
        annotations = self.annotate_samples(self.data,
                                            self.markers,
                                            scoring=SCORING_EXP_RATIO)

        self.assertEqual(type(annotations), Table)
        self.assertEqual(len(annotations), len(self.data))
        self.assertEqual(len(annotations[0]), 2)  # two types in the data
        self.assertGreater(np.nansum(annotations.X), 0)
        self.assertLessEqual(np.nanmax(annotations.X), 1)
        self.assertGreaterEqual(np.nanmin(annotations.X), 0)

        # scoring SCORING_MARKERS_SUM
        annotations = self.annotate_samples(self.data,
                                            self.markers,
                                            scoring=SCORING_MARKERS_SUM)

        self.assertEqual(type(annotations), Table)
        self.assertEqual(len(annotations), len(self.data))
        self.assertEqual(len(annotations[0]), 2)  # two types in the data

        # based on provided data it should match
        # the third row is skipped, since it is special
        self.assertEqual(annotations[0, 0].value, self.data.X[0].sum())
        self.assertEqual(annotations[5, 1].value, self.data.X[5].sum())

        # scoring SCORING_LOG_FDR
        annotations = self.annotate_samples(self.data,
                                            self.markers,
                                            scoring=SCORING_LOG_FDR)

        self.assertEqual(type(annotations), Table)
        self.assertEqual(len(annotations), len(self.data))
        self.assertEqual(len(annotations[0]), 2)  # two types in the data

        # scoring SCORING_LOG_PVALUE
        annotations = self.annotate_samples(self.data,
                                            self.markers,
                                            scoring=SCORING_LOG_PVALUE)

        self.assertEqual(type(annotations), Table)
        self.assertEqual(len(annotations), len(self.data))
        self.assertEqual(len(annotations[0]), 2)  # two types in the data

    def test_entrez_id_not_string(self):
        """
        It seems that some datasets (e.g. AML dataset) have Entrez ID as int
        although they should be strings. Here we add the test for those cases.
        """
        # change Entrez IDs to int
        for i, att in enumerate(self.data.domain.attributes):
            att.attributes["Entrez ID"] = int(att.attributes["Entrez ID"])

        annotations = self.annotate_samples(self.data, self.markers)

        self.assertEqual(type(annotations), Table)
        self.assertEqual(len(annotations), len(self.data))
        self.assertEqual(len(annotations[0]), 2)  # two types in the data
        self.assertGreater(np.nansum(annotations.X), 0)
        self.assertLessEqual(np.nanmax(annotations.X), 1)
        self.assertGreaterEqual(np.nanmin(annotations.X), 0)