def test_match_simple(self):
        df = pd.DataFrame({
            "matching_specificity":
            ["flu", "ebv", "GAD", "PPI", "GAD", "PPI", "ebv"],
            "p_val": [0.01, 0.00001, 0.000001, 0.01, 0.01, 0.0000001, 0.1],
            "odds_ratio": [0.51, 0.5, 0, 0, 0, 0, 0],
            "a": ["yes", "no", "no", "no", "no", "no", "no"],
            "b": ["no", "yes", "no", "no", "no", "no", "no"]
        })

        filter_params = {
            "type": OperationType.TOP_N,
            "value": {
                "type": DataType.COLUMN,
                "name": "odds_ratio",
            },
            "number": 3
        }

        matcher = CriteriaMatcher()
        result = matcher.match(filter_params, df)
        self.assertTrue(
            np.array_equal(
                result,
                np.array([True, True, False, False, False, False, True])))
    def test_match(self):

        df = pd.DataFrame({
            "matching_specificity":
            ["flu", "ebv", "GAD", "PPI", "GAD", "PPI", "ebv"],
            "p_val": [0.01, 0.00001, 0.000001, 0.01, 0.01, 0.0000001, 0.1],
            "odds_ratio": [0.51, 0, 0, 0, 0, 0, 0],
            "a": ["yes", "no", "no", "no", "no", "no", "no"],
            "b": ["no", "yes", "no", "no", "no", "no", "no"]
        })

        filter_params = {
            "type":
            BooleanType.OR,
            "operands": [
                {
                    "type":
                    BooleanType.AND,
                    "operands": [
                        {
                            "type": OperationType.IN,
                            "allowed_values": ["GAD", "PPI"],
                            "value": {
                                "type": DataType.COLUMN,
                                "name": "matching_specificity"
                            }
                        },
                        {
                            "type": OperationType.LESS_THAN,
                            "threshold": 0.001,
                            "value": {
                                "type": DataType.COLUMN,
                                "name": "p_val"
                            }
                        },
                    ]
                },
                {
                    "type":
                    BooleanType.AND,
                    "operands": [
                        {
                            "type": OperationType.IN,
                            "allowed_values": ["yes"],
                            "value": {
                                "type": DataType.COLUMN,
                                "name": "a"
                            }
                        },
                        {
                            "type": OperationType.GREATER_THAN,
                            "threshold": 0.5,
                            "value": {
                                "type": DataType.COLUMN,
                                "name": "odds_ratio"
                            }
                        },
                    ]
                },
            ]
        }

        matcher = CriteriaMatcher()
        result = matcher.match(filter_params, df)
        self.assertTrue(
            np.array_equal(
                result,
                np.array([True, False, True, False, False, True, False])))
 def get_matching_indices(dataset: RepertoireDataset, criteria):
     metadata = pd.DataFrame(dataset.get_metadata(None))
     matches = CriteriaMatcher().match(criteria, metadata)
     indices = np.where(matches)[0]
     return indices