Exemple #1
0
    def test_init_all_fields(self):
        field_mappings = ed.FieldMappings(client=ed.Client(ES_TEST_CLIENT),
                                          index_pattern=FLIGHTS_INDEX_NAME)

        expected = self.pd_flights().columns.to_list()

        assert expected == field_mappings.display_names
    def test_non_existant(self):
        ed_field_mappings = ed.FieldMappings(
            client=ed.Client(ES_TEST_CLIENT), index_pattern=FLIGHTS_INDEX_NAME
        )

        with pytest.raises(KeyError):
            ed_field_mappings.field_name_pd_dtype("unknown")
    def test_get_field_names_scripted(self):
        expected = ["Carrier", "AvgTicketPrice"]
        ed_field_mappings = ed.FieldMappings(
            client=ed.Client(ES_TEST_CLIENT),
            index_pattern=FLIGHTS_INDEX_NAME,
            display_names=expected,
        )
        pd_flights = self.pd_flights()[expected]

        fields1 = ed_field_mappings.get_field_names(include_scripted_fields=False)
        fields2 = ed_field_mappings.get_field_names(include_scripted_fields=True)

        assert fields1 == fields2
        assert_index_equal(pd_flights.columns, pd.Index(fields1))

        # now add scripted field
        ed_field_mappings.add_scripted_field(
            "scripted_field_None", None, np.dtype("int64")
        )

        fields3 = ed_field_mappings.get_field_names(include_scripted_fields=False)
        fields4 = ed_field_mappings.get_field_names(include_scripted_fields=True)

        assert fields1 == fields3
        fields1.append("scripted_field_None")
        assert fields1 == fields4
Exemple #4
0
    def test_exists_and_non_exists_rename(self):
        ed_field_mappings = ed.FieldMappings(
            client=ed.Client(ES_TEST_CLIENT), index_pattern=FLIGHTS_INDEX_NAME
        )

        pd_flights_column_series = self.pd_flights().columns.to_series()

        assert (
            pd_flights_column_series.index.to_list() == ed_field_mappings.display_names
        )

        renames = {
            "unknown": "renamed_unknown",
            "DestWeather": "renamed_DestWeather",
            "unknown2": "renamed_unknown2",
            "Carrier": "renamed_Carrier",
        }

        # inplace rename - only real names get renamed
        ed_field_mappings.rename(renames)

        assert (
            pd_flights_column_series.rename(renames).index.to_list()
            == ed_field_mappings.display_names
        )

        get_renames = ed_field_mappings.get_renames()

        assert {
            "Carrier": "renamed_Carrier",
            "DestWeather": "renamed_DestWeather",
        } == get_renames
Exemple #5
0
    def test_multi_rename(self):
        ed_field_mappings = ed.FieldMappings(
            client=ed.Client(ES_TEST_CLIENT), index_pattern=FLIGHTS_INDEX_NAME
        )

        pd_flights_column_series = self.pd_flights().columns.to_series()

        assert (
            pd_flights_column_series.index.to_list() == ed_field_mappings.display_names
        )

        renames = {
            "DestWeather": "renamed_DestWeather",
            "renamed_DestWeather": "renamed_renamed_DestWeather",
        }

        # inplace rename - only first rename gets renamed
        ed_field_mappings.rename(renames)

        assert (
            pd_flights_column_series.rename(renames).index.to_list()
            == ed_field_mappings.display_names
        )

        get_renames = ed_field_mappings.get_renames()

        assert {"DestWeather": "renamed_DestWeather"} == get_renames
    def test_ecommerce_selected_all_metric_source_fields(self):
        field_names = [
            "total_quantity", "taxful_total_price", "taxless_total_price"
        ]
        """
        Note: all are metric
        total_quantity           int64
        taxful_total_price     float64
        taxless_total_price    float64
        """
        ed_field_mappings = ed.FieldMappings(
            client=ed.Client(ES_TEST_CLIENT),
            index_pattern=ECOMMERCE_INDEX_NAME,
            display_names=field_names,
        )
        pd_ecommerce = self.pd_ecommerce()[field_names]

        ed_dtypes, ed_fields, es_date_formats = ed_field_mappings.metric_source_fields(
        )
        pd_metric = pd_ecommerce.select_dtypes(include=np.number)

        assert pd_metric.dtypes.to_list() == ed_dtypes
        assert pd_metric.columns.to_list() == ed_fields
        assert len(es_date_formats) == len(ed_dtypes)
        assert set(es_date_formats) == {None}
    def test_ecommerce_selected_non_metric_source_fields(self):
        field_names = [
            "category",
            "currency",
            "customer_birth_date",
            "customer_first_name",
            "user",
        ]
        """
        Note: non of there are metric
        category                       object
        currency                       object
        customer_birth_date    datetime64[ns]
        customer_first_name            object
        user                           object
        """
        ed_field_mappings = ed.FieldMappings(
            client=ed.Client(ES_TEST_CLIENT),
            index_pattern=ECOMMERCE_INDEX_NAME,
            display_names=field_names,
        )
        pd_ecommerce = self.pd_ecommerce()[field_names]

        ed_dtypes, ed_fields, es_date_formats = ed_field_mappings.metric_source_fields(
        )
        pd_metric = pd_ecommerce.select_dtypes(include=np.number)

        assert pd_metric.dtypes.to_list() == ed_dtypes
        assert pd_metric.columns.to_list() == ed_fields
        assert len(es_date_formats) == len(ed_dtypes)
        assert set(es_date_formats) == set()
Exemple #8
0
    def test_all_fields(self):
        field_mappings = ed.FieldMappings(client=ed.Client(ES_TEST_CLIENT),
                                          index_pattern=FLIGHTS_INDEX_NAME)

        pd_flights = self.pd_flights()

        assert_series_equal(pd_flights.dtypes, field_mappings.dtypes())
Exemple #9
0
    def test_perform_request(self):
        client = ed.Client(ES_TEST_CLIENT)

        response = client.perform_request("GET", "/_cat/indices/flights")

        # yellow open flights TNUv0iysQSi7F-N5ykWfWQ 1 1 13059 0 5.7mb 5.7mb
        tokens = response.split(" ")

        assert tokens[2] == "flights"
        assert tokens[6] == "13059"
    def test_get_field_names_all(self):
        ed_field_mappings = ed.FieldMappings(
            client=ed.Client(ES_TEST_CLIENT), index_pattern=FLIGHTS_INDEX_NAME
        )
        pd_flights = self.pd_flights()

        fields1 = ed_field_mappings.get_field_names(include_scripted_fields=False)
        fields2 = ed_field_mappings.get_field_names(include_scripted_fields=True)

        assert fields1 == fields2
        assert_index_equal(pd_flights.columns, pd.Index(fields1))
Exemple #11
0
    def test_init_selected_fields(self):
        expected = [
            "timestamp", "DestWeather", "DistanceKilometers", "AvgTicketPrice"
        ]

        field_mappings = ed.FieldMappings(
            client=ed.Client(ES_TEST_CLIENT),
            index_pattern=FLIGHTS_INDEX_NAME,
            display_names=expected,
        )

        assert expected == field_mappings.display_names
    def test_flights_all_metric_source_fields_and_bool(self):
        ed_field_mappings = ed.FieldMappings(client=ed.Client(ES_TEST_CLIENT),
                                             index_pattern=FLIGHTS_INDEX_NAME)
        pd_flights = self.pd_flights()

        ed_dtypes, ed_fields, es_date_formats = ed_field_mappings.metric_source_fields(
            include_bool=True)
        pd_metric = pd_flights.select_dtypes(include=[np.number, "bool"])

        assert pd_metric.dtypes.to_list() == ed_dtypes
        assert pd_metric.columns.to_list() == ed_fields
        assert len(es_date_formats) == len(ed_dtypes)
        assert set(es_date_formats) == {None}
    def test_all_formats(self):
        ed_field_mappings = ed.FieldMappings(
            client=ed.Client(ES_TEST_CLIENT), index_pattern=FLIGHTS_INDEX_NAME
        )

        pd_flights = self.pd_flights()

        assert_series_equal(pd_flights.dtypes, ed_field_mappings.dtypes())

        for es_field_name in FLIGHTS_MAPPING["mappings"]["properties"].keys():
            pd_dtype = ed_field_mappings.field_name_pd_dtype(es_field_name)

            assert pd_flights[es_field_name].dtype == pd_dtype
    def test_get_field_names_selected(self):
        expected = ["Carrier", "AvgTicketPrice"]
        ed_field_mappings = ed.FieldMappings(
            client=ed.Client(ES_TEST_CLIENT),
            index_pattern=FLIGHTS_INDEX_NAME,
            display_names=expected,
        )
        pd_flights = self.pd_flights()[expected]

        fields1 = ed_field_mappings.get_field_names(include_scripted_fields=False)
        fields2 = ed_field_mappings.get_field_names(include_scripted_fields=True)

        assert fields1 == fields2
        assert_index_equal(pd_flights.columns, pd.Index(fields1))
Exemple #15
0
    def test_invalid_list_type_display_names(self):
        field_mappings = ed.FieldMappings(client=ed.Client(ES_TEST_CLIENT),
                                          index_pattern=FLIGHTS_INDEX_NAME)

        # not a list like object
        with pytest.raises(ValueError):
            field_mappings.display_names = 12.0

        # tuple is list like
        field_mappings.display_names = ("Cancelled", "DestWeather")

        expected = ["Cancelled", "DestWeather"]

        assert expected == field_mappings.display_names
Exemple #16
0
    def test_selected_fields(self):
        expected = [
            "timestamp", "DestWeather", "DistanceKilometers", "AvgTicketPrice"
        ]

        field_mappings = ed.FieldMappings(
            client=ed.Client(ES_TEST_CLIENT),
            index_pattern=FLIGHTS_INDEX_NAME,
            display_names=expected,
        )

        pd_flights = self.pd_flights()[expected]

        assert_series_equal(pd_flights.dtypes, field_mappings.dtypes())
    def test_flights_all_metric_source_fields_bool_and_timestamp(self):
        ed_field_mappings = ed.FieldMappings(client=ed.Client(ES_TEST_CLIENT),
                                             index_pattern=FLIGHTS_INDEX_NAME)
        pd_flights = self.pd_flights()

        ed_dtypes, ed_fields, es_date_formats = ed_field_mappings.metric_source_fields(
            include_bool=True, include_timestamp=True)
        pd_metric = pd_flights.select_dtypes(
            include=[np.number, "bool", "datetime"])

        assert pd_metric.dtypes.to_list() == ed_dtypes
        assert pd_metric.columns.to_list() == ed_fields
        assert len(es_date_formats) == len(ed_dtypes)
        assert set(es_date_formats) == set(
            {"strict_date_hour_minute_second",
             None})  # TODO - test position of date_format
    def test_add_new_scripted_field(self):
        ed_field_mappings = ed.FieldMappings(client=ed.Client(ES_TEST_CLIENT),
                                             index_pattern=FLIGHTS_INDEX_NAME)

        ed_field_mappings.add_scripted_field("scripted_field_None", None,
                                             np.dtype("int64"))

        # note 'None' is printed as 'NaN' in index, but .index shows it is 'None'
        # buf = StringIO()
        # ed_field_mappings.info_es(buf)
        # print(buf.getvalue())

        expected = self.pd_flights().columns.to_list()
        expected.append(None)

        assert expected == ed_field_mappings.display_names
    def test_all_formats(self):
        ed_field_mappings = ed.FieldMappings(
            client=ed.Client(ES_TEST_CLIENT),
            index_pattern=self.time_index_name)

        # do a rename so display_name for a field is different to es_field_name
        ed_field_mappings.rename(
            {"strict_year_month": "renamed_strict_year_month"})

        # buf = StringIO()
        # ed_field_mappings.info_es(buf)
        # print(buf.getvalue())

        for format_name in self.time_formats.keys():
            es_date_format = ed_field_mappings.date_field_format(format_name)

            assert format_name == es_date_format
Exemple #20
0
    def test_not_found_display_names(self):
        not_found = [
            "Cancelled",
            "timestamp",
            "DestWeather",
            "unknown",
            "DistanceKilometers",
            "AvgTicketPrice",
        ]

        field_mappings = ed.FieldMappings(client=ed.Client(ES_TEST_CLIENT),
                                          index_pattern=FLIGHTS_INDEX_NAME)

        with pytest.raises(KeyError):
            field_mappings.display_names = not_found

        expected = self.pd_flights().columns.to_list()

        assert expected == field_mappings.display_names
Exemple #21
0
    def test_set_display_names(self):
        expected = [
            "Cancelled",
            "timestamp",
            "DestWeather",
            "DistanceKilometers",
            "AvgTicketPrice",
        ]

        field_mappings = ed.FieldMappings(client=ed.Client(ES_TEST_CLIENT),
                                          index_pattern=FLIGHTS_INDEX_NAME)

        field_mappings.display_names = expected

        assert expected == field_mappings.display_names

        # now set again
        new_expected = ["AvgTicketPrice", "timestamp"]

        field_mappings.display_names = new_expected
        assert new_expected == field_mappings.display_names
Exemple #22
0
    def test_non_exists_rename(self):
        ed_field_mappings = ed.FieldMappings(
            client=ed.Client(ES_TEST_CLIENT), index_pattern=FLIGHTS_INDEX_NAME
        )

        pd_flights_column_series = self.pd_flights().columns.to_series()

        assert (
            pd_flights_column_series.index.to_list() == ed_field_mappings.display_names
        )

        renames = {"unknown": "renamed_unknown"}

        # inplace rename - in this case it has no effect
        ed_field_mappings.rename(renames)

        assert (
            pd_flights_column_series.index.to_list() == ed_field_mappings.display_names
        )

        get_renames = ed_field_mappings.get_renames()

        assert not get_renames
Exemple #23
0
    def test_bad_perform_request(self):
        client = ed.Client(ES_TEST_CLIENT)

        with pytest.raises(elasticsearch.exceptions.NotFoundError):
            client.perform_request("GET", "/_cat/indices/non_existant_index")