def test_str_add_ser(self):
        edadd = "The last name is: " + self.ed_ecommerce(
        )["customer_last_name"]
        pdadd = "The last name is: " + self.pd_ecommerce(
        )["customer_last_name"]

        assert_pandas_eland_series_equal(pdadd, edadd)
    def test_ser_add_str(self):
        edadd = self.ed_ecommerce(
        )["customer_first_name"] + " is the first name."
        pdadd = self.pd_ecommerce(
        )["customer_first_name"] + " is the first name."

        assert_pandas_eland_series_equal(pdadd, edadd)
    def test_bad_str_add_ser(self):
        # TODO encode special characters better
        #      Elasticsearch accepts this, but it will cause problems
        edadd = " *" + self.ed_ecommerce()["customer_last_name"]
        pdadd = " *" + self.pd_ecommerce()["customer_last_name"]

        assert_pandas_eland_series_equal(pdadd, edadd)
    def test_ser_add_str_add_ser(self):
        pdadd = (self.pd_ecommerce()["customer_first_name"] + " " +
                 self.pd_ecommerce()["customer_last_name"])
        edadd = (self.ed_ecommerce()["customer_first_name"] + " " +
                 self.ed_ecommerce()["customer_last_name"])

        assert_pandas_eland_series_equal(pdadd, edadd)
Esempio n. 5
0
    def test_flights_filter_index_items(self, items):
        ed_flights_small = self.ed_flights_small()["FlightDelayType"]
        pd_flights_small = self.pd_flights_small()["FlightDelayType"]

        ed_ser = ed_flights_small.filter(items=items, axis=0)
        pd_ser = pd_flights_small.filter(items=items, axis=0)

        assert_pandas_eland_series_equal(pd_ser, ed_ser)
Esempio n. 6
0
    def test_ecommerce_series_simple_series_addition(self):
        pd_df = self.pd_ecommerce().head(100)
        ed_df = self.ed_ecommerce().head(100)

        pd_series = pd_df["taxful_total_price"] + pd_df["total_quantity"]
        ed_series = ed_df["taxful_total_price"] + ed_df["total_quantity"]

        assert_pandas_eland_series_equal(pd_series, ed_series, check_less_precise=True)
Esempio n. 7
0
    def test_getitem_one_argument(self):
        ed_flights = self.ed_flights().head(89)
        pd_flights = self.pd_flights().head(89)

        ed_flights_OriginAirportID = ed_flights.OriginAirportID
        pd_flights_OriginAirportID = pd_flights.OriginAirportID

        assert_pandas_eland_series_equal(pd_flights_OriginAirportID,
                                         ed_flights_OriginAirportID)
Esempio n. 8
0
    def test_getitem_one_attribute(self):
        ed_flights = self.ed_flights().head(103)
        pd_flights = self.pd_flights().head(103)

        ed_flights_OriginAirportID = ed_flights["OriginAirportID"]
        pd_flights_OriginAirportID = pd_flights["OriginAirportID"]

        assert_pandas_eland_series_equal(pd_flights_OriginAirportID,
                                         ed_flights_OriginAirportID)
Esempio n. 9
0
    def test_head_tail(self):
        pd_s = self.pd_flights()["Carrier"]
        ed_s = ed.Series(ES_TEST_CLIENT, FLIGHTS_INDEX_NAME, "Carrier")

        pd_s_head = pd_s.head(10)
        ed_s_head = ed_s.head(10)

        assert_pandas_eland_series_equal(pd_s_head, ed_s_head)

        pd_s_tail = pd_s.tail(10)
        ed_s_tail = ed_s.tail(10)

        assert_pandas_eland_series_equal(pd_s_tail, ed_s_tail)
Esempio n. 10
0
    def test_ecommerce_series_simple_arithmetics(self):
        pd_df = self.pd_ecommerce().head(100)
        ed_df = self.ed_ecommerce().head(100)

        pd_series = (pd_df["taxful_total_price"] + 5 +
                     pd_df["total_quantity"] / pd_df["taxless_total_price"] -
                     pd_df["total_unique_products"] * 10.0 +
                     pd_df["total_quantity"])
        ed_series = (ed_df["taxful_total_price"] + 5 +
                     ed_df["total_quantity"] / ed_df["taxless_total_price"] -
                     ed_df["total_unique_products"] * 10.0 +
                     ed_df["total_quantity"])

        assert_pandas_eland_series_equal(pd_series,
                                         ed_series,
                                         check_less_precise=True)
Esempio n. 11
0
    def test_getitem_multiple_calls(self):
        ed_flights = self.ed_flights().head(89)
        pd_flights = self.pd_flights().head(89)

        ed_col0 = ed_flights[
            ["DestCityName", "DestCountry", "DestLocation", "DestRegion"]
        ]
        try:
            ed_col1 = ed_col0["Carrier"]
        except KeyError:
            pass

        pd_col1 = pd_flights["DestCountry"]
        ed_col1 = ed_col0["DestCountry"]

        assert_pandas_eland_series_equal(pd_col1, ed_col1)
Esempio n. 12
0
    def test_all_formats(self):
        index_name = self.time_index_name
        ed_df = ed.DataFrame(ES_TEST_CLIENT, index_name)

        for format_name in self.time_formats.keys():
            times = [
                pd.to_datetime(
                    datetime.strptime(dt, "%Y-%m-%dT%H:%M:%S.%f%z").strftime(
                        self.time_formats[format_name]),
                    format=self.time_formats[format_name],
                ) for dt in self.times
            ]

            ed_series = ed_df[format_name]
            pd_series = pd.Series(
                times,
                index=[str(i) for i in range(len(self.times))],
                name=format_name)

            assert_pandas_eland_series_equal(pd_series, ed_series)
Esempio n. 13
0
    def test_simple_lat_lon(self):
        """
        Note on nested object order - this can change when
        note this could be a bug in ES...
        PUT my_index/doc/1
        {
          "location": {
            "lat": "50.033333",
            "lon": "8.570556"
          }
        }

        GET my_index/_search

        "_source": {
          "location": {
            "lat": "50.033333",
            "lon": "8.570556"
          }
        }

        GET my_index/_search
        {
          "_source": "location"
        }

        "_source": {
          "location": {
            "lon": "8.570556",
            "lat": "50.033333"
          }
        }

        Hence we store the pandas df source json as 'lon', 'lat'
        """
        pd_dest_location = self.pd_flights()["DestLocation"].head(1)
        ed_dest_location = self.ed_flights()["DestLocation"].head(1)

        assert_pandas_eland_series_equal(
            pd_dest_location, ed_dest_location, check_exact=False, rtol=2
        )
Esempio n. 14
0
    def test_ecommerce_series_basic_rarithmetics(self):
        pd_df = self.pd_ecommerce().head(10)
        ed_df = self.ed_ecommerce().head(10)

        ops = [
            "__radd__",
            "__rtruediv__",
            "__rfloordiv__",
            "__rpow__",
            "__rmod__",
            "__rmul__",
            "__rsub__",
            "radd",
            "rtruediv",
            "rfloordiv",
            "rpow",
            "rmod",
            "rmul",
            "rsub",
        ]

        for op in ops:
            pd_series = getattr(pd_df["taxful_total_price"], op)(
                pd_df["total_quantity"]
            )
            ed_series = getattr(ed_df["taxful_total_price"], op)(
                ed_df["total_quantity"]
            )
            assert_pandas_eland_series_equal(
                pd_series, ed_series, check_less_precise=True
            )

            pd_series = getattr(pd_df["taxful_total_price"], op)(3.141)
            ed_series = getattr(ed_df["taxful_total_price"], op)(3.141)
            assert_pandas_eland_series_equal(
                pd_series, ed_series, check_less_precise=True
            )

            pd_series = getattr(pd_df["taxful_total_price"], op)(np.float32(2.879))
            ed_series = getattr(ed_df["taxful_total_price"], op)(np.float32(2.879))
            assert_pandas_eland_series_equal(
                pd_series, ed_series, check_less_precise=True
            )

            pd_series = getattr(pd_df["taxful_total_price"], op)(int(6))
            ed_series = getattr(ed_df["taxful_total_price"], op)(int(6))
            assert_pandas_eland_series_equal(
                pd_series, ed_series, check_less_precise=True
            )
Esempio n. 15
0
    def test_ecommerce_series_basic_arithmetics(self):
        pd_df = self.pd_ecommerce().head(100)
        ed_df = self.ed_ecommerce().head(100)

        ops = [
            "__add__",
            "__truediv__",
            "__floordiv__",
            "__pow__",
            "__mod__",
            "__mul__",
            "__sub__",
            "add",
            "truediv",
            "floordiv",
            "pow",
            "mod",
            "mul",
            "sub",
        ]

        for op in ops:
            pd_series = getattr(pd_df["taxful_total_price"], op)(
                pd_df["total_quantity"]
            )
            ed_series = getattr(ed_df["taxful_total_price"], op)(
                ed_df["total_quantity"]
            )
            assert_pandas_eland_series_equal(
                pd_series, ed_series, check_less_precise=True
            )

            pd_series = getattr(pd_df["taxful_total_price"], op)(10.56)
            ed_series = getattr(ed_df["taxful_total_price"], op)(10.56)
            assert_pandas_eland_series_equal(
                pd_series, ed_series, check_less_precise=True
            )

            pd_series = getattr(pd_df["taxful_total_price"], op)(np.float32(1.879))
            ed_series = getattr(ed_df["taxful_total_price"], op)(np.float32(1.879))
            assert_pandas_eland_series_equal(
                pd_series, ed_series, check_less_precise=True
            )

            pd_series = getattr(pd_df["taxful_total_price"], op)(int(8))
            ed_series = getattr(ed_df["taxful_total_price"], op)(int(8))
            assert_pandas_eland_series_equal(
                pd_series, ed_series, check_less_precise=True
            )
Esempio n. 16
0
    def test_name(self):
        # deep copy pandas DataFrame as .name alters this reference frame
        pd_series = self.pd_flights()["Carrier"].copy(deep=True)
        ed_series = ed.Series(ES_TEST_CLIENT, FLIGHTS_INDEX_NAME, "Carrier")

        assert_pandas_eland_series_equal(pd_series, ed_series)
        assert ed_series.name == pd_series.name

        pd_series.name = "renamed1"
        ed_series.name = "renamed1"

        assert_pandas_eland_series_equal(pd_series, ed_series)
        assert ed_series.name == pd_series.name

        pd_series.name = "renamed2"
        ed_series.name = "renamed2"

        assert_pandas_eland_series_equal(pd_series, ed_series)
        assert ed_series.name == pd_series.name
Esempio n. 17
0
    def test_rename(self):
        pd_carrier = self.pd_flights()["Carrier"]
        ed_carrier = ed.Series(ES_TEST_CLIENT, FLIGHTS_INDEX_NAME, "Carrier")

        assert_pandas_eland_series_equal(pd_carrier, ed_carrier)

        pd_renamed = pd_carrier.rename("renamed")
        ed_renamed = ed_carrier.rename("renamed")

        print(pd_renamed)
        print(ed_renamed)

        print(ed_renamed.info_es())

        assert_pandas_eland_series_equal(pd_renamed, ed_renamed)

        pd_renamed2 = pd_renamed.rename("renamed2")
        ed_renamed2 = ed_renamed.rename("renamed2")

        print(ed_renamed2.info_es())

        assert "renamed2" == ed_renamed2.name

        assert_pandas_eland_series_equal(pd_renamed2, ed_renamed2)
Esempio n. 18
0
    def test_supported_series_dtypes_rops(self):
        pd_df = self.pd_ecommerce().head(100)
        ed_df = self.ed_ecommerce().head(100)

        # Test some specific operations that are and aren't supported
        numeric_ops = [
            "__radd__",
            "__rtruediv__",
            "__rfloordiv__",
            "__rpow__",
            "__rmod__",
            "__rmul__",
            "__rsub__",
        ]

        non_string_numeric_ops = [
            "__radd__",
            "__rtruediv__",
            "__rfloordiv__",
            "__rpow__",
            "__rmod__",
            "__rsub__",
        ]
        # __rmul__ is supported for int * str in pandas

        # float op float
        for op in numeric_ops:
            pd_series = getattr(pd_df["taxful_total_price"], op)(
                pd_df["taxless_total_price"]
            )
            ed_series = getattr(ed_df["taxful_total_price"], op)(
                ed_df["taxless_total_price"]
            )
            assert_pandas_eland_series_equal(
                pd_series, ed_series, check_less_precise=True
            )

        # int op float
        for op in numeric_ops:
            pd_series = getattr(pd_df["total_quantity"], op)(
                pd_df["taxless_total_price"]
            )
            ed_series = getattr(ed_df["total_quantity"], op)(
                ed_df["taxless_total_price"]
            )
            assert_pandas_eland_series_equal(
                pd_series, ed_series, check_less_precise=True
            )

        # float op int
        for op in numeric_ops:
            pd_series = getattr(pd_df["taxful_total_price"], op)(
                pd_df["total_quantity"]
            )
            ed_series = getattr(ed_df["taxful_total_price"], op)(
                ed_df["total_quantity"]
            )
            assert_pandas_eland_series_equal(
                pd_series, ed_series, check_less_precise=True
            )

        # str op int (throws)
        for op in non_string_numeric_ops:
            with pytest.raises(TypeError):
                pd_series = getattr(pd_df["currency"], op)(pd_df["total_quantity"])
            with pytest.raises(TypeError):
                ed_series = getattr(ed_df["currency"], op)(ed_df["total_quantity"])
            with pytest.raises(TypeError):
                pd_series = getattr(pd_df["currency"], op)(10.0)
            with pytest.raises(TypeError):
                ed_series = getattr(ed_df["currency"], op)(10.0)

        # int op str (throws)
        for op in non_string_numeric_ops:
            with pytest.raises(TypeError):
                pd_series = getattr(pd_df["total_quantity"], op)(pd_df["currency"])
            with pytest.raises(TypeError):
                ed_series = getattr(ed_df["total_quantity"], op)(ed_df["currency"])
Esempio n. 19
0
    def test_sample(self):
        ed_s = ed.Series(ES_TEST_CLIENT, FLIGHTS_INDEX_NAME, "Carrier")
        pd_s = self.build_from_index(ed_s.sample(n=10, random_state=self.SEED))

        ed_s_sample = ed_s.sample(n=10, random_state=self.SEED)
        assert_pandas_eland_series_equal(pd_s, ed_s_sample)