Beispiel #1
0
def test_random_from_query(get_dataframe):
    """
    Tests whether class selects random rows from query.
    """
    custom_query = CustomQuery("SELECT id, version FROM infrastructure.sites",
                               column_names=["id", "version"])
    df = get_dataframe(custom_query.random_sample(size=7))
    assert len(df) == 7
Beispiel #2
0
def test_random_sample(get_dataframe):
    """
    Test whether the random_sample method in the Query object works.
    """
    custom_query = CustomQuery("SELECT id, version FROM infrastructure.sites",
                               ["id", "version"])
    df = get_dataframe(custom_query.random_sample(size=6))
    assert list(df.columns) == ["id", "version"]
    assert len(df) == 6
Beispiel #3
0
def test_union(get_dataframe):
    """
    Test union with all set to false dedupes.
    """
    q1 = CustomQuery("SELECT * FROM events.calls LIMIT 10")
    union = q1.union(q1, all=False)
    union_df = get_dataframe(union)
    single_id = union_df[union_df.id == "5wNJA-PdRJ4-jxEdG-yOXpZ"]
    assert len(single_id) == 2
Beispiel #4
0
def test_union_all(get_dataframe):
    """
    Test default union behaviour keeps duplicates.
    """
    q1 = CustomQuery("SELECT * FROM events.calls LIMIT 10")
    union_all = q1.union(q1)
    union_all_df = get_dataframe(union_all)
    single_id = union_all_df[union_all_df.id == "5wNJA-PdRJ4-jxEdG-yOXpZ"]
    assert len(single_id) == 4
Beispiel #5
0
    def setUp(self):

        self.dl1 = daily_location("2016-01-01")
        self.dl2 = daily_location("2016-01-02")

        self.stub1 = limit("2016-01-01")
        self.stub2 = limit("2016-01-01", offset=5)

        self.subset_q = CustomQuery("SELECT msisdn FROM events.calls LIMIT 10")
Beispiel #6
0
def test_using_join_to_subset(get_dataframe):
    """
    Should be able to use the join method to subset one query by another
    """
    dl1 = daily_location("2016-01-01")
    subset_q = CustomQuery("SELECT msisdn FROM events.calls LIMIT 10")
    sub = dl1.join(subset_q, on_left=["subscriber"], on_right=["msisdn"])
    value_set = set(get_dataframe(sub).subscriber)
    assert set(get_dataframe(subset_q).msisdn) == value_set
    assert 10 == len(value_set)
Beispiel #7
0
class test_query_union(TestCase):
    def setUp(self):
        self.q1 = CustomQuery("SELECT * FROM events.calls LIMIT 10")

    def test_union_all(self):
        """
        Test union with all = True
        """
        union_all = self.q1.union(self.q1)
        union_all_df = union_all.get_dataframe()
        single_id = union_all_df[union_all_df.id == "5wNJA-PdRJ4-jxEdG-yOXpZ"]
        assert len(single_id) == 4

    def test_union(self):
        """
        Test union with all = False
        """
        union = self.q1.union(self.q1, all=False)
        union_df = union.get_dataframe()
        single_id = union_df[union_df.id == "5wNJA-PdRJ4-jxEdG-yOXpZ"]
        assert len(single_id) == 2
Beispiel #8
0
 def setUp(self):
     self.q1 = CustomQuery("SELECT * FROM events.calls LIMIT 10")
Beispiel #9
0
class test_join(TestCase):
    def setUp(self):

        self.dl1 = daily_location("2016-01-01")
        self.dl2 = daily_location("2016-01-02")

        self.stub1 = limit("2016-01-01")
        self.stub2 = limit("2016-01-01", offset=5)

        self.subset_q = CustomQuery("SELECT msisdn FROM events.calls LIMIT 10")

    def _query_has_values(self, Q, expected_values, column="subscriber"):
        """
        Test if the values of a dataframes columns are equal
        to certain values.
        """
        value_set = set(Q.get_dataframe()[column])
        self.assertEqual(set(expected_values), value_set)

    def test_can_join(self):
        """
        Two queries can be joined.
        """

        df = self.dl1.join(self.dl2, on_left="subscriber").get_dataframe()
        self.assertIs(type(df), DataFrame)

    def test_name_append(self):
        """
        Can append a custom name to a join.
        """

        df = self.dl1.join(
            self.dl2, on_left="subscriber", left_append="_left", right_append="_right"
        ).get_dataframe()
        self.assertEqual(list(df.columns), ["subscriber", "name_left", "name_right"])

    def test_value_of_join(self):
        """
        One randomly chosen value is correct
        """

        df = self.dl1.join(
            self.dl2, on_left="subscriber", left_append="_day1", right_append="_day2"
        ).get_dataframe()
        self.assertEqual(
            list(df.set_index("subscriber").ix["ye8jQ0ovnGd9GlJa"]),
            ["Rukum", "Baglung"],
        )

    def test_left_join(self):
        """
        FlowMachine.Join can be done as a left join.
        """

        table = self.stub1.join(
            self.stub2, on_left="subscriber", how="left"
        ).get_dataframe()
        self.assertEqual(len(table), 10)
        self.assertEqual(table.subscriber.isnull().sum(), 0)

    def test_right_join(self):
        """
        FlowMachine.Join can be done as a right join.
        """

        table = self.stub1.join(
            self.stub2, on_left="subscriber", how="right"
        ).get_dataframe()
        self.assertEqual(len(table), 10)
        self.assertEqual(table.subscriber.isnull().sum(), 0)

    def test_left_join(self):
        """
        FlowMachine.Join can be done as a left join.
        """

        table = self.stub1.join(
            self.stub2, on_left="subscriber", how="left"
        ).get_dataframe()
        self.assertEqual(len(table), 10)
        self.assertEqual(table.subscriber.isnull().sum(), 0)

    def test_join_multiple_columns(self):
        """
        flowmachine.Join can be done on more than one column
        """
        pass

    def test_raises_value_error(self):
        """
        flowmachine.Join raises value error when on_left and on_right are different lengths.
        """

        with self.assertRaises(ValueError):
            self.dl1.join(
                self.dl2, on_left=["subscriber", "location_id"], on_right="subscriber"
            )

    def test_using_join_to_subset(self):
        """
        Can we use the join method to subset with a query
        """
        sub = self.dl1.join(self.subset_q, on_left=["subscriber"], on_right=["msisdn"])
        self._query_has_values(sub, self.subset_q.get_dataframe()["msisdn"])