def test_random_from_query(get_dataframe): """ Tests whether class selects random rows from query. """ custom_query = CustomQuery("SELECT id, version FROM infrastructure.sites", column_names=["id", "version"]) df = get_dataframe(custom_query.random_sample(size=7)) assert len(df) == 7
def test_random_sample(get_dataframe): """ Test whether the random_sample method in the Query object works. """ custom_query = CustomQuery("SELECT id, version FROM infrastructure.sites", ["id", "version"]) df = get_dataframe(custom_query.random_sample(size=6)) assert list(df.columns) == ["id", "version"] assert len(df) == 6
def test_union(get_dataframe): """ Test union with all set to false dedupes. """ q1 = CustomQuery("SELECT * FROM events.calls LIMIT 10") union = q1.union(q1, all=False) union_df = get_dataframe(union) single_id = union_df[union_df.id == "5wNJA-PdRJ4-jxEdG-yOXpZ"] assert len(single_id) == 2
def test_union_all(get_dataframe): """ Test default union behaviour keeps duplicates. """ q1 = CustomQuery("SELECT * FROM events.calls LIMIT 10") union_all = q1.union(q1) union_all_df = get_dataframe(union_all) single_id = union_all_df[union_all_df.id == "5wNJA-PdRJ4-jxEdG-yOXpZ"] assert len(single_id) == 4
def setUp(self): self.dl1 = daily_location("2016-01-01") self.dl2 = daily_location("2016-01-02") self.stub1 = limit("2016-01-01") self.stub2 = limit("2016-01-01", offset=5) self.subset_q = CustomQuery("SELECT msisdn FROM events.calls LIMIT 10")
def test_using_join_to_subset(get_dataframe): """ Should be able to use the join method to subset one query by another """ dl1 = daily_location("2016-01-01") subset_q = CustomQuery("SELECT msisdn FROM events.calls LIMIT 10") sub = dl1.join(subset_q, on_left=["subscriber"], on_right=["msisdn"]) value_set = set(get_dataframe(sub).subscriber) assert set(get_dataframe(subset_q).msisdn) == value_set assert 10 == len(value_set)
class test_query_union(TestCase): def setUp(self): self.q1 = CustomQuery("SELECT * FROM events.calls LIMIT 10") def test_union_all(self): """ Test union with all = True """ union_all = self.q1.union(self.q1) union_all_df = union_all.get_dataframe() single_id = union_all_df[union_all_df.id == "5wNJA-PdRJ4-jxEdG-yOXpZ"] assert len(single_id) == 4 def test_union(self): """ Test union with all = False """ union = self.q1.union(self.q1, all=False) union_df = union.get_dataframe() single_id = union_df[union_df.id == "5wNJA-PdRJ4-jxEdG-yOXpZ"] assert len(single_id) == 2
def setUp(self): self.q1 = CustomQuery("SELECT * FROM events.calls LIMIT 10")
class test_join(TestCase): def setUp(self): self.dl1 = daily_location("2016-01-01") self.dl2 = daily_location("2016-01-02") self.stub1 = limit("2016-01-01") self.stub2 = limit("2016-01-01", offset=5) self.subset_q = CustomQuery("SELECT msisdn FROM events.calls LIMIT 10") def _query_has_values(self, Q, expected_values, column="subscriber"): """ Test if the values of a dataframes columns are equal to certain values. """ value_set = set(Q.get_dataframe()[column]) self.assertEqual(set(expected_values), value_set) def test_can_join(self): """ Two queries can be joined. """ df = self.dl1.join(self.dl2, on_left="subscriber").get_dataframe() self.assertIs(type(df), DataFrame) def test_name_append(self): """ Can append a custom name to a join. """ df = self.dl1.join( self.dl2, on_left="subscriber", left_append="_left", right_append="_right" ).get_dataframe() self.assertEqual(list(df.columns), ["subscriber", "name_left", "name_right"]) def test_value_of_join(self): """ One randomly chosen value is correct """ df = self.dl1.join( self.dl2, on_left="subscriber", left_append="_day1", right_append="_day2" ).get_dataframe() self.assertEqual( list(df.set_index("subscriber").ix["ye8jQ0ovnGd9GlJa"]), ["Rukum", "Baglung"], ) def test_left_join(self): """ FlowMachine.Join can be done as a left join. """ table = self.stub1.join( self.stub2, on_left="subscriber", how="left" ).get_dataframe() self.assertEqual(len(table), 10) self.assertEqual(table.subscriber.isnull().sum(), 0) def test_right_join(self): """ FlowMachine.Join can be done as a right join. """ table = self.stub1.join( self.stub2, on_left="subscriber", how="right" ).get_dataframe() self.assertEqual(len(table), 10) self.assertEqual(table.subscriber.isnull().sum(), 0) def test_left_join(self): """ FlowMachine.Join can be done as a left join. """ table = self.stub1.join( self.stub2, on_left="subscriber", how="left" ).get_dataframe() self.assertEqual(len(table), 10) self.assertEqual(table.subscriber.isnull().sum(), 0) def test_join_multiple_columns(self): """ flowmachine.Join can be done on more than one column """ pass def test_raises_value_error(self): """ flowmachine.Join raises value error when on_left and on_right are different lengths. """ with self.assertRaises(ValueError): self.dl1.join( self.dl2, on_left=["subscriber", "location_id"], on_right="subscriber" ) def test_using_join_to_subset(self): """ Can we use the join method to subset with a query """ sub = self.dl1.join(self.subset_q, on_left=["subscriber"], on_right=["msisdn"]) self._query_has_values(sub, self.subset_q.get_dataframe()["msisdn"])