def test_split_panda_default_column_names(self): # Columns can be named as key words in pandas sad = emp.sad(self.pat2, self.cols2, splits="mean:2", clean=False) assert_equal(len(sad[1][1]), 2) sad = emp.sad(self.pat2, self.cols2, splits="mean:2; y:3", clean=True) assert_equal(len(sad[1][1]), 2)
def test_clean(self): # No a in second split on x sad = emp.sad(self.pat1, self.cols1, 'x:2', clean=False) assert_equal(len(sad[1][1]), 2) # Both spp when clean False sad = emp.sad(self.pat1, self.cols1, 'x:2', clean=True) assert_equal(len(sad[1][1]), 1) # Only 'b' when clean True
def test_one_way_uneven_split(self): # 0.2 should fall in second division of y sad = emp.sad(self.pat1, self.cols1, 'y:2') assert_equal(len(sad), 2) assert_equal(sad[0][1]['spp'].values, ['a']) assert_equal(sad[0][1]['y'].values, [2]) assert_equal(sad[1][1]['spp'].values, ['a','b']) assert_equal(sad[1][1]['y'].values, [2,4])
def test_two_way_split(self): # Complete split generates 6 results sad = emp.sad(self.pat1, self.cols1, 'x:2; y:3') assert_equal(len(sad), 6) # Goes through x then y assert_equal(sad[0][1]['spp'].values, 'a') assert_equal(sad[0][1]['y'].values, 2) assert_equal(sad[1][1]['y'].values, [1,1]) assert_equal(sad[5][1]['spp'].values, 'b') assert_equal(sad[0][1]['y'].values, 2)
def test_simple_with_cols(self): # Specify count and spp_col here sad = emp.sad(self.pat1, self.cols1, None) assert_array_equal(sad[0][1]['y'], [4,4])
def test_simple(self): # Falling back on spp_col in metadata, so count 1 for each row sad = emp.sad(self.pat1, None, None) assert_array_equal(sad[0][1]['y'], [3,2])
def test_split_categorical(self): sad = emp.sad(self.pat1, self.cols1, 'year:split; x:2') assert_equal(sad[0][1]['y'].values, 3) assert_equal(sad[1][1]['y'].values, []) assert_equal(sad[2][1]['y'].values, [1,1]) assert_equal(sad[3][1]['y'].values, [3])