def test_insert(self): def _test_insert_row(sheet): sheet.insert_row(0, ROW_1) sheet.insert_row(1, ROW_2) self.assertEqual(tuple(sheet.shape), (5, 5)) self.assertEqual(sheet.missing, [0, 0, 2, 0, 4]) self.assertEqual(sheet.columns, ['A', 'B', 'C', 'D', 'C_4']) self.assertEqual(sheet[0], ['ROW1', 'ROW1', None, 'ROW1', None]) self.assertEqual(sheet[1], ROW_2) self.assertEqual(sheet[-1], [6, 7, 8, 9, None]) def _test_insert_col(sheet): sheet.insert_col(0, ROW_1) sheet.insert_col(1, ROW_2) self.assertEqual(tuple(sheet.shape), (5, 6)) self.assertEqual(sheet.missing, [2, 0, 2, 2, 3, 2]) self.assertEqual(sheet.columns, ['C_4', 'C_5', 'A', 'B', 'C', 'D']) self.assertEqual(sheet[0], ['ROW1', 'ROW2', 1, 2, 3, 4]) self.assertEqual(sheet[2], [None, 'ROW3', 6, 7, 8, 9]) self.assertEqual(sheet[-1], [None, 'ROW5', None, None, None, None]) _test_insert_row(SeriesSet(TABLE_DATA, TABLE_COL, nan=None)) _test_insert_col(SeriesSet(TABLE_DATA, TABLE_COL, nan=None))
def test_pop(self): def pop_row(sheet): rows = sheet.pop_row([0, 1]) self.assertEqual(tuple(rows.shape), (2, 2)) self.assertEqual(rows.missing, [1, 0]) self.assertEqual(rows.columns, ['Name', 'Age']) self.assertEqual(rows[0], ['Alan', 35]) self.assertEqual(rows[1], ['', 3]) def pop_col(sheet): rows = sheet.pop_col([0]) self.assertEqual(tuple(rows.shape), (6, 1)) self.assertEqual(rows.missing, [1]) self.assertEqual(rows.columns, ['Name']) self.assertEqual(rows[0], ['Alan']) self.assertEqual(tuple(sheet.shape), (6, 1)) self.assertEqual(sheet.columns, ['Age']) pop_row( SeriesSet([['Alan', 35], ['', 3], ['Bob', 27], ['Charlie', 30], ['Daniel', 29], ['Daniel', 29]], ['Name', 'Age'], '')) pop_col( SeriesSet([['Alan', 35], ['', 3], ['Bob', 27], ['Charlie', 30], ['Daniel', 29], ['Daniel', 29]], ['Name', 'Age'], ''))
def test_merge(self): left = SeriesSet( [['Alan', 35], ['Bob', 27], ['Charlie', 30], ['Daniel', 29]], ['Name', 'Age'], '') right = SeriesSet([['Alan', 'M', 35], ['Bob', 'M', 27], ['Charlie', 'F', 30], ['Janny', 'F', 26]], ['Name', 'gender', 'Age'], '') new = left.merge(right, 'outer', 'Name', 'Name') self.assertEqual(tuple(new.shape), (5, 5)) self.assertEqual(new.missing, [1, 1, 1, 1, 1]) self.assertEqual(new.columns, ['Name', 'Age', 'Name_1', 'gender', 'Age_1']) self.assertEqual(new[0], ['Alan', 35, 'Alan', 'M', 35]) self.assertEqual(new[-1], ['', '', 'Janny', 'F', 26]) new = left.merge(right, 'inner', 'Name', 'Name') self.assertEqual(tuple(new.shape), (3, 5)) self.assertEqual(new.missing, [0, 0, 0, 0, 0]) self.assertEqual(new.columns, ['Name', 'Age', 'Name_1', 'gender', 'Age_1']) self.assertEqual(new[0], ['Bob', 27, 'Bob', 'M', 27]) self.assertEqual(new[-1], ['Charlie', 30, 'Charlie', 'F', 30]) new = left.merge(right, 'left', 'Name', 'Name') self.assertEqual(tuple(new.shape), (4, 5)) self.assertEqual(new.missing, [0, 0, 1, 1, 1]) self.assertEqual(new.columns, ['Name', 'Age', 'Name_1', 'gender', 'Age_1']) self.assertEqual(new[0], ['Alan', 35, 'Alan', 'M', 35]) self.assertEqual(new[-1], ['Daniel', 29, '', '', '']) new = left.merge(right, 'right', 'Name', 'Name') self.assertEqual(tuple(new.shape), (4, 5)) self.assertEqual(new.missing, [0, 0, 0, 1, 1]) self.assertEqual(new.columns, ['Name', 'gender', 'Age', 'Name_1', 'Age_1']) self.assertEqual(new[0], ['Alan', 'M', 35, 'Alan', 35]) self.assertEqual(new[-1], ['Janny', 'F', 26, '', ''])
def test_init_seq(self): dcol = SeriesSet(SEQ_DATA, 'T1', None) #self.isinit_sheet_success(dframe, [[1], [3], [None], [2], [4]], (5, 1), ['T1'], None, [1]) self.isinit_sheet_success(dcol, OrderedDict(T1=SEQ_DATA), (5, 1), ['T1'], None, [1])
def test_init_dict(self): # self.isinit_sheet_success(Frame(DICT_DATA), TABLE_DATA, (3, 4), TABLE_COL, None, [0, 0, 1, 0]) self.isinit_sheet_success(SeriesSet(DICT_DATA, nan=None), DICT_DATA, (3, 4), TABLE_COL, None, [0, 0, 1, 0])
def test_init_table(self): # self.isinit_sheet_success(Frame(TABLE_DATA, TABLE_COL), TABLE_DATA, (3, 4), TABLE_COL, None, [0, 0, 1, 0]) self.isinit_sheet_success(SeriesSet(TABLE_DATA, TABLE_COL, None), DICT_DATA, (3, 4), TABLE_COL, None, [0, 0, 1, 0])
def test_count(self): data = SeriesSet(TABLE_DATA, TABLE_COL, None) self.assertEqual(data.count(2), 1) self.assertEqual(sorted(data.count([2, 3]).values()), [1, 2]) self.assertEqual(sorted(data.count([2, 3]).keys()), [2, 3]) self.assertEqual(data.count(None, (1, 2), (0, 1)), 1)
def test_get(self): data = SeriesSet(TABLE_DATA, TABLE_COL, None) self.assertEqual(data.get('TEST'), None)
if str(top).lower() == 'all': top = self.n_features shannons = [(key, total / times) for key, (total, times) in self._shannon.items()] return sorted(shannons, key=lambda x: x[1], reverse=True)[:top] if __name__ == '__main__': test_data = SeriesSet({ 'color': ['green', 'dark', 'dark', 'green', 'white', 'green', 'dark', 'dark', 'dark', 'green', 'white', 'white', 'green', 'white', 'dark', 'white', 'green'], 'root': ['fully rolled', 'fully rolled', 'fully rolled', 'fully rolled', 'fully rolled', 'slightly rolled ','slightly rolled ', 'slightly rolled ', 'slightly rolled ', 'straight', 'slightly rolled ', 'fully rolled', 'slightly rolled ', 'slightly rolled ','slightly rolled ', 'fully rolled', 'fully rolled'], 'response': ['boom', 'low', 'boom', 'low', 'boom', 'boom', 'boom', 'boom', 'low', 'clear', 'clear', 'boom', 'boom', 'low', 'boom', 'boom', 'low'], 'texture': ['clear'] * 6 + ['slightly paste', 'clear', 'slightly paste', 'clear', 'paste', 'paste', 'slightly paste', 'slightly paste', 'clear', 'paste', 'slightly paste'], 'navel': ['dent'] * 5 + ['slightly dent'] * 4 + ['flat'] * 3 + ['dent'] * 2 + \ ['slightly dent', 'flat', 'slightly dent'], 'touch': ['hard slip'] * 5 + ['soft sticky ', 'soft sticky ', 'hard slip', 'hard slip', 'soft sticky ', 'hard slip', 'soft sticky ', 'hard slip', 'hard slip', 'soft sticky ', 'hard slip', 'hard slip'], 'good': ['good'] * 8 + ['bad'] * 9}) test_data = test_data[[ 'color', 'root', 'response', 'texture', 'navel', 'touch', 'good' ]] ## print(test_data.show()) X, Y = test_data[:'touch'], test_data['good'] mytree = DecisionTreeClassifier() mytree.fit(X, Y) import pydotplus