def test_invalid_path(self): """ Test if ValueError is raised for invalid paths. """ path = '../does/not/exist.db' with self.assertRaises(ValueError): homework3.create_dataframe(path)
def test_invalid_path(self): #tests if the db path is valid try: homework3.create_dataframe("C:/abracadabra.db") self.assertTrue(False) except ValueError: self.assertTrue(True)
def test_badpath(self): ''' Checks that a ValueError is raised when a bad path is provided. Use as smoke test-- returns true if ValueError is raised. Other exceptions are not caught. ''' try: create_dataframe('badpath') except ValueError: return True
def test_ColsAreKey(self): df = hw.create_dataframe(self.INPUT_PATH) frameLen = len(df) keyCols = ['video_id', 'category_id'] keyLength = len(df[keyCols].drop_duplicates()) keyCheck = (frameLen == keyLength) self.assertTrue(keyCheck)
def test_row_count(self): """ Test if the dataframe contains the expected no. of rows. """ path = '../LectureNotes/Data-Essentials/class.db' df = homework3.create_dataframe(path) self.assertEqual(len(df), 75005)
def test_column_names(self): inputdf = create_dataframe('./class.db') inputdfColumns, testColumns = list( inputdf.columns), ['video_id', 'category_id', 'language'] checkColumnInd = not bool( set(inputdfColumns).difference(set(testColumns))) self.assertTrue(checkColumnInd)
def test_columns_are_key(self): # Columns that should be a key valid_key_columns = ['video_id', 'language'] # Instantiate DataFrame df = create_dataframe(HomeworkThreeTest.correct_file_path) grouped_df = df.groupby(valid_key_columns).size() # Assert if columns aren't a key self.assertFalse(grouped_df[grouped_df > 1].any())
def test_key1(self): #tests if video_id and language can possibly be a key df = homework3.create_dataframe(self.path) dflen = df.shape[0] df_small = df['video_id'] + df['language'] distinctValuesByKey = df_small.nunique() check = dflen == distinctValuesByKey self.assertTrue(check)
def test_column_names(self): """ Test if the dataframe only contains the expected columns. """ path = '../LectureNotes/Data-Essentials/class.db' df = homework3.create_dataframe(path) requiredCols = ['language', 'video_id', 'category_id'] self.assertSetEqual(set(df.columns), set(requiredCols))
def test_column_names(self): """ check if only the expected column names are present """ db_path = "/Users/whamsy/Desktop/class.db" df_to_test = homework3.create_dataframe(db_path) cols_to_have = ['category_id', 'language', 'video_id'] self.assertEqual(sorted(df_to_test.columns), cols_to_have)
def test_colnames(self): ''' Asserts the column names of the dataframe match specs. ''' df = create_dataframe("homework-3-iankirkman/class.db") self.assertTrue( len(df.columns) == 3 and 'video_id' in df.columns and 'category_id' in df.columns and 'language' in df.columns)
def test_ExpectedColumns(self): df = hw.create_dataframe(self.INPUT_PATH) expectedCols = ['video_id', 'language', 'category_id'] expectedCols.sort() inputCols = df.columns.tolist() inputCols.sort() columnCheck = inputCols == expectedCols self.assertTrue(columnCheck)
def test_valueError(self): bad_args = [None, "blah.db", ""] for argv in bad_args: try: raises_error(create_dataframe(argv)) except ValueError: pass else: self.fail('Did not see ValueError')
def test_num_rows(self): """ check if number of records is as expected (taking value of rows from result obtained in sqlite outside python) """ db_path = "/Users/whamsy/Desktop/class.db" df_to_test = homework3.create_dataframe(db_path) rows_to_have = 35950 self.assertEqual(df_to_test.shape[0], rows_to_have)
def test_key2(self): #tests if video_id, language and category_id can possibly be a key df = homework3.create_dataframe(self.path) dflen = df.shape[0] df_small = df['video_id'].astype(str) + df['language'].astype( str) + df['category_id'].astype(str) distinctValuesByKey = df_small.nunique() check = dflen == distinctValuesByKey self.assertTrue(check)
def test_column_key(self): """ Testing that ['category_id', 'video_id', 'language'] constitutes a key """ db_path = "/Users/whamsy/Desktop/class.db" df_to_test = homework3.create_dataframe(db_path) test_key_set = ['category_id', 'video_id', 'language'] grp = df_to_test.groupby(test_key_set) self.assertEqual(df_to_test.shape[0], len(grp))
def test_column_names(self): # List of valid column names column_list = ['video_id', 'category_id', 'language'] # Instantiate DataFrame df = create_dataframe(HomeworkThreeTest.correct_file_path) # Loop through column names to check that each is valid result = True for column_name in df.columns: if column_name not in column_list: result = False # Assert if any column names are invalid self.assertTrue(result)
def test_column_not_key(self): """ Testing that no combination of columns that isnt ['category_id', 'video_id', 'language'] constitutes a key """ db_path = "/Users/whamsy/Desktop/class.db" df_to_test = homework3.create_dataframe(db_path) test_not_key_sets = [['video_id'], ['category_id'], ['language'], ['video_id', 'language'], ['category_id', 'video_id'], ['category_id', 'language']] for cat_group in test_not_key_sets: grp = df_to_test.groupby(cat_group) self.assertNotEqual(df_to_test.shape[0], len(grp))
class Homework3Tests(unittest.TestCase): """Unit tests class for homework3.py.""" test_file = 'class.db' test_df = create_dataframe(test_file) col_length = test_df.shape[0] def test_column_names(self): """Testing the DataFrame column names are video_id, category_id, language columns """ self.assertTrue( set(self.test_df.columns) == set(['category_id', 'video_id', 'language'])) def test_number_rows(self): """Testing there are at least 10 rows in the DataFrame""" self.assertTrue(self.col_length >= 10) def test_category_id_key(self): """Testing category_id column whether it constitute a key""" self.assertEqual( self.col_length, len(self.test_df['category_id'].unique()), 'category_id is not a key' ) def test_video_id_key(self): """Testing video_id column whether it constitute a key""" self.assertEqual( self.col_length, len(self.test_df['video_id'].unique()), 'video_id is not a key' ) def test_language_key(self): """Testing language column whether it constitute a key""" self.assertEqual( self.col_length, len(self.test_df['language'].unique()), 'language is not a key' ) def test_path_exception(self): """Testing whether invalid path raise ValueError exception""" self.assertRaises(ValueError, create_dataframe, 'nonexistant_file.db')
def test_check_key(self): """ Test if none of the cols in the dataframe form a key. """ path = '../LectureNotes/Data-Essentials/class.db' df = homework3.create_dataframe(path) combs = [] # generate all possible combinations of keys for size in range(1, len(df.columns)): combs += list(itertools.combinations(df.columns, size)) # Since there are duplicate rows in the df, we # would expect none of the column combinations to # be a key. for c in combs: grouping = df.groupby(c) self.assertNotEqual(len(grouping), len(df))
def test_key(self): ''' Confirms that all three columns are required for a unique key. ''' df = create_dataframe("homework-3-iankirkman/class.db") # All three columns with dups removed df_nodups = df.drop_duplicates() # All pairs of 2 cols with dups removed df_vid_cat = df.drop(columns=['language']).drop_duplicates() df_vid_lang = df.drop(columns=['category_id']).drop_duplicates() df_cat_lang = df.drop(columns=['video_id']).drop_duplicates() # Confirm all three columns can be a key, and any combo of 2 cannot: self.assertTrue(df.shape[0] == df_nodups.shape[0] and df.shape[0] > df_vid_cat.shape[0] and df.shape[0] > df_vid_lang.shape[0] and df.shape[0] > df_cat_lang.shape[0])
def test_column_names(self): # test for exactly and only the column names video_id, category_id, # and language dftest = hw3.create_dataframe(pathname) # if we test for the number of columns to match the number of exact column # names, and that we have at least one column of each of the required names # then we can conclude that we have only the exact columns required passtest = True knownnames = ('video_id', 'category_id', 'language') passtest = passtest & (dftest.shape[1] == len(knownnames)) col_names = dftest.columns def is_valid_column(cnames, testcname): isvalid = False for x in range(0, len(cnames)): isvalid = isvalid | (cnames[x] == testcname) return isvalid for y in range(0, len(knownnames)): passtest = passtest & (is_valid_column(col_names, knownnames[y])) self.assertTrue(passtest)
import homework3 as hw import sqlite3 from sqlite3 import OperationalError import pandas as pd import os import unittest print("Unit tests to validate that we have the right column names, all five languages are present, the DF has at least 10K rows, and that Id and Language are a key:") path = '../../LectureNotes/Data-Essentials/class.db' df = hw.create_dataframe(path) class UnitTests(unittest.TestCase): # df = hw.create_dataframe('Data-Essentials/class.db') # columns = list(df) def test_ColNames(self): self.assertEqual(set(list(df)), set(['video_id', 'category_id', 'language'])) def test_LanguagesPresent(self): self.assertEqual(set(df.language.unique()), set(['FR', 'DE', 'GB', 'CA', 'US'])) def test_DataFrameSize(self): self.assertTrue(df.shape[0] >10000) def test_IdLanIsKey(self): self.assertTrue(len(df[['video_id', 'language']].drop_duplicates()) == len(df.drop_duplicates())) def test_invalid_path_error(self): self.assertRaises(ValueError, hw.create_dataframe, 'badPath') suite = unittest.TestLoader().loadTestsFromTestCase(UnitTests)
def testcolnamescheck(self): colnames = homework3.create_dataframe('class.db').columns self.assertEqual(sorted(colnames), sorted(['video_id', 'category_id', 'language']))
def testcheckkeys(self): df = homework3.create_dataframe('class.db') self.assertTrue( df.shape[0] == df.groupby(['video_id', 'language']).ngroups)
def testnumrows(self): num_rows = homework3.create_dataframe('class.db').shape[0] self.assertEqual(num_rows, 35950)
def setUp(self): self.df = create_dataframe( '/Users/Eric/Documents/UW/DATA515/Assignments/hw2-EPripstein/class.db' )
def test_smoke(self): df = homework3.create_dataframe("class.db") self.assertTrue(df.shape[0] > 10)
def testPossibleKey(self): df = homework3.create_dataframe("class.db") nodupe_df = df.iloc[:, [0, 2]].drop_duplicates() self.assertTrue(nodupe_df.shape[0] == df.shape[0])
def testColumnNamesAndOrder(self): df = homework3.create_dataframe("class.db") self.assertTrue((df.columns[0] == 'video_id') & (df.columns[1] == 'category_id') & (df.columns[2] == 'language') & (df.shape[1] == 3))