예제 #1
0
 def test_invalid_path(self):
     """
     Test if ValueError is raised for invalid paths.
     """
     path = '../does/not/exist.db'
     with self.assertRaises(ValueError):
         homework3.create_dataframe(path)
 def test_invalid_path(self):
     #tests if the db path is valid
     try:
         homework3.create_dataframe("C:/abracadabra.db")
         self.assertTrue(False)
     except ValueError:
         self.assertTrue(True)
 def test_badpath(self):
     '''
     Checks that a ValueError is raised when a bad path is provided.
     
     Use as smoke test-- returns true if ValueError is raised. 
     Other exceptions are not caught.
     '''
     try:
         create_dataframe('badpath')
     except ValueError:
         return True
 def test_ColsAreKey(self):
     df = hw.create_dataframe(self.INPUT_PATH)
     frameLen = len(df)
     keyCols = ['video_id', 'category_id']
     keyLength = len(df[keyCols].drop_duplicates())
     keyCheck = (frameLen == keyLength)
     self.assertTrue(keyCheck)
예제 #5
0
 def test_row_count(self):
     """
     Test if the dataframe contains the expected no. of rows.
     """
     path = '../LectureNotes/Data-Essentials/class.db'
     df = homework3.create_dataframe(path)
     self.assertEqual(len(df), 75005)
 def test_column_names(self):
     inputdf = create_dataframe('./class.db')
     inputdfColumns, testColumns = list(
         inputdf.columns), ['video_id', 'category_id', 'language']
     checkColumnInd = not bool(
         set(inputdfColumns).difference(set(testColumns)))
     self.assertTrue(checkColumnInd)
예제 #7
0
 def test_columns_are_key(self):
     # Columns that should be a key
     valid_key_columns = ['video_id', 'language']
     # Instantiate DataFrame
     df = create_dataframe(HomeworkThreeTest.correct_file_path)
     grouped_df = df.groupby(valid_key_columns).size()
     # Assert if columns aren't a key
     self.assertFalse(grouped_df[grouped_df > 1].any())
 def test_key1(self):
     #tests if video_id and language can possibly be a key
     df = homework3.create_dataframe(self.path)
     dflen = df.shape[0]
     df_small = df['video_id'] + df['language']
     distinctValuesByKey = df_small.nunique()
     check = dflen == distinctValuesByKey
     self.assertTrue(check)
예제 #9
0
 def test_column_names(self):
     """
     Test if the dataframe only contains the expected columns.
     """
     path = '../LectureNotes/Data-Essentials/class.db'
     df = homework3.create_dataframe(path)
     requiredCols = ['language', 'video_id', 'category_id']
     self.assertSetEqual(set(df.columns), set(requiredCols))
예제 #10
0
 def test_column_names(self):
     """
     check if only the expected column names are present
     """
     db_path = "/Users/whamsy/Desktop/class.db"
     df_to_test = homework3.create_dataframe(db_path)
     cols_to_have = ['category_id', 'language', 'video_id']
     self.assertEqual(sorted(df_to_test.columns), cols_to_have)
 def test_colnames(self):
     '''
     Asserts the column names of the dataframe match specs.
     '''
     df = create_dataframe("homework-3-iankirkman/class.db")
     self.assertTrue(
         len(df.columns) == 3 and 'video_id' in df.columns
         and 'category_id' in df.columns and 'language' in df.columns)
 def test_ExpectedColumns(self):
     df = hw.create_dataframe(self.INPUT_PATH)
     expectedCols = ['video_id', 'language', 'category_id']
     expectedCols.sort()
     inputCols = df.columns.tolist()
     inputCols.sort()
     columnCheck = inputCols == expectedCols
     self.assertTrue(columnCheck)
예제 #13
0
 def test_valueError(self):
     bad_args = [None, "blah.db", ""]
     for argv in bad_args:
         try:
             raises_error(create_dataframe(argv))
         except ValueError:
             pass
         else:
             self.fail('Did not see ValueError')
예제 #14
0
 def test_num_rows(self):
     """
     check if number of records is as expected (taking value of rows from
     result obtained in sqlite outside python)
     """
     db_path = "/Users/whamsy/Desktop/class.db"
     df_to_test = homework3.create_dataframe(db_path)
     rows_to_have = 35950
     self.assertEqual(df_to_test.shape[0], rows_to_have)
 def test_key2(self):
     #tests if video_id, language and category_id can possibly be a key
     df = homework3.create_dataframe(self.path)
     dflen = df.shape[0]
     df_small = df['video_id'].astype(str) + df['language'].astype(
         str) + df['category_id'].astype(str)
     distinctValuesByKey = df_small.nunique()
     check = dflen == distinctValuesByKey
     self.assertTrue(check)
예제 #16
0
    def test_column_key(self):
        """
        Testing that ['category_id', 'video_id', 'language'] constitutes a key
        """
        db_path = "/Users/whamsy/Desktop/class.db"
        df_to_test = homework3.create_dataframe(db_path)

        test_key_set = ['category_id', 'video_id', 'language']

        grp = df_to_test.groupby(test_key_set)
        self.assertEqual(df_to_test.shape[0], len(grp))
예제 #17
0
 def test_column_names(self):
     # List of valid column names
     column_list = ['video_id', 'category_id', 'language']
     # Instantiate DataFrame
     df = create_dataframe(HomeworkThreeTest.correct_file_path)
     # Loop through column names to check that each is valid
     result = True
     for column_name in df.columns:
         if column_name not in column_list:
             result = False
     # Assert if any column names are invalid
     self.assertTrue(result)
예제 #18
0
    def test_column_not_key(self):
        """
        Testing that no combination of columns that isnt ['category_id', 'video_id', 'language']
        constitutes a key
        """
        db_path = "/Users/whamsy/Desktop/class.db"
        df_to_test = homework3.create_dataframe(db_path)

        test_not_key_sets = [['video_id'], ['category_id'], ['language'],
                             ['video_id', 'language'],
                             ['category_id', 'video_id'],
                             ['category_id', 'language']]

        for cat_group in test_not_key_sets:
            grp = df_to_test.groupby(cat_group)
            self.assertNotEqual(df_to_test.shape[0], len(grp))
class Homework3Tests(unittest.TestCase):
    """Unit tests class for homework3.py."""

    test_file = 'class.db'
    test_df = create_dataframe(test_file)
    col_length = test_df.shape[0]

    def test_column_names(self):
        """Testing the DataFrame column names are
        video_id, category_id, language columns
        """
        self.assertTrue(
            set(self.test_df.columns) == set(['category_id', 'video_id', 'language']))

    def test_number_rows(self):
        """Testing there are at least 10 rows in the DataFrame"""
        self.assertTrue(self.col_length >= 10)

    def test_category_id_key(self):
        """Testing category_id column whether it constitute a key"""
        self.assertEqual(
            self.col_length,
            len(self.test_df['category_id'].unique()),
            'category_id is not a key'
            )

    def test_video_id_key(self):
        """Testing video_id column whether it constitute a key"""
        self.assertEqual(
            self.col_length,
            len(self.test_df['video_id'].unique()),
            'video_id is not a key'
            )

    def test_language_key(self):
        """Testing language column whether it constitute a key"""
        self.assertEqual(
            self.col_length,
            len(self.test_df['language'].unique()),
            'language is not a key'
            )

    def test_path_exception(self):
        """Testing whether invalid path raise ValueError exception"""
        self.assertRaises(ValueError, create_dataframe, 'nonexistant_file.db')
예제 #20
0
    def test_check_key(self):
        """
        Test if none of the cols in the dataframe form a key.
        """
        path = '../LectureNotes/Data-Essentials/class.db'
        df = homework3.create_dataframe(path)
        combs = []

        # generate all possible combinations of keys
        for size in range(1, len(df.columns)):
            combs += list(itertools.combinations(df.columns, size))

        # Since there are duplicate rows in the df, we
        # would expect none of the column combinations to
        # be a key.
        for c in combs:
            grouping = df.groupby(c)
            self.assertNotEqual(len(grouping), len(df))
    def test_key(self):
        '''
        Confirms that all three columns are required for a unique key.
        '''
        df = create_dataframe("homework-3-iankirkman/class.db")

        # All three columns with dups removed
        df_nodups = df.drop_duplicates()

        # All pairs of 2 cols with dups removed
        df_vid_cat = df.drop(columns=['language']).drop_duplicates()
        df_vid_lang = df.drop(columns=['category_id']).drop_duplicates()
        df_cat_lang = df.drop(columns=['video_id']).drop_duplicates()

        # Confirm all three columns can be a key, and any combo of 2 cannot:
        self.assertTrue(df.shape[0] == df_nodups.shape[0]
                        and df.shape[0] > df_vid_cat.shape[0]
                        and df.shape[0] > df_vid_lang.shape[0]
                        and df.shape[0] > df_cat_lang.shape[0])
예제 #22
0
    def test_column_names(self):
        # test for exactly and only the column names video_id, category_id,
        # and language
        dftest = hw3.create_dataframe(pathname)

        # if we test for the number of columns to match the number of exact column
        # names, and that we have at least one column of each of the required names
        # then we can conclude that we have only the exact columns required
        passtest = True
        knownnames = ('video_id', 'category_id', 'language')
        passtest = passtest & (dftest.shape[1] == len(knownnames))
        col_names = dftest.columns

        def is_valid_column(cnames, testcname):
            isvalid = False
            for x in range(0, len(cnames)):
                isvalid = isvalid | (cnames[x] == testcname)
            return isvalid

        for y in range(0, len(knownnames)):
            passtest = passtest & (is_valid_column(col_names, knownnames[y]))

        self.assertTrue(passtest)
예제 #23
0
import homework3 as hw
import sqlite3
from sqlite3 import OperationalError
import pandas as pd
import os
import unittest



print("Unit tests to validate that we have the right column names, all five languages are present, the DF has at least 10K rows, and that Id and Language are a key:")
path = '../../LectureNotes/Data-Essentials/class.db'
df = hw.create_dataframe(path)
class UnitTests(unittest.TestCase):
	# df = hw.create_dataframe('Data-Essentials/class.db')
	# columns = list(df)
	def test_ColNames(self):
		self.assertEqual(set(list(df)), set(['video_id', 'category_id', 'language']))
	
	def test_LanguagesPresent(self):
		self.assertEqual(set(df.language.unique()), set(['FR', 'DE', 'GB', 'CA', 'US']))
	
	def test_DataFrameSize(self):
		self.assertTrue(df.shape[0] >10000)

	def test_IdLanIsKey(self):
		self.assertTrue(len(df[['video_id', 'language']].drop_duplicates()) == len(df.drop_duplicates()))

	def test_invalid_path_error(self):
 		self.assertRaises(ValueError, hw.create_dataframe, 'badPath')

suite = unittest.TestLoader().loadTestsFromTestCase(UnitTests)
 def testcolnamescheck(self):
     colnames = homework3.create_dataframe('class.db').columns
     self.assertEqual(sorted(colnames),
                      sorted(['video_id', 'category_id', 'language']))
    def testcheckkeys(self):

        df = homework3.create_dataframe('class.db')
        self.assertTrue(
            df.shape[0] == df.groupby(['video_id', 'language']).ngroups)
    def testnumrows(self):

        num_rows = homework3.create_dataframe('class.db').shape[0]
        self.assertEqual(num_rows, 35950)
예제 #27
0
 def setUp(self):
     self.df = create_dataframe(
         '/Users/Eric/Documents/UW/DATA515/Assignments/hw2-EPripstein/class.db'
     )
 def test_smoke(self):
     df = homework3.create_dataframe("class.db")
     self.assertTrue(df.shape[0] > 10)
 def testPossibleKey(self):
     df = homework3.create_dataframe("class.db")
     nodupe_df = df.iloc[:, [0, 2]].drop_duplicates()
     self.assertTrue(nodupe_df.shape[0] == df.shape[0])
 def testColumnNamesAndOrder(self):
     df = homework3.create_dataframe("class.db")
     self.assertTrue((df.columns[0] == 'video_id')
                     & (df.columns[1] == 'category_id')
                     & (df.columns[2] == 'language') & (df.shape[1] == 3))