コード例 #1
0
 def test_invalid_path(self):
     """
     Test if ValueError is raised for invalid paths.
     """
     path = '../does/not/exist.db'
     with self.assertRaises(ValueError):
         homework3.create_dataframe(path)
コード例 #2
0
 def test_invalid_path(self):
     #tests if the db path is valid
     try:
         homework3.create_dataframe("C:/abracadabra.db")
         self.assertTrue(False)
     except ValueError:
         self.assertTrue(True)
コード例 #3
0
 def test_badpath(self):
     '''
     Checks that a ValueError is raised when a bad path is provided.
     
     Use as smoke test-- returns true if ValueError is raised. 
     Other exceptions are not caught.
     '''
     try:
         create_dataframe('badpath')
     except ValueError:
         return True
コード例 #4
0
 def test_ColsAreKey(self):
     df = hw.create_dataframe(self.INPUT_PATH)
     frameLen = len(df)
     keyCols = ['video_id', 'category_id']
     keyLength = len(df[keyCols].drop_duplicates())
     keyCheck = (frameLen == keyLength)
     self.assertTrue(keyCheck)
コード例 #5
0
 def test_row_count(self):
     """
     Test if the dataframe contains the expected no. of rows.
     """
     path = '../LectureNotes/Data-Essentials/class.db'
     df = homework3.create_dataframe(path)
     self.assertEqual(len(df), 75005)
コード例 #6
0
 def test_column_names(self):
     inputdf = create_dataframe('./class.db')
     inputdfColumns, testColumns = list(
         inputdf.columns), ['video_id', 'category_id', 'language']
     checkColumnInd = not bool(
         set(inputdfColumns).difference(set(testColumns)))
     self.assertTrue(checkColumnInd)
コード例 #7
0
 def test_columns_are_key(self):
     # Columns that should be a key
     valid_key_columns = ['video_id', 'language']
     # Instantiate DataFrame
     df = create_dataframe(HomeworkThreeTest.correct_file_path)
     grouped_df = df.groupby(valid_key_columns).size()
     # Assert if columns aren't a key
     self.assertFalse(grouped_df[grouped_df > 1].any())
コード例 #8
0
 def test_key1(self):
     #tests if video_id and language can possibly be a key
     df = homework3.create_dataframe(self.path)
     dflen = df.shape[0]
     df_small = df['video_id'] + df['language']
     distinctValuesByKey = df_small.nunique()
     check = dflen == distinctValuesByKey
     self.assertTrue(check)
コード例 #9
0
 def test_column_names(self):
     """
     Test if the dataframe only contains the expected columns.
     """
     path = '../LectureNotes/Data-Essentials/class.db'
     df = homework3.create_dataframe(path)
     requiredCols = ['language', 'video_id', 'category_id']
     self.assertSetEqual(set(df.columns), set(requiredCols))
コード例 #10
0
 def test_column_names(self):
     """
     check if only the expected column names are present
     """
     db_path = "/Users/whamsy/Desktop/class.db"
     df_to_test = homework3.create_dataframe(db_path)
     cols_to_have = ['category_id', 'language', 'video_id']
     self.assertEqual(sorted(df_to_test.columns), cols_to_have)
コード例 #11
0
 def test_colnames(self):
     '''
     Asserts the column names of the dataframe match specs.
     '''
     df = create_dataframe("homework-3-iankirkman/class.db")
     self.assertTrue(
         len(df.columns) == 3 and 'video_id' in df.columns
         and 'category_id' in df.columns and 'language' in df.columns)
コード例 #12
0
 def test_ExpectedColumns(self):
     df = hw.create_dataframe(self.INPUT_PATH)
     expectedCols = ['video_id', 'language', 'category_id']
     expectedCols.sort()
     inputCols = df.columns.tolist()
     inputCols.sort()
     columnCheck = inputCols == expectedCols
     self.assertTrue(columnCheck)
コード例 #13
0
 def test_valueError(self):
     bad_args = [None, "blah.db", ""]
     for argv in bad_args:
         try:
             raises_error(create_dataframe(argv))
         except ValueError:
             pass
         else:
             self.fail('Did not see ValueError')
コード例 #14
0
 def test_num_rows(self):
     """
     check if number of records is as expected (taking value of rows from
     result obtained in sqlite outside python)
     """
     db_path = "/Users/whamsy/Desktop/class.db"
     df_to_test = homework3.create_dataframe(db_path)
     rows_to_have = 35950
     self.assertEqual(df_to_test.shape[0], rows_to_have)
コード例 #15
0
 def test_key2(self):
     #tests if video_id, language and category_id can possibly be a key
     df = homework3.create_dataframe(self.path)
     dflen = df.shape[0]
     df_small = df['video_id'].astype(str) + df['language'].astype(
         str) + df['category_id'].astype(str)
     distinctValuesByKey = df_small.nunique()
     check = dflen == distinctValuesByKey
     self.assertTrue(check)
コード例 #16
0
    def test_column_key(self):
        """
        Testing that ['category_id', 'video_id', 'language'] constitutes a key
        """
        db_path = "/Users/whamsy/Desktop/class.db"
        df_to_test = homework3.create_dataframe(db_path)

        test_key_set = ['category_id', 'video_id', 'language']

        grp = df_to_test.groupby(test_key_set)
        self.assertEqual(df_to_test.shape[0], len(grp))
コード例 #17
0
 def test_column_names(self):
     # List of valid column names
     column_list = ['video_id', 'category_id', 'language']
     # Instantiate DataFrame
     df = create_dataframe(HomeworkThreeTest.correct_file_path)
     # Loop through column names to check that each is valid
     result = True
     for column_name in df.columns:
         if column_name not in column_list:
             result = False
     # Assert if any column names are invalid
     self.assertTrue(result)
コード例 #18
0
    def test_column_not_key(self):
        """
        Testing that no combination of columns that isnt ['category_id', 'video_id', 'language']
        constitutes a key
        """
        db_path = "/Users/whamsy/Desktop/class.db"
        df_to_test = homework3.create_dataframe(db_path)

        test_not_key_sets = [['video_id'], ['category_id'], ['language'],
                             ['video_id', 'language'],
                             ['category_id', 'video_id'],
                             ['category_id', 'language']]

        for cat_group in test_not_key_sets:
            grp = df_to_test.groupby(cat_group)
            self.assertNotEqual(df_to_test.shape[0], len(grp))
コード例 #19
0
class Homework3Tests(unittest.TestCase):
    """Unit tests class for homework3.py."""

    test_file = 'class.db'
    test_df = create_dataframe(test_file)
    col_length = test_df.shape[0]

    def test_column_names(self):
        """Testing the DataFrame column names are
        video_id, category_id, language columns
        """
        self.assertTrue(
            set(self.test_df.columns) == set(['category_id', 'video_id', 'language']))

    def test_number_rows(self):
        """Testing there are at least 10 rows in the DataFrame"""
        self.assertTrue(self.col_length >= 10)

    def test_category_id_key(self):
        """Testing category_id column whether it constitute a key"""
        self.assertEqual(
            self.col_length,
            len(self.test_df['category_id'].unique()),
            'category_id is not a key'
            )

    def test_video_id_key(self):
        """Testing video_id column whether it constitute a key"""
        self.assertEqual(
            self.col_length,
            len(self.test_df['video_id'].unique()),
            'video_id is not a key'
            )

    def test_language_key(self):
        """Testing language column whether it constitute a key"""
        self.assertEqual(
            self.col_length,
            len(self.test_df['language'].unique()),
            'language is not a key'
            )

    def test_path_exception(self):
        """Testing whether invalid path raise ValueError exception"""
        self.assertRaises(ValueError, create_dataframe, 'nonexistant_file.db')
コード例 #20
0
    def test_check_key(self):
        """
        Test if none of the cols in the dataframe form a key.
        """
        path = '../LectureNotes/Data-Essentials/class.db'
        df = homework3.create_dataframe(path)
        combs = []

        # generate all possible combinations of keys
        for size in range(1, len(df.columns)):
            combs += list(itertools.combinations(df.columns, size))

        # Since there are duplicate rows in the df, we
        # would expect none of the column combinations to
        # be a key.
        for c in combs:
            grouping = df.groupby(c)
            self.assertNotEqual(len(grouping), len(df))
コード例 #21
0
    def test_key(self):
        '''
        Confirms that all three columns are required for a unique key.
        '''
        df = create_dataframe("homework-3-iankirkman/class.db")

        # All three columns with dups removed
        df_nodups = df.drop_duplicates()

        # All pairs of 2 cols with dups removed
        df_vid_cat = df.drop(columns=['language']).drop_duplicates()
        df_vid_lang = df.drop(columns=['category_id']).drop_duplicates()
        df_cat_lang = df.drop(columns=['video_id']).drop_duplicates()

        # Confirm all three columns can be a key, and any combo of 2 cannot:
        self.assertTrue(df.shape[0] == df_nodups.shape[0]
                        and df.shape[0] > df_vid_cat.shape[0]
                        and df.shape[0] > df_vid_lang.shape[0]
                        and df.shape[0] > df_cat_lang.shape[0])
コード例 #22
0
    def test_column_names(self):
        # test for exactly and only the column names video_id, category_id,
        # and language
        dftest = hw3.create_dataframe(pathname)

        # if we test for the number of columns to match the number of exact column
        # names, and that we have at least one column of each of the required names
        # then we can conclude that we have only the exact columns required
        passtest = True
        knownnames = ('video_id', 'category_id', 'language')
        passtest = passtest & (dftest.shape[1] == len(knownnames))
        col_names = dftest.columns

        def is_valid_column(cnames, testcname):
            isvalid = False
            for x in range(0, len(cnames)):
                isvalid = isvalid | (cnames[x] == testcname)
            return isvalid

        for y in range(0, len(knownnames)):
            passtest = passtest & (is_valid_column(col_names, knownnames[y]))

        self.assertTrue(passtest)
コード例 #23
0
import homework3 as hw
import sqlite3
from sqlite3 import OperationalError
import pandas as pd
import os
import unittest



print("Unit tests to validate that we have the right column names, all five languages are present, the DF has at least 10K rows, and that Id and Language are a key:")
path = '../../LectureNotes/Data-Essentials/class.db'
df = hw.create_dataframe(path)
class UnitTests(unittest.TestCase):
	# df = hw.create_dataframe('Data-Essentials/class.db')
	# columns = list(df)
	def test_ColNames(self):
		self.assertEqual(set(list(df)), set(['video_id', 'category_id', 'language']))
	
	def test_LanguagesPresent(self):
		self.assertEqual(set(df.language.unique()), set(['FR', 'DE', 'GB', 'CA', 'US']))
	
	def test_DataFrameSize(self):
		self.assertTrue(df.shape[0] >10000)

	def test_IdLanIsKey(self):
		self.assertTrue(len(df[['video_id', 'language']].drop_duplicates()) == len(df.drop_duplicates()))

	def test_invalid_path_error(self):
 		self.assertRaises(ValueError, hw.create_dataframe, 'badPath')

suite = unittest.TestLoader().loadTestsFromTestCase(UnitTests)
コード例 #24
0
 def testcolnamescheck(self):
     colnames = homework3.create_dataframe('class.db').columns
     self.assertEqual(sorted(colnames),
                      sorted(['video_id', 'category_id', 'language']))
コード例 #25
0
    def testcheckkeys(self):

        df = homework3.create_dataframe('class.db')
        self.assertTrue(
            df.shape[0] == df.groupby(['video_id', 'language']).ngroups)
コード例 #26
0
    def testnumrows(self):

        num_rows = homework3.create_dataframe('class.db').shape[0]
        self.assertEqual(num_rows, 35950)
コード例 #27
0
 def setUp(self):
     self.df = create_dataframe(
         '/Users/Eric/Documents/UW/DATA515/Assignments/hw2-EPripstein/class.db'
     )
コード例 #28
0
 def test_smoke(self):
     df = homework3.create_dataframe("class.db")
     self.assertTrue(df.shape[0] > 10)
コード例 #29
0
 def testPossibleKey(self):
     df = homework3.create_dataframe("class.db")
     nodupe_df = df.iloc[:, [0, 2]].drop_duplicates()
     self.assertTrue(nodupe_df.shape[0] == df.shape[0])
コード例 #30
0
 def testColumnNamesAndOrder(self):
     df = homework3.create_dataframe("class.db")
     self.assertTrue((df.columns[0] == 'video_id')
                     & (df.columns[1] == 'category_id')
                     & (df.columns[2] == 'language') & (df.shape[1] == 3))