def test_unique_entries_neg(self):
        """
        Negative test case for the unique_entries function.
        """

        # Dataframe that we create.
        df1 = pd.DataFrame(
            [[1, 6, 2, 3, 19], [4, 5, 8, 6, 30], [4, 5, 12, 8, 22],
             [4, 7, 9, 5, 21], [7, 8, 9, 12, 5]],
            columns=['A', 'B', 'C', 'D', 'E'])

        # Dataframe that is NOT the same as the one the function should return.
        df2 = pd.DataFrame(
            [[1, 6, 2, 3, 19], [4, 5, 12, 8, 22], [7, 8, 9, 12, 5]],
            columns=['A', 'B', 'C', 'D', 'E'])

        # List that is NOT the same as the one the function should return.
        list1 = [1, 4, 4, 4, 7]

        # Assume
        subsets = XbrlSubsets()

        # Assume 1
        tn_unique_entries1 = subsets.unique_entries(df1, 'A', False)
        # Assume 2
        tn_unique_entries2 = subsets.unique_entries(df1, 'A', True)

        # Assert 1
        self.assertNotEqual(
            tn_unique_entries1.reset_index(drop=True).equals(
                df2.reset_index(drop=True)), True)
        # Assert 2
        self.assertNotEqual(tn_unique_entries2 == list1, True)
    def test_types(self):
        """
        Positive test case for the unique_entries function.
        """
        # Assume
        df1 = pd.DataFrame(
            [[1, 6, 2, 3, 19], [4, 5, 8, 6, 30], [4, 5, 12, 8, 22],
             [4, 7, 9, 5, 21], [7, 8, 9, 12, 5]],
            columns=['A', 'B', 'C', 'D', 'E'])

        # Assume
        subsets = XbrlSubsets()

        # Assert
        with self.assertRaises(TypeError):
            subsets.unique_entries(1.0, 'A', False)

        with self.assertRaises(TypeError):
            subsets.unique_entries(df1, None, False)

        with self.assertRaises(TypeError):
            subsets.unique_entries(df1, ['A', 'B'], True)

        with self.assertRaises(TypeError):
            subsets.unique_entries(df1, 'A', 'False')
    def test_tag_extraction_pos(self):
        """
        Positive test case for the tag_extraction function.
        """
        # Dataframe that we create.
        df1 = pd.DataFrame([['A', '10'], ['B', '20'], ['A', '30'], ['C', '40'],
                            ['D', '50'], ['C', '60']],
                           columns=['Name', 'Age'])

        # The dataframe the function should return (tp_tag_extraction1).
        df2 = pd.DataFrame([['A', '10'], ['A', '30']], columns=['Name', 'Age'])

        # The dataframe the function should return (tp_tag_extraction2).
        df3 = pd.DataFrame(
            [['A', '10'], ['A', '30'], ['C', '40'], ['C', '60']],
            columns=['Name', 'Age'])

        # Assume
        subsets = XbrlSubsets()

        # Assume 1
        tp_tag_extraction1 = subsets.tag_extraction(df1, 'Name', 'A')
        # Assume 2
        tp_tag_extraction2 = subsets.tag_extraction(df1, 'Name', ['A', 'C'])

        # Assert 1
        assert_frame_equal(tp_tag_extraction1.reset_index(drop=True),
                           df2.reset_index(drop=True))
        # Assert 2
        assert_frame_equal(tp_tag_extraction2.reset_index(drop=True),
                           df3.reset_index(drop=True))
    def test_unique_entries_pos(self):
        """
        Positive test case for the unique_entries function.
        """
        # Dataframe that we create.
        df1 = pd.DataFrame(
            [[1, 6, 2, 3, 19], [4, 5, 8, 6, 30], [4, 5, 12, 8, 22],
             [4, 7, 9, 5, 21], [7, 8, 9, 12, 5]],
            columns=['A', 'B', 'C', 'D', 'E'])

        # The dataframe the function should return (tp_unique_entries1)
        df2 = pd.DataFrame(
            [[1, 6, 2, 3, 19], [4, 5, 8, 6, 30], [7, 8, 9, 12, 5]],
            columns=['A', 'B', 'C', 'D', 'E'])

        # The list the function should return (tp_unique_entries2)
        list1 = [1, 4, 7]

        # Assume
        subsets = XbrlSubsets()

        # Assume 1
        tp_unique_entries1 = subsets.unique_entries(df1, 'A', False)
        # Assume 2
        tp_unique_entries2 = subsets.unique_entries(df1, 'A', True)

        # Assert 1
        assert_frame_equal(tp_unique_entries1.reset_index(drop=True),
                           df2.reset_index(drop=True))
        # Assert 2
        self.assertListEqual(tp_unique_entries2, list1)
    def test_tag_extraction_neg(self):
        """
        Negative test case for the tag_extraction function.
        """

        # Dataframe that we create.
        df1 = pd.DataFrame([['A', '10'], ['B', '20'], ['A', '30'], ['C', '40'],
                            ['D', '50'], ['C', '60']],
                           columns=['Name', 'Age'])

        # Dataframe that is NOT the same as the one the function should return.
        df2 = pd.DataFrame([['10'], ['30']], columns=['Age'])

        # Dataframe that is NOT the same as the one the function should return.
        df3 = pd.DataFrame(
            [['A', '10'], ['C', '40'], ['A', '30'], ['C', '60']],
            columns=['Name', 'Age'])

        # Assume
        subsets = XbrlSubsets()

        # Assume 1
        tn_tag_extraction1 = subsets.tag_extraction(df1, 'Name', 'A')
        # Assume 2
        tn_tag_extraction2 = subsets.tag_extraction(df1, 'Name', ['A', 'C'])

        # Assert 1
        self.assertNotEqual(
            tn_tag_extraction1.reset_index(drop=True).equals(
                df2.reset_index(drop=True)), True)
        # Assert 2
        self.assertNotEqual(
            tn_tag_extraction2.reset_index(drop=True).equals(
                df2.reset_index(drop=True)), True)
    def test_aggregation_neg(self):
        """
        Negative test case for the aggregation function.
        """
        # Dataframe that we create.
        df1 = pd.DataFrame(
            [[1, 6, 2, 3, 19], [4, 5, 8, 6, 30], [4, 5, 12, 8, 22],
             [4, 7, 9, 5, 21], [7, 8, 9, 12, 5]],
            columns=['A', 'B', 'C', 'D', 'E'])

        # Dataframe that is NOT the same as the one the function should return.
        df2 = pd.DataFrame([[1, 3, 19], [4, 6, 30], [7, 12, 6]],
                           columns=['A', 'first  D', 'first  E'])
        df2 = pd.DataFrame(df2).set_index('A')

        # Dataframe that is NOT the same as the one the function should return.
        df3 = pd.DataFrame([[1, 3, 19], [4, 6, 30], [7, 12, 5]],
                           columns=['A', 'first  D', 'first  E'])
        df3 = pd.DataFrame(df3).set_index('A')

        # Assume
        subsets = XbrlSubsets()

        # Assume 1
        tn_aggregation1 = subsets.aggregation(df1, ['A'], 'first', ['D', 'E'],
                                              False)
        # Assume 2
        tn_aggregation2 = subsets.aggregation(df1, ['A'], 'first', ['D', 'E'],
                                              True)

        # Assert 1
        self.assertNotEqual(tn_aggregation1.equals(df2), True)
        # Assert 2
        self.assertNotEqual(tn_aggregation2.equals(df3), True)
    def test_aggregation_pos(self):
        """
        Positive test case for the aggregation function.
        """
        # Dataframe that we create.
        df1 = pd.DataFrame(
            [[1, 6, 2, 3, 19], [4, 5, 8, 6, 30], [4, 5, 12, 8, 22],
             [4, 7, 9, 5, 21], [7, 8, 9, 12, 5]],
            columns=['A', 'B', 'C', 'D', 'E'])

        # The dataframe that the function should return (tp_aggregation1)
        df2 = pd.DataFrame([[1, 3, 19], [4, 6, 30], [7, 12, 5]],
                           columns=['A', 'first  D', 'first  E'])
        df2 = pd.DataFrame(df2).set_index('A')

        # The dataframe that the function should return (tp_aggregation2)
        df3 = pd.DataFrame([[1, 3, 19], [4, 6, 30], [7, 12, 5]],
                           columns=['A', 'D', 'E'])
        df3 = pd.DataFrame(df3).set_index('A')

        # Assume
        subsets = XbrlSubsets()

        # Assume 1
        tp_aggregation1 = subsets.aggregation(df1, ['A'], 'first', ['D', 'E'],
                                              False)
        # Assume 2
        tp_aggregation2 = subsets.aggregation(df1, ['A'], 'first', ['D', 'E'],
                                              True)

        # Assert 1
        assert_frame_equal(tp_aggregation1, df2)
        # Assert 2
        assert_frame_equal(tp_aggregation2, df3)
    def test_values(self):
        """
        Positive test case for the merge function.
        """

        # Assume
        df4 = pd.DataFrame([['A', '10'],
                            ['B', '20']],
                           columns=['Name', 'Age'])
        df5 = pd.DataFrame([['A', 'football'],
                            ['G', 'swimming']],
                           columns=['Name', 'Sport'])

        # Assume
        subsets = XbrlSubsets()

        # Assert
        with self.assertRaises(ValueError):
            subsets.merge(df4, df5, 'Name', 'Name', 'merge')

        with self.assertRaises(ValueError):
            subsets.merge(df4, df5, 'Name', 'Name', 'inner', 'inner')

        with self.assertRaises(ValueError):
            subsets.merge(df4, df5, 'Name', 'Name', 'both', 'inner')

        with self.assertRaises(ValueError):
            subsets.merge(df4, df5, 'Name', 'Name', 'both')
Ejemplo n.º 9
0
    def test_str_to_date_pos(self):
        """
        Positive test case for the str_to_date function.
        """

        df1_str_date = pd.DataFrame(
            [['A', '01/01/2001'], ['B', '02/02/2002'], ['C', '03/03/2003']],
            columns=['Name', 'Date'])
        df2_str_date = pd.DataFrame(
            [['A', '01/01/2001'], ['B', '02/02/2002'], ['C', '03/03/2003']],
            columns=['Name', 'Date'])
        df3_datetime_inplace = pd.DataFrame(
            [['A', '01/01/2001'], ['B', '02/02/2002'], ['C', '03/03/2003']],
            columns=['Name', 'Date'])
        df3_datetime_inplace['Date'] = pd.to_datetime(
            df3_datetime_inplace['Date'])

        df4_datetime_new_col = pd.DataFrame(
            [['A', '01/01/2001'], ['B', '02/02/2002'], ['C', '03/03/2003']],
            columns=['Name', 'Date'])
        df4_datetime_new_col['New Date Col'] = pd.to_datetime(
            df4_datetime_new_col['Date'])

        # Assume
        subsets = XbrlSubsets()

        # Assume 1 - replace type of Date column with datetime instead of string.
        tp_str_to_date = subsets.str_to_date(df1_str_date, 'Date', replace="y")

        # Assume 2 - create new Date column called 'New Date Col' of type datetime.
        tp_str_to_date_2 = subsets.str_to_date(df2_str_date,
                                               'Date',
                                               replace="n",
                                               col_name="New Date Col")

        # Assert 1
        assert_frame_equal(tp_str_to_date.reset_index(drop=True),
                           df3_datetime_inplace.reset_index(drop=True))

        # Assert 2
        assert_frame_equal(tp_str_to_date_2.reset_index(drop=True),
                           df4_datetime_new_col.reset_index(drop=True))
Ejemplo n.º 10
0
    def test_str_to_date_neg(self):
        """
        Negative test case for the str_to_date function.
        """

        df1_str_date = pd.DataFrame(
            [['A', '01/01/2001'], ['B', '02/02/2002'], ['C', '03/03/2003']],
            columns=['Name', 'Date'])
        df2_str_date = pd.DataFrame(
            [['A', '01/01/2001'], ['B', '02/02/2002'], ['C', '03/03/2003']],
            columns=['Name', 'Date'])
        df3_datetime_inplace = pd.DataFrame(
            [['A', '01/01/2001'], ['B', '02/02/2002'], ['C', '03/03/2003']],
            columns=['Name', 'Date'])
        df3_datetime_inplace['Date'] = pd.to_datetime(
            df3_datetime_inplace['Date'])

        df4_datetime_new_col = pd.DataFrame(
            [['A', '01/01/2001'], ['B', '02/02/2002'], ['C', '03/03/2003']],
            columns=['Name', 'Date'])
        df4_datetime_new_col['New Date Col'] = pd.to_datetime(
            df4_datetime_new_col['Date'])

        # Assume
        subsets = XbrlSubsets()

        # Assume 1 - replace type of Date column with datetime instead of string.
        tn_str_to_date = subsets.str_to_date(df1_str_date, 'Date', replace="y")

        # Assume 2 - create new Date column called 'New Date Col' of type datetime.
        tn_str_to_date_2 = subsets.str_to_date(df2_str_date,
                                               'Date',
                                               replace="n",
                                               col_name="New Date Col")

        # Assert 1
        self.assertEqual(tn_str_to_date.equals(df4_datetime_new_col), False)

        # Assert 2
        self.assertEqual(tn_str_to_date_2.equals(df3_datetime_inplace), False)
    def test_merge_neg(self):
        """
        Negative test case for the merge function.
        """

        df1 = pd.DataFrame([['A', '10'],
                            ['B', '20']],
                           columns=['Name', 'Age'])
        df2 = pd.DataFrame([['A', 'football'],
                            ['G', 'swimming']],
                           columns=['Name', 'Sport'])
        df3 = pd.DataFrame([['A', '20', 'football']],
                           columns=['Name', 'Age', 'Sport'])

        # Assume
        subsets = XbrlSubsets()

        # Assume
        tp_merge = subsets.merge(df1, df2, 'Name', 'Name', 'inner', 'df1')

        # Assert
        self.assertEqual(tp_merge.equals(df3), False)
    def test_types(self):
        """
        Positive test case for the tag_extraction function.
        """
        # Assume
        df1 = pd.DataFrame(
            [['A', '10'], ['B', '20'], ['A', '30'], ['C', '40'], ['D', '50']],
            columns=['Name', 'Age'])
        # Assume
        subsets = XbrlSubsets()

        # Assert
        with self.assertRaises(TypeError):
            subsets.tag_extraction(1.0, 'Name', 'A')

        with self.assertRaises(TypeError):
            subsets.tag_extraction(df1, 1, 'A')

        with self.assertRaises(TypeError):
            subsets.tag_extraction(df1, 'Name', {'A': 'a'})

        with self.assertRaises(TypeError):
            subsets.tag_extraction(df1, 'Name', all)
    def test_types(self):
        """
        Positive test case for the aggregation function.
        """
        # Assume
        df1 = pd.DataFrame(
            [[1, 6, 2, 3, 19], [4, 5, 8, 6, 30], [4, 5, 12, 8, 22],
             [4, 7, 9, 5, 21], [7, 8, 9, 12, 5]],
            columns=['A', 'B', 'C', 'D', 'E'])
        # Assume
        subsets = XbrlSubsets()

        # Assert
        with self.assertRaises(TypeError):
            subsets.aggregation(df1, 'A', 'sum', 'B', 'False')

        with self.assertRaises(TypeError):
            subsets.aggregation(1, 'A', 'sum', 'B', False)

        with self.assertRaises(TypeError):
            subsets.aggregation(df1, {'A': 'a'}, 'sum', 'B', False)

        with self.assertRaises(TypeError):
            subsets.aggregation(df1, 'A', 'sum', 6, True)
Ejemplo n.º 14
0
    def test_types(self):
        """
        Test types for the str_to_date function.
        """
        # Assume
        df1_str_date = pd.DataFrame(
            [['A', '01/01/2001'], ['B', '02/02/2002'], ['C', '03/03/2003']],
            columns=['Name', 'Date'])

        # Assume
        subsets = XbrlSubsets()

        # Assert
        with self.assertRaises(TypeError):
            subsets.str_to_date(1.0, 'Date')

        with self.assertRaises(TypeError):
            subsets.str_to_date(df1_str_date, 5)

        with self.assertRaises(TypeError):
            subsets.str_to_date(df1_str_date, ['Name', 'Date'])
Ejemplo n.º 15
0
    def test_values(self):
        """
        Test values for the str_to_date function.
        """
        # Assume
        df1_str_date = pd.DataFrame(
            [['A', '01/01/2001'], ['B', '02/02/2002'], ['C', '03/03/2003']],
            columns=['Name', 'Date'])

        # Assume
        subsets = XbrlSubsets()

        # Check error raised if string not present in column names
        with self.assertRaises(ValueError):
            subsets.str_to_date(df1_str_date, 'Yellow')

        #Check erorr if user combines two column names into one string
        with self.assertRaises(ValueError):
            subsets.str_to_date(df1_str_date, 'Name, Date')

        #Check error is raised if replace is not 'y' or 'n'
        with self.assertRaises(ValueError):
            subsets.str_to_date(df1_str_date, 'Name', replace='q')