예제 #1
0
 def test_non_zero_df(self):
     tested_object = GeometricMeanNormalization(self.dummy_df,
                                                zero_threshold=80)
     data = [[255, 26, 48, 75], [955, 222, 46, 65], [89, 54, 145, 29]]
     column_names = ['Sample_1', 'Sample_2', 'Sample_3', 'Sample_4']
     ind = ["Gen_1", "Gen_3", 'Gen_4']
     expected_result = pd.DataFrame(data, columns=column_names,
                                    index=ind).astype('float')
     pd.testing.assert_frame_equal(tested_object.non_zero_df(self.dummy_df),
                                   expected_result)
예제 #2
0
 def test_scaling_factor_zero_thresh_70(self):
     tested_object = GeometricMeanNormalization(self.dummy_df,
                                                zero_threshold=70)
     data = [3.495248, 0.612726, 0.699505, 0.432524]
     ind = ['Sample_1', 'Sample_2', 'Sample_3', 'Sample_4']
     expected_result = pd.Series(data, index=ind)
     pd.testing.assert_series_equal(tested_object.scaling_factors,
                                    expected_result)
예제 #3
0
 def test_scaling_factor_zero_thresh_80(self):
     tested_object = GeometricMeanNormalization(self.dummy_df,
                                                zero_threshold=80)
     data = [3.648263, 0.805390, 0.686732, 0.432524]
     ind = ['Sample_1', 'Sample_2', 'Sample_3', 'Sample_4']
     expected_result = pd.Series(data, index=ind)
     pd.testing.assert_series_equal(tested_object.scaling_factors,
                                    expected_result)
예제 #4
0
 def test_removed_zero_df_None(self):
     data = [[255, 26, 48, 75], [366, 0, 78, 0], [955, 0, 46, 65],
             [89, 54, 145, 29]]
     column_names = ['Sample_1', 'Sample_2', 'Sample_3', 'Sample_4']
     ind = ["Gen_1", 'Gen_2', "Gen_3", 'Gen_4']
     dummy_df = pd.DataFrame(data, columns=column_names, index=ind)
     tested_object = GeometricMeanNormalization(dummy_df)
     expected_result = None
     self.assertEqual(tested_object.removed_zero_df, expected_result)
예제 #5
0
    def test_log_df(self):
        input_data = [[255, 26, 48, 75], [955, 222, 46, 65], [89, 54, 145, 29]]
        input_column_names = ['Sample_1', 'Sample_2', 'Sample_3', 'Sample_4']
        input_ind = ["Gen_1", "Gen_3", 'Gen_4']
        input_df = pd.DataFrame(input_data,
                                columns=input_column_names,
                                index=input_ind)
        tested_object = GeometricMeanNormalization(self.dummy_df)

        data = [[5.541264, 3.258097, 3.871201, 4.317488],
                [6.861711, 5.402677, 3.828641, 4.174387],
                [4.488636, 3.988984, 4.976734, 3.367296]]
        column_names = ['Sample_1', 'Sample_2', 'Sample_3', 'Sample_4']
        ind = ["Gen_1", "Gen_3", 'Gen_4']
        expected_result = pd.DataFrame(data, columns=column_names, index=ind)

        pd.testing.assert_frame_equal(tested_object.log_df(input_df),
                                      expected_result)
예제 #6
0
    def test_log_base_n_df(self):
        input_data = [[255, 26, 48, 75], [955, 222, 46, 65], [89, 54, 145, 29]]
        input_column_names = ['Sample_1', 'Sample_2', 'Sample_3', 'Sample_4']
        input_ind = ["Gen_1", "Gen_3", 'Gen_4']
        input_df = pd.DataFrame(input_data,
                                columns=input_column_names,
                                index=input_ind)
        tested_object = GeometricMeanNormalization(self.dummy_df,
                                                   log_number=10)

        data = [[2.406540, 1.414973, 1.681241, 1.875061],
                [2.980003, 2.346353, 1.662758, 1.812913],
                [1.949390, 1.732394, 2.161368, 1.462398]]
        column_names = ['Sample_1', 'Sample_2', 'Sample_3', 'Sample_4']
        ind = ["Gen_1", "Gen_3", 'Gen_4']
        expected_result = pd.DataFrame(data, columns=column_names, index=ind)

        pd.testing.assert_frame_equal(tested_object.log_df(input_df),
                                      expected_result)
예제 #7
0
    def test_calculating_and_substracting_mean_row(self):
        input_data = [[5.541264, 3.258097, 3.871201, 4.317488],
                      [6.861711, 5.402677, 3.828641, 4.174387],
                      [4.488636, 3.988984, 4.976734, 3.367296]]
        input_column_names = ['Sample_1', 'Sample_2', 'Sample_3', 'Sample_4']
        input_ind = ["Gen_1", "Gen_3", 'Gen_4']
        input_df = pd.DataFrame(input_data,
                                columns=input_column_names,
                                index=input_ind)
        tested_object = GeometricMeanNormalization(self.dummy_df)
        data = [[1.294251, -0.988916, -0.375811, 0.070475],
                [1.794857, 0.335823, -1.238213, -0.892467],
                [0.283224, -0.216428, 0.771321, -0.838117]]
        column_names = ['Sample_1', 'Sample_2', 'Sample_3', 'Sample_4']
        ind = ["Gen_1", "Gen_3", 'Gen_4']
        expected_result = pd.DataFrame(data, columns=column_names, index=ind)

        pd.testing.assert_frame_equal(
            tested_object.calculating_and_substracting_mean_row(
                input_df).round(6), expected_result)
예제 #8
0
 def test_normalized_df(self):
     tested_object = GeometricMeanNormalization(self.dummy_df,
                                                zero_threshold=100)
     data = [[69.896271, 32.282490, 69.896271, 173.400644],
             [100.321707, 57.115175, 113.581441, 0.000000],
             [261.768388, 275.642802, 66.983926, 150.280558],
             [24.395169, 67.048249, 211.144986, 67.048249]]
     column_names = ['Sample_1', 'Sample_2', 'Sample_3', 'Sample_4']
     ind = ["Gen_1", "Gen_2", "Gen_3", 'Gen_4']
     expected_result = pd.DataFrame(data, columns=column_names, index=ind)
     pd.testing.assert_frame_equal(tested_object.normalized_df,
                                   expected_result)
예제 #9
0
 def test_normalized_df_replace_0_to_1(self):
     tested_object = GeometricMeanNormalization(self.dummy_df,
                                                zero_threshold=100,
                                                replace_0_to_1=True)
     data = [[52.757471, 24.026807, 33.691852, 178.111683],
             [75.722488, 42.508965, 54.749259, 0.000000],
             [197.581903, 205.151963, 32.288025, 154.363458],
             [18.413392, 49.901829, 101.777469, 68.869851]]
     column_names = ['Sample_1', 'Sample_2', 'Sample_3', 'Sample_4']
     ind = ["Gen_1", "Gen_2", "Gen_3", 'Gen_4']
     expected_result = pd.DataFrame(data, columns=column_names, index=ind)
     pd.testing.assert_frame_equal(tested_object.normalized_df,
                                   expected_result)
예제 #10
0
 def test_scaling_factor_zero_thresh_80_more_zeros(self):
     data = [[255, 26, 48, 75], [366, 0, 78, 0], [955, 0, 46, 65],
             [89, 54, 145, 29]]
     column_names = ['Sample_1', 'Sample_2', 'Sample_3', 'Sample_4']
     ind = ["Gen_1", 'Gen_2', "Gen_3", 'Gen_4']
     more_zero_example = pd.DataFrame(data, columns=column_names, index=ind)
     tested_object = GeometricMeanNormalization(more_zero_example,
                                                zero_threshold=80)
     data = [2.487833, 0.588685, 1.424677, 0.752771]
     ind = ['Sample_1', 'Sample_2', 'Sample_3', 'Sample_4']
     expected_result = pd.Series(data, index=ind)
     pd.testing.assert_series_equal(tested_object.scaling_factors,
                                    expected_result)
예제 #11
0
 def test_removed_zero_df(self):
     data = [[255, 26, 48, 75], [366, 0, 78, 0], [955, 0, 46, 65],
             [89, 54, 145, 29]]
     column_names = ['Sample_1', 'Sample_2', 'Sample_3', 'Sample_4']
     ind = ["Gen_1", 'Gen_2', "Gen_3", 'Gen_4']
     dummy_df = pd.DataFrame(data, columns=column_names, index=ind)
     tested_object = GeometricMeanNormalization(dummy_df)
     data = [
         [366, 0, 78, 0],
         [955, 0, 46, 65],
     ]
     column_names = ['Sample_1', 'Sample_2', 'Sample_3', 'Sample_4']
     ind = ['Gen_2', "Gen_3"]
     expected_result = pd.DataFrame(data, columns=column_names, index=ind)
     scaling_factors = tested_object.scaling_factors  # noqa
     pd.testing.assert_frame_equal(tested_object.removed_zero_df,
                                   expected_result)
예제 #12
0
 def test_multiindex_normalization_second_element(self):
     tested_object = pd.DataFrame.from_dict(
         {
             ('genus_1', 'specie_1'): {
                 '1': 3,
                 '2': 2,
                 '3': 1,
                 '4': 0
             },
             ('genus_1', 'specie_3'): {
                 '1': 5,
                 '2': 8,
                 '3': 4,
                 '4': 7
             },
             ('genus_2', 'specie_2'): {
                 '1': 25,
                 '2': 6,
                 '3': 3,
                 '4': 9
             },
             ('genus_2', 'specie_4'): {
                 '1': 9,
                 '2': 18,
                 '3': 6,
                 '4': 3
             },
             ('genus_3', 'specie_5'): {
                 '1': 5,
                 '2': 0,
                 '3': 3,
                 '4': 2
             },
             ('genus_3', 'specie_6'): {
                 '1': 19,
                 '2': 8,
                 '3': 0,
                 '4': 0
             },
         },
         orient='index')
     tested_object.columns.name = 'sample'
     tested_object.index.set_names(['genus', 'specie'], inplace=True)
     normalized_tested_object = GeometricMeanNormalization(
         tested_object, normalization_level=1)
     expected_result = pd.DataFrame.from_dict(
         {
             ('genus_1', 'specie_1'): {
                 '1': 2.449490,
                 '2': 1.446254,
                 '3': 1.446254,
                 '4': 0.000000
             },
             ('genus_1', 'specie_3'): {
                 '1': 4.082483,
                 '2': 5.785015,
                 '3': 5.785015,
                 '4': 6.204679
             },
             ('genus_2', 'specie_2'): {
                 '1': 20.412415,
                 '2': 4.338761,
                 '3': 4.338761,
                 '4': 7.977444
             },
             ('genus_2', 'specie_4'): {
                 '1': 7.348469,
                 '2': 13.016284,
                 '3': 8.677523,
                 '4': 2.659148
             },
             ('genus_3', 'specie_5'): {
                 '1': 4.082483,
                 '2': 0.000000,
                 '3': 4.338761,
                 '4': 1.772765
             },
             ('genus_3', 'specie_6'): {
                 '1': 15.513435,
                 '2': 5.785015,
                 '3': 0.000000,
                 '4': 0.000000
             },
         },
         orient='index')
     expected_result.columns.name = 'sample'
     expected_result.index.set_names(['genus', 'specie'], inplace=True)
     pd.testing.assert_frame_equal(normalized_tested_object.normalized_df,
                                   expected_result)
예제 #13
0
 def test_multiindex_normalization_first_element(self):
     tested_object = pd.DataFrame.from_dict(
         {
             ('genus_1', 'specie_1'): {
                 '1': 3,
                 '2': 2,
                 '3': 1,
                 '4': 0
             },
             ('genus_1', 'specie_3'): {
                 '1': 5,
                 '2': 8,
                 '3': 4,
                 '4': 7
             },
             ('genus_2', 'specie_2'): {
                 '1': 25,
                 '2': 6,
                 '3': 3,
                 '4': 9
             },
             ('genus_2', 'specie_4'): {
                 '1': 9,
                 '2': 18,
                 '3': 6,
                 '4': 3
             },
             ('genus_3', 'specie_5'): {
                 '1': 5,
                 '2': 0,
                 '3': 3,
                 '4': 2
             },
             ('genus_3', 'specie_6'): {
                 '1': 19,
                 '2': 8,
                 '3': 0,
                 '4': 0
             },
         },
         orient='index')
     tested_object.columns.name = 'sample'
     tested_object.index.set_names(['genus', 'specie'], inplace=True)
     normalized_tested_object = GeometricMeanNormalization(
         tested_object, normalization_level='genus')
     expected_result = pd.DataFrame.from_dict(
         {
             ('genus_1', 'specie_1'): {
                 '1': 1.520270,
                 '2': 1.454854,
                 '3': 1.914414,
                 '4': 0.000000
             },
             ('genus_1', 'specie_3'): {
                 '1': 2.533784,
                 '2': 5.819417,
                 '3': 7.657658,
                 '4': 10.050676
             },
             ('genus_2', 'specie_2'): {
                 '1': 12.668919,
                 '2': 4.364563,
                 '3': 5.743243,
                 '4': 12.922298
             },
             ('genus_2', 'specie_4'): {
                 '1': 4.560811,
                 '2': 13.093689,
                 '3': 11.486487,
                 '4': 4.307433
             },
             ('genus_3', 'specie_5'): {
                 '1': 2.533784,
                 '2': 0.000000,
                 '3': 5.7432433,
                 '4': 2.871622
             },
             ('genus_3', 'specie_6'): {
                 '1': 9.628379,
                 '2': 5.819417,
                 '3': 0.000000,
                 '4': 0.000000
             },
         },
         orient='index')
     expected_result.columns.name = 'sample'
     expected_result.index.set_names(['genus', 'specie'], inplace=True)
     pd.testing.assert_frame_equal(normalized_tested_object.normalized_df,
                                   expected_result)
예제 #14
0
 def test_check_format(self):
     tested_object = GeometricMeanNormalization(self.dummy_df)
     pd.testing.assert_frame_equal(tested_object.df, self.dummy_df)