def test_outlier_test(): endog, exog, labels = get_duncan_data() ndarray_mod = OLS(endog, exog).fit() rstudent = [ 3.1345185839, -2.3970223990, 2.0438046359, -1.9309187757, 1.8870465798, -1.7604905300, -1.7040324156, 1.6024285876, -1.4332485037, -1.1044851583, 1.0688582315, 1.0185271840, -0.9024219332, -0.9023876471, -0.8830953936, 0.8265782334, 0.8089220547, 0.7682770197, 0.7319491074, -0.6665962829, 0.5227352794, -0.5135016547, 0.5083881518, 0.4999224372, -0.4980818221, -0.4759717075, -0.4293565820, -0.4114056499, -0.3779540862, 0.3556874030, 0.3409200462, 0.3062248646, 0.3038999429, -0.3030815773, -0.1873387893, 0.1738050251, 0.1424246593, -0.1292266025, 0.1272066463, -0.0798902878, 0.0788467222, 0.0722556991, 0.0505098280, 0.0233215136, 0.0007112055 ] unadj_p = [ 0.003177202, 0.021170298, 0.047432955, 0.060427645, 0.066248120, 0.085783008, 0.095943909, 0.116738318, 0.159368890, 0.275822623, 0.291386358, 0.314400295, 0.372104049, 0.372122040, 0.382333561, 0.413260793, 0.423229432, 0.446725370, 0.468363101, 0.508764039, 0.603971990, 0.610356737, 0.613905871, 0.619802317, 0.621087703, 0.636621083, 0.669911674, 0.682917818, 0.707414459, 0.723898263, 0.734904667, 0.760983108, 0.762741124, 0.763360242, 0.852319039, 0.862874018, 0.887442197, 0.897810225, 0.899398691, 0.936713197, 0.937538115, 0.942749758, 0.959961394, 0.981506948, 0.999435989 ] bonf_p = [ 0.1429741, 0.9526634, 2.1344830, 2.7192440, 2.9811654, 3.8602354, 4.3174759, 5.2532243, 7.1716001, 12.4120180, 13.1123861, 14.1480133, 16.7446822, 16.7454918, 17.2050103, 18.5967357, 19.0453245, 20.1026416, 21.0763395, 22.8943818, 27.1787396, 27.4660532, 27.6257642, 27.8911043, 27.9489466, 28.6479487, 30.1460253, 30.7313018, 31.8336506, 32.5754218, 33.0707100, 34.2442399, 34.3233506, 34.3512109, 38.3543568, 38.8293308, 39.9348989, 40.4014601, 40.4729411, 42.1520939, 42.1892152, 42.4237391, 43.1982627, 44.1678127, 44.9746195 ] bonf_p = np.array(bonf_p) bonf_p[bonf_p > 1] = 1 sorted_labels = [ "minister", "reporter", "contractor", "insurance.agent", "machinist", "store.clerk", "conductor", "factory.owner", "mail.carrier", "streetcar.motorman", "carpenter", "coal.miner", "bartender", "bookkeeper", "soda.clerk", "chemist", "RR.engineer", "professor", "electrician", "gas.stn.attendant", "auto.repairman", "watchman", "banker", "machine.operator", "dentist", "waiter", "shoe.shiner", "welfare.worker", "plumber", "physician", "pilot", "engineer", "accountant", "lawyer", "undertaker", "barber", "store.manager", "truck.driver", "cook", "janitor", "policeman", "architect", "teacher", "taxi.driver", "author" ] res2 = np.c_[rstudent, unadj_p, bonf_p] res = oi.outlier_test(ndarray_mod, method='b', labels=labels, order=True) np.testing.assert_almost_equal(res.values, res2, 7) np.testing.assert_equal(res.index.tolist(), sorted_labels) # pylint: disable-msg=E1103 data = pd.DataFrame(np.column_stack((endog, exog)), columns='y const var1 var2'.split(), index=labels) # check `order` with pandas bug in #3971 res_pd = OLS.from_formula('y ~ const + var1 + var2 - 0', data).fit() res_outl2 = oi.outlier_test(res_pd, method='b', order=True) assert_almost_equal(res_outl2.values, res2, 7) assert_equal(res_outl2.index.tolist(), sorted_labels) res_outl1 = res_pd.outlier_test(method='b') res_outl1 = res_outl1.sort_values(['unadj_p'], ascending=True) assert_almost_equal(res_outl1.values, res2, 7) assert_equal(res_outl1.index.tolist(), sorted_labels) assert_array_equal(res_outl2.index, res_outl1.index) # additional keywords in method res_outl3 = res_pd.outlier_test(method='b', order=True) assert_equal(res_outl3.index.tolist(), sorted_labels) res_outl4 = res_pd.outlier_test(method='b', order=True, cutoff=0.15) assert_equal(res_outl4.index.tolist(), sorted_labels[:1])
def test_outlier_test(): # results from R with NA -> 1. Just testing interface here because # outlier_test is just a wrapper labels = [ "accountant", "pilot", "architect", "author", "chemist", "minister", "professor", "dentist", "reporter", "engineer", "undertaker", "lawyer", "physician", "welfare.worker", "teacher", "conductor", "contractor", "factory.owner", "store.manager", "banker", "bookkeeper", "mail.carrier", "insurance.agent", "store.clerk", "carpenter", "electrician", "RR.engineer", "machinist", "auto.repairman", "plumber", "gas.stn.attendant", "coal.miner", "streetcar.motorman", "taxi.driver", "truck.driver", "machine.operator", "barber", "bartender", "shoe.shiner", "cook", "soda.clerk", "watchman", "janitor", "policeman", "waiter", ] # Duncan's prestige data from car exog = [ [1.0, 62.0, 86.0], [1.0, 72.0, 76.0], [1.0, 75.0, 92.0], [1.0, 55.0, 90.0], [1.0, 64.0, 86.0], [1.0, 21.0, 84.0], [1.0, 64.0, 93.0], [1.0, 80.0, 100.0], [1.0, 67.0, 87.0], [1.0, 72.0, 86.0], [1.0, 42.0, 74.0], [1.0, 76.0, 98.0], [1.0, 76.0, 97.0], [1.0, 41.0, 84.0], [1.0, 48.0, 91.0], [1.0, 76.0, 34.0], [1.0, 53.0, 45.0], [1.0, 60.0, 56.0], [1.0, 42.0, 44.0], [1.0, 78.0, 82.0], [1.0, 29.0, 72.0], [1.0, 48.0, 55.0], [1.0, 55.0, 71.0], [1.0, 29.0, 50.0], [1.0, 21.0, 23.0], [1.0, 47.0, 39.0], [1.0, 81.0, 28.0], [1.0, 36.0, 32.0], [1.0, 22.0, 22.0], [1.0, 44.0, 25.0], [1.0, 15.0, 29.0], [1.0, 7.0, 7.0], [1.0, 42.0, 26.0], [1.0, 9.0, 19.0], [1.0, 21.0, 15.0], [1.0, 21.0, 20.0], [1.0, 16.0, 26.0], [1.0, 16.0, 28.0], [1.0, 9.0, 17.0], [1.0, 14.0, 22.0], [1.0, 12.0, 30.0], [1.0, 17.0, 25.0], [1.0, 7.0, 20.0], [1.0, 34.0, 47.0], [1.0, 8.0, 32.0], ] endog = [ 82.0, 83.0, 90.0, 76.0, 90.0, 87.0, 93.0, 90.0, 52.0, 88.0, 57.0, 89.0, 97.0, 59.0, 73.0, 38.0, 76.0, 81.0, 45.0, 92.0, 39.0, 34.0, 41.0, 16.0, 33.0, 53.0, 67.0, 57.0, 26.0, 29.0, 10.0, 15.0, 19.0, 10.0, 13.0, 24.0, 20.0, 7.0, 3.0, 16.0, 6.0, 11.0, 8.0, 41.0, 10.0, ] ndarray_mod = OLS(endog, exog).fit() rstudent = [ 3.1345185839, -2.3970223990, 2.0438046359, -1.9309187757, 1.8870465798, -1.7604905300, -1.7040324156, 1.6024285876, -1.4332485037, -1.1044851583, 1.0688582315, 1.0185271840, -0.9024219332, -0.9023876471, -0.8830953936, 0.8265782334, 0.8089220547, 0.7682770197, 0.7319491074, -0.6665962829, 0.5227352794, -0.5135016547, 0.5083881518, 0.4999224372, -0.4980818221, -0.4759717075, -0.4293565820, -0.4114056499, -0.3779540862, 0.3556874030, 0.3409200462, 0.3062248646, 0.3038999429, -0.3030815773, -0.1873387893, 0.1738050251, 0.1424246593, -0.1292266025, 0.1272066463, -0.0798902878, 0.0788467222, 0.0722556991, 0.0505098280, 0.0233215136, 0.0007112055, ] unadj_p = [ 0.003177202, 0.021170298, 0.047432955, 0.060427645, 0.066248120, 0.085783008, 0.095943909, 0.116738318, 0.159368890, 0.275822623, 0.291386358, 0.314400295, 0.372104049, 0.372122040, 0.382333561, 0.413260793, 0.423229432, 0.446725370, 0.468363101, 0.508764039, 0.603971990, 0.610356737, 0.613905871, 0.619802317, 0.621087703, 0.636621083, 0.669911674, 0.682917818, 0.707414459, 0.723898263, 0.734904667, 0.760983108, 0.762741124, 0.763360242, 0.852319039, 0.862874018, 0.887442197, 0.897810225, 0.899398691, 0.936713197, 0.937538115, 0.942749758, 0.959961394, 0.981506948, 0.999435989, ] bonf_p = [ 0.1429741, 0.9526634, 2.1344830, 2.7192440, 2.9811654, 3.8602354, 4.3174759, 5.2532243, 7.1716001, 12.4120180, 13.1123861, 14.1480133, 16.7446822, 16.7454918, 17.2050103, 18.5967357, 19.0453245, 20.1026416, 21.0763395, 22.8943818, 27.1787396, 27.4660532, 27.6257642, 27.8911043, 27.9489466, 28.6479487, 30.1460253, 30.7313018, 31.8336506, 32.5754218, 33.0707100, 34.2442399, 34.3233506, 34.3512109, 38.3543568, 38.8293308, 39.9348989, 40.4014601, 40.4729411, 42.1520939, 42.1892152, 42.4237391, 43.1982627, 44.1678127, 44.9746195, ] bonf_p = np.array(bonf_p) bonf_p[bonf_p > 1] = 1 sorted_labels = [ "minister", "reporter", "contractor", "insurance.agent", "machinist", "store.clerk", "conductor", "factory.owner", "mail.carrier", "streetcar.motorman", "carpenter", "coal.miner", "bartender", "bookkeeper", "soda.clerk", "chemist", "RR.engineer", "professor", "electrician", "gas.stn.attendant", "auto.repairman", "watchman", "banker", "machine.operator", "dentist", "waiter", "shoe.shiner", "welfare.worker", "plumber", "physician", "pilot", "engineer", "accountant", "lawyer", "undertaker", "barber", "store.manager", "truck.driver", "cook", "janitor", "policeman", "architect", "teacher", "taxi.driver", "author", ] res2 = np.c_[rstudent, unadj_p, bonf_p] res = oi.outlier_test(ndarray_mod, method="b", labels=labels, order=True) np.testing.assert_almost_equal(res.values, res2, 7) np.testing.assert_equal(res.index.tolist(), sorted_labels) # pylint: disable-msg=E1103
def test_outlier_test(): endog, exog, labels = get_duncan_data() ndarray_mod = OLS(endog, exog).fit() rstudent = [3.1345185839, -2.3970223990, 2.0438046359, -1.9309187757, 1.8870465798, -1.7604905300, -1.7040324156, 1.6024285876, -1.4332485037, -1.1044851583, 1.0688582315, 1.0185271840, -0.9024219332, -0.9023876471, -0.8830953936, 0.8265782334, 0.8089220547, 0.7682770197, 0.7319491074, -0.6665962829, 0.5227352794, -0.5135016547, 0.5083881518, 0.4999224372, -0.4980818221, -0.4759717075, -0.4293565820, -0.4114056499, -0.3779540862, 0.3556874030, 0.3409200462, 0.3062248646, 0.3038999429, -0.3030815773, -0.1873387893, 0.1738050251, 0.1424246593, -0.1292266025, 0.1272066463, -0.0798902878, 0.0788467222, 0.0722556991, 0.0505098280, 0.0233215136, 0.0007112055] unadj_p = [0.003177202, 0.021170298, 0.047432955, 0.060427645, 0.066248120, 0.085783008, 0.095943909, 0.116738318, 0.159368890, 0.275822623, 0.291386358, 0.314400295, 0.372104049, 0.372122040, 0.382333561, 0.413260793, 0.423229432, 0.446725370, 0.468363101, 0.508764039, 0.603971990, 0.610356737, 0.613905871, 0.619802317, 0.621087703, 0.636621083, 0.669911674, 0.682917818, 0.707414459, 0.723898263, 0.734904667, 0.760983108, 0.762741124, 0.763360242, 0.852319039, 0.862874018, 0.887442197, 0.897810225, 0.899398691, 0.936713197, 0.937538115, 0.942749758, 0.959961394, 0.981506948, 0.999435989] bonf_p = [0.1429741, 0.9526634, 2.1344830, 2.7192440, 2.9811654, 3.8602354, 4.3174759, 5.2532243, 7.1716001, 12.4120180, 13.1123861, 14.1480133, 16.7446822, 16.7454918, 17.2050103, 18.5967357, 19.0453245, 20.1026416, 21.0763395, 22.8943818, 27.1787396, 27.4660532, 27.6257642, 27.8911043, 27.9489466, 28.6479487, 30.1460253, 30.7313018, 31.8336506, 32.5754218, 33.0707100, 34.2442399, 34.3233506, 34.3512109, 38.3543568, 38.8293308, 39.9348989, 40.4014601, 40.4729411, 42.1520939, 42.1892152, 42.4237391, 43.1982627, 44.1678127, 44.9746195] bonf_p = np.array(bonf_p) bonf_p[bonf_p > 1] = 1 sorted_labels = ["minister", "reporter", "contractor", "insurance.agent", "machinist", "store.clerk", "conductor", "factory.owner", "mail.carrier", "streetcar.motorman", "carpenter", "coal.miner", "bartender", "bookkeeper", "soda.clerk", "chemist", "RR.engineer", "professor", "electrician", "gas.stn.attendant", "auto.repairman", "watchman", "banker", "machine.operator", "dentist", "waiter", "shoe.shiner", "welfare.worker", "plumber", "physician", "pilot", "engineer", "accountant", "lawyer", "undertaker", "barber", "store.manager", "truck.driver", "cook", "janitor", "policeman", "architect", "teacher", "taxi.driver", "author"] res2 = np.c_[rstudent, unadj_p, bonf_p] res = oi.outlier_test(ndarray_mod, method='b', labels=labels, order=True) np.testing.assert_almost_equal(res.values, res2, 7) np.testing.assert_equal(res.index.tolist(), sorted_labels) # pylint: disable-msg=E1103 data = pd.DataFrame(np.column_stack((endog, exog)), columns='y const var1 var2'.split(), index=labels) # check `order` with pandas bug in #3971 res_pd = OLS.from_formula('y ~ const + var1 + var2 - 0', data).fit() res_outl2 = oi.outlier_test(res_pd, method='b', order=True) assert_almost_equal(res_outl2.values, res2, 7) assert_equal(res_outl2.index.tolist(), sorted_labels) res_outl1 = res_pd.outlier_test(method='b') res_outl1 = res_outl1.sort_values(['unadj_p'], ascending=True) assert_almost_equal(res_outl1.values, res2, 7) assert_equal(res_outl1.index.tolist(), sorted_labels) assert_array_equal(res_outl2.index, res_outl1.index) # additional keywords in method res_outl3 = res_pd.outlier_test(method='b', order=True) assert_equal(res_outl3.index.tolist(), sorted_labels) res_outl4 = res_pd.outlier_test(method='b', order=True, cutoff=0.15) assert_equal(res_outl4.index.tolist(), sorted_labels[:1])
def test_outlier_test(): # results from R with NA -> 1. Just testing interface here because # outlier_test is just a wrapper labels = [ 'accountant', 'pilot', 'architect', 'author', 'chemist', 'minister', 'professor', 'dentist', 'reporter', 'engineer', 'undertaker', 'lawyer', 'physician', 'welfare.worker', 'teacher', 'conductor', 'contractor', 'factory.owner', 'store.manager', 'banker', 'bookkeeper', 'mail.carrier', 'insurance.agent', 'store.clerk', 'carpenter', 'electrician', 'RR.engineer', 'machinist', 'auto.repairman', 'plumber', 'gas.stn.attendant', 'coal.miner', 'streetcar.motorman', 'taxi.driver', 'truck.driver', 'machine.operator', 'barber', 'bartender', 'shoe.shiner', 'cook', 'soda.clerk', 'watchman', 'janitor', 'policeman', 'waiter' ] #Duncan's prestige data from car exog = [[1.0, 62.0, 86.0], [1.0, 72.0, 76.0], [1.0, 75.0, 92.0], [1.0, 55.0, 90.0], [1.0, 64.0, 86.0], [1.0, 21.0, 84.0], [1.0, 64.0, 93.0], [1.0, 80.0, 100.0], [1.0, 67.0, 87.0], [1.0, 72.0, 86.0], [1.0, 42.0, 74.0], [1.0, 76.0, 98.0], [1.0, 76.0, 97.0], [1.0, 41.0, 84.0], [1.0, 48.0, 91.0], [1.0, 76.0, 34.0], [1.0, 53.0, 45.0], [1.0, 60.0, 56.0], [1.0, 42.0, 44.0], [1.0, 78.0, 82.0], [1.0, 29.0, 72.0], [1.0, 48.0, 55.0], [1.0, 55.0, 71.0], [1.0, 29.0, 50.0], [1.0, 21.0, 23.0], [1.0, 47.0, 39.0], [1.0, 81.0, 28.0], [1.0, 36.0, 32.0], [1.0, 22.0, 22.0], [1.0, 44.0, 25.0], [1.0, 15.0, 29.0], [1.0, 7.0, 7.0], [1.0, 42.0, 26.0], [1.0, 9.0, 19.0], [1.0, 21.0, 15.0], [1.0, 21.0, 20.0], [1.0, 16.0, 26.0], [1.0, 16.0, 28.0], [1.0, 9.0, 17.0], [1.0, 14.0, 22.0], [1.0, 12.0, 30.0], [1.0, 17.0, 25.0], [1.0, 7.0, 20.0], [1.0, 34.0, 47.0], [1.0, 8.0, 32.0]] endog = [ 82., 83., 90., 76., 90., 87., 93., 90., 52., 88., 57., 89., 97., 59., 73., 38., 76., 81., 45., 92., 39., 34., 41., 16., 33., 53., 67., 57., 26., 29., 10., 15., 19., 10., 13., 24., 20., 7., 3., 16., 6., 11., 8., 41., 10. ] ndarray_mod = OLS(endog, exog).fit() rstudent = [ 3.1345185839, -2.3970223990, 2.0438046359, -1.9309187757, 1.8870465798, -1.7604905300, -1.7040324156, 1.6024285876, -1.4332485037, -1.1044851583, 1.0688582315, 1.0185271840, -0.9024219332, -0.9023876471, -0.8830953936, 0.8265782334, 0.8089220547, 0.7682770197, 0.7319491074, -0.6665962829, 0.5227352794, -0.5135016547, 0.5083881518, 0.4999224372, -0.4980818221, -0.4759717075, -0.4293565820, -0.4114056499, -0.3779540862, 0.3556874030, 0.3409200462, 0.3062248646, 0.3038999429, -0.3030815773, -0.1873387893, 0.1738050251, 0.1424246593, -0.1292266025, 0.1272066463, -0.0798902878, 0.0788467222, 0.0722556991, 0.0505098280, 0.0233215136, 0.0007112055 ] unadj_p = [ 0.003177202, 0.021170298, 0.047432955, 0.060427645, 0.066248120, 0.085783008, 0.095943909, 0.116738318, 0.159368890, 0.275822623, 0.291386358, 0.314400295, 0.372104049, 0.372122040, 0.382333561, 0.413260793, 0.423229432, 0.446725370, 0.468363101, 0.508764039, 0.603971990, 0.610356737, 0.613905871, 0.619802317, 0.621087703, 0.636621083, 0.669911674, 0.682917818, 0.707414459, 0.723898263, 0.734904667, 0.760983108, 0.762741124, 0.763360242, 0.852319039, 0.862874018, 0.887442197, 0.897810225, 0.899398691, 0.936713197, 0.937538115, 0.942749758, 0.959961394, 0.981506948, 0.999435989 ] bonf_p = [ 0.1429741, 0.9526634, 2.1344830, 2.7192440, 2.9811654, 3.8602354, 4.3174759, 5.2532243, 7.1716001, 12.4120180, 13.1123861, 14.1480133, 16.7446822, 16.7454918, 17.2050103, 18.5967357, 19.0453245, 20.1026416, 21.0763395, 22.8943818, 27.1787396, 27.4660532, 27.6257642, 27.8911043, 27.9489466, 28.6479487, 30.1460253, 30.7313018, 31.8336506, 32.5754218, 33.0707100, 34.2442399, 34.3233506, 34.3512109, 38.3543568, 38.8293308, 39.9348989, 40.4014601, 40.4729411, 42.1520939, 42.1892152, 42.4237391, 43.1982627, 44.1678127, 44.9746195 ] bonf_p = np.array(bonf_p) bonf_p[bonf_p > 1] = 1 sorted_labels = [ "minister", "reporter", "contractor", "insurance.agent", "machinist", "store.clerk", "conductor", "factory.owner", "mail.carrier", "streetcar.motorman", "carpenter", "coal.miner", "bartender", "bookkeeper", "soda.clerk", "chemist", "RR.engineer", "professor", "electrician", "gas.stn.attendant", "auto.repairman", "watchman", "banker", "machine.operator", "dentist", "waiter", "shoe.shiner", "welfare.worker", "plumber", "physician", "pilot", "engineer", "accountant", "lawyer", "undertaker", "barber", "store.manager", "truck.driver", "cook", "janitor", "policeman", "architect", "teacher", "taxi.driver", "author" ] res2 = np.c_[rstudent, unadj_p, bonf_p] res = oi.outlier_test(ndarray_mod, method='b', labels=labels, order=True) np.testing.assert_almost_equal(res.values, res2, 7) np.testing.assert_equal(res.index.tolist(), sorted_labels) # pylint: disable-msg=E1103