def test_selector(self): index_weight = pd.MultiIndex.from_product([[dt(2014, 1, 30), dt(2014, 2, 28)], ['a', 'b', 'other']], names=INDEX_INDUSTRY_WEIGHT.full_index) industry_weight = pd.DataFrame([0.5, 0.4, 0.1, 0.5, 0.3, 0.2], index=index_weight) index = pd.MultiIndex.from_product([['2014-01-30', '2014-02-28'], ['001', '002', '003', '004', '005']], names=INDEX_FACTOR.full_index) X = pd.DataFrame({'score': [2, 3, 3, 8, 4, 5, 9, 11, 2, 0], 'industry_code': ['a', 'a', 'a', 'b', 'b', 'a', 'a', 'other', 'b', 'b']}, index=index) score = Factor(data=X['score'], name='score', property_dict={'type': FactorType.SCORE}) industry_code = Factor(data=X['industry_code'], name='industry_code', property_dict={'type': FactorType.INDUSTY_CODE}) fc = FactorContainer(start_date='2014-01-30', end_date='2014-02-28') fc.add_factor(score) fc.add_factor(industry_code) calculated = Selector(industry_weight=industry_weight, method=SelectionMethod.INDUSTRY_NEUTRAL).fit(fc).predict(fc) index_exp = pd.MultiIndex.from_arrays( [[dt(2014, 1, 30), dt(2014, 1, 30), dt(2014, 1, 30), dt(2014, 1, 30), dt(2014, 2, 28), dt(2014, 2, 28), dt(2014, 2, 28), dt(2014, 2, 28), dt(2014, 2, 28)], ['002', '003', '004', '005', '002', '001', '004', '005', '003']], names=['trade_date', 'ticker']) expected = pd.DataFrame({'score': [3, 3, 8, 4, 9, 5, 2, 0, 11], 'industry_code': ['a', 'a', 'b', 'b', 'a', 'a', 'b', 'b', 'other'], 'weight': [0.25, 0.25, 0.2, 0.2, 0.25, 0.25, 0.15, 0.15, 0.2]}, index=index_exp, dtype=object) expected = expected[['score', 'industry_code', 'weight']] assert_frame_equal(calculated, expected)
def test_imputer_2(self): index = pd.MultiIndex.from_product( [['2014-01-30', '2014-02-28'], ['001', '002', '003', '004']], names=['trade_date', 'ticker']) data = pd.DataFrame(index=index, data=[1.0, 3.0, 3.0, np.nan, 5.0, 5.0, 6.0, 8.0]) factor_test = Factor(data=data, name='test1') index = pd.MultiIndex.from_product( [[dt(2014, 1, 30), dt(2014, 2, 28)], ['001', '002', '003', '004']], names=['trade_date', 'ticker']) fi = FactorImputer(numerical_strategy=NAStrategy.MEAN) calculated = fi.fit_transform(factor_test) expected = pd.DataFrame( {'test1': [1.0, 3.0, 3.0, 2.33333333333, 5.0, 5.0, 6.0, 8.0]}, index=index) assert_frame_equal(calculated, expected) fi.set_out_container(True) calculated = fi.fit_transform(factor_test) expected = FactorContainer(start_date='2014-01-30', end_date='2014-02-28') factor = Factor(data=pd.DataFrame( {'test1': [1.0, 3.0, 3.0, 2.33333333333, 5.0, 5.0, 6.0, 8.0]}, index=index), name='test1') expected.add_factor(factor) assert (isinstance(calculated, FactorContainer)) self.assertEqual(calculated.property, expected.property) assert_frame_equal(calculated.data, expected.data)
def test_imputer_1(self): index = pd.MultiIndex.from_product([['2014-01-30', '2014-02-28'], ['001', '002', '003', '004']], names=['trade_date', 'ticker']) data1 = pd.DataFrame(index=index, data=[1.0, 3.0, 3.0, np.nan, 5.0, 5.0, 6.0, 8.0]) factor_test1 = Factor(data=data1, name='test1') data2 = pd.DataFrame(index=index, data=[3.0, 2.0, 3.0, 7.0, 7.0, np.nan, 6.0, 6.0]) factor_test2 = Factor(data=data2, name='test2') data3 = pd.DataFrame(index=index, data=[3.0, 3.0, np.nan, 5.0, 6.0, 7.0, 6.0, 6.0]) factor_test3 = Factor(data=data3, name='test3') fc = FactorContainer('2014-01-30', '2014-02-28', [factor_test1, factor_test2, factor_test3]) index = pd.MultiIndex.from_product([[dt(2014, 1, 30), dt(2014, 2, 28)], ['001', '002', '003', '004']], names=['trade_date', 'ticker']) calculated = FactorImputer(numerical_strategy=NAStrategy.MOST_FREQ).fit_transform(fc) expected = pd.DataFrame({'test1': [1.0, 3.0, 3.0, 3.0, 5.0, 5.0, 6.0, 8.0], 'test2': [3.0, 2.0, 3.0, 7.0, 7.0, 6.0, 6.0, 6.0], 'test3': [3.0, 3.0, 3.0, 5.0, 6.0, 7.0, 6.0, 6.0]}, index=index) assert_frame_equal(calculated, expected) calculated = FactorImputer(numerical_strategy=NAStrategy.MEDIAN).fit_transform(fc) expected = pd.DataFrame({'test1': [1.0, 3.0, 3.0, 3.0, 5.0, 5.0, 6.0, 8.0], 'test2': [3.0, 2.0, 3.0, 7.0, 7.0, 6.0, 6.0, 6.0], 'test3': [3.0, 3.0, 3.0, 5.0, 6.0, 7.0, 6.0, 6.0]}, index=index) assert_frame_equal(calculated, expected) industry = pd.DataFrame(index=index, data=['a', 'a', 'a', 'a', 'a', 'a', np.nan, 'a']) factor_industry = Factor(data=industry, name='industry', property_dict={'type': FactorType.INDUSTY_CODE}) fc.add_factor(factor=factor_industry) calculated = FactorImputer(numerical_strategy=NAStrategy.MEDIAN, categorical_strategy=NAStrategy.CUSTOM, custom_value='other').fit_transform(fc) calculated.sort_index(axis=1, inplace=True) expected = pd.DataFrame({'test1': [1.0, 3.0, 3.0, 3.0, 5.0, 5.0, 6.0, 8.0], 'test2': [3.0, 2.0, 3.0, 7.0, 7.0, 6.0, 6.0, 6.0], 'test3': [3.0, 3.0, 3.0, 5.0, 6.0, 7.0, 6.0, 6.0], 'industry': ['a', 'a', 'a', 'a', 'a', 'a', 'other', 'a']}, index=index, dtype=object) assert_frame_equal(calculated, expected)
def test_selector(self): index_weight = pd.MultiIndex.from_product( [[dt(2014, 1, 30), dt(2014, 2, 28)], ['a', 'b', 'other']], names=INDEX_INDUSTRY_WEIGHT.full_index) industry_weight = pd.DataFrame([0.5, 0.4, 0.1, 0.5, 0.3, 0.2], index=index_weight) index = pd.MultiIndex.from_product( [['2014-01-30', '2014-02-28'], ['001', '002', '003', '004', '005'] ], names=INDEX_FACTOR.full_index) X = pd.DataFrame( { 'score': [2, 3, 3, 8, 4, 5, 9, 11, 2, 0], 'industry_code': ['a', 'a', 'a', 'b', 'b', 'a', 'a', 'other', 'b', 'b'] }, index=index) score = Factor(data=X['score'], name='score', property_dict={'type': FactorType.SCORE}) industry_code = Factor(data=X['industry_code'], name='industry_code', property_dict={'type': FactorType.INDUSTY_CODE}) fc = FactorContainer(start_date='2014-01-30', end_date='2014-02-28') fc.add_factor(score) fc.add_factor(industry_code) calculated = Selector( industry_weight=industry_weight, method=SelectionMethod.INDUSTRY_NEUTRAL).fit(fc).predict(fc) index_exp = pd.MultiIndex.from_arrays([[ dt(2014, 1, 30), dt(2014, 1, 30), dt(2014, 1, 30), dt(2014, 1, 30), dt(2014, 2, 28), dt(2014, 2, 28), dt(2014, 2, 28), dt(2014, 2, 28), dt(2014, 2, 28) ], ['002', '003', '004', '005', '002', '001', '004', '005', '003']], names=['trade_date', 'ticker']) expected = pd.DataFrame( { 'score': [3, 3, 8, 4, 9, 5, 2, 0, 11], 'industry_code': ['a', 'a', 'b', 'b', 'a', 'a', 'b', 'b', 'other'], 'weight': [0.25, 0.25, 0.2, 0.2, 0.25, 0.25, 0.15, 0.15, 0.2] }, index=index_exp, dtype=object) expected = expected[['score', 'industry_code', 'weight']] assert_frame_equal(calculated, expected)
def test_imputer_2(self): index = pd.MultiIndex.from_product([['2014-01-30', '2014-02-28'], ['001', '002', '003', '004']], names=['trade_date', 'ticker']) data = pd.DataFrame(index=index, data=[1.0, 3.0, 3.0, np.nan, 5.0, 5.0, 6.0, 8.0]) factor_test = Factor(data=data, name='test1') index = pd.MultiIndex.from_product([[dt(2014, 1, 30), dt(2014, 2, 28)], ['001', '002', '003', '004']], names=['trade_date', 'ticker']) fi = FactorImputer(numerical_strategy=NAStrategy.MEAN) calculated = fi.fit_transform(factor_test) expected = pd.DataFrame({'test1': [1.0, 3.0, 3.0, 2.33333333333, 5.0, 5.0, 6.0, 8.0]}, index=index) assert_frame_equal(calculated, expected) fi.set_out_container(True) calculated = fi.fit_transform(factor_test) expected = FactorContainer(start_date='2014-01-30', end_date='2014-02-28') factor = Factor(data=pd.DataFrame({'test1': [1.0, 3.0, 3.0, 2.33333333333, 5.0, 5.0, 6.0, 8.0]}, index=index), name='test1') expected.add_factor(factor) assert (isinstance(calculated, FactorContainer)) self.assertEqual(calculated.property, expected.property) assert_frame_equal(calculated.data, expected.data)
def test_factor_simple_rank_1(self): index = pd.MultiIndex.from_product( [['2014-01-30', '2014-02-28'], ['001', '002', '003']], names=['trade_date', 'ticker']) data1 = pd.DataFrame(index=index, data=[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]) factor_test = Factor(data=data1, name='alpha1') fc = FactorContainer('2014-01-30', '2014-02-28', [factor_test]) t = FactorSimpleRank() t.fit(fc) calculate = t.transform(fc)['score'] index = pd.MultiIndex.from_product( [[dt(2014, 1, 30), dt(2014, 2, 28)], ['001', '002', '003']], names=['trade_date', 'ticker']) expected = pd.Series(index=index, data=[0.0, 1.0, 2.0, 0.0, 1.0, 2.0], name='score') assert_series_equal(calculate, expected)
def setUp(self): index = pd.MultiIndex.from_product([['2014-01-30', '2014-02-28'], ['001', '002', '003', '004']], names=['trade_date', 'ticker']) data1 = pd.DataFrame(index=index, data=[1.0, 1.0, 1.2, 2.0, 0.9, 5.0, 5.0, 5.1]) factor_test1 = Factor(data=data1, name='test1', property_dict={'norm_type': FactorNormType.Industry_Cap_Neutral}) data2 = pd.DataFrame(index=index, data=[2.6, 2.5, 2.8, 2.9, 2.7, 1.9, 5.0, 2.1]) factor_test2 = Factor(data=data2, name='test2', property_dict={'type': FactorType.ALPHA_FACTOR_MV}) data3 = pd.DataFrame(index=index, data=['a', 'b', 'a', 'a', 'a', 'b', 'c', 'b']) factor_test3 = Factor(data=data3, name='test3', property_dict={'type': FactorType.INDUSTY_CODE}) data4 = pd.DataFrame(index=index, data=[1.0, 1.0, 1.2, 2.0, 0.9, 5.0, 5.0, 5.1]) factor_test4 = Factor(data=data4, name='test4', property_dict={'norm_type': FactorNormType.Industry_Neutral}) fc = FactorContainer('2014-01-30', '2014-02-28', [factor_test1, factor_test2, factor_test3, factor_test4]) self.factor_container = fc
def test_factor_container_1(self): index = pd.MultiIndex.from_product([['2014-01-30', '2014-02-28', '2014-03-31'], ['001', '002']], names=['trade_date', 'ticker']) data1 = pd.DataFrame(index=index, data=[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]) factor_test1 = Factor(data=data1, name='test1') data2 = pd.DataFrame(index=index, data=[3.0, 2.0, 3.0, 7.0, 8.0, 9.0]) factor_test2 = Factor(data=data2, name='test2') data3 = pd.DataFrame(index=index, data=[3.0, 4.0, 3.0, 5.0, 6.0, 7.0]) factor_test3 = Factor(data=data3, name='test3') fc = FactorContainer('2014-01-30', '2014-02-28', [factor_test1, factor_test2]) index_exp = pd.MultiIndex.from_product([[dt(2014, 1, 30), dt(2014, 2, 28)], ['001', '002']], names=['trade_date', 'ticker']) data_exp = pd.DataFrame({'test1': [1.0, 2.0, 3.0, 4.0], 'test2': [3.0, 2.0, 3.0, 7.0]}, index=index_exp) assert_frame_equal(fc.data, data_exp) fc.add_factor(factor_test3) data_exp = pd.DataFrame({'test1': [1.0, 2.0, 3.0, 4.0], 'test2': [3.0, 2.0, 3.0, 7.0], 'test3': [3.0, 4.0, 3.0, 5.0]}, index=index_exp) assert_frame_equal(fc.data, data_exp) fc.remove_factor(factor_test2) data_exp = pd.DataFrame({'test1': [1.0, 2.0, 3.0, 4.0], 'test3': [3.0, 4.0, 3.0, 5.0]}, index=index_exp) assert_frame_equal(fc.data, data_exp) property_exp = {'test1': {'type': FactorType.ALPHA_FACTOR, 'data_format': OutputDataFormat.MULTI_INDEX_DF, 'norm_type': FactorNormType.Null, 'freq': FreqType.EOM}, 'test3': {'type': FactorType.ALPHA_FACTOR, 'data_format': OutputDataFormat.MULTI_INDEX_DF, 'norm_type': FactorNormType.Null, 'freq': FreqType.EOM}} self.assertEqual(fc.property, property_exp) fc.replace_data(np.array([[1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 5.0]]).T) assert_frame_equal(fc.data, pd.DataFrame({'test1': [1.0, 2.0, 3.0, 4.0], 'test3': [1.0, 2.0, 3.0, 5.0]}, index=index_exp)) self.assertEqual(list(fc.alpha_factor_col), ['test1', 'test3'])
def test_standardizer(self): index = pd.MultiIndex.from_product( [['2014-01-30', '2014-02-28'], ['001', '002', '003', '004']], names=['trade_date', 'ticker']) data1 = pd.DataFrame(index=index, data=[1.0, 1.0, 1.2, 200.0, 0.9, 5.0, 5.0, 5.1]) factor_test1 = Factor(data=data1, name='test1') data2 = pd.DataFrame(index=index, data=[2.6, 2.5, 2.8, 2.9, 2.7, 1.9, -10.0, 2.1]) factor_test2 = Factor(data=data2, name='test2') data3 = pd.DataFrame(index=index, data=['a', 'b', 'a', 'd', 'a', 'b', 'c', 'b']) factor_test3 = Factor(data=data3, name='test3', property_dict={'type': FactorType.INDUSTY_CODE}) fc = FactorContainer('2014-01-30', '2014-02-28', [factor_test1, factor_test2, factor_test3]) calculated = FactorStandardizer().fit_transform(fc) index = pd.MultiIndex.from_product( [[dt(2014, 1, 30), dt(2014, 2, 28)], ['001', '002', '003', '004']], names=['trade_date', 'ticker']) expected = pd.DataFrame( { 'test1': [ -0.578123937458, -0.578123937458, -0.575802154576, 1.73205002949, -1.73160039778, 0.558580773478, 0.558580773478, 0.614438850826 ], 'test2': [ -0.632455532034, -1.26491106407, 0.632455532034, 1.26491106407, 0.664422038189, 0.513631221012, -1.72938218451, 0.551328925306 ], 'test3': ['a', 'b', 'a', 'd', 'a', 'b', 'c', 'b'] }, index=index, dtype=object) assert_frame_equal(calculated, expected)
def test_factor_winsorizer(self): index = pd.MultiIndex.from_product( [['2014-01-30', '2014-02-28'], ['001', '002', '003', '004', '005'] ], names=['trade_date', 'ticker']) data1 = pd.DataFrame( index=index, data=[1.0, 1.0, 1.2, 200.0, 0.9, 5.0, 5.0, 5.1, 5.9, 5.0]) factor_test1 = Factor(data=data1, name='test1') data2 = pd.DataFrame( index=index, data=[2.6, 2.5, 2.8, 2.9, 2.7, 1.9, -10.0, 2.1, 2.0, 1.9]) factor_test2 = Factor(data=data2, name='test2') data3 = pd.DataFrame( index=index, data=[3.0, 3.0, 30.0, 5.0, 4.0, 6.0, 7.0, 6.0, 6.0, 5.9]) factor_test3 = Factor(data=data3, name='test3') fc = FactorContainer('2014-01-30', '2014-02-28', [factor_test1, factor_test2, factor_test3]) quantile_range = (1, 99) calculated = FactorWinsorizer(quantile_range).fit_transform(fc) index = pd.MultiIndex.from_product( [[dt(2014, 1, 30), dt(2014, 2, 28)], ['001', '002', '003', '004', '005']], names=['trade_date', 'ticker']) expected = pd.DataFrame( { 'test1': [1.0, 1.0, 1.2, 192.048, 0.904, 5.0, 5.0, 5.1, 5.868, 5.0], 'test2': [2.6, 2.504, 2.8, 2.896, 2.7, 1.9, -9.524, 2.096, 2.0, 1.9], 'test3': [3.0, 3.0, 29.0, 5.0, 4.0, 6.0, 6.96, 6.0, 6.0, 5.904] }, index=index) assert_frame_equal(calculated, expected)
def test_factor_ic(self): index = pd.MultiIndex.from_product( [['2014-01-30', '2014-02-28', '2014-03-31'], ['001', '002']], names=['trade_date', 'ticker']) data1 = pd.DataFrame(index=index, data=[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]) factor_test1 = Factor(data=data1, name='alpha1') factor_test3 = Factor(data=data1, name='alpha2') test2_property = { 'type': FactorType.FWD_RETURN, 'data_format': OutputDataFormat.MULTI_INDEX_DF, 'norm_type': FactorNormType.Null, 'freq': FreqType.EOM } data2 = pd.DataFrame(index=index, data=[3.0, 2.0, 3.0, 7.0, 8.0, 9.0]) factor_test2 = Factor(data=data2, name='fwd_return1', property_dict=test2_property) factor_test4 = Factor(data=data2, name='fwd_return2', property_dict=test2_property) fc = FactorContainer( '2014-01-30', '2014-02-28', [factor_test1, factor_test2, factor_test3, factor_test4]) t = FactorIC() calculate = t.predict(fc) expected = pd.DataFrame(data=[[-1.0, -1.0, -1.0, -1.0], [1.0, 1.0, 1.0, 1.0]], index=pd.DatetimeIndex( ['2014-01-30', '2014-02-28'], freq=None), columns=[ 'alpha1_fwd_return1', 'alpha2_fwd_return1', 'alpha1_fwd_return2', 'alpha2_fwd_return2' ]) assert_frame_equal(calculate, expected)
def test_factor_container_1(self): index = pd.MultiIndex.from_product( [['2014-01-30', '2014-02-28', '2014-03-31'], ['001', '002']], names=['trade_date', 'ticker']) data1 = pd.DataFrame(index=index, data=[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]) factor_test1 = Factor(data=data1, name='test1') data2 = pd.DataFrame(index=index, data=[3.0, 2.0, 3.0, 7.0, 8.0, 9.0]) factor_test2 = Factor(data=data2, name='test2') data3 = pd.DataFrame(index=index, data=[3.0, 4.0, 3.0, 5.0, 6.0, 7.0]) factor_test3 = Factor(data=data3, name='test3') fc = FactorContainer('2014-01-30', '2014-02-28', [factor_test1, factor_test2]) index_exp = pd.MultiIndex.from_product( [[dt(2014, 1, 30), dt(2014, 2, 28)], ['001', '002']], names=['trade_date', 'ticker']) data_exp = pd.DataFrame( { 'test1': [1.0, 2.0, 3.0, 4.0], 'test2': [3.0, 2.0, 3.0, 7.0] }, index=index_exp) assert_frame_equal(fc.data, data_exp) fc.add_factor(factor_test3) data_exp = pd.DataFrame( { 'test1': [1.0, 2.0, 3.0, 4.0], 'test2': [3.0, 2.0, 3.0, 7.0], 'test3': [3.0, 4.0, 3.0, 5.0] }, index=index_exp) assert_frame_equal(fc.data, data_exp) fc.remove_factor(factor_test2) data_exp = pd.DataFrame( { 'test1': [1.0, 2.0, 3.0, 4.0], 'test3': [3.0, 4.0, 3.0, 5.0] }, index=index_exp) assert_frame_equal(fc.data, data_exp) property_exp = { 'test1': { 'type': FactorType.ALPHA_FACTOR, 'data_format': OutputDataFormat.MULTI_INDEX_DF, 'norm_type': FactorNormType.Null, 'freq': FreqType.EOM }, 'test3': { 'type': FactorType.ALPHA_FACTOR, 'data_format': OutputDataFormat.MULTI_INDEX_DF, 'norm_type': FactorNormType.Null, 'freq': FreqType.EOM } } self.assertEqual(fc.property, property_exp) fc.replace_data( np.array([[1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 5.0]]).T) assert_frame_equal( fc.data, pd.DataFrame( { 'test1': [1.0, 2.0, 3.0, 4.0], 'test3': [1.0, 2.0, 3.0, 5.0] }, index=index_exp)) self.assertEqual(list(fc.alpha_factor_col), ['test1', 'test3'])
def test_factor_neutralizer(self): index = pd.MultiIndex.from_product( [['2014-01-30', '2014-02-28'], ['001', '002', '003', '004']], names=['trade_date', 'ticker']) data1 = pd.DataFrame(index=index, data=[1.0, 1.0, 1.2, 2.0, 0.9, 5.0, 5.0, 5.1]) factor_test1 = Factor( data=data1, name='test1', property_dict={'norm_type': FactorNormType.Industry_Cap_Neutral}) data2 = pd.DataFrame(index=index, data=[2.6, 2.5, 2.8, 2.9, 2.7, 1.9, 5.0, 2.1]) factor_test2 = Factor(data=data2, name='test2', property_dict={ 'type': FactorType.ALPHA_FACTOR_MV, 'norm_type': FactorNormType.Industry_Neutral }) data3 = pd.DataFrame(index=index, data=['a', 'b', 'a', 'a', 'a', 'b', 'c', 'b']) factor_test3 = Factor(data=data3, name='test3', property_dict={'type': FactorType.INDUSTY_CODE}) data4 = pd.DataFrame(index=index, data=[1.0, 1.0, 1.2, 2.0, 0.9, 5.0, 5.0, 5.1]) factor_test4 = Factor( data=data4, name='test4', property_dict={'norm_type': FactorNormType.Industry_Neutral}) fc = FactorContainer( '2014-01-30', '2014-02-28', [factor_test1, factor_test2, factor_test3, factor_test4]) calculated = FactorNeutralizer().fit_transform(fc) index = pd.MultiIndex.from_product( [[dt(2014, 1, 30), dt(2014, 2, 28)], ['001', '002', '003', '004']], names=['trade_date', 'ticker']) expected = pd.DataFrame( { 'test1': [ 0.0983574180639, 8.881784197e-16, -0.306074564019, 0.207717145955, -2.10942374679e-15, 8.881784197e-16, -5.3290705182e-15, 0.0 ], 'test2': [ -0.166666666667, 0.0, 0.0333333333333, 0.133333333333, 0.0, -0.1, 0.0, 0.1 ], 'test3': ['a', 'b', 'a', 'a', 'a', 'b', 'c', 'b'], 'test4': [-0.4, 0.0, -0.2, 0.6, 0.0, -0.05, 0.0, 0.05] }, index=index, dtype=object) assert_frame_equal(calculated, expected) calculated = FactorNeutralizer(out_container=True).fit_transform(fc) assert_frame_equal(calculated.data, expected) self.assertEqual(calculated.container_property, fc.container_property)
data_mv = factor_load('2014-01-01', '2014-03-10', 'MV', sec_id='fullA', is_index=True, save_file='mv.csv') # data_pb = pd.read_csv('pb.csv', encoding='gbk') # data_mv = pd.read_csv('mv.csv', encoding='gbk') # data_pb['date'] = pd.to_datetime(data_pb['date']) # data_mv['date'] = pd.to_datetime(data_mv['date']) # data_pb.set_index(['date', ' ticker'], inplace=True) # data_mv.set_index(['date', ' ticker'], inplace=True) # 创建Factor实例,储存数据以及相关参数 factor_pb = Factor(data=data_pb, name='PB', property_dict={'type': FactorType.ALPHA_FACTOR, 'norm_type': FactorNormType.Industry_Neutral}) factor_mv = Factor(data=data_mv, name='MV', property_dict={'type': FactorType.ALPHA_FACTOR_MV}) # 创建FactorContainer实例,加载所有的因子信息 fc = FactorContainer(start_date='2014-01-01', end_date='2014-03-10') fc.add_factor(factor_pb) fc.add_factor(factor_mv) # 也可以一次性加载所有因子 # # fc = FactorContainer(start_date='2014-01-01', end_date='2014-03-10', factors=[factor_pb, factor_mv]) # print fc.tiaocang_date # # [datetime.datetime(2014, 1, 30, 0, 0), datetime.datetime(2014, 2, 28, 0, 0)] # print fc.alpha_factor_col # # ['PB', 'MV'] # 提取行业数据 data_industry_code = factor_load('2014-01-01', '2014-03-10', 'SW_C1', sec_id='fullA', is_index=True, save_file='sw.csv') # data_industry_code = pd.read_csv('sw.csv', encoding='gbk')
# data_pb.set_index(['date', ' ticker'], inplace=True) # data_mv.set_index(['date', ' ticker'], inplace=True) # 创建Factor实例,储存数据以及相关参数 factor_pb = Factor(data=data_pb, name='PB', property_dict={ 'type': FactorType.ALPHA_FACTOR, 'norm_type': FactorNormType.Industry_Neutral }) factor_mv = Factor(data=data_mv, name='MV', property_dict={'type': FactorType.ALPHA_FACTOR_MV}) # 创建FactorContainer实例,加载所有的因子信息 fc = FactorContainer(start_date='2014-01-01', end_date='2014-03-10') fc.add_factor(factor_pb) fc.add_factor(factor_mv) # 也可以一次性加载所有因子 # # fc = FactorContainer(start_date='2014-01-01', end_date='2014-03-10', factors=[factor_pb, factor_mv]) # print fc.tiaocang_date # # [datetime.datetime(2014, 1, 30, 0, 0), datetime.datetime(2014, 2, 28, 0, 0)] # print fc.alpha_factor_col # # ['PB', 'MV'] # 提取行业数据 data_industry_code = factor_load('2014-01-01', '2014-03-10', 'SW_C1', sec_id='fullA',
data_industry_code = load_factor_data_from_csv('sw.csv') factor_industry_code = Factor(data=data_industry_code, name='industry_code', property_dict={'type': FactorType.INDUSTY_CODE}) # 加载月度收益数据 data_return = load_factor_data_from_csv('return.csv') # 将数据改成未来1月收益 data_return = fwd_return(data_return) factor_return = Factor(data=data_return, name='1_Fwd_Return', property_dict={'type': FactorType.FWD_RETURN, 'norm_type': FactorNormType.Industry_Neutral}) # 创建FactorContainer实例,加载所有的因子信息 fc = FactorContainer(start_date='2014-01-01', end_date='2014-03-01', factors=[factor_mv, factor_pb, factor_return, factor_industry_code]) # pipeline # 第一步,处理极个别N/A, 有中位数替换 step_1 = ('imputer', FactorImputer(numerical_strategy=NAStrategy.MEDIAN, categorical_strategy=NAStrategy.CUSTOM, custom_value='other')) # 第二部,去极值化 step_2 = ('winsorize', FactorWinsorizer(quantile_range=(5, 95))) # 第三步,标准化 step_3 = ('std', FactorStandardizer()) # 第四步,中性化 step_4 = ('neutralize', FactorNeutralizer())
def test_imputer_1(self): index = pd.MultiIndex.from_product( [['2014-01-30', '2014-02-28'], ['001', '002', '003', '004']], names=['trade_date', 'ticker']) data1 = pd.DataFrame(index=index, data=[1.0, 3.0, 3.0, np.nan, 5.0, 5.0, 6.0, 8.0]) factor_test1 = Factor(data=data1, name='test1') data2 = pd.DataFrame(index=index, data=[3.0, 2.0, 3.0, 7.0, 7.0, np.nan, 6.0, 6.0]) factor_test2 = Factor(data=data2, name='test2') data3 = pd.DataFrame(index=index, data=[3.0, 3.0, np.nan, 5.0, 6.0, 7.0, 6.0, 6.0]) factor_test3 = Factor(data=data3, name='test3') fc = FactorContainer('2014-01-30', '2014-02-28', [factor_test1, factor_test2, factor_test3]) index = pd.MultiIndex.from_product( [[dt(2014, 1, 30), dt(2014, 2, 28)], ['001', '002', '003', '004']], names=['trade_date', 'ticker']) calculated = FactorImputer( numerical_strategy=NAStrategy.MOST_FREQ).fit_transform(fc) expected = pd.DataFrame( { 'test1': [1.0, 3.0, 3.0, 3.0, 5.0, 5.0, 6.0, 8.0], 'test2': [3.0, 2.0, 3.0, 7.0, 7.0, 6.0, 6.0, 6.0], 'test3': [3.0, 3.0, 3.0, 5.0, 6.0, 7.0, 6.0, 6.0] }, index=index) assert_frame_equal(calculated, expected) calculated = FactorImputer( numerical_strategy=NAStrategy.MEDIAN).fit_transform(fc) expected = pd.DataFrame( { 'test1': [1.0, 3.0, 3.0, 3.0, 5.0, 5.0, 6.0, 8.0], 'test2': [3.0, 2.0, 3.0, 7.0, 7.0, 6.0, 6.0, 6.0], 'test3': [3.0, 3.0, 3.0, 5.0, 6.0, 7.0, 6.0, 6.0] }, index=index) assert_frame_equal(calculated, expected) industry = pd.DataFrame( index=index, data=['a', 'a', 'a', 'a', 'a', 'a', np.nan, 'a']) factor_industry = Factor( data=industry, name='industry', property_dict={'type': FactorType.INDUSTY_CODE}) fc.add_factor(factor=factor_industry) calculated = FactorImputer(numerical_strategy=NAStrategy.MEDIAN, categorical_strategy=NAStrategy.CUSTOM, custom_value='other').fit_transform(fc) calculated.sort_index(axis=1, inplace=True) expected = pd.DataFrame( { 'test1': [1.0, 3.0, 3.0, 3.0, 5.0, 5.0, 6.0, 8.0], 'test2': [3.0, 2.0, 3.0, 7.0, 7.0, 6.0, 6.0, 6.0], 'test3': [3.0, 3.0, 3.0, 5.0, 6.0, 7.0, 6.0, 6.0], 'industry': ['a', 'a', 'a', 'a', 'a', 'a', 'other', 'a'] }, index=index, dtype=object) assert_frame_equal(calculated, expected)