Ejemplo n.º 1
0
    def test_selector(self):
        index_weight = pd.MultiIndex.from_product([[dt(2014, 1, 30), dt(2014, 2, 28)], ['a', 'b', 'other']],
                                                  names=INDEX_INDUSTRY_WEIGHT.full_index)
        industry_weight = pd.DataFrame([0.5, 0.4, 0.1, 0.5, 0.3, 0.2], index=index_weight)

        index = pd.MultiIndex.from_product([['2014-01-30', '2014-02-28'], ['001', '002', '003', '004', '005']],
                                           names=INDEX_FACTOR.full_index)
        X = pd.DataFrame({'score': [2, 3, 3, 8, 4, 5, 9, 11, 2, 0],
                          'industry_code': ['a', 'a', 'a', 'b', 'b', 'a', 'a', 'other', 'b', 'b']},
                         index=index)

        score = Factor(data=X['score'], name='score', property_dict={'type': FactorType.SCORE})
        industry_code = Factor(data=X['industry_code'], name='industry_code',
                               property_dict={'type': FactorType.INDUSTY_CODE})
        fc = FactorContainer(start_date='2014-01-30', end_date='2014-02-28')
        fc.add_factor(score)
        fc.add_factor(industry_code)

        calculated = Selector(industry_weight=industry_weight,
                              method=SelectionMethod.INDUSTRY_NEUTRAL).fit(fc).predict(fc)

        index_exp = pd.MultiIndex.from_arrays(
            [[dt(2014, 1, 30), dt(2014, 1, 30), dt(2014, 1, 30), dt(2014, 1, 30), dt(2014, 2, 28), dt(2014, 2, 28),
              dt(2014, 2, 28), dt(2014, 2, 28), dt(2014, 2, 28)],
             ['002', '003', '004', '005', '002', '001', '004', '005', '003']], names=['trade_date', 'ticker'])
        expected = pd.DataFrame({'score': [3, 3, 8, 4, 9, 5, 2, 0, 11],
                                 'industry_code': ['a', 'a', 'b', 'b', 'a', 'a', 'b', 'b', 'other'],
                                 'weight': [0.25, 0.25, 0.2, 0.2, 0.25, 0.25, 0.15, 0.15, 0.2]},
                                index=index_exp, dtype=object)
        expected = expected[['score', 'industry_code', 'weight']]
        assert_frame_equal(calculated, expected)
Ejemplo n.º 2
0
    def test_imputer_2(self):
        index = pd.MultiIndex.from_product(
            [['2014-01-30', '2014-02-28'], ['001', '002', '003', '004']],
            names=['trade_date', 'ticker'])
        data = pd.DataFrame(index=index,
                            data=[1.0, 3.0, 3.0, np.nan, 5.0, 5.0, 6.0, 8.0])
        factor_test = Factor(data=data, name='test1')

        index = pd.MultiIndex.from_product(
            [[dt(2014, 1, 30), dt(2014, 2, 28)], ['001', '002', '003', '004']],
            names=['trade_date', 'ticker'])
        fi = FactorImputer(numerical_strategy=NAStrategy.MEAN)
        calculated = fi.fit_transform(factor_test)
        expected = pd.DataFrame(
            {'test1': [1.0, 3.0, 3.0, 2.33333333333, 5.0, 5.0, 6.0, 8.0]},
            index=index)
        assert_frame_equal(calculated, expected)

        fi.set_out_container(True)
        calculated = fi.fit_transform(factor_test)
        expected = FactorContainer(start_date='2014-01-30',
                                   end_date='2014-02-28')
        factor = Factor(data=pd.DataFrame(
            {'test1': [1.0, 3.0, 3.0, 2.33333333333, 5.0, 5.0, 6.0, 8.0]},
            index=index),
                        name='test1')
        expected.add_factor(factor)

        assert (isinstance(calculated, FactorContainer))
        self.assertEqual(calculated.property, expected.property)
        assert_frame_equal(calculated.data, expected.data)
Ejemplo n.º 3
0
    def test_imputer_1(self):
        index = pd.MultiIndex.from_product([['2014-01-30', '2014-02-28'], ['001', '002', '003', '004']],
                                           names=['trade_date', 'ticker'])
        data1 = pd.DataFrame(index=index, data=[1.0, 3.0, 3.0, np.nan, 5.0, 5.0, 6.0, 8.0])
        factor_test1 = Factor(data=data1, name='test1')

        data2 = pd.DataFrame(index=index, data=[3.0, 2.0, 3.0, 7.0, 7.0, np.nan, 6.0, 6.0])
        factor_test2 = Factor(data=data2, name='test2')

        data3 = pd.DataFrame(index=index, data=[3.0, 3.0, np.nan, 5.0, 6.0, 7.0, 6.0, 6.0])
        factor_test3 = Factor(data=data3, name='test3')

        fc = FactorContainer('2014-01-30', '2014-02-28', [factor_test1, factor_test2, factor_test3])

        index = pd.MultiIndex.from_product([[dt(2014, 1, 30), dt(2014, 2, 28)], ['001', '002', '003', '004']],
                                           names=['trade_date', 'ticker'])
        calculated = FactorImputer(numerical_strategy=NAStrategy.MOST_FREQ).fit_transform(fc)
        expected = pd.DataFrame({'test1': [1.0, 3.0, 3.0, 3.0, 5.0, 5.0, 6.0, 8.0],
                                 'test2': [3.0, 2.0, 3.0, 7.0, 7.0, 6.0, 6.0, 6.0],
                                 'test3': [3.0, 3.0, 3.0, 5.0, 6.0, 7.0, 6.0, 6.0]},
                                index=index)
        assert_frame_equal(calculated, expected)

        calculated = FactorImputer(numerical_strategy=NAStrategy.MEDIAN).fit_transform(fc)
        expected = pd.DataFrame({'test1': [1.0, 3.0, 3.0, 3.0, 5.0, 5.0, 6.0, 8.0],
                                 'test2': [3.0, 2.0, 3.0, 7.0, 7.0, 6.0, 6.0, 6.0],
                                 'test3': [3.0, 3.0, 3.0, 5.0, 6.0, 7.0, 6.0, 6.0]},
                                index=index)
        assert_frame_equal(calculated, expected)

        industry = pd.DataFrame(index=index, data=['a', 'a', 'a', 'a', 'a', 'a', np.nan, 'a'])
        factor_industry = Factor(data=industry, name='industry', property_dict={'type': FactorType.INDUSTY_CODE})
        fc.add_factor(factor=factor_industry)
        calculated = FactorImputer(numerical_strategy=NAStrategy.MEDIAN,
                                   categorical_strategy=NAStrategy.CUSTOM,
                                   custom_value='other').fit_transform(fc)
        calculated.sort_index(axis=1, inplace=True)
        expected = pd.DataFrame({'test1': [1.0, 3.0, 3.0, 3.0, 5.0, 5.0, 6.0, 8.0],
                                 'test2': [3.0, 2.0, 3.0, 7.0, 7.0, 6.0, 6.0, 6.0],
                                 'test3': [3.0, 3.0, 3.0, 5.0, 6.0, 7.0, 6.0, 6.0],
                                 'industry': ['a', 'a', 'a', 'a', 'a', 'a', 'other', 'a']},
                                index=index,
                                dtype=object)
        assert_frame_equal(calculated, expected)
Ejemplo n.º 4
0
    def test_selector(self):
        index_weight = pd.MultiIndex.from_product(
            [[dt(2014, 1, 30), dt(2014, 2, 28)], ['a', 'b', 'other']],
            names=INDEX_INDUSTRY_WEIGHT.full_index)
        industry_weight = pd.DataFrame([0.5, 0.4, 0.1, 0.5, 0.3, 0.2],
                                       index=index_weight)

        index = pd.MultiIndex.from_product(
            [['2014-01-30', '2014-02-28'], ['001', '002', '003', '004', '005']
             ],
            names=INDEX_FACTOR.full_index)
        X = pd.DataFrame(
            {
                'score': [2, 3, 3, 8, 4, 5, 9, 11, 2, 0],
                'industry_code':
                ['a', 'a', 'a', 'b', 'b', 'a', 'a', 'other', 'b', 'b']
            },
            index=index)

        score = Factor(data=X['score'],
                       name='score',
                       property_dict={'type': FactorType.SCORE})
        industry_code = Factor(data=X['industry_code'],
                               name='industry_code',
                               property_dict={'type': FactorType.INDUSTY_CODE})
        fc = FactorContainer(start_date='2014-01-30', end_date='2014-02-28')
        fc.add_factor(score)
        fc.add_factor(industry_code)

        calculated = Selector(
            industry_weight=industry_weight,
            method=SelectionMethod.INDUSTRY_NEUTRAL).fit(fc).predict(fc)

        index_exp = pd.MultiIndex.from_arrays([[
            dt(2014, 1, 30),
            dt(2014, 1, 30),
            dt(2014, 1, 30),
            dt(2014, 1, 30),
            dt(2014, 2, 28),
            dt(2014, 2, 28),
            dt(2014, 2, 28),
            dt(2014, 2, 28),
            dt(2014, 2, 28)
        ], ['002', '003', '004', '005', '002', '001', '004', '005', '003']],
                                              names=['trade_date', 'ticker'])
        expected = pd.DataFrame(
            {
                'score': [3, 3, 8, 4, 9, 5, 2, 0, 11],
                'industry_code':
                ['a', 'a', 'b', 'b', 'a', 'a', 'b', 'b', 'other'],
                'weight': [0.25, 0.25, 0.2, 0.2, 0.25, 0.25, 0.15, 0.15, 0.2]
            },
            index=index_exp,
            dtype=object)
        expected = expected[['score', 'industry_code', 'weight']]
        assert_frame_equal(calculated, expected)
Ejemplo n.º 5
0
    def test_imputer_2(self):
        index = pd.MultiIndex.from_product([['2014-01-30', '2014-02-28'], ['001', '002', '003', '004']],
                                           names=['trade_date', 'ticker'])
        data = pd.DataFrame(index=index, data=[1.0, 3.0, 3.0, np.nan, 5.0, 5.0, 6.0, 8.0])
        factor_test = Factor(data=data, name='test1')

        index = pd.MultiIndex.from_product([[dt(2014, 1, 30), dt(2014, 2, 28)], ['001', '002', '003', '004']],
                                           names=['trade_date', 'ticker'])
        fi = FactorImputer(numerical_strategy=NAStrategy.MEAN)
        calculated = fi.fit_transform(factor_test)
        expected = pd.DataFrame({'test1': [1.0, 3.0, 3.0, 2.33333333333, 5.0, 5.0, 6.0, 8.0]}, index=index)
        assert_frame_equal(calculated, expected)

        fi.set_out_container(True)
        calculated = fi.fit_transform(factor_test)
        expected = FactorContainer(start_date='2014-01-30',
                                   end_date='2014-02-28')
        factor = Factor(data=pd.DataFrame({'test1': [1.0, 3.0, 3.0, 2.33333333333, 5.0, 5.0, 6.0, 8.0]}, index=index),
                        name='test1')
        expected.add_factor(factor)

        assert (isinstance(calculated, FactorContainer))
        self.assertEqual(calculated.property, expected.property)
        assert_frame_equal(calculated.data, expected.data)
Ejemplo n.º 6
0
    def test_factor_simple_rank_1(self):
        index = pd.MultiIndex.from_product(
            [['2014-01-30', '2014-02-28'], ['001', '002', '003']],
            names=['trade_date', 'ticker'])
        data1 = pd.DataFrame(index=index, data=[1.0, 2.0, 3.0, 4.0, 5.0, 6.0])

        factor_test = Factor(data=data1, name='alpha1')
        fc = FactorContainer('2014-01-30', '2014-02-28', [factor_test])
        t = FactorSimpleRank()
        t.fit(fc)
        calculate = t.transform(fc)['score']
        index = pd.MultiIndex.from_product(
            [[dt(2014, 1, 30), dt(2014, 2, 28)], ['001', '002', '003']],
            names=['trade_date', 'ticker'])

        expected = pd.Series(index=index,
                             data=[0.0, 1.0, 2.0, 0.0, 1.0, 2.0],
                             name='score')
        assert_series_equal(calculate, expected)
Ejemplo n.º 7
0
    def setUp(self):
        index = pd.MultiIndex.from_product([['2014-01-30', '2014-02-28'], ['001', '002', '003', '004']],
                                           names=['trade_date', 'ticker'])
        data1 = pd.DataFrame(index=index, data=[1.0, 1.0, 1.2, 2.0, 0.9, 5.0, 5.0, 5.1])
        factor_test1 = Factor(data=data1, name='test1',
                              property_dict={'norm_type': FactorNormType.Industry_Cap_Neutral})

        data2 = pd.DataFrame(index=index, data=[2.6, 2.5, 2.8, 2.9, 2.7, 1.9, 5.0, 2.1])
        factor_test2 = Factor(data=data2, name='test2', property_dict={'type': FactorType.ALPHA_FACTOR_MV})

        data3 = pd.DataFrame(index=index, data=['a', 'b', 'a', 'a', 'a', 'b', 'c', 'b'])
        factor_test3 = Factor(data=data3, name='test3', property_dict={'type': FactorType.INDUSTY_CODE})

        data4 = pd.DataFrame(index=index, data=[1.0, 1.0, 1.2, 2.0, 0.9, 5.0, 5.0, 5.1])
        factor_test4 = Factor(data=data4, name='test4', property_dict={'norm_type': FactorNormType.Industry_Neutral})

        fc = FactorContainer('2014-01-30', '2014-02-28', [factor_test1, factor_test2, factor_test3, factor_test4])

        self.factor_container = fc
Ejemplo n.º 8
0
    def test_factor_container_1(self):
        index = pd.MultiIndex.from_product([['2014-01-30', '2014-02-28', '2014-03-31'], ['001', '002']],
                                           names=['trade_date', 'ticker'])
        data1 = pd.DataFrame(index=index, data=[1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
        factor_test1 = Factor(data=data1, name='test1')

        data2 = pd.DataFrame(index=index, data=[3.0, 2.0, 3.0, 7.0, 8.0, 9.0])
        factor_test2 = Factor(data=data2, name='test2')

        data3 = pd.DataFrame(index=index, data=[3.0, 4.0, 3.0, 5.0, 6.0, 7.0])
        factor_test3 = Factor(data=data3, name='test3')

        fc = FactorContainer('2014-01-30', '2014-02-28', [factor_test1, factor_test2])

        index_exp = pd.MultiIndex.from_product([[dt(2014, 1, 30), dt(2014, 2, 28)], ['001', '002']],
                                               names=['trade_date', 'ticker'])

        data_exp = pd.DataFrame({'test1': [1.0, 2.0, 3.0, 4.0], 'test2': [3.0, 2.0, 3.0, 7.0]}, index=index_exp)
        assert_frame_equal(fc.data, data_exp)

        fc.add_factor(factor_test3)
        data_exp = pd.DataFrame({'test1': [1.0, 2.0, 3.0, 4.0], 'test2': [3.0, 2.0, 3.0, 7.0],
                                 'test3': [3.0, 4.0, 3.0, 5.0]}, index=index_exp)
        assert_frame_equal(fc.data, data_exp)

        fc.remove_factor(factor_test2)
        data_exp = pd.DataFrame({'test1': [1.0, 2.0, 3.0, 4.0], 'test3': [3.0, 4.0, 3.0, 5.0]}, index=index_exp)
        assert_frame_equal(fc.data, data_exp)

        property_exp = {'test1': {'type': FactorType.ALPHA_FACTOR,
                                  'data_format': OutputDataFormat.MULTI_INDEX_DF,
                                  'norm_type': FactorNormType.Null,
                                  'freq': FreqType.EOM},
                        'test3': {'type': FactorType.ALPHA_FACTOR,
                                  'data_format': OutputDataFormat.MULTI_INDEX_DF,
                                  'norm_type': FactorNormType.Null,
                                  'freq': FreqType.EOM}}
        self.assertEqual(fc.property, property_exp)

        fc.replace_data(np.array([[1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 5.0]]).T)
        assert_frame_equal(fc.data,
                           pd.DataFrame({'test1': [1.0, 2.0, 3.0, 4.0], 'test3': [1.0, 2.0, 3.0, 5.0]},
                                        index=index_exp))

        self.assertEqual(list(fc.alpha_factor_col), ['test1', 'test3'])
Ejemplo n.º 9
0
    def test_standardizer(self):
        index = pd.MultiIndex.from_product(
            [['2014-01-30', '2014-02-28'], ['001', '002', '003', '004']],
            names=['trade_date', 'ticker'])
        data1 = pd.DataFrame(index=index,
                             data=[1.0, 1.0, 1.2, 200.0, 0.9, 5.0, 5.0, 5.1])
        factor_test1 = Factor(data=data1, name='test1')

        data2 = pd.DataFrame(index=index,
                             data=[2.6, 2.5, 2.8, 2.9, 2.7, 1.9, -10.0, 2.1])
        factor_test2 = Factor(data=data2, name='test2')

        data3 = pd.DataFrame(index=index,
                             data=['a', 'b', 'a', 'd', 'a', 'b', 'c', 'b'])
        factor_test3 = Factor(data=data3,
                              name='test3',
                              property_dict={'type': FactorType.INDUSTY_CODE})

        fc = FactorContainer('2014-01-30', '2014-02-28',
                             [factor_test1, factor_test2, factor_test3])
        calculated = FactorStandardizer().fit_transform(fc)
        index = pd.MultiIndex.from_product(
            [[dt(2014, 1, 30), dt(2014, 2, 28)], ['001', '002', '003', '004']],
            names=['trade_date', 'ticker'])
        expected = pd.DataFrame(
            {
                'test1': [
                    -0.578123937458, -0.578123937458, -0.575802154576,
                    1.73205002949, -1.73160039778, 0.558580773478,
                    0.558580773478, 0.614438850826
                ],
                'test2': [
                    -0.632455532034, -1.26491106407, 0.632455532034,
                    1.26491106407, 0.664422038189, 0.513631221012,
                    -1.72938218451, 0.551328925306
                ],
                'test3': ['a', 'b', 'a', 'd', 'a', 'b', 'c', 'b']
            },
            index=index,
            dtype=object)
        assert_frame_equal(calculated, expected)
Ejemplo n.º 10
0
    def test_factor_winsorizer(self):
        index = pd.MultiIndex.from_product(
            [['2014-01-30', '2014-02-28'], ['001', '002', '003', '004', '005']
             ],
            names=['trade_date', 'ticker'])
        data1 = pd.DataFrame(
            index=index,
            data=[1.0, 1.0, 1.2, 200.0, 0.9, 5.0, 5.0, 5.1, 5.9, 5.0])
        factor_test1 = Factor(data=data1, name='test1')

        data2 = pd.DataFrame(
            index=index,
            data=[2.6, 2.5, 2.8, 2.9, 2.7, 1.9, -10.0, 2.1, 2.0, 1.9])
        factor_test2 = Factor(data=data2, name='test2')

        data3 = pd.DataFrame(
            index=index,
            data=[3.0, 3.0, 30.0, 5.0, 4.0, 6.0, 7.0, 6.0, 6.0, 5.9])
        factor_test3 = Factor(data=data3, name='test3')

        fc = FactorContainer('2014-01-30', '2014-02-28',
                             [factor_test1, factor_test2, factor_test3])
        quantile_range = (1, 99)
        calculated = FactorWinsorizer(quantile_range).fit_transform(fc)
        index = pd.MultiIndex.from_product(
            [[dt(2014, 1, 30), dt(2014, 2, 28)],
             ['001', '002', '003', '004', '005']],
            names=['trade_date', 'ticker'])
        expected = pd.DataFrame(
            {
                'test1':
                [1.0, 1.0, 1.2, 192.048, 0.904, 5.0, 5.0, 5.1, 5.868, 5.0],
                'test2':
                [2.6, 2.504, 2.8, 2.896, 2.7, 1.9, -9.524, 2.096, 2.0, 1.9],
                'test3':
                [3.0, 3.0, 29.0, 5.0, 4.0, 6.0, 6.96, 6.0, 6.0, 5.904]
            },
            index=index)
        assert_frame_equal(calculated, expected)
Ejemplo n.º 11
0
    def test_factor_ic(self):
        index = pd.MultiIndex.from_product(
            [['2014-01-30', '2014-02-28', '2014-03-31'], ['001', '002']],
            names=['trade_date', 'ticker'])
        data1 = pd.DataFrame(index=index, data=[1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
        factor_test1 = Factor(data=data1, name='alpha1')
        factor_test3 = Factor(data=data1, name='alpha2')
        test2_property = {
            'type': FactorType.FWD_RETURN,
            'data_format': OutputDataFormat.MULTI_INDEX_DF,
            'norm_type': FactorNormType.Null,
            'freq': FreqType.EOM
        }

        data2 = pd.DataFrame(index=index, data=[3.0, 2.0, 3.0, 7.0, 8.0, 9.0])
        factor_test2 = Factor(data=data2,
                              name='fwd_return1',
                              property_dict=test2_property)
        factor_test4 = Factor(data=data2,
                              name='fwd_return2',
                              property_dict=test2_property)

        fc = FactorContainer(
            '2014-01-30', '2014-02-28',
            [factor_test1, factor_test2, factor_test3, factor_test4])
        t = FactorIC()
        calculate = t.predict(fc)
        expected = pd.DataFrame(data=[[-1.0, -1.0, -1.0, -1.0],
                                      [1.0, 1.0, 1.0, 1.0]],
                                index=pd.DatetimeIndex(
                                    ['2014-01-30', '2014-02-28'], freq=None),
                                columns=[
                                    'alpha1_fwd_return1', 'alpha2_fwd_return1',
                                    'alpha1_fwd_return2', 'alpha2_fwd_return2'
                                ])
        assert_frame_equal(calculate, expected)
Ejemplo n.º 12
0
    def test_factor_container_1(self):
        index = pd.MultiIndex.from_product(
            [['2014-01-30', '2014-02-28', '2014-03-31'], ['001', '002']],
            names=['trade_date', 'ticker'])
        data1 = pd.DataFrame(index=index, data=[1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
        factor_test1 = Factor(data=data1, name='test1')

        data2 = pd.DataFrame(index=index, data=[3.0, 2.0, 3.0, 7.0, 8.0, 9.0])
        factor_test2 = Factor(data=data2, name='test2')

        data3 = pd.DataFrame(index=index, data=[3.0, 4.0, 3.0, 5.0, 6.0, 7.0])
        factor_test3 = Factor(data=data3, name='test3')

        fc = FactorContainer('2014-01-30', '2014-02-28',
                             [factor_test1, factor_test2])

        index_exp = pd.MultiIndex.from_product(
            [[dt(2014, 1, 30), dt(2014, 2, 28)], ['001', '002']],
            names=['trade_date', 'ticker'])

        data_exp = pd.DataFrame(
            {
                'test1': [1.0, 2.0, 3.0, 4.0],
                'test2': [3.0, 2.0, 3.0, 7.0]
            },
            index=index_exp)
        assert_frame_equal(fc.data, data_exp)

        fc.add_factor(factor_test3)
        data_exp = pd.DataFrame(
            {
                'test1': [1.0, 2.0, 3.0, 4.0],
                'test2': [3.0, 2.0, 3.0, 7.0],
                'test3': [3.0, 4.0, 3.0, 5.0]
            },
            index=index_exp)
        assert_frame_equal(fc.data, data_exp)

        fc.remove_factor(factor_test2)
        data_exp = pd.DataFrame(
            {
                'test1': [1.0, 2.0, 3.0, 4.0],
                'test3': [3.0, 4.0, 3.0, 5.0]
            },
            index=index_exp)
        assert_frame_equal(fc.data, data_exp)

        property_exp = {
            'test1': {
                'type': FactorType.ALPHA_FACTOR,
                'data_format': OutputDataFormat.MULTI_INDEX_DF,
                'norm_type': FactorNormType.Null,
                'freq': FreqType.EOM
            },
            'test3': {
                'type': FactorType.ALPHA_FACTOR,
                'data_format': OutputDataFormat.MULTI_INDEX_DF,
                'norm_type': FactorNormType.Null,
                'freq': FreqType.EOM
            }
        }
        self.assertEqual(fc.property, property_exp)

        fc.replace_data(
            np.array([[1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 5.0]]).T)
        assert_frame_equal(
            fc.data,
            pd.DataFrame(
                {
                    'test1': [1.0, 2.0, 3.0, 4.0],
                    'test3': [1.0, 2.0, 3.0, 5.0]
                },
                index=index_exp))

        self.assertEqual(list(fc.alpha_factor_col), ['test1', 'test3'])
Ejemplo n.º 13
0
    def test_factor_neutralizer(self):
        index = pd.MultiIndex.from_product(
            [['2014-01-30', '2014-02-28'], ['001', '002', '003', '004']],
            names=['trade_date', 'ticker'])
        data1 = pd.DataFrame(index=index,
                             data=[1.0, 1.0, 1.2, 2.0, 0.9, 5.0, 5.0, 5.1])
        factor_test1 = Factor(
            data=data1,
            name='test1',
            property_dict={'norm_type': FactorNormType.Industry_Cap_Neutral})

        data2 = pd.DataFrame(index=index,
                             data=[2.6, 2.5, 2.8, 2.9, 2.7, 1.9, 5.0, 2.1])
        factor_test2 = Factor(data=data2,
                              name='test2',
                              property_dict={
                                  'type': FactorType.ALPHA_FACTOR_MV,
                                  'norm_type': FactorNormType.Industry_Neutral
                              })

        data3 = pd.DataFrame(index=index,
                             data=['a', 'b', 'a', 'a', 'a', 'b', 'c', 'b'])
        factor_test3 = Factor(data=data3,
                              name='test3',
                              property_dict={'type': FactorType.INDUSTY_CODE})

        data4 = pd.DataFrame(index=index,
                             data=[1.0, 1.0, 1.2, 2.0, 0.9, 5.0, 5.0, 5.1])
        factor_test4 = Factor(
            data=data4,
            name='test4',
            property_dict={'norm_type': FactorNormType.Industry_Neutral})

        fc = FactorContainer(
            '2014-01-30', '2014-02-28',
            [factor_test1, factor_test2, factor_test3, factor_test4])

        calculated = FactorNeutralizer().fit_transform(fc)
        index = pd.MultiIndex.from_product(
            [[dt(2014, 1, 30), dt(2014, 2, 28)], ['001', '002', '003', '004']],
            names=['trade_date', 'ticker'])
        expected = pd.DataFrame(
            {
                'test1': [
                    0.0983574180639, 8.881784197e-16, -0.306074564019,
                    0.207717145955, -2.10942374679e-15, 8.881784197e-16,
                    -5.3290705182e-15, 0.0
                ],
                'test2': [
                    -0.166666666667, 0.0, 0.0333333333333, 0.133333333333, 0.0,
                    -0.1, 0.0, 0.1
                ],
                'test3': ['a', 'b', 'a', 'a', 'a', 'b', 'c', 'b'],
                'test4': [-0.4, 0.0, -0.2, 0.6, 0.0, -0.05, 0.0, 0.05]
            },
            index=index,
            dtype=object)
        assert_frame_equal(calculated, expected)

        calculated = FactorNeutralizer(out_container=True).fit_transform(fc)
        assert_frame_equal(calculated.data, expected)
        self.assertEqual(calculated.container_property, fc.container_property)
Ejemplo n.º 14
0
data_mv = factor_load('2014-01-01', '2014-03-10', 'MV', sec_id='fullA', is_index=True, save_file='mv.csv')

# data_pb = pd.read_csv('pb.csv', encoding='gbk')
# data_mv = pd.read_csv('mv.csv', encoding='gbk')
# data_pb['date'] = pd.to_datetime(data_pb['date'])
# data_mv['date'] = pd.to_datetime(data_mv['date'])
# data_pb.set_index(['date', ' ticker'], inplace=True)
# data_mv.set_index(['date', ' ticker'], inplace=True)

# 创建Factor实例,储存数据以及相关参数
factor_pb = Factor(data=data_pb, name='PB',
                   property_dict={'type': FactorType.ALPHA_FACTOR, 'norm_type': FactorNormType.Industry_Neutral})
factor_mv = Factor(data=data_mv, name='MV', property_dict={'type': FactorType.ALPHA_FACTOR_MV})

# 创建FactorContainer实例,加载所有的因子信息
fc = FactorContainer(start_date='2014-01-01', end_date='2014-03-10')
fc.add_factor(factor_pb)
fc.add_factor(factor_mv)

# 也可以一次性加载所有因子
# # fc = FactorContainer(start_date='2014-01-01', end_date='2014-03-10', factors=[factor_pb, factor_mv])
# print fc.tiaocang_date
# # [datetime.datetime(2014, 1, 30, 0, 0), datetime.datetime(2014, 2, 28, 0, 0)]
# print fc.alpha_factor_col
# # ['PB', 'MV']


# 提取行业数据
data_industry_code = factor_load('2014-01-01', '2014-03-10', 'SW_C1', sec_id='fullA', is_index=True, save_file='sw.csv')

# data_industry_code = pd.read_csv('sw.csv', encoding='gbk')
Ejemplo n.º 15
0
# data_pb.set_index(['date', ' ticker'], inplace=True)
# data_mv.set_index(['date', ' ticker'], inplace=True)

# 创建Factor实例,储存数据以及相关参数
factor_pb = Factor(data=data_pb,
                   name='PB',
                   property_dict={
                       'type': FactorType.ALPHA_FACTOR,
                       'norm_type': FactorNormType.Industry_Neutral
                   })
factor_mv = Factor(data=data_mv,
                   name='MV',
                   property_dict={'type': FactorType.ALPHA_FACTOR_MV})

# 创建FactorContainer实例,加载所有的因子信息
fc = FactorContainer(start_date='2014-01-01', end_date='2014-03-10')
fc.add_factor(factor_pb)
fc.add_factor(factor_mv)

# 也可以一次性加载所有因子
# # fc = FactorContainer(start_date='2014-01-01', end_date='2014-03-10', factors=[factor_pb, factor_mv])
# print fc.tiaocang_date
# # [datetime.datetime(2014, 1, 30, 0, 0), datetime.datetime(2014, 2, 28, 0, 0)]
# print fc.alpha_factor_col
# # ['PB', 'MV']

# 提取行业数据
data_industry_code = factor_load('2014-01-01',
                                 '2014-03-10',
                                 'SW_C1',
                                 sec_id='fullA',
Ejemplo n.º 16
0
data_industry_code = load_factor_data_from_csv('sw.csv')
factor_industry_code = Factor(data=data_industry_code,
                              name='industry_code',
                              property_dict={'type': FactorType.INDUSTY_CODE})

# 加载月度收益数据
data_return = load_factor_data_from_csv('return.csv')
# 将数据改成未来1月收益
data_return = fwd_return(data_return)
factor_return = Factor(data=data_return,
                       name='1_Fwd_Return',
                       property_dict={'type': FactorType.FWD_RETURN, 'norm_type': FactorNormType.Industry_Neutral})

# 创建FactorContainer实例,加载所有的因子信息
fc = FactorContainer(start_date='2014-01-01',
                     end_date='2014-03-01',
                     factors=[factor_mv, factor_pb, factor_return, factor_industry_code])

# pipeline
# 第一步,处理极个别N/A, 有中位数替换
step_1 = ('imputer', FactorImputer(numerical_strategy=NAStrategy.MEDIAN,
                                   categorical_strategy=NAStrategy.CUSTOM,
                                   custom_value='other'))
# 第二部,去极值化
step_2 = ('winsorize', FactorWinsorizer(quantile_range=(5, 95)))

# 第三步,标准化
step_3 = ('std', FactorStandardizer())

# 第四步,中性化
step_4 = ('neutralize', FactorNeutralizer())
Ejemplo n.º 17
0
    def test_imputer_1(self):
        index = pd.MultiIndex.from_product(
            [['2014-01-30', '2014-02-28'], ['001', '002', '003', '004']],
            names=['trade_date', 'ticker'])
        data1 = pd.DataFrame(index=index,
                             data=[1.0, 3.0, 3.0, np.nan, 5.0, 5.0, 6.0, 8.0])
        factor_test1 = Factor(data=data1, name='test1')

        data2 = pd.DataFrame(index=index,
                             data=[3.0, 2.0, 3.0, 7.0, 7.0, np.nan, 6.0, 6.0])
        factor_test2 = Factor(data=data2, name='test2')

        data3 = pd.DataFrame(index=index,
                             data=[3.0, 3.0, np.nan, 5.0, 6.0, 7.0, 6.0, 6.0])
        factor_test3 = Factor(data=data3, name='test3')

        fc = FactorContainer('2014-01-30', '2014-02-28',
                             [factor_test1, factor_test2, factor_test3])

        index = pd.MultiIndex.from_product(
            [[dt(2014, 1, 30), dt(2014, 2, 28)], ['001', '002', '003', '004']],
            names=['trade_date', 'ticker'])
        calculated = FactorImputer(
            numerical_strategy=NAStrategy.MOST_FREQ).fit_transform(fc)
        expected = pd.DataFrame(
            {
                'test1': [1.0, 3.0, 3.0, 3.0, 5.0, 5.0, 6.0, 8.0],
                'test2': [3.0, 2.0, 3.0, 7.0, 7.0, 6.0, 6.0, 6.0],
                'test3': [3.0, 3.0, 3.0, 5.0, 6.0, 7.0, 6.0, 6.0]
            },
            index=index)
        assert_frame_equal(calculated, expected)

        calculated = FactorImputer(
            numerical_strategy=NAStrategy.MEDIAN).fit_transform(fc)
        expected = pd.DataFrame(
            {
                'test1': [1.0, 3.0, 3.0, 3.0, 5.0, 5.0, 6.0, 8.0],
                'test2': [3.0, 2.0, 3.0, 7.0, 7.0, 6.0, 6.0, 6.0],
                'test3': [3.0, 3.0, 3.0, 5.0, 6.0, 7.0, 6.0, 6.0]
            },
            index=index)
        assert_frame_equal(calculated, expected)

        industry = pd.DataFrame(
            index=index, data=['a', 'a', 'a', 'a', 'a', 'a', np.nan, 'a'])
        factor_industry = Factor(
            data=industry,
            name='industry',
            property_dict={'type': FactorType.INDUSTY_CODE})
        fc.add_factor(factor=factor_industry)
        calculated = FactorImputer(numerical_strategy=NAStrategy.MEDIAN,
                                   categorical_strategy=NAStrategy.CUSTOM,
                                   custom_value='other').fit_transform(fc)
        calculated.sort_index(axis=1, inplace=True)
        expected = pd.DataFrame(
            {
                'test1': [1.0, 3.0, 3.0, 3.0, 5.0, 5.0, 6.0, 8.0],
                'test2': [3.0, 2.0, 3.0, 7.0, 7.0, 6.0, 6.0, 6.0],
                'test3': [3.0, 3.0, 3.0, 5.0, 6.0, 7.0, 6.0, 6.0],
                'industry': ['a', 'a', 'a', 'a', 'a', 'a', 'other', 'a']
            },
            index=index,
            dtype=object)
        assert_frame_equal(calculated, expected)