def test_idiosyncratic_var_matrix(fn):
    dates = generate_random_dates(4)
    assets = get_assets(3)

    fn_inputs = {
        'returns': pd.DataFrame(
            [
                [ 0.02769242,  1.34872387,  0.23460972],
                [-0.94728692,  0.68386883, -1.23987235],
                [ 1.93769376, -0.48275934,  0.34957348],
                [ 0.23985234,  0.35897345,  0.34598734]],
            dates, assets),
        'factor_returns': pd.DataFrame([
                [-0.49503261,  1.45332369, -0.08980631],
                [-1.87563271,  0.67894147, -1.11984992],
                [-0.13027172, -0.49001128,  1.67259298],
                [-0.25392567,  0.47320133,  0.04528734]],
                dates),
        'factor_betas': pd.DataFrame([
            [ 0.00590170, -0.07759542, 0.99696746],
            [-0.13077609,  0.98836246, 0.07769983],
            [ 0.99139436,  0.13083807, 0.00431461]]),
        'ann_factor': 250}
    fn_correct_outputs = OrderedDict([
        (
            'idiosyncratic_var_matrix', pd.DataFrame(np.full([3,3], 0.0), assets, assets))])

    assert_output(fn, fn_inputs, fn_correct_outputs)
def test_get_return_lookahead(fn):
    tickers = generate_random_tickers(3)
    dates = generate_random_dates(5)

    fn_inputs = {
        'close': pd.DataFrame(
            [
                [25.6788, 35.1392, 34.0527],
                [25.1884, 14.3453, 39.9373],
                [62.3457, 92.2524, 65.7893],
                [78.2803, 34.3854, 23.2932],
                [88.8725, 52.223, 34.4107]],
            dates, tickers),
        'lookahead_prices': pd.DataFrame(
            [
                [62.34570000, 92.25240000, 65.78930000],
                [78.28030000, 34.38540000, 23.29320000],
                [88.87250000, 52.22300000, 34.41070000],
                [np.nan, np.nan, np.nan],
                [np.nan, np.nan, np.nan]],
            dates, tickers)}
    fn_correct_outputs = OrderedDict([
        (
            'lookahead_returns',
            pd.DataFrame(
                [
                    [0.88702896,  0.96521098,  0.65854789],
                    [1.13391240,  0.87420969, -0.53914925],
                    [0.35450805, -0.56900529, -0.64808965],
                    [np.nan, np.nan, np.nan],
                    [np.nan, np.nan, np.nan]],
                dates, tickers))])

    assert_output(fn, fn_inputs, fn_correct_outputs)
def test_get_signal_return(fn):
    tickers = generate_random_tickers(3)
    dates = generate_random_dates(5)

    fn_inputs = {
        'signal': pd.DataFrame(
            [
                [0, 0, 0],
                [-1, -1, -1],
                [1, 0, 0],
                [0, 0, 0],
                [0, 1, 0]],
            dates, tickers),
        'lookahead_returns': pd.DataFrame(
            [
                [0.88702896, 0.96521098, 0.65854789],
                [1.13391240, 0.87420969, -0.53914925],
                [0.35450805, -0.56900529, -0.64808965],
                [0.38572896, -0.94655617, 0.123564379],
                [np.nan, np.nan, np.nan]],
            dates, tickers)}
    fn_correct_outputs = OrderedDict([
        (
            'signal_return',
            pd.DataFrame(
                [
                    [0, 0, 0],
                    [-1.13391240, -0.87420969,  0.53914925],
                    [0.35450805, 0, 0],
                    [0, 0, 0],
                    [np.nan, np.nan, np.nan]],
                dates, tickers))])

    assert_output(fn, fn_inputs, fn_correct_outputs)
def test_shift_returns(fn):
    tickers = generate_random_tickers(5)
    dates = pd.DatetimeIndex(['2008-08-31', '2008-09-30', '2008-10-31', '2008-11-30'])

    fn_inputs = {
        'returns': pd.DataFrame(
            [
                [np.nan, np.nan, np.nan, np.nan, np.nan],
                [3.13172138, 0.72709204, 5.76874778, 1.77557845, 0.04098317],
                [-3.78816218, -0.67583590, -4.95433863, -1.67093250, -0.24929051],
                [0.05579709, 0.29199789, 0.00697116, 1.05956179, 0.30686995]],
            dates, tickers),
        'shift_n': 1}
    fn_correct_outputs = OrderedDict([
        (
            'shifted_returns',
            pd.DataFrame(
                [
                    [np.nan, np.nan, np.nan, np.nan, np.nan],
                    [np.nan, np.nan, np.nan, np.nan, np.nan],
                    [3.13172138, 0.72709204, 5.76874778, 1.77557845, 0.04098317],
                    [-3.78816218, -0.67583590, -4.95433863, -1.67093250, -0.24929051]],
                dates, tickers))])

    assert_output(fn, fn_inputs, fn_correct_outputs)
def test_optimal_holdings_strict_factor_get_obj(cl):
    optimal_holdings_strict_factor = cl()
    alpha_vector = pd.DataFrame(
        [-0.58642457, -0.45333845, -0.69993898, -0.06790952],
        get_assets(4),
        ['alpha_vector'])

    fn_inputs = {
        'weights': cvx.Variable(len(alpha_vector)),
        'alpha_vector': alpha_vector}
    fn_correct_outputs = OrderedDict([
        (
            'solution', np.array([-0.07441958, -0.00079418, -0.13721759, 0.21243135]))])

    def solve_problem(weights, alpha_vector):
        constaints = [sum(weights) == 0.0, sum(cvx.abs(weights)) <= 1.0]
        obj = optimal_holdings_strict_factor._get_obj(weights, alpha_vector)
        prob = cvx.Problem(obj, constaints)
        prob.solve(max_iters=500)

        return np.asarray(weights.value).flatten()

    print('Running Integration Test on Problem.solve:')
    print('> constaints = [sum(weights) == 0.0, sum(cvx.abs(weights)) <= 1.0]')
    print('> obj = optimal_holdings_strict_factor._get_obj(weights, alpha_vector)')
    print('> prob = cvx.Problem(obj, constaints)')
    print('> prob.solve(max_iters=500)')
    print('> solution = np.asarray(weights.value).flatten()')
    print('')

    assert_output(solve_problem, fn_inputs, fn_correct_outputs, check_parameter_changes=False)
def test_generate_weighted_returns(fn):
    tickers = generate_random_tickers(3)
    dates = generate_random_dates(4)

    fn_inputs = {
        'returns': pd.DataFrame(
            [
                [np.nan, np.nan, np.nan],
                [1.59904743, 1.66397210, 1.67345829],
                [-0.37065629, -0.36541822, -0.36015840],
                [-0.41055669, 0.60004777, 0.00536958]],
            dates, tickers),
        'weights': pd.DataFrame(
            [
                [0.03777059, 0.04733924, 0.05197790],
                [0.82074874, 0.48533938, 0.75792752],
                [0.10196420, 0.05866016, 0.09578226],
                [0.03951647, 0.40866122, 0.09431233]],
            dates, tickers)}
    fn_correct_outputs = OrderedDict([
        (
            'weighted_returns',
                pd.DataFrame(
                    [
                        [np.nan, np.nan, np.nan],
                        [1.31241616, 0.80759119, 1.26836009],
                        [-0.03779367, -0.02143549, -0.03449679],
                        [-0.01622375, 0.24521625, 0.00050642]],
                    dates, tickers))])

    assert_output(fn, fn_inputs, fn_correct_outputs)
def test_generate_dollar_volume_weights(fn):
    tickers = generate_random_tickers(3)
    dates = generate_random_dates(4)

    fn_inputs = {
        'close': pd.DataFrame(
            [
                [35.4411, 34.1799, 34.0223],
                [92.1131, 91.0543, 90.9572],
                [57.9708, 57.7814, 58.1982],
                [34.1705, 92.453, 58.5107]],
            dates, tickers),
        'volume': pd.DataFrame(
            [
                [9.83683e+06, 1.78072e+07, 8.82982e+06],
                [8.22427e+07, 6.85315e+07, 4.81601e+07],
                [1.62348e+07, 1.30527e+07, 9.51201e+06],
                [1.06742e+07, 5.68313e+07, 9.31601e+06]],
            dates, tickers)}
    fn_correct_outputs = OrderedDict([
        (
            'dollar_volume_weights',
            pd.DataFrame(
                [
                    [0.27719777, 0.48394253, 0.23885970],
                     [0.41632975, 0.34293308, 0.24073717],
                     [0.41848548, 0.33536102, 0.24615350],
                     [0.05917255, 0.85239760, 0.08842984]],
                dates, tickers))])

    assert_output(fn, fn_inputs, fn_correct_outputs)
def test_factor_betas(fn):
    n_components = 3
    dates = generate_random_dates(4)
    assets = get_assets(3)

    pca = PCA(n_components)
    pca.fit(pd.DataFrame(
        [
            [0.21487253,  0.12342312, -0.13245215],
            [0.23423439, -0.23434532,  1.67834324],
            [0.23432445, -0.23563226,  0.23423523],
            [0.24824535, -0.23523435,  0.36235236]],
        dates, assets))

    fn_inputs = {
        'pca': pca,
        'factor_beta_indices': np.array(assets),
        'factor_beta_columns': np.arange(n_components)}
    fn_correct_outputs = OrderedDict([
        (
            'factor_betas', pd.DataFrame(
                [
                    [ 0.00590170, -0.07759542, 0.99696746],
                    [-0.13077609,  0.98836246, 0.07769983],
                    [ 0.99139436,  0.13083807, 0.00431461]],
                fn_inputs['factor_beta_indices'],
                fn_inputs['factor_beta_columns']))])

    assert_output(fn, fn_inputs, fn_correct_outputs, check_parameter_changes=False)
def test_get_lookahead_prices(fn):
    tickers = generate_random_tickers(3)
    dates = generate_random_dates(5)

    fn_inputs = {
        'close': pd.DataFrame(
            [
                [25.6788, 35.1392, 34.0527],
                [25.1884, 14.3453, 39.9373],
                [62.3457, 92.2524, 65.7893],
                [78.2803, 34.3854, 23.2932],
                [88.8725, 52.223, 34.4107]],
            dates, tickers),
        'lookahead_days': 2}
    fn_correct_outputs = OrderedDict([
        (
            'lookahead_prices',
            pd.DataFrame(
                [
                    [62.34570000, 92.25240000, 65.78930000],
                    [78.28030000, 34.38540000, 23.29320000],
                    [88.87250000, 52.22300000, 34.41070000],
                    [np.nan, np.nan, np.nan],
                    [np.nan, np.nan, np.nan]],
                dates, tickers))])

    assert_output(fn, fn_inputs, fn_correct_outputs)
def test_get_top_n(fn):
    tickers = generate_random_tickers(5)
    dates = pd.DatetimeIndex(['2008-08-31', '2008-09-30', '2008-10-31', '2008-11-30'])

    fn_inputs = {
        'prev_returns': pd.DataFrame(
            [
                [np.nan, np.nan, np.nan, np.nan, np.nan],
                [np.nan, np.nan, np.nan, np.nan, np.nan],
                [3.13172138, 0.72709204, 5.76874778, 1.77557845, 0.04098317],
                [-3.78816218, -0.67583590, -4.95433863, -1.67093250, -0.24929051]],
            dates, tickers),
        'top_n': 3}
    fn_correct_outputs = OrderedDict([
        (
            'top_stocks',
            pd.DataFrame(
                [
                    [0, 0, 0, 0, 0],
                    [0, 0, 0, 0, 0],
                    [1, 0, 1, 1, 0],
                    [0, 1, 0, 1, 1]],
                dates, tickers))])

    assert_output(fn, fn_inputs, fn_correct_outputs)
def test_resample_prices(fn):
    tickers = generate_random_tickers(5)
    dates = pd.DatetimeIndex(['2008-08-19', '2008-09-08', '2008-09-28', '2008-10-18', '2008-11-07', '2008-11-27'])
    resampled_dates = pd.DatetimeIndex(['2008-08-31', '2008-09-30', '2008-10-31', '2008-11-30'])

    fn_inputs = {
        'close_prices': pd.DataFrame(
            [
                [21.050810483942833, 17.013843810658827, 10.984503755486879, 11.248093428369392, 12.961712733997235],
                [15.63570258751384, 14.69054309070934, 11.353027688995159, 475.74195118202061, 11.959640427803022],
                [482.34539247360806, 35.202580592515041, 3516.5416782257166, 66.405314327318209, 13.503960481087077],
                [10.918933017418304, 17.9086438675435, 24.801265417692324, 12.488954191854916, 10.52435923388642],
                [10.675971965144655, 12.749401436636365, 11.805257579935713, 21.539039489843024, 19.99766036804861],
                [11.545495378369814, 23.981468434099405, 24.974763062186504, 36.031962102997689, 14.304332320024963]],
            dates, tickers),
        'freq': 'M'}
    fn_correct_outputs = OrderedDict([
        (
            'prices_resampled',
            pd.DataFrame(
                [
                        [21.05081048, 17.01384381, 10.98450376, 11.24809343, 12.96171273],
                        [482.34539247, 35.20258059, 3516.54167823, 66.40531433, 13.50396048],
                        [10.91893302, 17.90864387, 24.80126542, 12.48895419, 10.52435923],
                        [11.54549538, 23.98146843, 24.97476306, 36.03196210, 14.30433232]],
                resampled_dates, tickers))])

    assert_output(fn, fn_inputs, fn_correct_outputs)
def test_optimal_holdings_regualization_get_obj(cl):
    optimal_holdings_regualization = cl()
    alpha_vector = pd.DataFrame(
        [-0.58642457, -0.45333845, -0.69993898, -0.06790952],
        get_assets(4),
        ['alpha_vector'])

    fn_inputs = {
        'weights': cvx.Variable(len(alpha_vector)),
        'alpha_vector': alpha_vector}
    fn_correct_outputs = OrderedDict([
        (
            'solution', np.array([-2.80288449e-10, -4.73562710e-12, -5.12563104e-10, 7.97632862e-10]))])

    def solve_problem(weights, alpha_vector):
        constaints = [sum(weights) == 0.0, sum(cvx.abs(weights)) <= 1.0]
        obj = optimal_holdings_regualization._get_obj(weights, alpha_vector)
        prob = cvx.Problem(obj, constaints)
        prob.solve(max_iters=500)

        return np.asarray(weights.value).flatten()

    print('Running Integration Test on Problem.solve:')
    print('> constaints = [sum(weights) == 0.0, sum(cvx.abs(weights)) <= 1.0]')
    print('> obj = optimal_holdings_regualization._get_obj(weights, alpha_vector)')
    print('> prob = cvx.Problem(obj, constaints)')
    print('> prob.solve(max_iters=500)')
    print('> solution = np.asarray(weights.value).flatten()')
    print('')

    assert_output(solve_problem, fn_inputs, fn_correct_outputs, check_parameter_changes=False)
Beispiel #13
0
def test_factor_returns(fn):
    n_components = 3
    dates = generate_random_dates(4)
    assets = get_assets(3)

    pca = PCA(n_components)
    pca.fit(
        pd.DataFrame(
            [
                [0.21487253, 0.12342312, -0.13245215],
                [0.23423439, -0.23434532, 1.67834324],
                [0.23432445, -0.23563226, 0.23423523],
                [0.24824535, -0.23523435, 0.36235236],
            ],
            dates,
            assets,
        ))

    fn_inputs = {
        "pca":
        pca,
        "returns":
        pd.DataFrame(
            [
                [0.02769242, 1.34872387, 0.23460972],
                [-0.94728692, 0.68386883, -1.23987235],
                [1.93769376, -0.48275934, 0.34957348],
                [0.23985234, 0.35897345, 0.34598734],
            ],
            dates,
            assets,
        ),
        "factor_return_indices":
        np.array(dates),
        "factor_return_columns":
        np.arange(n_components),
    }
    fn_correct_outputs = OrderedDict([(
        "factor_returns",
        pd.DataFrame(
            [
                [-0.49503261, 1.45332369, -0.08980631],
                [-1.87563271, 0.67894147, -1.11984992],
                [-0.13027172, -0.49001128, 1.67259298],
                [-0.25392567, 0.47320133, 0.04528734],
            ],
            fn_inputs["factor_return_indices"],
            fn_inputs["factor_return_columns"],
        ),
    )])

    assert_output(fn,
                  fn_inputs,
                  fn_correct_outputs,
                  check_parameter_changes=False)
def test_get_cosine_similarity(fn):
    fn_inputs = {
        'tfidf_matrix': np.array([
                [0.0,           0.57735027, 0.57735027, 0.0,        0.0,        0.0,        0.57735027],
                [0.0,           0.32516555, 0.6503311,  0.0,        0.42755362, 0.42755362, 0.32516555],
                [0.70710678,    0.0,        0.0,        0.70710678, 0.0,        0.0,        0.0]])}
    fn_correct_outputs = OrderedDict([
        (
            'cosine_similarities', [0.75093766927060945, 0.0])])

    assert_output(fn, fn_inputs, fn_correct_outputs, check_parameter_changes=False)
def test_get_jaccard_similarity(fn):
    fn_inputs = {
        'bag_of_words_matrix': np.array([
                [0, 1, 1, 0, 0, 0, 1],
                [0, 1, 2, 0, 1, 1, 1],
                [1, 0, 0, 1, 0, 0, 0]])}
    fn_correct_outputs = OrderedDict([
        (
            'jaccard_similarities', [0.7142857142857143, 0.0])])

    assert_output(fn, fn_inputs, fn_correct_outputs, check_parameter_changes=False)
def test_get_portfolio_turnover(fn):
    fn_inputs = {
        'all_rebalance_weights': [
            np.array([0.00012205033508460705, 0.0003019915743383353, 0.999575958090577]),
            np.array([1.305709815242165e-05, 8.112998801084706e-06, 0.9999788299030465]),
            np.array([0.3917481750142896, 0.5607687848565064, 0.0474830401292039])],
        'shift_size': 3,
        'rebalance_count': 2}
    fn_correct_outputs = OrderedDict([('portfolio_turnover', 80.0434875733)])

    assert_output(fn, fn_inputs, fn_correct_outputs)
def test_get_portfolio_turnover(fn):
    fn_inputs = {
        'all_rebalance_weights': [
            np.array([0.00012205033508460705, 0.0003019915743383353, 0.999575958090577]),
            np.array([1.305709815242165e-05, 8.112998801084706e-06, 0.9999788299030465]),
            np.array([0.3917481750142896, 0.5607687848565064, 0.0474830401292039])],
        'shift_size': 3,
        'rebalance_count': 11}
    fn_correct_outputs = OrderedDict([('portfolio_turnover', 14.553361377)])

    assert_output(fn, fn_inputs, fn_correct_outputs)
Beispiel #18
0
def test_find_outliers(fn):
    tickers = generate_random_tickers(3)

    fn_inputs = {
        "ks_values": pd.Series([0.20326939, 0.34826827, 0.60256811], tickers),
        "p_values": pd.Series([0.98593727, 0.48009144, 0.02898631], tickers),
        "ks_threshold": 0.5,
        "pvalue_threshold": 0.05,
    }
    fn_correct_outputs = OrderedDict([("outliers", set([tickers[2]]))])

    assert_output(fn, fn_inputs, fn_correct_outputs)
def test_analyze_returns(filename='net_returns.csv'):
    """Test run analyze_returns() with net strategy returns from a file."""
    net_returns = pd.Series.from_csv(filename, header=0)
    fn_inputs = {'net_returns': net_returns}
    fn_outputs = OrderedDict([
        (
            '(t, p)',
            (0.760, 0.226606)
        )
    ])

    assert_output(analyze_returns, fn_inputs, fn_outputs)
def test_tracking_error(fn):
    dates = generate_random_dates(4)

    fn_inputs = {
        'benchmark_returns_by_date':
        pd.Series([np.nan, 0.99880148, 0.99876653, 1.00024411], dates),
        'etf_returns_by_date':
        pd.Series([np.nan, 0.63859274, 0.93475823, 2.57295727], dates)
    }
    fn_correct_outputs = OrderedDict([('tracking_error', 16.5262431971)])

    assert_output(fn, fn_inputs, fn_correct_outputs)
Beispiel #21
0
def test_estimate_volatility(fn=estimate_volatility):
    """Test run get_most_volatile() with stock prices from a file."""
    prices = pd.read_csv('data.csv',
                         parse_dates=['date'],
                         index_col='date',
                         squeeze=True)
    fn_inputs = {'prices': prices, 'l': 0.7}
    fn_correct_outputs = OrderedDict([
        ('Last estimate of log return volatility', 0.004940582044719361)
    ])

    assert_output(fn, fn_inputs, fn_correct_outputs)
Beispiel #22
0
def test_find_outliers(fn=utils.find_outliers):
    tickers = generate_random_tickers(3)

    fn_inputs = {
        'ks_values': pd.Series([0.20326939, 0.34826827, 0.60256811], tickers),
        'p_values': pd.Series([0.98593727, 0.48009144, 0.02898631], tickers),
        'ks_threshold': 0.5,
        'pvalue_threshold': 0.05
    }
    fn_correct_outputs = OrderedDict([('outliers', set([tickers[2]]))])

    assert_output(fn, fn_inputs, fn_correct_outputs)
def test_analyze_alpha(fn=utils.analyze_alpha):
    dates = pd.DatetimeIndex(
        ['2008-08-31', '2008-09-30', '2008-10-31', '2008-11-30'])

    fn_inputs = {
        'expected_portfolio_returns_by_date':
        pd.Series([0.00000000, 0.00000000, 0.01859903, -0.41819699], dates)
    }
    fn_correct_outputs = OrderedDict([('t_value', -0.940764456618),
                                      ('p_value', 0.208114098207)])

    assert_output(fn, fn_inputs, fn_correct_outputs)
def test_resample_prices(fn=utils.resample_prices):
    tickers = generate_random_tickers(5)
    dates = pd.DatetimeIndex([
        '2008-08-19', '2008-09-08', '2008-09-28', '2008-10-18', '2008-11-07',
        '2008-11-27'
    ])
    resampled_dates = pd.DatetimeIndex(
        ['2008-08-31', '2008-09-30', '2008-10-31', '2008-11-30'])

    fn_inputs = {
        'close_prices':
        pd.DataFrame(
            [[
                21.050810483942833, 17.013843810658827, 10.984503755486879,
                11.248093428369392, 12.961712733997235
            ],
             [
                 15.63570258751384, 14.69054309070934, 11.353027688995159,
                 475.74195118202061, 11.959640427803022
             ],
             [
                 482.34539247360806, 35.202580592515041, 3516.5416782257166,
                 66.405314327318209, 13.503960481087077
             ],
             [
                 10.918933017418304, 17.9086438675435, 24.801265417692324,
                 12.488954191854916, 10.52435923388642
             ],
             [
                 10.675971965144655, 12.749401436636365, 11.805257579935713,
                 21.539039489843024, 19.99766036804861
             ],
             [
                 11.545495378369814, 23.981468434099405, 24.974763062186504,
                 36.031962102997689, 14.304332320024963
             ]], dates, tickers),
        'freq':
        'M'
    }
    fn_correct_outputs = OrderedDict([
        ('prices_resampled',
         pd.DataFrame([[
             21.05081048, 17.01384381, 10.98450376, 11.24809343, 12.96171273
         ], [
             482.34539247, 35.20258059, 3516.54167823, 66.40531433, 13.50396048
         ], [
             10.91893302, 17.90864387, 24.80126542, 12.48895419, 10.52435923
         ], [11.54549538, 23.98146843, 24.97476306, 36.03196210, 14.30433232]],
                      resampled_dates, tickers))
    ])

    assert_output(fn, fn_inputs, fn_correct_outputs)
def test_get_optimal_weights(fn):
    fn_inputs = {
        'covariance_returns': np.array(
            [
                [0.143123, 0.0216755, 0.014273],
                [0.0216755, 0.0401826, 0.00663152],
                [0.014273, 0.00663152, 0.044963]]),
        'index_weights': pd.Series([0.23623892, 0.0125628, 0.7511982], ['A', 'B', 'C'])}
    fn_correct_outputs = OrderedDict([
        (
            'x',
            np.array([0.23623897, 0.01256285, 0.75119817]))])

    assert_output(fn, fn_inputs, fn_correct_outputs)
def test_lemmatize_words(fn):
    fn_inputs = {
        'words':
        ['cow', 'running', 'jeep', 'swimmers', 'tackle', 'throw', 'driven']
    }
    fn_correct_outputs = OrderedDict([
        ('lemmatized_words',
         ['cow', 'run', 'jeep', 'swimmers', 'tackle', 'throw', 'drive'])
    ])

    assert_output(fn,
                  fn_inputs,
                  fn_correct_outputs,
                  check_parameter_changes=False)
Beispiel #27
0
def test_lemmatize_words(fn):
    fn_inputs = {
        "words":
        ["cow", "running", "jeep", "swimmers", "tackle", "throw", "driven"]
    }
    fn_correct_outputs = OrderedDict([(
        "lemmatized_words",
        ["cow", "run", "jeep", "swimmers", "tackle", "throw", "drive"],
    )])

    assert_output(fn,
                  fn_inputs,
                  fn_correct_outputs,
                  check_parameter_changes=False)
def test_get_optimal_weights(fn):
    fn_inputs = {
        'covariance_returns':
        np.array([[0.143123, 0.0216755, 0.014273],
                  [0.0216755, 0.0401826, 0.00663152],
                  [0.014273, 0.00663152, 0.044963]]),
        'index_weights':
        pd.Series([0.23623892, 0.0125628, 0.7511982], ['A', 'B', 'C'])
    }
    fn_correct_outputs = OrderedDict([
        ('x', np.array([0.23623897, 0.01256285, 0.75119817]))
    ])

    assert_output(fn, fn_inputs, fn_correct_outputs)
def test_rebalance_portfolio(fn):
    tickers = generate_random_tickers(3)
    dates = generate_random_dates(11)

    fn_inputs = {
        'returns':
        pd.DataFrame(
            [[np.nan, np.nan, np.nan], [-0.02202381, 0.02265285, 0.01441961],
             [0.01947657, 0.00551985, 0.00047382],
             [0.00537313, -0.00803232, 0.01160313],
             [0.00593824, -0.00567773, 0.02247191],
             [0.02479339, 0.01758824, -0.00824176],
             [-0.0109447, -0.00383568, 0.01361958],
             [0.01164822, 0.01558719, 0.00614894],
             [0.0109384, -0.00182079, 0.02900868],
             [0.01138952, 0.00218049, -0.00954495],
             [0.0106982, 0.00644535, -0.01815329]], dates, tickers),
        'index_weights':
        pd.DataFrame([[0.00449404, 0.11586048, 0.00359727],
                      [
                          0.00403487,
                          0.12534048,
                          0.0034428,
                      ], [0.00423485, 0.12854258, 0.00347404],
                      [0.00395679, 0.1243466, 0.00335064],
                      [0.00368729, 0.11750295, 0.00333929],
                      [0.00369562, 0.11447422, 0.00325973],
                      [
                          0.00379612,
                          0.11088075,
                          0.0031734,
                      ], [0.00366501, 0.10806014, 0.00314648],
                      [0.00361268, 0.10376514, 0.00323257],
                      [0.00358844, 0.10097531, 0.00319009],
                      [0.00362045, 0.09791232, 0.00318071]], dates, tickers),
        'shift_size':
        2,
        'chunk_size':
        4
    }
    fn_correct_outputs = OrderedDict([('all_rebalance_weights', [
        np.array([0.29341237, 0.41378419, 0.29280344]),
        np.array([0.29654088, 0.40731481, 0.29614432]),
        np.array([0.29868214, 0.40308791, 0.29822995]),
        np.array([0.30100044, 0.39839644, 0.30060312])
    ])])

    assert_output(fn, fn_inputs, fn_correct_outputs)
def test_mean_reversion_5day_sector_neutral_smoothed(fn):
    column_name = 'Mean_Reversion_5Day_Sector_Neutral_Smoothed'
    start_date_str = '2015-01-05'
    end_date_str = '2015-01-07'

    # Build engine
    trading_calendar = get_calendar('NYSE')
    bundle_data = bundles.load(project_helper.EOD_BUNDLE_NAME)
    engine = project_helper.build_pipeline_engine(bundle_data, trading_calendar)

    # Build pipeline
    universe_window_length = 2
    universe_asset_count = 4
    universe = AverageDollarVolume(window_length=universe_window_length).top(universe_asset_count)
    pipeline = Pipeline(screen=universe)

    run_pipeline_args = {
        'pipeline': pipeline,
        'start_date': pd.Timestamp(start_date_str, tz='utc'),
        'end_date': pd.Timestamp(end_date_str, tz='utc')}
    fn_inputs = {
        'window_length': 3,
        'universe': universe,
        'sector': project_helper.Sector()}
    fn_correct_outputs = OrderedDict([
        (
            'pipline_out', pd.DataFrame(
                [0.44721360, 1.34164079, -1.34164079, -0.44721360,
                 1.34164079, 0.44721360, -1.34164079, -0.44721360,
                 0.44721360, 1.34164079, -1.34164079, -0.44721360],
                engine.run_pipeline(**run_pipeline_args).index,
                [column_name]))])

    print('Running Integration Test on pipeline:')
    print('> start_dat = pd.Timestamp(\'{}\', tz=\'utc\')'.format(start_date_str))
    print('> end_date = pd.Timestamp(\'{}\', tz=\'utc\')'.format(end_date_str))
    print('> universe = AverageDollarVolume(window_length={}).top({})'.format(
        universe_window_length, universe_asset_count))
    print('> factor = {}('.format(fn.__name__))
    print('    window_length={},'.format(fn_inputs['window_length']))
    print('    universe=universe,')
    print('    sector=project_helper.Sector())')
    print('> pipeline.add(factor, \'{}\')'.format(column_name))
    print('> engine.run_pipeline(pipeline, start_dat, end_date)')
    print('')

    pipeline.add(fn(**fn_inputs), column_name)
    assert_output(engine.run_pipeline, run_pipeline_args, fn_correct_outputs, check_parameter_changes=False)
def test_get_optimal_weights(fn):
    fn_inputs = {
        "covariance_returns": np.array(
            [
                [0.143123, 0.0216755, 0.014273],
                [0.0216755, 0.0401826, 0.00663152],
                [0.014273, 0.00663152, 0.044963],
            ]
        ),
        "index_weights": pd.Series([0.23623892, 0.0125628, 0.7511982], ["A", "B", "C"]),
    }
    fn_correct_outputs = OrderedDict(
        [("x", np.array([0.23623897, 0.01256285, 0.75119817]))]
    )

    assert_output(fn, fn_inputs, fn_correct_outputs)
def test_analyze_alpha(fn):
    dates = pd.DatetimeIndex(['2008-08-31', '2008-09-30', '2008-10-31', '2008-11-30'])

    fn_inputs = {
        'expected_portfolio_returns_by_date': pd.Series(
            [0.00000000, 0.00000000, 0.01859903, -0.41819699],
            dates)}
    fn_correct_outputs = OrderedDict([
        (
            't_value',
            -0.940764456618),
        (
            'p_value',
            0.208114098207)])

    assert_output(fn, fn_inputs, fn_correct_outputs)
def test_tracking_error(fn):
    dates = generate_random_dates(4)

    fn_inputs = {
        'benchmark_returns_by_date': pd.Series(
                [np.nan, 0.99880148, 0.99876653, 1.00024411],
                dates),
        'etf_returns_by_date': pd.Series(
                [np.nan, 0.63859274, 0.93475823, 2.57295727],
                dates)}
    fn_correct_outputs = OrderedDict([
        (
            'tracking_error',
            16.5262431971)])

    assert_output(fn, fn_inputs, fn_correct_outputs)
Beispiel #34
0
def test_optimal_holdings_get_constraints(cl):
    optimal_holdings = cl()
    x_size = 3
    weights_size = 4

    fn_inputs = {'weights': cvx.Variable(weights_size)}
    fn_correct_outputs = OrderedDict([
        ('solution',
         np.array([-0.01095207, 0.0027576, 0.02684978, -0.01865519]))
    ])

    def solve_problem(weights):
        x = np.diag(np.arange(x_size))
        s = np.diag(np.arange(weights_size))
        factor_betas = np.arange(weights_size * x_size).reshape(
            [weights_size, x_size])
        risk = cvx.quad_form(weights * factor_betas, x) + cvx.quad_form(
            weights, s)
        constaints = optimal_holdings._get_constraints(weights, factor_betas,
                                                       risk)
        obj = cvx.Maximize([0, 1, 5, -1] * weights)
        prob = cvx.Problem(obj, constaints)
        prob.solve(max_iters=500)

        return np.asarray(weights.value).flatten()

    print('\nRunning Integration Test on Problem.solve:')
    print('> x = np.diag(np.arange({}))'.format(x_size))
    print('> s = np.diag(np.arange({}))'.format(weights_size))
    print('> factor_betas = np.arange({} * {}).reshape([{}, {}])'.format(
        weights_size, x_size, weights_size, x_size))
    print(
        '> risk = cvx.quad_form(weights * factor_betas, x) + cvx.quad_form(weights, s)'
    )
    print(
        '> constaints = optimal_holdings._get_constraints(weights, factor_betas, risk)'
    )
    print('> obj = cvx.Maximize([0, 1, 5, -1] * weights)')
    print('> prob = cvx.Problem(obj, constaints)')
    print('> prob.solve(max_iters=500)')
    print('> solution = np.asarray(weights.value).flatten()')
    print('')

    assert_output(solve_problem,
                  fn_inputs,
                  fn_correct_outputs,
                  check_parameter_changes=False)
Beispiel #35
0
def test_optimize_twoasset_portfolio(fn=quiz.optimize_twoasset_portfolio):
    varA, varB, rAB = 0.1, 0.05, 0.25
    cov = np.sqrt(varA) * np.sqrt(varB) * rAB
    x = cvx.Variable(2)
    P = np.array([[varA, cov], [cov, varB]])
    quad_form = cvx.quad_form(x, P)
    objective = cvx.Minimize(quad_form)
    constraints = [sum(x) == 1]
    problem = cvx.Problem(objective, constraints)
    _ = problem.solve()
    xA, xB = x.value

    fn_inputs = {'varA': varA, 'varB': varB, 'rAB': rAB}

    fn_correct_outputs = OrderedDict([('xA', xA), ('xB', xB)])

    assert_output(fn, fn_inputs, fn_correct_outputs)
Beispiel #36
0
def test_is_normal_ks(fn=is_normal_ks):
    sample_normal = stats.norm.rvs(loc=0.0, scale=1.0, size=(1000, ))
    fn_inputs = {'sample': sample_normal}

    fn_correct_outputs = OrderedDict([('normal', np.True_)])

    assert_output(fn, fn_inputs, fn_correct_outputs)

    sample_not_normal = stats.lognorm.rvs(s=0.5,
                                          loc=0.0,
                                          scale=1.0,
                                          size=(1000, ))
    fn_inputs = {'sample': sample_not_normal}

    fn_correct_outputs = OrderedDict([('not_normal', np.False_)])

    assert_output(fn, fn_inputs, fn_correct_outputs)
def test_get_document_type(fn):
    doc = '\n' \
        '<TYPE>10-K\n' \
        '<SEQUENCE>1\n' \
        '<FILENAME>test-20171231x10k.htm\n' \
        '<DESCRIPTION>10-K\n' \
        '<TEXT>\n' \
        '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">\n' \
        '...'

    fn_inputs = {'doc': doc}
    fn_correct_outputs = OrderedDict([('doc_type', '10-k')])

    assert_output(fn,
                  fn_inputs,
                  fn_correct_outputs,
                  check_parameter_changes=False)
Beispiel #38
0
def test_filter_signals(fn):
    tickers = generate_random_tickers(3)
    dates = generate_random_dates(10)

    fn_inputs = {
        "signal":
        pd.DataFrame(
            [
                [0, 0, 0],
                [-1, -1, -1],
                [1, 0, -1],
                [0, 0, 0],
                [1, 0, 0],
                [0, 1, 0],
                [0, 0, 1],
                [0, -1, 1],
                [-1, 0, 0],
                [0, 0, 0],
            ],
            dates,
            tickers,
        ),
        "lookahead_days":
        3,
    }
    fn_correct_outputs = OrderedDict([(
        "filtered_signal",
        pd.DataFrame(
            [
                [0, 0, 0],
                [-1, -1, -1],
                [1, 0, 0],
                [0, 0, 0],
                [0, 0, 0],
                [0, 1, 0],
                [0, 0, 1],
                [0, -1, 0],
                [-1, 0, 0],
                [0, 0, 0],
            ],
            dates,
            tickers,
        ),
    )])

    assert_output(fn, fn_inputs, fn_correct_outputs)
def test_rebalance_portfolio(fn):
    tickers = generate_random_tickers(3)
    dates = generate_random_dates(11)

    fn_inputs = {
        'returns': pd.DataFrame(
            [
                [np.nan, np.nan, np.nan],
                [-0.02202381, 0.02265285, 0.01441961],
                [0.01947657, 0.00551985, 0.00047382],
                [0.00537313, -0.00803232, 0.01160313],
                [0.00593824, -0.00567773, 0.02247191],
                [0.02479339, 0.01758824, -0.00824176],
                [-0.0109447, -0.00383568, 0.01361958],
                [0.01164822, 0.01558719, 0.00614894],
                [0.0109384, -0.00182079, 0.02900868],
                [0.01138952, 0.00218049, -0.00954495],
                [0.0106982, 0.00644535, -0.01815329]],
            dates, tickers),
        'index_weights': pd.DataFrame(
            [
                [0.00449404, 0.11586048, 0.00359727],
                [0.00403487, 0.12534048, 0.0034428, ],
                [0.00423485, 0.12854258, 0.00347404],
                [0.00395679, 0.1243466, 0.00335064],
                [0.00368729, 0.11750295, 0.00333929],
                [0.00369562, 0.11447422, 0.00325973],
                [0.00379612, 0.11088075, 0.0031734, ],
                [0.00366501, 0.10806014, 0.00314648],
                [0.00361268, 0.10376514, 0.00323257],
                [0.00358844, 0.10097531, 0.00319009],
                [0.00362045, 0.09791232, 0.00318071]],
            dates, tickers),
        'shift_size': 2,
        'chunk_size': 4}
    fn_correct_outputs = OrderedDict([
        (
            'all_rebalance_weights',
            [
                np.array([0.29341237, 0.41378419, 0.29280344]),
                np.array([0.29654088, 0.40731481, 0.29614432]),
                np.array([0.29868214, 0.40308791, 0.29822995]),
                np.array([0.30100044, 0.39839644, 0.30060312])]
        )])

    assert_output(fn, fn_inputs, fn_correct_outputs)
Beispiel #40
0
def test_get_return_lookahead(fn):
    tickers = generate_random_tickers(3)
    dates = generate_random_dates(5)

    fn_inputs = {
        "close":
        pd.DataFrame(
            [
                [25.6788, 35.1392, 34.0527],
                [25.1884, 14.3453, 39.9373],
                [62.3457, 92.2524, 65.7893],
                [78.2803, 34.3854, 23.2932],
                [88.8725, 52.223, 34.4107],
            ],
            dates,
            tickers,
        ),
        "lookahead_prices":
        pd.DataFrame(
            [
                [62.34570000, 92.25240000, 65.78930000],
                [78.28030000, 34.38540000, 23.29320000],
                [88.87250000, 52.22300000, 34.41070000],
                [np.nan, np.nan, np.nan],
                [np.nan, np.nan, np.nan],
            ],
            dates,
            tickers,
        ),
    }
    fn_correct_outputs = OrderedDict([(
        "lookahead_returns",
        pd.DataFrame(
            [
                [0.88702896, 0.96521098, 0.65854789],
                [1.13391240, 0.87420969, -0.53914925],
                [0.35450805, -0.56900529, -0.64808965],
                [np.nan, np.nan, np.nan],
                [np.nan, np.nan, np.nan],
            ],
            dates,
            tickers,
        ),
    )])

    assert_output(fn, fn_inputs, fn_correct_outputs)
Beispiel #41
0
def test_get_long_short(fn):
    tickers = generate_random_tickers(3)
    dates = generate_random_dates(4)

    fn_inputs = {
        "close":
        pd.DataFrame(
            [
                [25.6788, 35.1392, 34.0527],
                [25.1884, 14.3453, 39.9373],
                [78.2803, 34.3854, 23.2932],
                [88.8725, 52.223, 34.4107],
            ],
            dates,
            tickers,
        ),
        "lookback_high":
        pd.DataFrame(
            [
                [np.nan, np.nan, np.nan],
                [92.11310000, 91.05430000, 90.95720000],
                [35.4411, 34.1799, 34.0223],
                [92.11310000, 91.05430000, 90.95720000],
            ],
            dates,
            tickers,
        ),
        "lookback_low":
        pd.DataFrame(
            [
                [np.nan, np.nan, np.nan],
                [34.1705, 92.453, 58.5107],
                [15.67180000, 12.34530000, 34.05270000],
                [27.18340000, 12.34530000, 23.29320000],
            ],
            dates,
            tickers,
        ),
    }
    fn_correct_outputs = OrderedDict([(
        "long_short",
        pd.DataFrame([[0, 0, 0], [-1, -1, -1], [1, 1, -1], [0, 0, 0]], dates,
                     tickers),
    )])

    assert_output(fn, fn_inputs, fn_correct_outputs)
def test_predict_portfolio_risk(fn):
    assets = get_assets(3)

    fn_inputs = {
        'factor_betas': pd.DataFrame([
            [-0.04316847, 0.01955111, -0.00993375,  0.01054038],
            [-0.05874471, 0.19637679,  0.07868756,  0.08209582],
            [-0.03433256, 0.03451503,  0.01133839, -0.02543666]],
            assets),
        'factor_cov_matrix': np.diag([14.01830425, 1.10591127, 0.77099145, 0.18725609]),
        'idiosyncratic_var_matrix': pd.DataFrame(np.diag([0.02272535, 0.05190083, 0.03040361]), assets, assets),
        'weights': pd.DataFrame([0.0, 0.0, 0.25], assets)}
    fn_correct_outputs = OrderedDict([
        (
            'portfolio_risk_prediction', 0.0550369570517)])

    assert_output(fn, fn_inputs, fn_correct_outputs)
def test_calculate_oob_score(fn):
    n_estimators = 3
    n_features = 2
    n_samples = 1000

    noise = np.random.RandomState(0).random_sample([3, n_samples]) * n_samples
    x = np.arange(n_estimators * n_samples * n_features).reshape([n_estimators, n_samples, n_features])
    y = np.sum(x, axis=-1) + noise
    estimators = [
        RandomForestRegressor(300, oob_score=True, n_jobs=-1, random_state=101).fit(x[estimator_i], y[estimator_i])
        for estimator_i in range(n_estimators)]

    fn_inputs = {
        'classifiers': estimators}
    fn_correct_outputs = OrderedDict([('oob_score', 0.911755651666)])

    assert_output(fn, fn_inputs, fn_correct_outputs, check_parameter_changes=False)
def test_calculate_cumulative_returns(fn):
    tickers = generate_random_tickers(3)
    dates = generate_random_dates(4)

    fn_inputs = {
        'returns':
        pd.DataFrame(
            [[np.nan, np.nan, np.nan], [1.59904743, 1.66397210, 1.67345829],
             [-0.37065629, -0.36541822, -0.36015840],
             [-0.41055669, 0.60004777, 0.00536958]], dates, tickers)
    }
    fn_correct_outputs = OrderedDict([
        ('cumulative_returns',
         pd.Series([np.nan, 5.93647782, -0.57128454, -0.68260542], dates))
    ])

    assert_output(fn, fn_inputs, fn_correct_outputs)
def test_generate_weighted_returns(fn):
    tickers = generate_random_tickers(3)
    dates = generate_random_dates(4)

    fn_inputs = {
        "returns": pd.DataFrame(
            [
                [np.nan, np.nan, np.nan],
                [1.59904743, 1.66397210, 1.67345829],
                [-0.37065629, -0.36541822, -0.36015840],
                [-0.41055669, 0.60004777, 0.00536958],
            ],
            dates,
            tickers,
        ),
        "weights": pd.DataFrame(
            [
                [0.03777059, 0.04733924, 0.05197790],
                [0.82074874, 0.48533938, 0.75792752],
                [0.10196420, 0.05866016, 0.09578226],
                [0.03951647, 0.40866122, 0.09431233],
            ],
            dates,
            tickers,
        ),
    }
    fn_correct_outputs = OrderedDict(
        [
            (
                "weighted_returns",
                pd.DataFrame(
                    [
                        [np.nan, np.nan, np.nan],
                        [1.31241616, 0.80759119, 1.26836009],
                        [-0.03779367, -0.02143549, -0.03449679],
                        [-0.01622375, 0.24521625, 0.00050642],
                    ],
                    dates,
                    tickers,
                ),
            )
        ]
    )

    assert_output(fn, fn_inputs, fn_correct_outputs)
def test_calculate_returns(fn):
    tickers = generate_random_tickers(5)
    dates = generate_random_dates(6)

    fn_inputs = {
        'close':
        pd.DataFrame(
            [[
                21.050810483942833, 17.013843810658827, 10.984503755486879,
                11.248093428369392, 12.961712733997235
            ],
             [
                 15.63570258751384, 14.69054309070934, 11.353027688995159,
                 475.74195118202061, 11.959640427803022
             ],
             [
                 482.34539247360806, 35.202580592515041, 3516.5416782257166,
                 66.405314327318209, 13.503960481087077
             ],
             [
                 10.918933017418304, 17.9086438675435, 24.801265417692324,
                 12.488954191854916, 10.52435923388642
             ],
             [
                 10.675971965144655, 12.749401436636365, 11.805257579935713,
                 21.539039489843024, 19.99766036804861
             ],
             [
                 11.545495378369814, 23.981468434099405, 24.974763062186504,
                 36.031962102997689, 14.304332320024963
             ]], dates, tickers)
    }
    fn_correct_outputs = OrderedDict([
        ('returns',
         pd.DataFrame([
             [np.nan, np.nan, np.nan, np.nan, np.nan],
             [-0.25723988, -0.13655355, 0.03354944, 41.29534136, -0.07731018],
             [29.84897463, 1.39627496, 308.74483411, -0.86041737, 0.12912763],
             [-0.97736283, -0.49126900, -0.99294726, -0.81192839, -0.22064647],
             [-0.02225135, -0.28808672, -0.52400584, 0.72464717, 0.90013092],
             [0.08144677, 0.88098779, 1.11556274, 0.67286764, -0.28469971]
         ], dates, tickers))
    ])

    assert_output(fn, fn_inputs, fn_correct_outputs)
def test_generate_dollar_volume_weights(fn):
    tickers = generate_random_tickers(3)
    dates = generate_random_dates(4)

    fn_inputs = {
        "close": pd.DataFrame(
            [
                [35.4411, 34.1799, 34.0223],
                [92.1131, 91.0543, 90.9572],
                [57.9708, 57.7814, 58.1982],
                [34.1705, 92.453, 58.5107],
            ],
            dates,
            tickers,
        ),
        "volume": pd.DataFrame(
            [
                [9.83683e06, 1.78072e07, 8.82982e06],
                [8.22427e07, 6.85315e07, 4.81601e07],
                [1.62348e07, 1.30527e07, 9.51201e06],
                [1.06742e07, 5.68313e07, 9.31601e06],
            ],
            dates,
            tickers,
        ),
    }
    fn_correct_outputs = OrderedDict(
        [
            (
                "dollar_volume_weights",
                pd.DataFrame(
                    [
                        [0.27719777, 0.48394253, 0.23885970],
                        [0.41632975, 0.34293308, 0.24073717],
                        [0.41848548, 0.33536102, 0.24615350],
                        [0.05917255, 0.85239760, 0.08842984],
                    ],
                    dates,
                    tickers,
                ),
            )
        ]
    )

    assert_output(fn, fn_inputs, fn_correct_outputs)
def test_sharpe_ratio(fn):
    dates = generate_random_dates(4)
    factor_names = ['Factor {}'.format(i) for i in range(3)]

    fn_inputs = {
        'factor_returns': pd.DataFrame(
            [
                [ 0.00069242,  0.00072387,  0.00002972],
                [-0.00028692,  0.00086883, -0.00007235],
                [-0.00066376, -0.00045934,  0.00007348],
                [ 0.00085234,  0.00093345,  0.00008734]],
            dates, factor_names),
        'annualization_factor': 16.0}
    fn_correct_outputs = OrderedDict([
        (
            'sharpe_ratio', pd.Series([3.21339895, 12.59157330, 6.54485802], factor_names))])

    assert_output(fn, fn_inputs, fn_correct_outputs)
def test_find_outliers(fn):
    tickers = generate_random_tickers(3)

    fn_inputs = {
        'ks_values': pd.Series(
            [0.20326939, 0.34826827, 0.60256811],
            tickers),
        'p_values': pd.Series(
            [0.98593727, 0.48009144, 0.02898631],
            tickers),
        'ks_threshold': 0.5,
        'pvalue_threshold': 0.05}
    fn_correct_outputs = OrderedDict([
        (
            'outliers',
            set([tickers[2]]))])

    assert_output(fn, fn_inputs, fn_correct_outputs)
def test_factor_cov_matrix(fn):
    dates = generate_random_dates(4)

    fn_inputs = {
        'factor_returns': pd.DataFrame([
                [-0.49503261,  1.45332369, -0.08980631],
                [-1.87563271,  0.67894147, -1.11984992],
                [-0.13027172, -0.49001128,  1.67259298],
                [-0.25392567,  0.47320133,  0.04528734]],
                dates),
        'ann_factor': 250}
    fn_correct_outputs = OrderedDict([
        (
            'factor_cov_matrix', np.array([
                [162.26559808, 0.0, 0.0],
                [0.0, 159.86284454, 0.0],
                [0.0, 0.0, 333.09785876]]))])

    assert_output(fn, fn_inputs, fn_correct_outputs)
def test_get_high_lows_lookback(fn):
    tickers = generate_random_tickers(3)
    dates = generate_random_dates(4)

    fn_inputs = {
        'high': pd.DataFrame(
            [
                [35.4411, 34.1799, 34.0223],
                [92.1131, 91.0543, 90.9572],
                [57.9708, 57.7814, 58.1982],
                [34.1705, 92.453, 58.5107]],
            dates, tickers),
        'low': pd.DataFrame(
            [
                [15.6718, 75.1392, 34.0527],
                [27.1834, 12.3453, 95.9373],
                [28.2503, 24.2854, 23.2932],
                [86.3725, 32.223, 38.4107]],
            dates, tickers),
        'lookback_days': 2}
    fn_correct_outputs = OrderedDict([
        (
            'lookback_high',
            pd.DataFrame(
                [
                    [np.nan, np.nan, np.nan],
                    [np.nan, np.nan, np.nan],
                    [92.11310000, 91.05430000, 90.95720000],
                    [92.11310000, 91.05430000, 90.95720000]],
                dates, tickers)),
        (
            'lookback_low',
            pd.DataFrame(
                [
                    [np.nan, np.nan, np.nan],
                    [np.nan, np.nan, np.nan],
                    [15.67180000, 12.34530000, 34.05270000],
                    [27.18340000, 12.34530000, 23.29320000]],
                dates, tickers))
    ])

    assert_output(fn, fn_inputs, fn_correct_outputs)
def test_calculate_cumulative_returns(fn):
    tickers = generate_random_tickers(3)
    dates = generate_random_dates(4)

    fn_inputs = {
        'returns': pd.DataFrame(
            [
                [np.nan, np.nan, np.nan],
                [1.59904743, 1.66397210, 1.67345829],
                [-0.37065629, -0.36541822, -0.36015840],
                [-0.41055669, 0.60004777, 0.00536958]],
            dates, tickers)}
    fn_correct_outputs = OrderedDict([
        (
            'cumulative_returns',
            pd.Series(
                [np.nan, 5.93647782, -0.57128454, -0.68260542],
                dates))])

    assert_output(fn, fn_inputs, fn_correct_outputs)
def test_get_covariance_returns(fn):
    tickers = generate_random_tickers(3)
    dates = generate_random_dates(4)

    fn_inputs = {
        'returns': pd.DataFrame(
            [
                [np.nan, np.nan, np.nan],
                [1.59904743, 1.66397210, 1.67345829],
                [-0.37065629, -0.36541822, -0.36015840],
                [-0.41055669, 0.60004777, 0.00536958]],
            dates, tickers)}
    fn_correct_outputs = OrderedDict([(
        'returns_covariance',
        np.array(
            [
                [0.89856076, 0.7205586, 0.8458721],
                [0.7205586, 0.78707297, 0.76450378],
                [0.8458721, 0.76450378, 0.83182775]]))])

    assert_output(fn, fn_inputs, fn_correct_outputs)
def test_idiosyncratic_var_vector(fn):
    dates = generate_random_dates(4)
    assets = get_assets(3)

    fn_inputs = {
        'returns': pd.DataFrame(
            [
                [ 0.02769242,  1.34872387,  0.23460972],
                [-0.94728692,  0.68386883, -1.23987235],
                [ 1.93769376, -0.48275934,  0.34957348],
                [ 0.23985234,  0.35897345,  0.34598734]],
            dates, assets),
        'idiosyncratic_var_matrix': pd.DataFrame([
                [0.02272535,  0.0, 0.0],
                [0.0,  0.05190083, 0.0],
                [0.0, -0.49001128,  0.05431181]],
            assets, assets),}
    fn_correct_outputs = OrderedDict([
        (
            'idiosyncratic_var_vector', pd.DataFrame([0.02272535, 0.05190083, 0.05431181], assets))])

    assert_output(fn, fn_inputs, fn_correct_outputs)
def test_portfolio_returns(fn):
    tickers = generate_random_tickers(5)
    dates = pd.DatetimeIndex(['2008-08-31', '2008-09-30', '2008-10-31', '2008-11-30'])

    fn_inputs = {
        'df_long': pd.DataFrame(
            [
                [0, 0, 0, 0, 0],
                [0, 0, 0, 0, 0],
                [1, 0, 1, 1, 0],
                [0, 1, 0, 1, 1]],
            dates, tickers),
        'df_short': pd.DataFrame(
            [
                [0, 0, 0, 0, 0],
                [0, 0, 0, 0, 0],
                [0, 1, 0, 1, 1],
                [1, 1, 1, 0, 0]],
            dates, tickers),
        'lookahead_returns': pd.DataFrame(
            [
                [3.13172138, 0.72709204, 5.76874778, 1.77557845, 0.04098317],
                [-3.78816218, -0.67583590, -4.95433863, -1.67093250, -0.24929051],
                [0.05579709, 0.29199789, 0.00697116, 1.05956179, 0.30686995],
                [1.25459098, 6.87369275, 2.58265839, 6.92676837, 0.84632677]],
            dates, tickers),
        'n_stocks': 3}
    fn_correct_outputs = OrderedDict([
        (
            'portfolio_returns',
            pd.DataFrame(
                [
                    [0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000],
                    [-0.00000000, -0.00000000, -0.00000000, -0.00000000, -0.00000000],
                    [0.01859903, -0.09733263, 0.00232372, 0.00000000, -0.10228998],
                    [-0.41819699, 0.00000000, -0.86088613, 2.30892279, 0.28210892]],
                dates, tickers))])

    assert_output(fn, fn_inputs, fn_correct_outputs)
def test_calculate_kstest(fn):
    tickers = generate_random_tickers(3)

    fn_inputs = {
        'long_short_signal_returns': pd.DataFrame(
            {
                'ticker': tickers * 5,
                'signal_return': [0.12, -0.83, 0.37, 0.83, -0.34, 0.27, -0.68, 0.29, 0.69,
                                  0.57, 0.39, 0.56, -0.97, -0.72, 0.26]})}
    fn_correct_outputs = OrderedDict([
        (
            'ks_values',
            pd.Series(
                [0.29787827, 0.35221525, 0.63919407],
                tickers)),
        (
            'p_values',
            pd.Series(
                [0.69536353, 0.46493498, 0.01650327],
                tickers))])

    assert_output(fn, fn_inputs, fn_correct_outputs)
def test_get_long_short(fn):
    tickers = generate_random_tickers(3)
    dates = generate_random_dates(4)

    fn_inputs = {
        'close': pd.DataFrame(
            [
                [25.6788, 35.1392, 34.0527],
                [25.1884, 14.3453, 39.9373],
                [78.2803, 34.3854, 23.2932],
                [88.8725, 52.223, 34.4107]],
            dates, tickers),
        'lookback_high': pd.DataFrame(
            [
                [np.nan, np.nan, np.nan],
                [92.11310000, 91.05430000, 90.95720000],
                [35.4411, 34.1799, 34.0223],
                [92.11310000, 91.05430000, 90.95720000]],
            dates, tickers),
        'lookback_low': pd.DataFrame(
            [
                [np.nan, np.nan, np.nan],
                [34.1705, 92.453, 58.5107],
                [15.67180000, 12.34530000, 34.05270000],
                [27.18340000, 12.34530000, 23.29320000]],
            dates, tickers)}
    fn_correct_outputs = OrderedDict([
        (
            'long_short',
            pd.DataFrame(
                [
                    [0, 0, 0],
                    [-1, -1, -1],
                    [1, 1, -1],
                    [0, 0, 0]],
                dates, tickers))])

    assert_output(fn, fn_inputs, fn_correct_outputs)
def test_filter_signals(fn):
    tickers = generate_random_tickers(3)
    dates = generate_random_dates(10)

    fn_inputs = {
        'signal': pd.DataFrame(
            [
                [0, 0, 0],
                [-1, -1, -1],
                [1, 0, -1],
                [0, 0, 0],
                [1, 0, 0],
                [0, 1, 0],
                [0, 0, 1],
                [0, -1, 1],
                [-1, 0, 0],
                [0, 0, 0]],
            dates, tickers),
        'lookahead_days': 3}
    fn_correct_outputs = OrderedDict([
        (
            'filtered_signal',
            pd.DataFrame(
                [
                    [0, 0, 0],
                    [-1, -1, -1],
                    [1, 0, 0],
                    [0, 0, 0],
                    [0, 0, 0],
                    [0, 1, 0],
                    [0, 0, 1],
                    [0, -1, 0],
                    [-1, 0, 0],
                    [0, 0, 0]],
                dates, tickers))])

    assert_output(fn, fn_inputs, fn_correct_outputs)