예제 #1
0
def chi2normal_transformation(df):
    risk_factor = pd.DataFrame(chi2.cdf(df, pd.rolling_mean(df, 150)),
                               columns=df.columns,
                               index=df.index)
    risk_factor = (risk_factor - pd.expanding_mean(risk_factor)) / \
        pd.expanding_std(risk_factor)
    return risk_factor
예제 #2
0
def json_series(request, pk):
    screen = get_object_or_404(Screen,pk=pk)
    what = request.GET.get('mode','comp') # choices: comp, hand
    ref = request.GET.get('ref','nap') # choices: nap, bkb, mv, cm
#     filters = [
#         RangeRule(name = 'range', lower = -5, upper = 5),
#         RollingRule(name = 'spike', count = 3, tolerance = 3, comp ='LT')
#         ]
    # determine resampling rule
    rule = request.GET.get('rule', 'H')
#     if rule is None:
#         series = screen.find_series()
#         rule = 'H' if series.aantal() < 10000 else 'D'
    series = screen.get_series(ref,what,rule=rule)#,filters=filters)
    if series is None or series.empty:
        values = []
    else:
        values = zip(series.index, series.values)
        
    data = {'screen%s'%screen.nr: values}
    stats = request.GET.get('stats','0')
    try:
        stats = int(stats)
        if stats:
            mean = pd.expanding_mean(series)
            std = pd.expanding_std(series)
            a = (mean - std).dropna()
            b = (mean + std).dropna()
            ranges = zip(a.index.to_pydatetime(), a.values, b.values)
            data.update({'stats%s'%screen.nr: ranges})
    except:
        pass
    return HttpResponse(json.dumps(data,ignore_nan=True,default=to_millis),content_type='application/json')
예제 #3
0
def rolling_tstat(x):
    emean = pd.expanding_mean(x)
    estd = pd.expanding_std(x)
    t = np.arange(1, len(x) + 1)
    esqr = np.sqrt(t)
    rtstat = (emean / estd) * esqr
    return rtstat
예제 #4
0
 def featurize(self, H):
     X = pd.DataFrame({
         'last_sh': H.shift(1).stack(),
         'history_sh': pd.expanding_mean(H).shift(1).stack(),
         'history_sh_vol': pd.expanding_std(H).shift(1).stack(),
         'nr_days': H.notnull().cumsum().stack()
     })
     return X
예제 #5
0
 def featurize(self, H):
     X = pd.DataFrame({
         'last_sh': H.shift(1).stack(),
         'history_sh': pd.expanding_mean(H).shift(1).stack(),
         'history_sh_vol': pd.expanding_std(H).shift(1).stack(),
         'nr_days': H.notnull().cumsum().stack()
     })
     return X
예제 #6
0
def VaR_norm(data, alpha=0.99, n=252):
    Z = stats.norm(0, 1).ppf(1 - alpha)  #反概率密度函数
    data['mean'] = pd.rolling_mean(data['return'], n)
    data['std'] = pd.rolling_std(data['return'], n)
    if math.isnan(data.tail(1).iat[0, 3]):
        data['mean'] = pd.expanding_mean(data['return'])
        data['std'] = pd.expanding_std(data['return'])
    data['delta'] = data['mean'] + Z * data['std']
    return data.tail(1).iat[0, 4]
예제 #7
0
파일: smoothers.py 프로젝트: est/ggplot
def mavg(x,y, span=SPAN):
    "compute moving average"
    x, y = map(_plot_friendly, [x,y])
    if _isdate(x[0]):
        x = np.array([i.toordinal() for i in x])
    std_err = pd.expanding_std(y, span)
    y = pd.rolling_mean(y, span)
    y1 = y - std_err
    y2 = y + std_err
    return (y, y1, y2)
예제 #8
0
def mavg(x, y, span=SPAN):
    "compute moving average"
    x, y = map(_plot_friendly, [x, y])
    if _isdate(x[0]):
        x = np.array([i.toordinal() for i in x])
    std_err = pd.expanding_std(y, span)
    y = pd.rolling_mean(y, span)
    y1 = y - std_err
    y2 = y + std_err
    return (y, y1, y2)
예제 #9
0
 def plotSecondMomentConvergence(self, x):
     """
     Plots the convergence of the second moment E(X^2), or
     more precisely, the standard deviation over MCMC iterations.
     :param x: MCMC samples from distribution
     :return: E(X^2) convergence plot
     """
     x_in = np.transpose(x)
     cumstd = pd.expanding_std(x_in, min_periods=1)
     plt.plot(cumstd, label = u'E(X^2) Convergence', color='k', linewidth=1.5)
     return cumstd
예제 #10
0
파일: perf.py 프로젝트: ychaim/tia
def sharpe(returns, rfr=0, expanding=0):
    """
    returns: periodic return string
    rfr: risk free rate
    expanding: bool
    """
    if expanding:
        excess = excess_returns(returns, rfr)
        return pd.expanding_mean(excess) / pd.expanding_std(returns)
    else:
        return excess_returns(returns, rfr).mean() / returns.std()
예제 #11
0
def sharpe(returns, rfr=0, expanding=0):
    """
    returns: periodic return string
    rfr: risk free rate
    expanding: bool
    """
    if expanding:
        excess = excess_returns(returns, rfr)
        return pd.expanding_mean(excess) / pd.expanding_std(returns)
    else:
        return excess_returns(returns, rfr).mean() / returns.std()
예제 #12
0
    def std(self, window=0, rebalanced=True, from_date=None, to_date=None):
        ret = None
        returns = self.returns(rebalanced, from_date, to_date)
        
        if window == 0:
            ret = np.asscalar(np.std(returns))
        if window > 0:
            ret = pd.rolling_std(returns, window)
        if window == -1:
            ret = pd.expanding_std(returns)

        return ret
def hurst(channel):

    x = np.array(channel)
    x = x - x.mean()
    z = np.cumsum(x)
    r = np.array((np.maximum.accumulate(z) - np.minimum.accumulate(z))[1:])
    s = pd.expanding_std(x)[1:]
    s[np.where(s == 0)] = 1e-12
    r += 1e-12
    y_axis = np.log(r / s)
    x_axis = np.log(np.arange(1, len(y_axis) + 1))
    x_axis = np.vstack([x_axis, np.ones(len(x_axis))]).T
    m, b = np.linalg.lstsq(x_axis, y_axis)[0]

    return (m)
예제 #14
0
        def apply_one(x):
            x -= x.mean()
            z = np.cumsum(x)
            r = (np.maximum.accumulate(z) - np.minimum.accumulate(z))[1:]
            s = pd.expanding_std(x)[1:]

            # prevent division by 0
            s[np.where(s == 0)] = 1e-12
            r += 1e-12

            y_axis = np.log(r / s)
            x_axis = np.log(np.arange(1, len(y_axis) + 1))
            x_axis = np.vstack([x_axis, np.ones(len(x_axis))]).T

            m, b = np.linalg.lstsq(x_axis, y_axis)[0]
            return m
        def apply_one(x):
            x -= x.mean()
            z = np.cumsum(x)
            r = (np.maximum.accumulate(z) - np.minimum.accumulate(z))[1:]
            s = pd.expanding_std(x)[1:]

            # prevent division by 0
            s[np.where(s == 0)] = 1e-12
            r += 1e-12

            y_axis = np.log(r / s)
            x_axis = np.log(np.arange(1, len(y_axis) + 1))
            x_axis = np.vstack([x_axis, np.ones(len(x_axis))]).T

            m, b = np.linalg.lstsq(x_axis, y_axis)[0]
            return m
예제 #16
0
파일: cf.py 프로젝트: epifanio/ecoop-binder
 def expanding_smoother(self, data, stype='rolling_mean', min_periods=None, freq=None):
     """
     
     Perform a expanding smooting on the data for a complete help refer to http://pandas.pydata.org/pandas-docs/dev/computation.html
     
     :param data: pandas dataframe input data
     :param stype: soothing type
     :param min_periods: periods
     :param freq: frequence
     smoothing types:
     expanding_count	Number of non-null observations
     expanding_sum	Sum of values
     expanding_mean	Mean of values
     expanding_median	Arithmetic median of values
     expanding_min	Minimum
     expanding_max	Maximum
     expandingg_std	Unbiased standard deviation
     expanding_var	Unbiased variance
     expanding_skew	Unbiased skewness (3rd moment)
     expanding_kurt	Unbiased kurtosis (4th moment)
     
     """
     if stype == 'count':
         newy = pd.expanding_count(data, min_periods=min_periods, freq=freq)
     if stype == 'sum':
         newy = pd.expanding_sum(data, min_periods=min_periods, freq=freq)
     if stype == 'mean':
         newy = pd.expanding_mean(data, min_periods=min_periods, freq=freq)
     if stype == 'median':
         newy = pd.expanding_median(data, min_periods=min_periods, freq=freq)
     if stype == 'min':
         newy = pd.expanding_min(data, min_periods=min_periods, freq=freq)
     if stype == 'max':
         newy = pd.expanding_max(data, min_periods=min_periods, freq=freq)
     if stype == 'std':
         newy = pd.expanding_std(data, min_periods=min_periods, freq=freq)
     if stype == 'var':
         newy = pd.expanding_var(data, min_periods=min_periods, freq=freq)
     if stype == 'skew':
         newy = pd.expanding_skew(data, min_periods=min_periods, freq=freq)
     if stype == 'kurt':
         newy = pd.expanding_kurt(data, min_periods=min_periods, freq=freq)
     return newy
예제 #17
0
def equity_mm_test(data_df):
    
    test_data = data_df[['S&P 500 Return','Cash Return']].add(1).cumprod()
    
    rolling_period = 3
    
    
    rolling_change = pd.DataFrame.pct_change(test_data,periods=rolling_period)

    column_one = test_data.columns.values[0]
    column_two = test_data.columns.values[1]
    
    
    data_diff = rolling_change[column_one] - rolling_change[column_two]
    
    data_diff['rolling_z'] = (data_diff - pd.expanding_mean(data_diff, min_periods=24))/  pd.expanding_std(data_diff, min_periods=24)
    
    
    weights = pd.DataFrame(index=data_diff.index)
    
    
    weights['bond_wght'] = data_diff['rolling_z']
    weights['treasury_wght'] = data_diff['rolling_z'] * -1
    
    weights = weights / 0.5
    weights.dropna(inplace=True)
    
    
    weights = weights.clip(-1, 1)

    #weights['bond_wght'] = np.where(data_diff > 0, 1.0, np.where(data_diff< 0,-1.0, np.nan))
    
    
    #weights['treasury_wght'] = np.where(data_diff > 0, -1.0, np.where(data_diff < 0,1.0, np.nan))
    
    bond_wght =  weights['bond_wght'].to_frame()
    bond_wght.columns = ['US HY Return']
    treasury_wght = weights['treasury_wght'].to_frame()
    treasury_wght.columns = ['US Int. Trsy Return']
    
    combined_wghts = pd.concat([bond_wght,treasury_wght], axis=1)
    
    combined_wghts = combined_wghts.shift(1)
    
    combined_wghts.dropna(inplace=True)    
    
    
    weighted_returns = combined_wghts * data_df[['US HY Return','US Int. Trsy Return']]
    
    portfolio_return = weighted_returns.sum(axis=1).to_frame()
    
    portfolio_return =  portfolio_return.add(1).cumprod()

    eq_mm = long_only_ew(portfolio_return, name='Equity Momentum')
    
    return eq_mm, combined_wghts
예제 #18
0
파일: perf.py 프로젝트: ychaim/tia
def std_annualized(returns, scale=None, expanding=0):
    scale = _resolve_periods_in_year(scale, returns)
    if expanding:
        return np.sqrt(scale) * pd.expanding_std(returns)
    else:
        return np.sqrt(scale) * returns.std()
예제 #19
0
def equity_vol_test(data_frame):
    
    rolling_period = 1
    
    rolling_change = pd.DataFrame.pct_change(np.log(data_frame['Equity Volatility']),periods=rolling_period)
    
    
    rolling_change['rolling_z'] = (rolling_change - pd.expanding_mean(rolling_change, min_periods=24))/  pd.expanding_std(rolling_change, min_periods=24)
    rolling_change['rolling_z'] = rolling_change['rolling_z'].to_frame()
    
    weights = pd.DataFrame(index=rolling_change['rolling_z'].index)
    
    weights['bond_wght'] = rolling_change['rolling_z'] * -1
    
    weights['treasury_wght'] = rolling_change['rolling_z']
    
    weights = weights / 1.5
    weights.dropna(inplace=True)
    weights = weights.clip(-1, 1)

    bond_wght =  weights['bond_wght'].to_frame()
    bond_wght.columns = ['US HY Return']
    treasury_wght = weights['treasury_wght'].to_frame()
    treasury_wght.columns = ['US Int. Trsy Return']
    
    combined_wghts = pd.concat([bond_wght,treasury_wght], axis=1)
    
    combined_wghts = combined_wghts.shift(1)
    
    combined_wghts.dropna(inplace=True)    

    weighted_returns = combined_wghts * data_frame[['US HY Return','US Int. Trsy Return']]
    
    portfolio_return = weighted_returns.sum(axis=1).to_frame()
    
    portfolio_return =  portfolio_return.add(1).cumprod()

    eq_vol = long_only_ew(portfolio_return, name='Equity Volatility')

    
    return eq_vol, combined_wghts
예제 #20
0
def cum_avg(data):
    data = pandas.DataFrame({'data': data})
    means = pandas.expanding_mean(data)
    stds = pandas.expanding_std(data)
    return numpy.array([i[0] for i in means.values
                        ]), numpy.array([i[0] for i in stds.values])
예제 #21
0
파일: fin2.py 프로젝트: G-back-iiitk/UI
          degree=3,
          gamma='auto',
          kernel='rbf',
          max_iter=1000,
          probability=False,
          random_state=None,
          shrinking=True,
          tol=0.001,
          verbose=False).fit(x_train, y_train)
y_predict = reg.predict(x[split:])
df = df.assign(p_trend=pd.Series(np.zeros(len(x))).values)
df['p_trend'][split:] = y_predict
accuracy = scorer.accuracy_score(df['Signal'][split:], df['p_trend'][split:])
df = df.assign(ret=pd.Series(np.zeros(len(x))).values)
df['ret'] = np.log(df['Open'].shift(-1) / df['Open'])
df = df.assign(ret1=pd.Series(np.zeros(len(x))).values)
df['ret1'] = df['p_trend'] * df['ret']
df = df.assign(cu_ret1=pd.Series(np.zeros(len(x))).values)
df['cu_ret1'] = np.cumsum(df['ret1'][split:])
df = df.assign(cu_ret=pd.Series(np.zeros(len(x))).values)
df['cu_ret'] = np.cumsum(df['ret'][split:])
std = pd.expanding_std(df['cu_ret1'])
sharpe = (df['cu_ret1'] - df['cu_ret']) / std
sharpe = sharpe[split:].mean()
print("\n\n ACCURACY :", accuracy)
plt.plot(df['cu_ret1'], color='b', label='Strategy Returns')
plt.plot(df['cu_ret'], color='g', label='Market Returns')
plt.figtext(0.14, 0.7, s='Sharpe ratio: %.2f' % sharpe)
plt.legend(loc='best')
plt.show()
예제 #22
0
def build_model():

    # Load SCDB CSV data.
    scdb_case_data = pandas.DataFrame.from_csv('data/SCDB_2013_01_caseCentered_Citation.csv')
    scdb_justice_data = pandas.DataFrame.from_csv('data/SCDB_2013_01_justiceCentered_Citation.csv')

    # Apply date transforms to the data.
    scdb_case_data['dateDecision'] = scdb_case_data['dateDecision'].apply(get_date)
    scdb_justice_data['dateDecision'] = scdb_justice_data['dateDecision'].apply(get_date)
    scdb_case_data['dateArgument'] = scdb_case_data['dateArgument'].apply(get_date)
    scdb_justice_data['dateArgument'] = scdb_justice_data['dateArgument'].apply(get_date)
    scdb_case_data['dateRearg'] = scdb_case_data['dateRearg'].apply(get_date)
    scdb_justice_data['dateRearg'] = scdb_justice_data['dateRearg'].apply(get_date)
    scdb_case_data['monthDecision'] = scdb_case_data['dateDecision'].apply(get_month)
    scdb_justice_data['monthDecision'] = scdb_justice_data['dateDecision'].apply(get_month)
    scdb_case_data['monthArgument'] = scdb_case_data['dateArgument'].apply(get_month)
    scdb_justice_data['monthArgument'] = scdb_justice_data['dateArgument'].apply(get_month)

    # Apply other basic transforms to the data.

    # Set unspecified decision directions to the middle of the range, 1.5
    scdb_case_data.loc[scdb_case_data['decisionDirection'] == 3, 'decisionDirection'] = 1.5
    scdb_justice_data.loc[scdb_justice_data['decisionDirection'] == 3, 'decisionDirection'] = 1.5

    # Map case origin and source to the Circuit within which it belongs.
    scdb_case_data['caseOrigin_circuit'] = scdb_case_data['caseOrigin'].apply(map_circuit)
    scdb_justice_data['caseOrigin_circuit'] = scdb_justice_data['caseOrigin'].apply(map_circuit)
    scdb_case_data['caseSource_circuit'] = scdb_case_data['caseSource'].apply(map_circuit)
    scdb_justice_data['caseSource_circuit'] = scdb_justice_data['caseSource'].apply(map_circuit)

    # Map party type (e.g., petitioner or respondent) to our mapping table in constants.
    scdb_case_data['petitioner_dk'] = scdb_case_data['petitioner'].apply(map_party)
    scdb_case_data['respondent_dk'] = scdb_case_data['respondent'].apply(map_party)
    scdb_justice_data['petitioner_dk'] = scdb_justice_data['petitioner'].apply(map_party)
    scdb_justice_data['respondent_dk'] = scdb_justice_data['respondent'].apply(map_party)

    # Generate the overturn variable by comparing Supreme Court direction with lower court direction.
    scdb_case_data['decisionOverturn'] = numpy.abs(numpy.sign(scdb_case_data['decisionDirection'] - scdb_case_data['lcDispositionDirection']))
    scdb_justice_data['decisionOverturn'] = numpy.abs(numpy.sign(scdb_justice_data['direction'] - scdb_justice_data['lcDispositionDirection']))

    # Handle the agreement field, i.e., does this particular Justice's vote match the Court's direction.
    scdb_justice_data['agree'] = (scdb_justice_data['direction'] == scdb_justice_data['decisionDirection'])

    # Map Justice data, some of which comes from the justice_list.csv file in data/
    scdb_justice_data['gender'] = scdb_justice_data['justice'].apply(get_gender)
    scdb_justice_data['year_of_birth'] = scdb_justice_data['justice'].apply(get_year_of_birth)
    scdb_justice_data['party_president'] = scdb_justice_data['justice'].apply(get_party_president)
    scdb_justice_data['segal_cover'] = scdb_justice_data['justice'].apply(get_segal_cover)
    scdb_justice_data['is_chief'] = [int(x.endswith(y)) for x, y, in zip(scdb_justice_data['justiceName'].tolist(), scdb_justice_data['chief'].tolist())]

    # Sort cases by decision date and set into case list
    docket_list = scdb_case_data.sort('dateDecision')['docketId'].tolist()

    # Clean up unspecifiable/direction=3 values by setting them to the middle of the range."
    scdb_case_data.loc[scdb_case_data['lcDispositionDirection'] == 3, 'lcDispositionDirection'] = 1.5
    scdb_justice_data.loc[scdb_justice_data['lcDispositionDirection'] == 3, 'lcDispositionDirection'] = 1.5
    scdb_case_data.loc[scdb_case_data['decisionDirection'] == 3, 'decisionDirection'] = 1.5
    scdb_justice_data.loc[scdb_justice_data['decisionDirection'] == 3, 'decisionDirection'] = 1.5

    # Set minimum record count prior to training and max records to predict.
    min_record_count = 100
    max_record_count = 99999

    # Setup total feature and target data
    feature_data = pandas.DataFrame()
    target_data = pandas.DataFrame()

    # Setup the model
    model = None
    bad_feature_labels = ['docket', 'outcome', 'docket_outcome', 'case_outcome', 'disposition_outcome', 'direction']

    feature_labels = []
    feature_weights = []

    # Outcome data
    outcome_data = pandas.DataFrame()
    case_outcome_data = pandas.DataFrame()

    # Track the less likely label
    min_label = 1.0

    # Iterate over all dockets
    num_dockets = 0

    for docket_id in docket_list:

        # Increment dockets seen
        num_dockets += 1

        if max_record_count != None and num_dockets > max_record_count:
            break

        # Get rows of feature and target data
        feature_rows, target_rows = get_ml_row(docket_id, scdb_case_data, scdb_justice_data)

        # Now append to the feature and target lists
        feature_data = feature_data.append(feature_rows.copy())
        target_data = target_data.append(target_rows.copy())

        # Now re-calculate all the z-scaled values
        feature_data['justice_direction_mean_z'] = (feature_data['justice_direction_mean'] - pandas.expanding_mean(
            feature_data['justice_direction_mean'])) / pandas.expanding_std(feature_data['justice_direction_mean'])
        feature_data['diff_justice_lc_direction_abs_z'] = (feature_data[
                                                               'diff_justice_lc_direction_abs'] - pandas.expanding_mean(
            feature_data['diff_justice_lc_direction_abs'])) / pandas.expanding_std(
            feature_data['diff_justice_lc_direction_abs'])
        feature_data['diff_justice_lc_direction_z'] = (feature_data['diff_justice_lc_direction'] - pandas.expanding_mean(
            feature_data['diff_justice_lc_direction'])) / pandas.expanding_std(feature_data['diff_justice_lc_direction'])
        feature_data['diff_court_lc_direction_abs_z'] = (
                                                            feature_data[
                                                                'diff_court_lc_direction_abs'] - pandas.expanding_mean(
                                                                feature_data[
                                                                    'diff_court_lc_direction_abs'])) / pandas.expanding_std(
            feature_data['diff_court_lc_direction_abs'])
        feature_data['justice_direction_issue_mean_z'] = (feature_data[
                                                              'justice_direction_issue_mean'] - pandas.expanding_mean(
            feature_data['justice_direction_issue_mean'])) / pandas.expanding_std(
            feature_data['justice_direction_issue_mean'])
        feature_data['current_court_direction_issue_mean_z'] = (feature_data[
                                                                    'current_court_direction_issue_mean'] - pandas.expanding_mean(
            feature_data['current_court_direction_issue_mean'])) / pandas.expanding_std(
            feature_data['current_court_direction_issue_mean'])
        feature_data = feature_data.replace(-numpy.inf, -98)
        feature_data = feature_data.replace(numpy.inf, -98)
        feature_data = feature_data.fillna(-99)

        # Update any missing columns in E-block
        feature_rows = feature_data.ix[feature_data['docket'] == docket_id].sort('justice').copy()
        target_rows = feature_rows['outcome']

        # Check to see if we've trained a model yet.
        if model != None:

            # If so, let's test it.
            docket_outcome_data = feature_rows.copy()
            docket_outcome_data['prediction'] = model.predict(feature_rows[feature_labels])
            docket_outcome_data['target'] = target_rows.copy()

            # Get the vote of the court aggregated
            vote_mean_outcome = docket_outcome_data['prediction'].value_counts().idxmax()

            docket_outcome_data['docket_vote_mean'] = vote_mean_outcome
            docket_outcome_data['docket_vote_sum'] = docket_outcome_data['prediction'].sum()

            # Append data to the case outcome data frame
            case_record = scdb_case_data.ix[scdb_case_data['docketId'] == docket_id]
            case_outcome_record = docket_outcome_data.ix[0][
                ['docket', 'docket_outcome', 'docket_vote_mean', 'docket_vote_sum']]
            case_outcome_record['docket_outcome'] = int(
                (case_record['lcDispositionDirection'] == case_record['decisionDirection']).tolist().pop())
            case_outcome_data = case_outcome_data.append(case_outcome_record)

            # Append feature weights
            feature_weights.append(copy.deepcopy(model.best_estimator_.steps[-1][1].feature_importances_.tolist()))

            # Aggregate all data
            outcome_data = outcome_data.append(copy.deepcopy(docket_outcome_data))

            if num_dockets % 100 == 0:
                # Output the rolling confusion matrix every few ticks
                print(sklearn.metrics.classification_report(outcome_data['target'].tolist(),
                                                            outcome_data['prediction'].tolist()))

                print(sklearn.metrics.accuracy_score(outcome_data['target'].tolist(),
                                                     outcome_data['prediction'].tolist()))

        # Relabel indices for feature and target data
        record_count = int(feature_data.shape[0])

        # Ensure that we have enough records
        if record_count < min_record_count:
            continue


        # If we have at least that many records, let's actually train a model.
        feature_data.index = range(record_count)
        target_data.index = range(record_count)

        # Subset feature labels to exclude our indices
        if num_dockets > min_record_count and model == None:

            # Set the excluded feature labels
            feature_labels = [label for label in feature_data.columns.tolist() if label not in bad_feature_labels]

            # Train the model on the data
            model = train_model(feature_data[feature_labels], target_data[0].apply(int).tolist(),
                                search_parameters)

        elif num_dockets > min_record_count and num_dockets % 100 == 0:

            print((docket_id, num_dockets))

            # Train the model on the data
            model = train_model(feature_data[feature_labels], target_data[0].apply(int).tolist(), search_parameters)

    # Output the feature weight data
    feature_weight_df = pandas.DataFrame(feature_weights, columns=feature_labels)

    # Track the case assessment
    case_assessment = []

    # Try to calculate case outcomes accurately.
    for case_id, case_data in outcome_data.groupby('docket'):

        # Get the vote data
        vote_data = (case_data[['docket', 'justice', 'is_chief', 'justice_direction_mean', 'prediction', 'target']].sort('justice_direction_mean'))

        overturn_predicted = vote_data['prediction'].mean()

        overturn_actual = vote_data['target'].mean()

        row = [
            case_id,
            get_year_from_docket(case_id),
            case_data['issue'].tail(1).tolist().pop(),
            case_data['issue_area'].tail(1).tolist().pop(),
            case_data['case_source_circuit'].tail(1).tolist().pop(),
            case_data['case_origin_circuit'].tail(1).tolist().pop(),
            case_data['lc_direction'].tail(1).tolist().pop(),
            case_data['lc_disposition'].tail(1).tolist().pop(),
            overturn_predicted,
            overturn_actual,
            overturn_predicted > 0.5,
            overturn_actual > 0.5
        ]

        # Get the votes aligned
        [row.append(value) for value in vote_data['prediction']]
        [row.append(value) for value in vote_data['justice_direction_mean']]

        # Pad if fewer than nine justices voting
        if vote_data['prediction'].shape[0] < 9:
            for i in range((9 - vote_data['prediction'].shape[0])):
                row.append(numpy.nan)

        row.append(vote_data.ix[vote_data['is_chief'] == 1]['prediction'].tolist().pop())

        # Append to the case assessment dataframe.
        case_assessment.append(row)

    # Setup the column list and final case assessment DF
    column_list = [
        'docket', 'year', 'issue', 'issue_area', 'case_source_circuit',
        'case_origin_circuit', 'lc_direction', 'lc_disposition',
        'overturn_count_predict', 'overturn_count_actual', 'overturn_predict',
        'overturn_actual', 'justice_1', 'justice_2', 'justice_3', 'justice_4',
        'justice_5', 'justice_6', 'justice_7', 'justice_8', 'justice_9',
        'justice_1_dir', 'justice_2_dir', 'justice_3_dir', 'justice_4_dir',
        'justice_5_dir', 'justice_6_dir', 'justice_7_dir', 'justice_8_dir',
        'justice_9_dir', 'justice_chief'
    ]

    case_assessment_df = pandas.DataFrame(case_assessment, columns=column_list)
    case_assessment_df['correct'] = (case_assessment_df['overturn_predict'] == case_assessment_df['overturn_actual'])

    outcome_data['correct'] = (outcome_data['prediction'] == outcome_data['target'])

    # Get the annual accuracy figures
    outcome_data['year'] = outcome_data['docket'].apply(get_year_from_docket)
    case_assessment_df['year'] = case_assessment_df['docket'].apply(get_year_from_docket)

    x_case_assessment_df = case_assessment_df.ix[case_assessment_df['year'] >= 1946]

    print "Case Assessment"

    print pandas.DataFrame(sklearn.metrics.confusion_matrix(x_case_assessment_df['overturn_actual'].tolist(), x_case_assessment_df['overturn_predict'].tolist()))
    print sklearn.metrics.classification_report(x_case_assessment_df['overturn_actual'].tolist(), x_case_assessment_df['overturn_predict'].tolist())
    print sklearn.metrics.accuracy_score(x_case_assessment_df['overturn_actual'].tolist(), x_case_assessment_df['overturn_predict'].tolist())

    print "Justice Assessment"

    x_outcome_data = outcome_data.loc[outcome_data['year'] >= 1946]
    print pandas.DataFrame(sklearn.metrics.confusion_matrix(x_outcome_data['target'].tolist(), x_outcome_data['prediction'].tolist()))
    print sklearn.metrics.classification_report(x_outcome_data['target'].tolist(), x_outcome_data['prediction'].tolist())
    print sklearn.metrics.accuracy_score(x_outcome_data['target'].tolist(), x_outcome_data['prediction'].tolist())

    # Setup vars
    output_folder = 'model_output'
    timestamp_suffix = time.strftime("%Y%m%d%H%M%S")

    # Create path
    run_output_folder = os.path.join(output_folder, timestamp_suffix)
    os.makedirs(run_output_folder)

    # Output data
    outcome_data.to_csv(os.path.join(run_output_folder, 'justice_outcome_data.csv'))
    case_assessment_df.to_csv(os.path.join(run_output_folder, 'case_outcome_data.csv'))
    feature_weight_df.to_csv(os.path.join(run_output_folder, 'feature_weights.csv'))

    # Make a ZIP
    os.system('zip -9 {0}.zip {1}'.format(os.path.join(output_folder, timestamp_suffix), os.path.join(run_output_folder, '*.csv')))
예제 #23
0
파일: utils.py 프로젝트: luoli413/Cornell
def comput_idicators(df,
                     trading_days,
                     required,
                     save_file,
                     save_address,
                     whole=1):
    # TODO:net_value has some problem.
    # columns needed
    col = ['index_price', 'Interest_rate', 'nav', 'rebalancing', 'stoploss']
    df_valid = df.ix[:, col]
    start_balance = df.index[df['rebalancing'] == 1][0]
    df_valid = df_valid[df_valid.index >= start_balance]

    # daily return
    df_valid['return'] = np.log(df['nav']) - np.log(df['nav'].shift(1))
    # benchmark_net_value
    df_valid[
        'benchmark'] = df_valid['index_price'] / df_valid['index_price'].ix[0]
    # benchmark_return
    df_valid['benchmark_return'] = (df_valid['benchmark']-
                                           df_valid['benchmark'].shift(1))/\
                                   df_valid['benchmark'].shift(1)
    # Annualized return
    df_valid['Annu_return'] = pd.expanding_mean(
        df_valid['return']) * trading_days
    # Volatility
    df_valid.loc[:, 'algo_volatility'] = pd.expanding_std(
        df_valid['return']) * np.sqrt(trading_days)
    df_valid.loc[:, 'xret'] = df_valid[
        'return'] - df_valid['Interest_rate'] / trading_days / 100
    df_valid.loc[:, 'ex_return'] = df_valid['return'] - df_valid[
        'benchmark_return']

    def ratio(x):
        return np.nanmean(x) / np.nanstd(x)

    # sharpe ratio
    df_valid.loc[:, 'sharpe'] = pd.expanding_apply(df_valid['xret'], ratio)\
                                * np.sqrt(trading_days)
    # information ratio
    df_valid.loc[:, 'IR'] = pd.expanding_apply(df_valid['ex_return'], ratio)\
                                * np.sqrt(trading_days)

    # Sortino ratio
    def modify_ratio(x, re):
        re /= trading_days
        ret = np.nanmean(x) - re
        st_d = np.nansum(np.square(x[x < re] - re)) / x[x < re].size
        return ret / np.sqrt(st_d)

    df_valid.loc[:, 'sortino'] = pd.expanding_apply(
        df_valid['return'], modify_ratio,
        args=(required, )) * np.sqrt(trading_days)
    # Transfer infs to NA
    df_valid.loc[np.isinf(df_valid.loc[:, 'sharpe']), 'sharpe'] = np.nan
    df_valid.loc[np.isinf(df_valid.loc[:, 'IR']), 'IR'] = np.nan
    # hit_rate
    wins = np.where(df_valid['return'] >= df_valid['benchmark_return'], 1.0,
                    0.0)
    df_valid.loc[:, 'hit_rate'] = wins.cumsum() / pd.expanding_apply(wins, len)
    # 95% VaR
    df_valid['VaR'] = -pd.expanding_quantile(df_valid['return'], 0.05)*\
                      np.sqrt(trading_days)
    # 95% CVaR
    df_valid['CVaR'] = -pd.expanding_apply(df_valid['return'],
                                          lambda x: x[x < np.nanpercentile(x, 5)].mean())\
                       * np.sqrt(trading_days)

    if whole == 1:
        # max_drawdown
        def exp_diff(x, type):
            if type == 'dollar':
                xret = pd.expanding_apply(x, lambda xx: (xx[-1] - xx.max()))
            else:
                xret = pd.expanding_apply(
                    x, lambda xx: (xx[-1] - xx.max()) / xx.max())
            return xret
    # dollar
    #     xret = exp_diff(df_valid['cum_profit'],'dollar')
    #     df_valid['max_drawdown_profit'] = abs(pd.expanding_min(xret))
    # percentage

        xret = exp_diff(df_valid['nav'], 'percentage')
        df_valid['max_drawdown_ret'] = abs(pd.expanding_min(xret))

        # max_drawdown_duration:
        # drawdown_enddate is the first time for restoring the max
        def drawdown_end(x, type):
            xret = exp_diff(x, type)
            minloc = xret[xret == xret.min()].index[0]
            x_sub = xret[xret.index > minloc]
            # if never recovering,then return nan
            try:
                return x_sub[x_sub == 0].index[0]
            except:
                return np.nan

        def drawdown_start(x, type):
            xret = exp_diff(x, type)
            minloc = xret[xret == xret.min()].index[0]
            x_sub = xret[xret.index < minloc]
            try:
                return x_sub[x_sub == 0].index[-1]
            except:
                return np.nan

        df_valid['max_drawdown_start'] = pd.Series()
        df_valid['max_drawdown_end'] = pd.Series()
        df_valid['max_drawdown_start'].ix[-1] = drawdown_start(
            df_valid['nav'], 'percentage')
        df_valid['max_drawdown_end'].ix[-1] = drawdown_end(
            df_valid['nav'], 'percentage')
    df_valid.to_csv(save_address)
    # =====result visualization=====
    plt.figure(1)
    if whole == 1:
        plt.subplot(224)
        plt.plot(df_valid['nav'], label='strategy')
        plt.plot(df_valid['benchmark'], label='S&P500')
    plt.xlabel('Date')
    plt.legend(loc=0, shadow=True)
    plt.ylabel('Nav')
    plt.title('Nav of ' + save_file + ' & SP500')

    # plt.subplot(223)
    # plt.plot(df_valid['cum_profit'],label = 'strategy')
    # plt.xlabel('Date')
    # plt.ylabel('Cum_profit')
    # plt.title('Cum_profit of ' + save_file)

    plt.subplot(221)
    plt.plot(df_valid['return'], label='strategy')
    plt.xlabel('Date')
    plt.ylabel('Daily_return')
    plt.title('Daily Return of ' + save_file)

    plt.subplot(222)
    x_return = df_valid[df_valid['return'].notna()].loc[:, 'return']
    y_return = df_valid[
        df_valid['benchmark_return'].notna()].loc[:, 'benchmark_return']
    mu = x_return.mean()
    sigma = x_return.std()
    mybins = np.linspace(mu - 3 * sigma, mu + 3 * sigma, 100)
    count_x, _, _ = plt.hist(x_return,
                             mybins,
                             normed=1,
                             alpha=0.5,
                             label='strategy')
    count_y, _, _ = plt.hist(y_return,
                             mybins,
                             normed=1,
                             alpha=0.5,
                             label='S&P500')
    plt.ylabel('density')
    plt.xlabel('daily_return')
    plt.title('Histogram of Daily Return for ' + save_file + ' & SP500')
    plt.grid(True)
    # add normal distribution line
    y = mlab.normpdf(mybins, mu, sigma)
    plt.plot(mybins, y, 'r--', linewidth=1, label='Normal of strategy')
    plt.legend(loc=0, shadow=True)
    # plt.tight_layout()
    plt.show()
    return df_valid
예제 #24
0
    def get_context_data(self, **kwargs):
        context = super(WellChartView, self).get_context_data(**kwargs)
        well = Well.objects.get(pk=context['pk'])
        name = unicode(well)
        options = {
            'rangeSelector': {
                'enabled': True,
                'inputEnabled': True,
            },
            'navigator': {
                'adaptToUpdatedData': True,
                'enabled': True
            },
            'chart': {
                'type': 'arearange',
                'zoomType': 'x'
            },
            'title': {
                'text': name
            },
            'xAxis': {
                'type': 'datetime'
            },
            'yAxis': [{
                'title': {
                    'text': 'm tov NAP'
                }
            }],
            'tooltip': {
                'valueSuffix': ' m',
                'valueDecimals': 2,
                'shared': True,
            },
            'legend': {
                'enabled': True
            },
            'plotOptions': {
                'line': {
                    'marker': {
                        'enabled': False
                    }
                }
            },
            'credits': {
                'enabled': True,
                'text': 'acaciawater.com',
                'href': 'http://www.acaciawater.com',
            },
        }
        series = []
        xydata = []
        for screen in well.screen_set.all():
            name = unicode(screen)
            data = screen.to_pandas(ref='nap')
            xydata = zip(data.index.to_pydatetime(), data.values)
            series.append({
                'name': name,
                'type': 'line',
                'data': xydata,
                'zIndex': 1,
            })
            mean = pd.expanding_mean(data)
            #             series.append({'name': 'gemiddelde',
            #                         'type': 'line',
            #                         'data': zip(mean.index.to_pydatetime(), mean.values),
            #                         'linkedTo' : ':previous',
            #                         })
            std = pd.expanding_std(data)
            a = (mean - std).dropna()
            b = (mean + std).dropna()
            ranges = zip(a.index.to_pydatetime(), a.values, b.values)
            series.append({
                'name': 'spreiding',
                'data': ranges,
                'type': 'arearange',
                'lineWidth': 0,
                'fillOpacity': 0.2,
                'linkedTo': ':previous',
                'zIndex': 0,
            })

        if len(xydata) > 0:
            mv = []
            for i in range(len(xydata)):
                mv.append((xydata[i][0], screen.well.maaiveld))
            series.append({'name': 'maaiveld', 'type': 'line', 'data': mv})

        options['series'] = series
        context['options'] = json.dumps(
            options, default=lambda x: int(time.mktime(x.timetuple()) * 1000))
        context['object'] = well
        return context
예제 #25
0
def return_reversal(data_df):
 
    data_df[['HY Tot Index','US Trs Tot Index']] = data_df[['US HY Return','US Int. Trsy Return']].add(1).cumprod()
    
    data_df[['HY Rolling Return','Rolling Return']] = pd.DataFrame.pct_change(data_df[['HY Tot Index','US Trs Tot Index']],periods=36)

    data_df['diff'] =  data_df['HY Rolling Return'] - data_df['Rolling Return']

    data_df['Return Z'] = (data_df['diff']- pd.expanding_mean(data_df['diff'], min_periods=24))/  pd.expanding_std(data_df['diff'], min_periods=24)
    
    
    data_df['Return Z'].dropna(inplace=True)
    
    data_df['Return Z'].plot()
    
    hp_months_cheap = 24
    hp_months_rich = 24

    counter = 0
    
    date_array = data_df.index.values
    
    signal = []
    
    signal_abs_threshold_cheap= 2
    signal_abs_threshold_rich= 2
    
  
    for date in range(0,len(date_array)):
  
        

        if len(data_df['Return Z']) - 1 >= counter:
            
            score = data_df['Return Z'].ix[counter]

            if score > signal_abs_threshold_cheap:
                counter = counter+hp_months_cheap
                temp_list = [-1] * hp_months_cheap
                signal.extend(temp_list)
                
            elif score < (signal_abs_threshold_rich * -1):
                counter = counter+hp_months_rich
                temp_list = [1] * hp_months_rich 
                signal.extend(temp_list)
                
            elif (score <= signal_abs_threshold_cheap) and score >= (signal_abs_threshold_rich * -1):
                counter = counter + 1
                signal.extend([0])
   
    
    signal = signal[:len(date_array)] 

    weights = pd.DataFrame(signal,index=data_df['Return Z'].index,columns=['US HY Return'])


    weights['bond_wght'] = weights
    
       
    weights['treasury_wght'] = weights['US HY Return'] * -1

    bond_wght =  weights['bond_wght'].to_frame()
    bond_wght.columns = ['US HY Return']
    treasury_wght = weights['treasury_wght'].to_frame()
    treasury_wght.columns = ['US Int. Trsy Return']
    
    
    combined_wghts = pd.concat([bond_wght,treasury_wght], axis=1)
    
    combined_wghts = combined_wghts.shift(1)
    
    combined_wghts.dropna(inplace=True)    

    weighted_returns = combined_wghts * data_df[['US HY Return','US Int. Trsy Return']]
    
    portfolio_return = weighted_returns.sum(axis=1).to_frame()
    
    portfolio_return =  portfolio_return.add(1).cumprod()

    return_relative_test = long_only_ew(portfolio_return, name='Reversal')

    return return_relative_test, combined_wghts
예제 #26
0
# -*- coding: utf-8 -*-
"""

Created on 05/10/15

@author: Carlos Eduardo Barbosa

Test convergence of Lick errors as a function of the number of simulation.

"""
import os

import numpy as np
from pandas import expanding_std
import matplotlib.pyplot as plt

from config import *

if __name__ == "__main__":
    os.chdir(os.path.join(home, "single2/mc_logs"))
    cols = np.array([12, 13, 16, 17, 18, 19, 20])
    logs = os.listdir(".")
    fig = plt.figure(1, figsize=(5, 15))
    for log in logs:
        data = np.loadtxt(log).T[cols]
        for i, d in enumerate(data):
            ax = plt.subplot(7, 1, i + 1)
            ax.plot(expanding_std(d, min_periods=1) / d.std(), "-k")
        plt.pause(1)
        plt.show(block=False)
예제 #27
0
reg = SVC(C=1,cache_size=200,class_weight=None,coef0=0,decision_function_shape=None,degree=3,gamma='auto',kernel='rbf',max_iter=1000,probability=False,random_state=None,shrinking=True,tol=0.001,verbose=False)
reg.fit(X[:split],y[:split])
y_predict = reg.predict(X[split:])

Df = Df.assign(P_Trend=pd.Series(np.zeros(len(X))).values)
Df['P_Trend'][split:] = y_predict
accuracy = scorer.accuracy_score(Df['Signal'][split:],Df['P_Trend'][split:])

Df = Df.assign(Ret=pd.Series(np.zeros(len(X))).values)
Df['Ret'] = np.log(Df['Open'].shift(-1)/Df['Open'])

Df = Df.assign(Ret1=pd.Series(np.zeros(len(X))).values)
Df['Ret1'] = Df['P_Trend']*Df['Ret']

Df = Df.assign(Cu_Ret1=pd.Series(np.zeros(len(X))).values)
Df['Cu_Ret1'] = np.cumsum(Df['Ret1'][split:])

Df = Df.assign(Cu_Ret=pd.Series(np.zeros(len(X))).values)
Df['Cu_Ret'] = np.cumsum(Dp['Ret'][split:])



Std = pd.expanding_std(Df['Cu_Ret1'])
Sharpe = (Df['Cu_Ret1']-Df['Cu_Ret'])/Std
Sharpe = Sharpe[split:].mean()
print('\n\nAccuracy:',accuracy)
plt.plot(Df['Cu_Ret1'],color='r',label='Strategy Returns')
plt.plot(Df['Cu_Ret'],color='g',label='Market Returns')
plt.figtext(0.14,0.7,s='Sharpe ratio: %.2f'%Sharpe)
plt.legend(log='best')
예제 #28
0
    # Increment dockets seen
    num_dockets += 1

    if max_record_count != None and num_dockets > max_record_count:
        break

    # Get rows of feature and target data
    feature_rows, target_rows = get_ml_row(docket_id, scdb_case_data, scdb_justice_data)

    # Now append to the feature and target lists
    feature_data = feature_data.append(feature_rows.copy())
    target_data = target_data.append(target_rows.copy())

    # Now re-calculate all the z-scaled values
    feature_data['justice_direction_mean_z'] = (feature_data['justice_direction_mean'] - pandas.expanding_mean(
        feature_data['justice_direction_mean'])) / pandas.expanding_std(feature_data['justice_direction_mean'])
    feature_data['diff_justice_lc_direction_abs_z'] = (feature_data[
                                                           'diff_justice_lc_direction_abs'] - pandas.expanding_mean(
        feature_data['diff_justice_lc_direction_abs'])) / pandas.expanding_std(
        feature_data['diff_justice_lc_direction_abs'])
    feature_data['diff_justice_lc_direction_z'] = (feature_data['diff_justice_lc_direction'] - pandas.expanding_mean(
        feature_data['diff_justice_lc_direction'])) / pandas.expanding_std(feature_data['diff_justice_lc_direction'])
    feature_data['diff_court_lc_direction_abs_z'] = (
                                                        feature_data[
                                                            'diff_court_lc_direction_abs'] - pandas.expanding_mean(
                                                            feature_data[
                                                                'diff_court_lc_direction_abs'])) / pandas.expanding_std(
        feature_data['diff_court_lc_direction_abs'])
    feature_data['justice_direction_issue_mean_z'] = (feature_data[
                                                          'justice_direction_issue_mean'] - pandas.expanding_mean(
        feature_data['justice_direction_issue_mean'])) / pandas.expanding_std(
예제 #29
0
def std_annualized(returns, scale=None, expanding=0):
    scale = _resolve_periods_in_year(scale, returns)
    if expanding:
        return np.sqrt(scale) * pd.expanding_std(returns)
    else:
        return np.sqrt(scale) * returns.std()
예제 #30
0
파일: ret.py 프로젝트: xie3ge/tia
class CumulativeRets(object):
    def __init__(self, rets=None, ltd_rets=None):
        if rets is None and ltd_rets is None:
            raise ValueError('rets or ltd_rets must be specified')

        if rets is None:
            if ltd_rets.empty:
                rets = ltd_rets
            else:
                rets = (1. + ltd_rets).pct_change()
                rets.iloc[0] = ltd_rets.iloc[0]

        if ltd_rets is None:
            if rets.empty:
                ltd_rets = rets
            else:
                ltd_rets = (1. + rets).cumprod() - 1.

        self.rets = rets
        self.ltd_rets = ltd_rets

    pds_per_year = property(lambda self: periodicity(self.rets))

    def asfreq(self, freq):
        other_pds_per_year = periodicity(freq)
        if self.pds_per_year < other_pds_per_year:
            msg = 'Cannot downsample returns. Cannot convert from %s periods/year to %s'
            raise ValueError(msg % (self.pds_per_year, other_pds_per_year))

        if freq == 'B':
            rets = (1. + self.rets).groupby(self.rets.index.date).apply(lambda s: s.prod()) - 1.
            # If you do not do this, it will be an object index
            rets.index = pd.DatetimeIndex([i for i in rets.index])
            return CumulativeRets(rets)
        else:
            rets = (1. + self.rets).resample(freq, how='prod') - 1.
            return CumulativeRets(rets)

    # -----------------------------------------------------------
    # Resampled data
    dly = lazy_property(lambda self: self.asfreq('B'), 'dly')
    weekly = lazy_property(lambda self: self.asfreq('W'), 'weekly')
    monthly = lazy_property(lambda self: self.asfreq('M'), 'monthly')
    quarterly = lazy_property(lambda self: self.asfreq('Q'), 'quarterly')
    annual = lazy_property(lambda self: self.asfreq('A'), 'annual')

    # -----------------------------------------------------------
    # Basic Metrics
    @lazy_property
    def ltd_rets_ann(self):
        return (1. + self.ltd_rets) ** (self.pds_per_year / pd.expanding_count(self.rets)) - 1.

    cnt = property(lambda self: self.rets.notnull().astype(int).sum())
    mean = lazy_property(lambda self: self.rets.mean(), 'avg')
    mean_ann = lazy_property(lambda self: self.mean * self.pds_per_year, 'avg_ann')
    ltd = lazy_property(lambda self: self.ltd_rets.iloc[-1], name='ltd')
    ltd_ann = lazy_property(lambda self: self.ltd_rets_ann.iloc[-1], name='ltd_ann')
    std = lazy_property(lambda self: self.rets.std(), 'std')
    std_ann = lazy_property(lambda self: self.std * np.sqrt(self.pds_per_year), 'std_ann')
    drawdown_info = lazy_property(lambda self: drawdown_info(self.rets), 'drawdown_info')
    drawdowns = lazy_property(lambda self: drawdowns(self.rets), 'drawdowns')
    maxdd = lazy_property(lambda self: self.drawdown_info['maxdd'].min(), 'maxdd')
    dd_avg = lazy_property(lambda self: self.drawdown_info['maxdd'].mean(), 'dd_avg')
    kurtosis = lazy_property(lambda self: self.rets.kurtosis(), 'kurtosis')
    skew = lazy_property(lambda self: self.rets.skew(), 'skew')

    sharpe_ann = lazy_property(lambda self: np.divide(self.ltd_ann, self.std_ann), 'sharpe_ann')
    downside_deviation = lazy_property(lambda self: downside_deviation(self.rets, mar=0, full=0, ann=1),
                                       'downside_deviation')
    sortino = lazy_property(lambda self: self.ltd_ann / self.downside_deviation, 'sortino')

    @lazy_property
    def maxdd_dt(self):
        ddinfo = self.drawdown_info
        if ddinfo.empty:
            return None
        else:
            return self.drawdown_info['maxdd dt'].ix[self.drawdown_info['maxdd'].idxmin()]

    # -----------------------------------------------------------
    # Expanding metrics
    expanding_mean = property(lambda self: pd.expanding_mean(self.rets), 'expanding_avg')
    expanding_mean_ann = property(lambda self: self.expanding_mean * self.pds_per_year, 'expanding_avg_ann')
    expanding_std = lazy_property(lambda self: pd.expanding_std(self.rets), 'expanding_std')
    expanding_std_ann = lazy_property(lambda self: self.expanding_std * np.sqrt(self.pds_per_year), 'expanding_std_ann')
    expanding_sharpe_ann = property(lambda self: np.divide(self.ltd_rets_ann, self.expanding_std_ann))

    # -----------------------------------------------------------
    # Rolling metrics
    rolling_mean = property(lambda self: pd.rolling_mean(self.rets), 'rolling_avg')
    rolling_mean_ann = property(lambda self: self.rolling_mean * self.pds_per_year, 'rolling_avg_ann')

    def rolling_ltd_rets(self, n):
        return pd.rolling_apply(self.rets, n, lambda s: (1. + s).prod() - 1.)

    def rolling_ltd_rets_ann(self, n):
        tot = self.rolling_ltd_rets(n)
        return tot ** (self.pds_per_year / n)

    def rolling_std(self, n):
        return pd.rolling_std(self.rets, n)

    def rolling_std_ann(self, n):
        return self.rolling_std(n) * np.sqrt(self.pds_per_year)

    def rolling_sharpe_ann(self, n):
        return self.rolling_ltd_rets_ann(n) / self.rolling_std_ann(n)

    def iter_by_year(self):
        """Split the return objects by year and iterate"""
        for key, grp in self.rets.groupby(lambda x: x.year):
            yield key, CumulativeRets(rets=grp)

    def truncate(self, before=None, after=None):
        rets = self.rets.truncate(before=before, after=after)
        return CumulativeRets(rets=rets)

    @lazy_property
    def summary(self):
        d = OrderedDict()
        d['ltd'] = self.ltd
        d['ltd ann'] = self.ltd_ann
        d['mean'] = self.mean
        d['mean ann'] = self.mean_ann
        d['std'] = self.std
        d['std ann'] = self.std_ann
        d['sharpe ann'] = self.sharpe_ann
        d['sortino'] = self.sortino
        d['maxdd'] = self.maxdd
        d['maxdd dt'] = self.maxdd_dt
        d['dd avg'] = self.dd_avg
        d['cnt'] = self.cnt
        return pd.Series(d, name=self.rets.index.freq or guess_freq(self.rets.index))

    def _repr_html_(self):
        from tia.util.fmt import new_dynamic_formatter

        fmt = new_dynamic_formatter(method='row', precision=2, pcts=1, trunc_dot_zeros=1, parens=1)
        df = self.summary.to_frame()
        return fmt(df)._repr_html_()

    def get_alpha_beta(self, bm_rets):
        if isinstance(bm_rets, pd.Series):
            bm = CumulativeRets(bm_rets)
        elif isinstance(bm_rets, CumulativeRets):
            bm = bm_rets
        else:
            raise ValueError('bm_rets must be series or CumulativeRetPerformace not %s' % (type(bm_rets)))

        bm_freq = guess_freq(bm_rets)
        if self.pds_per_year != bm.pds_per_year:
            tgt = {'B': 'dly', 'W': 'weekly', 'M': 'monthly', 'Q': 'quarterly', 'A': 'annual'}.get(bm_freq, None)
            if tgt is None:
                raise ValueError('No mapping for handling benchmark with frequency: %s' % bm_freq)
            tmp = getattr(self, tgt)
            y = tmp.rets
            y_ann = tmp.ltd_ann
        else:
            y = self.rets
            y_ann = self.ltd_ann

        x = bm.rets.truncate(y.index[0], y.index[-1])
        x_ann = bm.ltd_ann

        model = pd.ols(x=x, y=y)
        beta = model.beta[0]
        alpha = y_ann - beta * x_ann
        return pd.Series({'alpha': alpha, 'beta': beta}, name=bm_freq)

    def plot_ltd(self, ax=None, style='k', label='ltd', show_dd=1, title=True, legend=1):
        ltd = self.ltd_rets
        ax = ltd.plot(ax=ax, style=style, label=label)
        if show_dd:
            dd = self.drawdowns
            dd.plot(style='r', label='drawdowns', alpha=.5, ax=ax)
            ax.fill_between(dd.index, 0, dd.values, facecolor='red', alpha=.25)
            fmt = PercentFormatter

            AxesFormat().Y.percent().X.label("").apply(ax)
            legend and ax.legend(loc='upper left', prop={'size': 12})

            # show the actualy date and value
            mdt, mdd = self.maxdd_dt, self.maxdd
            bbox_props = dict(boxstyle="round", fc="w", ec="0.5", alpha=0.25)
            try:
                dtstr = '{0}'.format(mdt.to_period())
            except:
                # assume daily
                dtstr = '{0}'.format(hasattr(mdt, 'date') and mdt.date() or mdt)
            ax.text(mdt, dd[mdt], "{1} \n {0}".format(fmt(mdd), dtstr).strip(), ha="center", va="top", size=8,
                    bbox=bbox_props)

        if title is True:
            pf = new_percent_formatter(1, parens=False, trunc_dot_zeros=True)
            ff = new_float_formatter(precision=1, parens=False, trunc_dot_zeros=True)
            total = pf(self.ltd_ann)
            vol = pf(self.std_ann)
            sh = ff(self.sharpe_ann)
            mdd = pf(self.maxdd)
            title = 'ret$\mathregular{_{ann}}$ %s     vol$\mathregular{_{ann}}$ %s     sharpe %s     maxdd %s' % (
            total, vol, sh, mdd)

        title and ax.set_title(title, fontdict=dict(fontsize=10, fontweight='bold'))
        return ax

    def plot_ret_on_dollar(self, title=None, show_maxdd=1, figsize=None, ax=None, append=0, label=None, **plot_args):
        plot_return_on_dollar(self.rets, title=title, show_maxdd=show_maxdd, figsize=figsize, ax=ax, append=append,
                              label=label, **plot_args)

    def plot_hist(self, ax=None, **histplot_kwargs):
        pf = new_percent_formatter(precision=1, parens=False, trunc_dot_zeros=1)
        ff = new_float_formatter(precision=1, parens=False, trunc_dot_zeros=1)

        ax = self.rets.hist(ax=ax, **histplot_kwargs)
        AxesFormat().X.percent(1).apply(ax)
        m, s, sk, ku = pf(self.mean), pf(self.std), ff(self.skew), ff(self.kurtosis)
        txt = '$\mathregular{\mu}$=%s   $\mathregular{\sigma}$=%s   skew=%s   kurt=%s' % (m, s, sk, ku)
        bbox = dict(facecolor='white', alpha=0.5)
        ax.text(0, 1, txt, fontdict={'fontweight': 'bold'}, bbox=bbox, ha='left', va='top', transform=ax.transAxes)
        return ax

    def filter(self, mask, keep_ltd=0):
        if isinstance(mask, pd.Series):
            mask = mask.values
        rets = self.rets.ix[mask]
        ltd = None
        if keep_ltd:
            ltd = self.ltd_rets.ix[mask]
        return CumulativeRets(rets=rets, ltd_rets=ltd)
예제 #31
0
raw = requests.get("http://www.google.com/finance/getprices?i="+interval+"&p="+lookback+"d&f=c&df=cpct&q="+symbol).text

# Take the data and put it into a DataFrame
raw = raw.split()[7:]

data = pd.DataFrame(raw)
data = data.astype("float")
data["price"] =  data[0]
del data[0]

# We only need 60 minutes worth of data
if len(data["price"] >= 60):  data["price"] = data["price"][-60:]

# Columns for expanding mean and standard deviation
data["mean"] = pd.expanding_mean(data["price"])
data["vol"] = pd.expanding_std(data["price"])

# Linear regression on price data
x = range(len(data["price"][-60:]))
y = data["price"][-60:].values

A,B = curve_fit(f,x,y)

# Print the trend to the console
if A[0] < 0 : print("downtrend")
else: print("uptrend")

# Plot window
plt.figure(1)

# Plot for the price and its moving average
예제 #32
0
def spread_crossover(data_df,slow=1,fast=12):
    
    
    
    spread_log = pd.DataFrame(np.log(data_df.ix[:,0] * 100))
    
    
    data_df['spread_z_ma'] = (spread_log - pd.expanding_mean(spread_log, min_periods=24))/  pd.expanding_std(spread_log, min_periods=24)
    
    data_df['spread_z_ema'] = (spread_log - pd.ewma(spread_log, min_periods = 24, halflife=12)) / pd.ewmstd(spread_log, halflife=12)
    
    
    data_df['spread_z_ema'] = pd.rolling_mean(data_df['spread_z_ema'], window=3)
     
    data_df['slow'] = pd.rolling_mean(data_df['US HY Spread'],slow)

    data_df['fast'] = pd.rolling_mean(data_df['US HY Spread'],fast)
    
    data_df['diff'] = (data_df['slow'] - data_df['fast']) * -1
    
    data_df['diff'] = data_df['diff'] + 1
    
    data_df['diff'] = np.log(data_df['diff'])
    
    data_df['tren_z_ma'] = (data_df['diff'] - pd.expanding_mean(data_df['diff'], min_periods=24))/  pd.expanding_std(data_df['diff'], min_periods=24)
    
    data_df['tren_z_ma']  = pd.rolling_mean(data_df['tren_z_ma'], window=3)
    trend_valuation_df = pd.concat([data_df['spread_z_ema'],data_df['tren_z_ma']], axis=1)

    
    trend_valuation_df.dropna(inplace=True)
    trend_valuation_df.plot()
    plt.show()
    
    algo_wghts_df = pd.DataFrame()
    wghts_array = []
    
    valuation_threshold_cheap = 1
    valuation_threshold_rich = -1.0
    trend_threshold_tightening = 0.1
    trend_threshold_widening = -0.1
    
    data_df['spread_z_ma'].plot()
    plt.show()
    
    

    for score in trend_valuation_df.values:
        valuation_score = score[0]
        trend_score = score[1]
        
        if (trend_score >= -0.2 and valuation_score >= -1):
            wghts_array.append(min(1,abs(trend_score-valuation_score) / 1))
        else:
            wghts_array.append(0)
        #elif trend_score <= -0.1 and valuation_score <= valuation_threshold_cheap:
        #    wghts_array.append(-1)
        #elif valuation_score >= valuation_threshold_cheap:
        #    wghts_array.append(1)
        #else:
        #    wghts_array.append(0)   
    
    wghts_df = pd.DataFrame(wghts_array, index = trend_valuation_df.index)
    
 

    long = wghts_df[wghts_df == 1].count()[0] / len(trend_valuation_df)
    neutral = wghts_df[wghts_df == 0].count()[0] / len(trend_valuation_df)
    short = wghts_df[wghts_df == -1].count()[0] / len(trend_valuation_df)
    
    wghts_df.columns = [data_df.columns.values[1]]
    
    wghts_df = wghts_df.shift(1)
    
    
    s1 = bt.Strategy('Valuation & Trend ', [bt.algos.WeighTarget(wghts_df),
                               bt.algos.Rebalance()])
    
    return_data = data_df.ix[:,1].to_frame()
    return_data.columns = [data_df.columns.values[1]]

    strategy = bt.Backtest(s1, return_data)
    
    res = bt.run(strategy)
    
    res.plot(logy=True)
    res.display()
    print(long,neutral,short)
예제 #33
0
def spread_holding_test(data_df):
    
    data_df['lg_spread'] = np.log(data_df['US HY Spread'] * 100)
    data_df['spread_z_ema'] = (data_df['lg_spread'] - pd.expanding_mean(data_df['lg_spread'], min_periods=24))/  pd.expanding_std(data_df['lg_spread'], min_periods=24)
    
    data_df['spread_z_ema'].dropna(inplace=True)
    
    hp_months_cheap = 12
    hp_months_rich = 12

    counter = 0
    
    date_array = data_df.index.values
    
    signal = []
    
    signal_abs_threshold_cheap= 1.5
    signal_abs_threshold_rich= 1.5
    
  
    for date in range(0,len(date_array)):
  
        

        if len(data_df['spread_z_ema']) - 1 >= counter:
            
            score = data_df['spread_z_ema'].ix[counter]

            if score > signal_abs_threshold_cheap:
                counter = counter+hp_months_cheap
                temp_list = [1] * hp_months_cheap
                signal.extend(temp_list)
                
            elif score < (signal_abs_threshold_rich * -1):
                counter = counter+hp_months_rich
                temp_list = [-1] * hp_months_rich 
                signal.extend(temp_list)
                
            elif (score <= signal_abs_threshold_cheap) and score >= (signal_abs_threshold_rich * -1):
                counter = counter + 1
                signal.extend([1])
   
    
    signal = signal[:len(date_array)]    
    
    weights = pd.DataFrame(signal,index=data_df['spread_z_ema'].index,columns=['US HY Return'])


    weights['bond_wght'] = weights
    
       
    weights['treasury_wght'] = weights['US HY Return'] * -1

    bond_wght =  weights['bond_wght'].to_frame()
    bond_wght.columns = ['US HY Return']
    treasury_wght = weights['treasury_wght'].to_frame()
    treasury_wght.columns = ['US Int. Trsy Return']
    
    
    combined_wghts = pd.concat([bond_wght,treasury_wght], axis=1)
    
    combined_wghts = combined_wghts.shift(1)
    
    combined_wghts.dropna(inplace=True)    

    weighted_returns = combined_wghts * data_df[['US HY Return','US Int. Trsy Return']]
    
    portfolio_return = weighted_returns.sum(axis=1).to_frame()
    
    portfolio_return =  portfolio_return.add(1).cumprod()

    risk_premia = long_only_ew(portfolio_return, name='Risk Premia')

    return risk_premia, combined_wghts
예제 #34
0
"""

Created on 05/10/15

@author: Carlos Eduardo Barbosa

Test convergence of Lick errors as a function of the number of simulation.

"""
import os

import numpy as np
from pandas import expanding_std
import matplotlib.pyplot as plt

from config import *

if __name__ == "__main__":
    os.chdir(os.path.join(home, "single2/mc_logs"))
    cols = np.array([12, 13,16,17,18,19,20])
    logs = os.listdir(".")
    fig = plt.figure(1, figsize=(5,15))
    for log in logs:
        data = np.loadtxt(log).T[cols]
        for i,d in enumerate(data):
            ax = plt.subplot(7,1,i+1)
            ax.plot(expanding_std(d, min_periods=1) / d.std(), "-k")
        plt.pause(1)
        plt.show(block=False)

예제 #35
0
                   "&p=" + lookback + "d&f=c&df=cpct&q=" + symbol).text

# Take the data and put it into a DataFrame
raw = raw.split()[7:]

data = pd.DataFrame(raw)
data = data.astype("float")
data["price"] = data[0]
del data[0]

# We only need 60 minutes worth of data
if len(data["price"] >= 60): data["price"] = data["price"][-60:]

# Columns for expanding mean and standard deviation
data["mean"] = pd.expanding_mean(data["price"])
data["vol"] = pd.expanding_std(data["price"])

# Linear regression on price data
x = range(len(data["price"][-60:]))
y = data["price"][-60:].values

A, B = curve_fit(f, x, y)

# Print the trend to the console
if A[0] < 0: print("downtrend")
else: print("uptrend")

# Plot window
plt.figure(1)

# Plot for the price and its moving average
예제 #36
0
    def get_context_data(self, **kwargs):
        context = super(WellChartView, self).get_context_data(**kwargs)
        well = Well.objects.get(pk=context['pk'])
        name = unicode(well)
        options = {
             'rangeSelector': { 'enabled': True,
                               'inputEnabled': True,
                               },
            'navigator': {'adaptToUpdatedData': True, 'enabled': True},
            'chart': {'type': 'arearange', 'zoomType': 'x'},
            'title': {'text': name},
            'xAxis': {'type': 'datetime'},
            'yAxis': [{'title': {'text': 'Grondwaterstand\n(m tov NAP)'}}
                      ],
            'tooltip': {'valueSuffix': ' m',
                        'valueDecimals': 2,
                        'shared': True,
                       }, 
            'legend': {'enabled': True},
            'plotOptions': {'line': {'marker': {'enabled': False}}},            
            'credits': {'enabled': True, 
                        'text': 'acaciawater.com', 
                        'href': 'http://www.acaciawater.com',
                       },
            }
        series = []
        xydata = []
        for screen in well.screen_set.all():
            name = unicode(screen)
            data = screen.to_pandas(ref='nap')
            if data.size > 0:
                xydata = zip(data.index.to_pydatetime(), data.values)
                series.append({'name': name,
                            'type': 'line',
                            'data': xydata,
                            'lineWidth': 1,
                            'color': '#0066FF',
                            'zIndex': 2,
                            })
                mean = pd.expanding_mean(data)
                std = pd.expanding_std(data)
                a = (mean - std).dropna()
                b = (mean + std).dropna()
                ranges = zip(a.index.to_pydatetime(), a.values, b.values)
                series.append({'name': 'spreiding',
                            'data': ranges,
                            'type': 'arearange',
                            'lineWidth': 0,
                            'color': '#0066FF',
                            'fillOpacity': 0.2,
                            'linkedTo' : ':previous',
                            'zIndex': 0,
                            })

            data = screen.to_pandas(ref='nap',kind='HAND')
            if data.size > 0:
                hand = zip(data.index.to_pydatetime(), data.values)
                series.append({'name': 'handpeiling',
                            'type': 'scatter',
                            'data': hand,
                            'zIndex': 3,
                            'marker': {'symbol': 'circle', 'radius': 6, 'lineColor': 'white', 'lineWidth': 2, 'fillColor': 'red'},
                            })

        if len(xydata)>0:
            mv = []
            mv.append((xydata[0][0], screen.well.maaiveld))
            mv.append((xydata[-1][0], screen.well.maaiveld))
            series.append({'name': 'maaiveld',
                        'type': 'line',
                        'lineWidth': 2,
                        'color': '#009900',
                        'dashStyle': 'Dash',
                        'data': mv,
                        'zIndex': 4,
                        })

        # neerslag toevoegen
        try:
            closest = Station.closest(well.location)
            name = 'Meteostation {} (dagwaarden)'.format(closest.naam)
            neerslag = Series.objects.get(name='RH',mlocatie__name=name)
            data = neerslag.to_pandas(start=xydata[0][0], stop=xydata[-1][0]) / 10.0 # 0.1 mm -> mm
            data = zip(data.index.to_pydatetime(), data.values)
            series.append({'name': 'Neerslag '+ closest.naam,
                        'type': 'column',
                        'data': data,
                        'yAxis': 1,
                        'pointRange': 24 * 3600 * 1000, # 1 day
                        'pointPadding': 0.01,
                        'pointPlacement': 0.5,
                        'zIndex': 1,
                        'color': 'orange', 
                        'borderColor': '#cc6600', 
                        })
            options['yAxis'].append({'title': {'text': 'Neerslag (mm)'},
                                     'opposite': 1,
                                     'min': 0,
                                     })
        except:
            pass
        options['series'] = series
        context['options'] = json.dumps(options, default=lambda x: int(time.mktime(x.timetuple())*1000))
        context['object'] = well
        return context
예제 #37
0
    def get_context_data(self, **kwargs):
        context = super(WellChartView, self).get_context_data(**kwargs)
        well = Well.objects.get(pk=context['pk'])
        name = unicode(well)
        options = {
             'rangeSelector': { 'enabled': True,
                               'inputEnabled': True,
                               },
            'navigator': {'adaptToUpdatedData': True, 'enabled': True},
            'chart': {'type': 'arearange', 'zoomType': 'x'},
            'title': {'text': name},
            'xAxis': {'type': 'datetime'},
            'yAxis': [{'title': {'text': 'm tov NAP'}}
                      ],
            'tooltip': {'valueSuffix': ' m',
                        'valueDecimals': 2,
                        'shared': True,
                       }, 
            'legend': {'enabled': True},
            'plotOptions': {'line': {'marker': {'enabled': False}}},            
            'credits': {'enabled': True, 
                        'text': 'acaciawater.com', 
                        'href': 'http://www.acaciawater.com',
                       },
            }
        series = []
        xydata = []
        start = datetime.datetime(2013,1,1)
        stop = datetime.datetime(2016,1,1)
        for screen in well.screen_set.all():
            name = unicode(screen)
            data = screen.to_pandas(ref='nap')[start:stop]
            if data.size > 0:
                xydata = zip(data.index.to_pydatetime(), data.values)
                series.append({'name': name,
                            'type': 'line',
                            'data': xydata,
                            'lineWidth': 1,
                            'zIndex': 1,
                            })
                mean = pd.expanding_mean(data)
    #             series.append({'name': 'gemiddelde',
    #                         'type': 'line',
    #                         'data': zip(mean.index.to_pydatetime(), mean.values),
    #                         'linkedTo' : ':previous',
    #                         })
                std = pd.expanding_std(data)
                a = (mean - std).dropna()
                b = (mean + std).dropna()
                ranges = zip(a.index.to_pydatetime(), a.values, b.values)
                series.append({'name': 'spreiding',
                            'data': ranges,
                            'type': 'arearange',
                            'lineWidth': 0,
                            'fillOpacity': 0.2,
                            'linkedTo' : ':previous',
                            'zIndex': 0,
                            })
            data = screen.to_pandas(ref='nap',kind='HAND')[start:stop]
            if data.size > 0:
                hand = zip(data.index.to_pydatetime(), data.values)
                series.append({'name': 'handpeiling',
                            'type': 'scatter',
                            'data': hand,
                            'zIndex': 2,
                            'marker': {'symbol': 'circle', 'radius': 6, 'lineColor': 'white', 'lineWidth': 2, 'fillColor': 'blue'},
                            })

        if len(xydata)>0:
            mv = []
            mv.append((xydata[0][0], screen.well.maaiveld))
            mv.append((xydata[-1][0], screen.well.maaiveld))
            series.append({'name': 'maaiveld',
                        'type': 'line',
                        'lineWidth': 1,
                        'dashStyle': 'Dash',
                        'color': 'white',
                        'data': mv
                        })
        
        options['series'] = series
        context['options'] = json.dumps(options, default=lambda x: int(time.mktime(x.timetuple())*1000))
        context['object'] = well
        return context