def chi2normal_transformation(df): risk_factor = pd.DataFrame(chi2.cdf(df, pd.rolling_mean(df, 150)), columns=df.columns, index=df.index) risk_factor = (risk_factor - pd.expanding_mean(risk_factor)) / \ pd.expanding_std(risk_factor) return risk_factor
def json_series(request, pk): screen = get_object_or_404(Screen,pk=pk) what = request.GET.get('mode','comp') # choices: comp, hand ref = request.GET.get('ref','nap') # choices: nap, bkb, mv, cm # filters = [ # RangeRule(name = 'range', lower = -5, upper = 5), # RollingRule(name = 'spike', count = 3, tolerance = 3, comp ='LT') # ] # determine resampling rule rule = request.GET.get('rule', 'H') # if rule is None: # series = screen.find_series() # rule = 'H' if series.aantal() < 10000 else 'D' series = screen.get_series(ref,what,rule=rule)#,filters=filters) if series is None or series.empty: values = [] else: values = zip(series.index, series.values) data = {'screen%s'%screen.nr: values} stats = request.GET.get('stats','0') try: stats = int(stats) if stats: mean = pd.expanding_mean(series) std = pd.expanding_std(series) a = (mean - std).dropna() b = (mean + std).dropna() ranges = zip(a.index.to_pydatetime(), a.values, b.values) data.update({'stats%s'%screen.nr: ranges}) except: pass return HttpResponse(json.dumps(data,ignore_nan=True,default=to_millis),content_type='application/json')
def rolling_tstat(x): emean = pd.expanding_mean(x) estd = pd.expanding_std(x) t = np.arange(1, len(x) + 1) esqr = np.sqrt(t) rtstat = (emean / estd) * esqr return rtstat
def featurize(self, H): X = pd.DataFrame({ 'last_sh': H.shift(1).stack(), 'history_sh': pd.expanding_mean(H).shift(1).stack(), 'history_sh_vol': pd.expanding_std(H).shift(1).stack(), 'nr_days': H.notnull().cumsum().stack() }) return X
def VaR_norm(data, alpha=0.99, n=252): Z = stats.norm(0, 1).ppf(1 - alpha) #反概率密度函数 data['mean'] = pd.rolling_mean(data['return'], n) data['std'] = pd.rolling_std(data['return'], n) if math.isnan(data.tail(1).iat[0, 3]): data['mean'] = pd.expanding_mean(data['return']) data['std'] = pd.expanding_std(data['return']) data['delta'] = data['mean'] + Z * data['std'] return data.tail(1).iat[0, 4]
def mavg(x,y, span=SPAN): "compute moving average" x, y = map(_plot_friendly, [x,y]) if _isdate(x[0]): x = np.array([i.toordinal() for i in x]) std_err = pd.expanding_std(y, span) y = pd.rolling_mean(y, span) y1 = y - std_err y2 = y + std_err return (y, y1, y2)
def mavg(x, y, span=SPAN): "compute moving average" x, y = map(_plot_friendly, [x, y]) if _isdate(x[0]): x = np.array([i.toordinal() for i in x]) std_err = pd.expanding_std(y, span) y = pd.rolling_mean(y, span) y1 = y - std_err y2 = y + std_err return (y, y1, y2)
def plotSecondMomentConvergence(self, x): """ Plots the convergence of the second moment E(X^2), or more precisely, the standard deviation over MCMC iterations. :param x: MCMC samples from distribution :return: E(X^2) convergence plot """ x_in = np.transpose(x) cumstd = pd.expanding_std(x_in, min_periods=1) plt.plot(cumstd, label = u'E(X^2) Convergence', color='k', linewidth=1.5) return cumstd
def sharpe(returns, rfr=0, expanding=0): """ returns: periodic return string rfr: risk free rate expanding: bool """ if expanding: excess = excess_returns(returns, rfr) return pd.expanding_mean(excess) / pd.expanding_std(returns) else: return excess_returns(returns, rfr).mean() / returns.std()
def std(self, window=0, rebalanced=True, from_date=None, to_date=None): ret = None returns = self.returns(rebalanced, from_date, to_date) if window == 0: ret = np.asscalar(np.std(returns)) if window > 0: ret = pd.rolling_std(returns, window) if window == -1: ret = pd.expanding_std(returns) return ret
def hurst(channel): x = np.array(channel) x = x - x.mean() z = np.cumsum(x) r = np.array((np.maximum.accumulate(z) - np.minimum.accumulate(z))[1:]) s = pd.expanding_std(x)[1:] s[np.where(s == 0)] = 1e-12 r += 1e-12 y_axis = np.log(r / s) x_axis = np.log(np.arange(1, len(y_axis) + 1)) x_axis = np.vstack([x_axis, np.ones(len(x_axis))]).T m, b = np.linalg.lstsq(x_axis, y_axis)[0] return (m)
def apply_one(x): x -= x.mean() z = np.cumsum(x) r = (np.maximum.accumulate(z) - np.minimum.accumulate(z))[1:] s = pd.expanding_std(x)[1:] # prevent division by 0 s[np.where(s == 0)] = 1e-12 r += 1e-12 y_axis = np.log(r / s) x_axis = np.log(np.arange(1, len(y_axis) + 1)) x_axis = np.vstack([x_axis, np.ones(len(x_axis))]).T m, b = np.linalg.lstsq(x_axis, y_axis)[0] return m
def expanding_smoother(self, data, stype='rolling_mean', min_periods=None, freq=None): """ Perform a expanding smooting on the data for a complete help refer to http://pandas.pydata.org/pandas-docs/dev/computation.html :param data: pandas dataframe input data :param stype: soothing type :param min_periods: periods :param freq: frequence smoothing types: expanding_count Number of non-null observations expanding_sum Sum of values expanding_mean Mean of values expanding_median Arithmetic median of values expanding_min Minimum expanding_max Maximum expandingg_std Unbiased standard deviation expanding_var Unbiased variance expanding_skew Unbiased skewness (3rd moment) expanding_kurt Unbiased kurtosis (4th moment) """ if stype == 'count': newy = pd.expanding_count(data, min_periods=min_periods, freq=freq) if stype == 'sum': newy = pd.expanding_sum(data, min_periods=min_periods, freq=freq) if stype == 'mean': newy = pd.expanding_mean(data, min_periods=min_periods, freq=freq) if stype == 'median': newy = pd.expanding_median(data, min_periods=min_periods, freq=freq) if stype == 'min': newy = pd.expanding_min(data, min_periods=min_periods, freq=freq) if stype == 'max': newy = pd.expanding_max(data, min_periods=min_periods, freq=freq) if stype == 'std': newy = pd.expanding_std(data, min_periods=min_periods, freq=freq) if stype == 'var': newy = pd.expanding_var(data, min_periods=min_periods, freq=freq) if stype == 'skew': newy = pd.expanding_skew(data, min_periods=min_periods, freq=freq) if stype == 'kurt': newy = pd.expanding_kurt(data, min_periods=min_periods, freq=freq) return newy
def equity_mm_test(data_df): test_data = data_df[['S&P 500 Return','Cash Return']].add(1).cumprod() rolling_period = 3 rolling_change = pd.DataFrame.pct_change(test_data,periods=rolling_period) column_one = test_data.columns.values[0] column_two = test_data.columns.values[1] data_diff = rolling_change[column_one] - rolling_change[column_two] data_diff['rolling_z'] = (data_diff - pd.expanding_mean(data_diff, min_periods=24))/ pd.expanding_std(data_diff, min_periods=24) weights = pd.DataFrame(index=data_diff.index) weights['bond_wght'] = data_diff['rolling_z'] weights['treasury_wght'] = data_diff['rolling_z'] * -1 weights = weights / 0.5 weights.dropna(inplace=True) weights = weights.clip(-1, 1) #weights['bond_wght'] = np.where(data_diff > 0, 1.0, np.where(data_diff< 0,-1.0, np.nan)) #weights['treasury_wght'] = np.where(data_diff > 0, -1.0, np.where(data_diff < 0,1.0, np.nan)) bond_wght = weights['bond_wght'].to_frame() bond_wght.columns = ['US HY Return'] treasury_wght = weights['treasury_wght'].to_frame() treasury_wght.columns = ['US Int. Trsy Return'] combined_wghts = pd.concat([bond_wght,treasury_wght], axis=1) combined_wghts = combined_wghts.shift(1) combined_wghts.dropna(inplace=True) weighted_returns = combined_wghts * data_df[['US HY Return','US Int. Trsy Return']] portfolio_return = weighted_returns.sum(axis=1).to_frame() portfolio_return = portfolio_return.add(1).cumprod() eq_mm = long_only_ew(portfolio_return, name='Equity Momentum') return eq_mm, combined_wghts
def std_annualized(returns, scale=None, expanding=0): scale = _resolve_periods_in_year(scale, returns) if expanding: return np.sqrt(scale) * pd.expanding_std(returns) else: return np.sqrt(scale) * returns.std()
def equity_vol_test(data_frame): rolling_period = 1 rolling_change = pd.DataFrame.pct_change(np.log(data_frame['Equity Volatility']),periods=rolling_period) rolling_change['rolling_z'] = (rolling_change - pd.expanding_mean(rolling_change, min_periods=24))/ pd.expanding_std(rolling_change, min_periods=24) rolling_change['rolling_z'] = rolling_change['rolling_z'].to_frame() weights = pd.DataFrame(index=rolling_change['rolling_z'].index) weights['bond_wght'] = rolling_change['rolling_z'] * -1 weights['treasury_wght'] = rolling_change['rolling_z'] weights = weights / 1.5 weights.dropna(inplace=True) weights = weights.clip(-1, 1) bond_wght = weights['bond_wght'].to_frame() bond_wght.columns = ['US HY Return'] treasury_wght = weights['treasury_wght'].to_frame() treasury_wght.columns = ['US Int. Trsy Return'] combined_wghts = pd.concat([bond_wght,treasury_wght], axis=1) combined_wghts = combined_wghts.shift(1) combined_wghts.dropna(inplace=True) weighted_returns = combined_wghts * data_frame[['US HY Return','US Int. Trsy Return']] portfolio_return = weighted_returns.sum(axis=1).to_frame() portfolio_return = portfolio_return.add(1).cumprod() eq_vol = long_only_ew(portfolio_return, name='Equity Volatility') return eq_vol, combined_wghts
def cum_avg(data): data = pandas.DataFrame({'data': data}) means = pandas.expanding_mean(data) stds = pandas.expanding_std(data) return numpy.array([i[0] for i in means.values ]), numpy.array([i[0] for i in stds.values])
degree=3, gamma='auto', kernel='rbf', max_iter=1000, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False).fit(x_train, y_train) y_predict = reg.predict(x[split:]) df = df.assign(p_trend=pd.Series(np.zeros(len(x))).values) df['p_trend'][split:] = y_predict accuracy = scorer.accuracy_score(df['Signal'][split:], df['p_trend'][split:]) df = df.assign(ret=pd.Series(np.zeros(len(x))).values) df['ret'] = np.log(df['Open'].shift(-1) / df['Open']) df = df.assign(ret1=pd.Series(np.zeros(len(x))).values) df['ret1'] = df['p_trend'] * df['ret'] df = df.assign(cu_ret1=pd.Series(np.zeros(len(x))).values) df['cu_ret1'] = np.cumsum(df['ret1'][split:]) df = df.assign(cu_ret=pd.Series(np.zeros(len(x))).values) df['cu_ret'] = np.cumsum(df['ret'][split:]) std = pd.expanding_std(df['cu_ret1']) sharpe = (df['cu_ret1'] - df['cu_ret']) / std sharpe = sharpe[split:].mean() print("\n\n ACCURACY :", accuracy) plt.plot(df['cu_ret1'], color='b', label='Strategy Returns') plt.plot(df['cu_ret'], color='g', label='Market Returns') plt.figtext(0.14, 0.7, s='Sharpe ratio: %.2f' % sharpe) plt.legend(loc='best') plt.show()
def build_model(): # Load SCDB CSV data. scdb_case_data = pandas.DataFrame.from_csv('data/SCDB_2013_01_caseCentered_Citation.csv') scdb_justice_data = pandas.DataFrame.from_csv('data/SCDB_2013_01_justiceCentered_Citation.csv') # Apply date transforms to the data. scdb_case_data['dateDecision'] = scdb_case_data['dateDecision'].apply(get_date) scdb_justice_data['dateDecision'] = scdb_justice_data['dateDecision'].apply(get_date) scdb_case_data['dateArgument'] = scdb_case_data['dateArgument'].apply(get_date) scdb_justice_data['dateArgument'] = scdb_justice_data['dateArgument'].apply(get_date) scdb_case_data['dateRearg'] = scdb_case_data['dateRearg'].apply(get_date) scdb_justice_data['dateRearg'] = scdb_justice_data['dateRearg'].apply(get_date) scdb_case_data['monthDecision'] = scdb_case_data['dateDecision'].apply(get_month) scdb_justice_data['monthDecision'] = scdb_justice_data['dateDecision'].apply(get_month) scdb_case_data['monthArgument'] = scdb_case_data['dateArgument'].apply(get_month) scdb_justice_data['monthArgument'] = scdb_justice_data['dateArgument'].apply(get_month) # Apply other basic transforms to the data. # Set unspecified decision directions to the middle of the range, 1.5 scdb_case_data.loc[scdb_case_data['decisionDirection'] == 3, 'decisionDirection'] = 1.5 scdb_justice_data.loc[scdb_justice_data['decisionDirection'] == 3, 'decisionDirection'] = 1.5 # Map case origin and source to the Circuit within which it belongs. scdb_case_data['caseOrigin_circuit'] = scdb_case_data['caseOrigin'].apply(map_circuit) scdb_justice_data['caseOrigin_circuit'] = scdb_justice_data['caseOrigin'].apply(map_circuit) scdb_case_data['caseSource_circuit'] = scdb_case_data['caseSource'].apply(map_circuit) scdb_justice_data['caseSource_circuit'] = scdb_justice_data['caseSource'].apply(map_circuit) # Map party type (e.g., petitioner or respondent) to our mapping table in constants. scdb_case_data['petitioner_dk'] = scdb_case_data['petitioner'].apply(map_party) scdb_case_data['respondent_dk'] = scdb_case_data['respondent'].apply(map_party) scdb_justice_data['petitioner_dk'] = scdb_justice_data['petitioner'].apply(map_party) scdb_justice_data['respondent_dk'] = scdb_justice_data['respondent'].apply(map_party) # Generate the overturn variable by comparing Supreme Court direction with lower court direction. scdb_case_data['decisionOverturn'] = numpy.abs(numpy.sign(scdb_case_data['decisionDirection'] - scdb_case_data['lcDispositionDirection'])) scdb_justice_data['decisionOverturn'] = numpy.abs(numpy.sign(scdb_justice_data['direction'] - scdb_justice_data['lcDispositionDirection'])) # Handle the agreement field, i.e., does this particular Justice's vote match the Court's direction. scdb_justice_data['agree'] = (scdb_justice_data['direction'] == scdb_justice_data['decisionDirection']) # Map Justice data, some of which comes from the justice_list.csv file in data/ scdb_justice_data['gender'] = scdb_justice_data['justice'].apply(get_gender) scdb_justice_data['year_of_birth'] = scdb_justice_data['justice'].apply(get_year_of_birth) scdb_justice_data['party_president'] = scdb_justice_data['justice'].apply(get_party_president) scdb_justice_data['segal_cover'] = scdb_justice_data['justice'].apply(get_segal_cover) scdb_justice_data['is_chief'] = [int(x.endswith(y)) for x, y, in zip(scdb_justice_data['justiceName'].tolist(), scdb_justice_data['chief'].tolist())] # Sort cases by decision date and set into case list docket_list = scdb_case_data.sort('dateDecision')['docketId'].tolist() # Clean up unspecifiable/direction=3 values by setting them to the middle of the range." scdb_case_data.loc[scdb_case_data['lcDispositionDirection'] == 3, 'lcDispositionDirection'] = 1.5 scdb_justice_data.loc[scdb_justice_data['lcDispositionDirection'] == 3, 'lcDispositionDirection'] = 1.5 scdb_case_data.loc[scdb_case_data['decisionDirection'] == 3, 'decisionDirection'] = 1.5 scdb_justice_data.loc[scdb_justice_data['decisionDirection'] == 3, 'decisionDirection'] = 1.5 # Set minimum record count prior to training and max records to predict. min_record_count = 100 max_record_count = 99999 # Setup total feature and target data feature_data = pandas.DataFrame() target_data = pandas.DataFrame() # Setup the model model = None bad_feature_labels = ['docket', 'outcome', 'docket_outcome', 'case_outcome', 'disposition_outcome', 'direction'] feature_labels = [] feature_weights = [] # Outcome data outcome_data = pandas.DataFrame() case_outcome_data = pandas.DataFrame() # Track the less likely label min_label = 1.0 # Iterate over all dockets num_dockets = 0 for docket_id in docket_list: # Increment dockets seen num_dockets += 1 if max_record_count != None and num_dockets > max_record_count: break # Get rows of feature and target data feature_rows, target_rows = get_ml_row(docket_id, scdb_case_data, scdb_justice_data) # Now append to the feature and target lists feature_data = feature_data.append(feature_rows.copy()) target_data = target_data.append(target_rows.copy()) # Now re-calculate all the z-scaled values feature_data['justice_direction_mean_z'] = (feature_data['justice_direction_mean'] - pandas.expanding_mean( feature_data['justice_direction_mean'])) / pandas.expanding_std(feature_data['justice_direction_mean']) feature_data['diff_justice_lc_direction_abs_z'] = (feature_data[ 'diff_justice_lc_direction_abs'] - pandas.expanding_mean( feature_data['diff_justice_lc_direction_abs'])) / pandas.expanding_std( feature_data['diff_justice_lc_direction_abs']) feature_data['diff_justice_lc_direction_z'] = (feature_data['diff_justice_lc_direction'] - pandas.expanding_mean( feature_data['diff_justice_lc_direction'])) / pandas.expanding_std(feature_data['diff_justice_lc_direction']) feature_data['diff_court_lc_direction_abs_z'] = ( feature_data[ 'diff_court_lc_direction_abs'] - pandas.expanding_mean( feature_data[ 'diff_court_lc_direction_abs'])) / pandas.expanding_std( feature_data['diff_court_lc_direction_abs']) feature_data['justice_direction_issue_mean_z'] = (feature_data[ 'justice_direction_issue_mean'] - pandas.expanding_mean( feature_data['justice_direction_issue_mean'])) / pandas.expanding_std( feature_data['justice_direction_issue_mean']) feature_data['current_court_direction_issue_mean_z'] = (feature_data[ 'current_court_direction_issue_mean'] - pandas.expanding_mean( feature_data['current_court_direction_issue_mean'])) / pandas.expanding_std( feature_data['current_court_direction_issue_mean']) feature_data = feature_data.replace(-numpy.inf, -98) feature_data = feature_data.replace(numpy.inf, -98) feature_data = feature_data.fillna(-99) # Update any missing columns in E-block feature_rows = feature_data.ix[feature_data['docket'] == docket_id].sort('justice').copy() target_rows = feature_rows['outcome'] # Check to see if we've trained a model yet. if model != None: # If so, let's test it. docket_outcome_data = feature_rows.copy() docket_outcome_data['prediction'] = model.predict(feature_rows[feature_labels]) docket_outcome_data['target'] = target_rows.copy() # Get the vote of the court aggregated vote_mean_outcome = docket_outcome_data['prediction'].value_counts().idxmax() docket_outcome_data['docket_vote_mean'] = vote_mean_outcome docket_outcome_data['docket_vote_sum'] = docket_outcome_data['prediction'].sum() # Append data to the case outcome data frame case_record = scdb_case_data.ix[scdb_case_data['docketId'] == docket_id] case_outcome_record = docket_outcome_data.ix[0][ ['docket', 'docket_outcome', 'docket_vote_mean', 'docket_vote_sum']] case_outcome_record['docket_outcome'] = int( (case_record['lcDispositionDirection'] == case_record['decisionDirection']).tolist().pop()) case_outcome_data = case_outcome_data.append(case_outcome_record) # Append feature weights feature_weights.append(copy.deepcopy(model.best_estimator_.steps[-1][1].feature_importances_.tolist())) # Aggregate all data outcome_data = outcome_data.append(copy.deepcopy(docket_outcome_data)) if num_dockets % 100 == 0: # Output the rolling confusion matrix every few ticks print(sklearn.metrics.classification_report(outcome_data['target'].tolist(), outcome_data['prediction'].tolist())) print(sklearn.metrics.accuracy_score(outcome_data['target'].tolist(), outcome_data['prediction'].tolist())) # Relabel indices for feature and target data record_count = int(feature_data.shape[0]) # Ensure that we have enough records if record_count < min_record_count: continue # If we have at least that many records, let's actually train a model. feature_data.index = range(record_count) target_data.index = range(record_count) # Subset feature labels to exclude our indices if num_dockets > min_record_count and model == None: # Set the excluded feature labels feature_labels = [label for label in feature_data.columns.tolist() if label not in bad_feature_labels] # Train the model on the data model = train_model(feature_data[feature_labels], target_data[0].apply(int).tolist(), search_parameters) elif num_dockets > min_record_count and num_dockets % 100 == 0: print((docket_id, num_dockets)) # Train the model on the data model = train_model(feature_data[feature_labels], target_data[0].apply(int).tolist(), search_parameters) # Output the feature weight data feature_weight_df = pandas.DataFrame(feature_weights, columns=feature_labels) # Track the case assessment case_assessment = [] # Try to calculate case outcomes accurately. for case_id, case_data in outcome_data.groupby('docket'): # Get the vote data vote_data = (case_data[['docket', 'justice', 'is_chief', 'justice_direction_mean', 'prediction', 'target']].sort('justice_direction_mean')) overturn_predicted = vote_data['prediction'].mean() overturn_actual = vote_data['target'].mean() row = [ case_id, get_year_from_docket(case_id), case_data['issue'].tail(1).tolist().pop(), case_data['issue_area'].tail(1).tolist().pop(), case_data['case_source_circuit'].tail(1).tolist().pop(), case_data['case_origin_circuit'].tail(1).tolist().pop(), case_data['lc_direction'].tail(1).tolist().pop(), case_data['lc_disposition'].tail(1).tolist().pop(), overturn_predicted, overturn_actual, overturn_predicted > 0.5, overturn_actual > 0.5 ] # Get the votes aligned [row.append(value) for value in vote_data['prediction']] [row.append(value) for value in vote_data['justice_direction_mean']] # Pad if fewer than nine justices voting if vote_data['prediction'].shape[0] < 9: for i in range((9 - vote_data['prediction'].shape[0])): row.append(numpy.nan) row.append(vote_data.ix[vote_data['is_chief'] == 1]['prediction'].tolist().pop()) # Append to the case assessment dataframe. case_assessment.append(row) # Setup the column list and final case assessment DF column_list = [ 'docket', 'year', 'issue', 'issue_area', 'case_source_circuit', 'case_origin_circuit', 'lc_direction', 'lc_disposition', 'overturn_count_predict', 'overturn_count_actual', 'overturn_predict', 'overturn_actual', 'justice_1', 'justice_2', 'justice_3', 'justice_4', 'justice_5', 'justice_6', 'justice_7', 'justice_8', 'justice_9', 'justice_1_dir', 'justice_2_dir', 'justice_3_dir', 'justice_4_dir', 'justice_5_dir', 'justice_6_dir', 'justice_7_dir', 'justice_8_dir', 'justice_9_dir', 'justice_chief' ] case_assessment_df = pandas.DataFrame(case_assessment, columns=column_list) case_assessment_df['correct'] = (case_assessment_df['overturn_predict'] == case_assessment_df['overturn_actual']) outcome_data['correct'] = (outcome_data['prediction'] == outcome_data['target']) # Get the annual accuracy figures outcome_data['year'] = outcome_data['docket'].apply(get_year_from_docket) case_assessment_df['year'] = case_assessment_df['docket'].apply(get_year_from_docket) x_case_assessment_df = case_assessment_df.ix[case_assessment_df['year'] >= 1946] print "Case Assessment" print pandas.DataFrame(sklearn.metrics.confusion_matrix(x_case_assessment_df['overturn_actual'].tolist(), x_case_assessment_df['overturn_predict'].tolist())) print sklearn.metrics.classification_report(x_case_assessment_df['overturn_actual'].tolist(), x_case_assessment_df['overturn_predict'].tolist()) print sklearn.metrics.accuracy_score(x_case_assessment_df['overturn_actual'].tolist(), x_case_assessment_df['overturn_predict'].tolist()) print "Justice Assessment" x_outcome_data = outcome_data.loc[outcome_data['year'] >= 1946] print pandas.DataFrame(sklearn.metrics.confusion_matrix(x_outcome_data['target'].tolist(), x_outcome_data['prediction'].tolist())) print sklearn.metrics.classification_report(x_outcome_data['target'].tolist(), x_outcome_data['prediction'].tolist()) print sklearn.metrics.accuracy_score(x_outcome_data['target'].tolist(), x_outcome_data['prediction'].tolist()) # Setup vars output_folder = 'model_output' timestamp_suffix = time.strftime("%Y%m%d%H%M%S") # Create path run_output_folder = os.path.join(output_folder, timestamp_suffix) os.makedirs(run_output_folder) # Output data outcome_data.to_csv(os.path.join(run_output_folder, 'justice_outcome_data.csv')) case_assessment_df.to_csv(os.path.join(run_output_folder, 'case_outcome_data.csv')) feature_weight_df.to_csv(os.path.join(run_output_folder, 'feature_weights.csv')) # Make a ZIP os.system('zip -9 {0}.zip {1}'.format(os.path.join(output_folder, timestamp_suffix), os.path.join(run_output_folder, '*.csv')))
def comput_idicators(df, trading_days, required, save_file, save_address, whole=1): # TODO:net_value has some problem. # columns needed col = ['index_price', 'Interest_rate', 'nav', 'rebalancing', 'stoploss'] df_valid = df.ix[:, col] start_balance = df.index[df['rebalancing'] == 1][0] df_valid = df_valid[df_valid.index >= start_balance] # daily return df_valid['return'] = np.log(df['nav']) - np.log(df['nav'].shift(1)) # benchmark_net_value df_valid[ 'benchmark'] = df_valid['index_price'] / df_valid['index_price'].ix[0] # benchmark_return df_valid['benchmark_return'] = (df_valid['benchmark']- df_valid['benchmark'].shift(1))/\ df_valid['benchmark'].shift(1) # Annualized return df_valid['Annu_return'] = pd.expanding_mean( df_valid['return']) * trading_days # Volatility df_valid.loc[:, 'algo_volatility'] = pd.expanding_std( df_valid['return']) * np.sqrt(trading_days) df_valid.loc[:, 'xret'] = df_valid[ 'return'] - df_valid['Interest_rate'] / trading_days / 100 df_valid.loc[:, 'ex_return'] = df_valid['return'] - df_valid[ 'benchmark_return'] def ratio(x): return np.nanmean(x) / np.nanstd(x) # sharpe ratio df_valid.loc[:, 'sharpe'] = pd.expanding_apply(df_valid['xret'], ratio)\ * np.sqrt(trading_days) # information ratio df_valid.loc[:, 'IR'] = pd.expanding_apply(df_valid['ex_return'], ratio)\ * np.sqrt(trading_days) # Sortino ratio def modify_ratio(x, re): re /= trading_days ret = np.nanmean(x) - re st_d = np.nansum(np.square(x[x < re] - re)) / x[x < re].size return ret / np.sqrt(st_d) df_valid.loc[:, 'sortino'] = pd.expanding_apply( df_valid['return'], modify_ratio, args=(required, )) * np.sqrt(trading_days) # Transfer infs to NA df_valid.loc[np.isinf(df_valid.loc[:, 'sharpe']), 'sharpe'] = np.nan df_valid.loc[np.isinf(df_valid.loc[:, 'IR']), 'IR'] = np.nan # hit_rate wins = np.where(df_valid['return'] >= df_valid['benchmark_return'], 1.0, 0.0) df_valid.loc[:, 'hit_rate'] = wins.cumsum() / pd.expanding_apply(wins, len) # 95% VaR df_valid['VaR'] = -pd.expanding_quantile(df_valid['return'], 0.05)*\ np.sqrt(trading_days) # 95% CVaR df_valid['CVaR'] = -pd.expanding_apply(df_valid['return'], lambda x: x[x < np.nanpercentile(x, 5)].mean())\ * np.sqrt(trading_days) if whole == 1: # max_drawdown def exp_diff(x, type): if type == 'dollar': xret = pd.expanding_apply(x, lambda xx: (xx[-1] - xx.max())) else: xret = pd.expanding_apply( x, lambda xx: (xx[-1] - xx.max()) / xx.max()) return xret # dollar # xret = exp_diff(df_valid['cum_profit'],'dollar') # df_valid['max_drawdown_profit'] = abs(pd.expanding_min(xret)) # percentage xret = exp_diff(df_valid['nav'], 'percentage') df_valid['max_drawdown_ret'] = abs(pd.expanding_min(xret)) # max_drawdown_duration: # drawdown_enddate is the first time for restoring the max def drawdown_end(x, type): xret = exp_diff(x, type) minloc = xret[xret == xret.min()].index[0] x_sub = xret[xret.index > minloc] # if never recovering,then return nan try: return x_sub[x_sub == 0].index[0] except: return np.nan def drawdown_start(x, type): xret = exp_diff(x, type) minloc = xret[xret == xret.min()].index[0] x_sub = xret[xret.index < minloc] try: return x_sub[x_sub == 0].index[-1] except: return np.nan df_valid['max_drawdown_start'] = pd.Series() df_valid['max_drawdown_end'] = pd.Series() df_valid['max_drawdown_start'].ix[-1] = drawdown_start( df_valid['nav'], 'percentage') df_valid['max_drawdown_end'].ix[-1] = drawdown_end( df_valid['nav'], 'percentage') df_valid.to_csv(save_address) # =====result visualization===== plt.figure(1) if whole == 1: plt.subplot(224) plt.plot(df_valid['nav'], label='strategy') plt.plot(df_valid['benchmark'], label='S&P500') plt.xlabel('Date') plt.legend(loc=0, shadow=True) plt.ylabel('Nav') plt.title('Nav of ' + save_file + ' & SP500') # plt.subplot(223) # plt.plot(df_valid['cum_profit'],label = 'strategy') # plt.xlabel('Date') # plt.ylabel('Cum_profit') # plt.title('Cum_profit of ' + save_file) plt.subplot(221) plt.plot(df_valid['return'], label='strategy') plt.xlabel('Date') plt.ylabel('Daily_return') plt.title('Daily Return of ' + save_file) plt.subplot(222) x_return = df_valid[df_valid['return'].notna()].loc[:, 'return'] y_return = df_valid[ df_valid['benchmark_return'].notna()].loc[:, 'benchmark_return'] mu = x_return.mean() sigma = x_return.std() mybins = np.linspace(mu - 3 * sigma, mu + 3 * sigma, 100) count_x, _, _ = plt.hist(x_return, mybins, normed=1, alpha=0.5, label='strategy') count_y, _, _ = plt.hist(y_return, mybins, normed=1, alpha=0.5, label='S&P500') plt.ylabel('density') plt.xlabel('daily_return') plt.title('Histogram of Daily Return for ' + save_file + ' & SP500') plt.grid(True) # add normal distribution line y = mlab.normpdf(mybins, mu, sigma) plt.plot(mybins, y, 'r--', linewidth=1, label='Normal of strategy') plt.legend(loc=0, shadow=True) # plt.tight_layout() plt.show() return df_valid
def get_context_data(self, **kwargs): context = super(WellChartView, self).get_context_data(**kwargs) well = Well.objects.get(pk=context['pk']) name = unicode(well) options = { 'rangeSelector': { 'enabled': True, 'inputEnabled': True, }, 'navigator': { 'adaptToUpdatedData': True, 'enabled': True }, 'chart': { 'type': 'arearange', 'zoomType': 'x' }, 'title': { 'text': name }, 'xAxis': { 'type': 'datetime' }, 'yAxis': [{ 'title': { 'text': 'm tov NAP' } }], 'tooltip': { 'valueSuffix': ' m', 'valueDecimals': 2, 'shared': True, }, 'legend': { 'enabled': True }, 'plotOptions': { 'line': { 'marker': { 'enabled': False } } }, 'credits': { 'enabled': True, 'text': 'acaciawater.com', 'href': 'http://www.acaciawater.com', }, } series = [] xydata = [] for screen in well.screen_set.all(): name = unicode(screen) data = screen.to_pandas(ref='nap') xydata = zip(data.index.to_pydatetime(), data.values) series.append({ 'name': name, 'type': 'line', 'data': xydata, 'zIndex': 1, }) mean = pd.expanding_mean(data) # series.append({'name': 'gemiddelde', # 'type': 'line', # 'data': zip(mean.index.to_pydatetime(), mean.values), # 'linkedTo' : ':previous', # }) std = pd.expanding_std(data) a = (mean - std).dropna() b = (mean + std).dropna() ranges = zip(a.index.to_pydatetime(), a.values, b.values) series.append({ 'name': 'spreiding', 'data': ranges, 'type': 'arearange', 'lineWidth': 0, 'fillOpacity': 0.2, 'linkedTo': ':previous', 'zIndex': 0, }) if len(xydata) > 0: mv = [] for i in range(len(xydata)): mv.append((xydata[i][0], screen.well.maaiveld)) series.append({'name': 'maaiveld', 'type': 'line', 'data': mv}) options['series'] = series context['options'] = json.dumps( options, default=lambda x: int(time.mktime(x.timetuple()) * 1000)) context['object'] = well return context
def return_reversal(data_df): data_df[['HY Tot Index','US Trs Tot Index']] = data_df[['US HY Return','US Int. Trsy Return']].add(1).cumprod() data_df[['HY Rolling Return','Rolling Return']] = pd.DataFrame.pct_change(data_df[['HY Tot Index','US Trs Tot Index']],periods=36) data_df['diff'] = data_df['HY Rolling Return'] - data_df['Rolling Return'] data_df['Return Z'] = (data_df['diff']- pd.expanding_mean(data_df['diff'], min_periods=24))/ pd.expanding_std(data_df['diff'], min_periods=24) data_df['Return Z'].dropna(inplace=True) data_df['Return Z'].plot() hp_months_cheap = 24 hp_months_rich = 24 counter = 0 date_array = data_df.index.values signal = [] signal_abs_threshold_cheap= 2 signal_abs_threshold_rich= 2 for date in range(0,len(date_array)): if len(data_df['Return Z']) - 1 >= counter: score = data_df['Return Z'].ix[counter] if score > signal_abs_threshold_cheap: counter = counter+hp_months_cheap temp_list = [-1] * hp_months_cheap signal.extend(temp_list) elif score < (signal_abs_threshold_rich * -1): counter = counter+hp_months_rich temp_list = [1] * hp_months_rich signal.extend(temp_list) elif (score <= signal_abs_threshold_cheap) and score >= (signal_abs_threshold_rich * -1): counter = counter + 1 signal.extend([0]) signal = signal[:len(date_array)] weights = pd.DataFrame(signal,index=data_df['Return Z'].index,columns=['US HY Return']) weights['bond_wght'] = weights weights['treasury_wght'] = weights['US HY Return'] * -1 bond_wght = weights['bond_wght'].to_frame() bond_wght.columns = ['US HY Return'] treasury_wght = weights['treasury_wght'].to_frame() treasury_wght.columns = ['US Int. Trsy Return'] combined_wghts = pd.concat([bond_wght,treasury_wght], axis=1) combined_wghts = combined_wghts.shift(1) combined_wghts.dropna(inplace=True) weighted_returns = combined_wghts * data_df[['US HY Return','US Int. Trsy Return']] portfolio_return = weighted_returns.sum(axis=1).to_frame() portfolio_return = portfolio_return.add(1).cumprod() return_relative_test = long_only_ew(portfolio_return, name='Reversal') return return_relative_test, combined_wghts
# -*- coding: utf-8 -*- """ Created on 05/10/15 @author: Carlos Eduardo Barbosa Test convergence of Lick errors as a function of the number of simulation. """ import os import numpy as np from pandas import expanding_std import matplotlib.pyplot as plt from config import * if __name__ == "__main__": os.chdir(os.path.join(home, "single2/mc_logs")) cols = np.array([12, 13, 16, 17, 18, 19, 20]) logs = os.listdir(".") fig = plt.figure(1, figsize=(5, 15)) for log in logs: data = np.loadtxt(log).T[cols] for i, d in enumerate(data): ax = plt.subplot(7, 1, i + 1) ax.plot(expanding_std(d, min_periods=1) / d.std(), "-k") plt.pause(1) plt.show(block=False)
reg = SVC(C=1,cache_size=200,class_weight=None,coef0=0,decision_function_shape=None,degree=3,gamma='auto',kernel='rbf',max_iter=1000,probability=False,random_state=None,shrinking=True,tol=0.001,verbose=False) reg.fit(X[:split],y[:split]) y_predict = reg.predict(X[split:]) Df = Df.assign(P_Trend=pd.Series(np.zeros(len(X))).values) Df['P_Trend'][split:] = y_predict accuracy = scorer.accuracy_score(Df['Signal'][split:],Df['P_Trend'][split:]) Df = Df.assign(Ret=pd.Series(np.zeros(len(X))).values) Df['Ret'] = np.log(Df['Open'].shift(-1)/Df['Open']) Df = Df.assign(Ret1=pd.Series(np.zeros(len(X))).values) Df['Ret1'] = Df['P_Trend']*Df['Ret'] Df = Df.assign(Cu_Ret1=pd.Series(np.zeros(len(X))).values) Df['Cu_Ret1'] = np.cumsum(Df['Ret1'][split:]) Df = Df.assign(Cu_Ret=pd.Series(np.zeros(len(X))).values) Df['Cu_Ret'] = np.cumsum(Dp['Ret'][split:]) Std = pd.expanding_std(Df['Cu_Ret1']) Sharpe = (Df['Cu_Ret1']-Df['Cu_Ret'])/Std Sharpe = Sharpe[split:].mean() print('\n\nAccuracy:',accuracy) plt.plot(Df['Cu_Ret1'],color='r',label='Strategy Returns') plt.plot(Df['Cu_Ret'],color='g',label='Market Returns') plt.figtext(0.14,0.7,s='Sharpe ratio: %.2f'%Sharpe) plt.legend(log='best')
# Increment dockets seen num_dockets += 1 if max_record_count != None and num_dockets > max_record_count: break # Get rows of feature and target data feature_rows, target_rows = get_ml_row(docket_id, scdb_case_data, scdb_justice_data) # Now append to the feature and target lists feature_data = feature_data.append(feature_rows.copy()) target_data = target_data.append(target_rows.copy()) # Now re-calculate all the z-scaled values feature_data['justice_direction_mean_z'] = (feature_data['justice_direction_mean'] - pandas.expanding_mean( feature_data['justice_direction_mean'])) / pandas.expanding_std(feature_data['justice_direction_mean']) feature_data['diff_justice_lc_direction_abs_z'] = (feature_data[ 'diff_justice_lc_direction_abs'] - pandas.expanding_mean( feature_data['diff_justice_lc_direction_abs'])) / pandas.expanding_std( feature_data['diff_justice_lc_direction_abs']) feature_data['diff_justice_lc_direction_z'] = (feature_data['diff_justice_lc_direction'] - pandas.expanding_mean( feature_data['diff_justice_lc_direction'])) / pandas.expanding_std(feature_data['diff_justice_lc_direction']) feature_data['diff_court_lc_direction_abs_z'] = ( feature_data[ 'diff_court_lc_direction_abs'] - pandas.expanding_mean( feature_data[ 'diff_court_lc_direction_abs'])) / pandas.expanding_std( feature_data['diff_court_lc_direction_abs']) feature_data['justice_direction_issue_mean_z'] = (feature_data[ 'justice_direction_issue_mean'] - pandas.expanding_mean( feature_data['justice_direction_issue_mean'])) / pandas.expanding_std(
class CumulativeRets(object): def __init__(self, rets=None, ltd_rets=None): if rets is None and ltd_rets is None: raise ValueError('rets or ltd_rets must be specified') if rets is None: if ltd_rets.empty: rets = ltd_rets else: rets = (1. + ltd_rets).pct_change() rets.iloc[0] = ltd_rets.iloc[0] if ltd_rets is None: if rets.empty: ltd_rets = rets else: ltd_rets = (1. + rets).cumprod() - 1. self.rets = rets self.ltd_rets = ltd_rets pds_per_year = property(lambda self: periodicity(self.rets)) def asfreq(self, freq): other_pds_per_year = periodicity(freq) if self.pds_per_year < other_pds_per_year: msg = 'Cannot downsample returns. Cannot convert from %s periods/year to %s' raise ValueError(msg % (self.pds_per_year, other_pds_per_year)) if freq == 'B': rets = (1. + self.rets).groupby(self.rets.index.date).apply(lambda s: s.prod()) - 1. # If you do not do this, it will be an object index rets.index = pd.DatetimeIndex([i for i in rets.index]) return CumulativeRets(rets) else: rets = (1. + self.rets).resample(freq, how='prod') - 1. return CumulativeRets(rets) # ----------------------------------------------------------- # Resampled data dly = lazy_property(lambda self: self.asfreq('B'), 'dly') weekly = lazy_property(lambda self: self.asfreq('W'), 'weekly') monthly = lazy_property(lambda self: self.asfreq('M'), 'monthly') quarterly = lazy_property(lambda self: self.asfreq('Q'), 'quarterly') annual = lazy_property(lambda self: self.asfreq('A'), 'annual') # ----------------------------------------------------------- # Basic Metrics @lazy_property def ltd_rets_ann(self): return (1. + self.ltd_rets) ** (self.pds_per_year / pd.expanding_count(self.rets)) - 1. cnt = property(lambda self: self.rets.notnull().astype(int).sum()) mean = lazy_property(lambda self: self.rets.mean(), 'avg') mean_ann = lazy_property(lambda self: self.mean * self.pds_per_year, 'avg_ann') ltd = lazy_property(lambda self: self.ltd_rets.iloc[-1], name='ltd') ltd_ann = lazy_property(lambda self: self.ltd_rets_ann.iloc[-1], name='ltd_ann') std = lazy_property(lambda self: self.rets.std(), 'std') std_ann = lazy_property(lambda self: self.std * np.sqrt(self.pds_per_year), 'std_ann') drawdown_info = lazy_property(lambda self: drawdown_info(self.rets), 'drawdown_info') drawdowns = lazy_property(lambda self: drawdowns(self.rets), 'drawdowns') maxdd = lazy_property(lambda self: self.drawdown_info['maxdd'].min(), 'maxdd') dd_avg = lazy_property(lambda self: self.drawdown_info['maxdd'].mean(), 'dd_avg') kurtosis = lazy_property(lambda self: self.rets.kurtosis(), 'kurtosis') skew = lazy_property(lambda self: self.rets.skew(), 'skew') sharpe_ann = lazy_property(lambda self: np.divide(self.ltd_ann, self.std_ann), 'sharpe_ann') downside_deviation = lazy_property(lambda self: downside_deviation(self.rets, mar=0, full=0, ann=1), 'downside_deviation') sortino = lazy_property(lambda self: self.ltd_ann / self.downside_deviation, 'sortino') @lazy_property def maxdd_dt(self): ddinfo = self.drawdown_info if ddinfo.empty: return None else: return self.drawdown_info['maxdd dt'].ix[self.drawdown_info['maxdd'].idxmin()] # ----------------------------------------------------------- # Expanding metrics expanding_mean = property(lambda self: pd.expanding_mean(self.rets), 'expanding_avg') expanding_mean_ann = property(lambda self: self.expanding_mean * self.pds_per_year, 'expanding_avg_ann') expanding_std = lazy_property(lambda self: pd.expanding_std(self.rets), 'expanding_std') expanding_std_ann = lazy_property(lambda self: self.expanding_std * np.sqrt(self.pds_per_year), 'expanding_std_ann') expanding_sharpe_ann = property(lambda self: np.divide(self.ltd_rets_ann, self.expanding_std_ann)) # ----------------------------------------------------------- # Rolling metrics rolling_mean = property(lambda self: pd.rolling_mean(self.rets), 'rolling_avg') rolling_mean_ann = property(lambda self: self.rolling_mean * self.pds_per_year, 'rolling_avg_ann') def rolling_ltd_rets(self, n): return pd.rolling_apply(self.rets, n, lambda s: (1. + s).prod() - 1.) def rolling_ltd_rets_ann(self, n): tot = self.rolling_ltd_rets(n) return tot ** (self.pds_per_year / n) def rolling_std(self, n): return pd.rolling_std(self.rets, n) def rolling_std_ann(self, n): return self.rolling_std(n) * np.sqrt(self.pds_per_year) def rolling_sharpe_ann(self, n): return self.rolling_ltd_rets_ann(n) / self.rolling_std_ann(n) def iter_by_year(self): """Split the return objects by year and iterate""" for key, grp in self.rets.groupby(lambda x: x.year): yield key, CumulativeRets(rets=grp) def truncate(self, before=None, after=None): rets = self.rets.truncate(before=before, after=after) return CumulativeRets(rets=rets) @lazy_property def summary(self): d = OrderedDict() d['ltd'] = self.ltd d['ltd ann'] = self.ltd_ann d['mean'] = self.mean d['mean ann'] = self.mean_ann d['std'] = self.std d['std ann'] = self.std_ann d['sharpe ann'] = self.sharpe_ann d['sortino'] = self.sortino d['maxdd'] = self.maxdd d['maxdd dt'] = self.maxdd_dt d['dd avg'] = self.dd_avg d['cnt'] = self.cnt return pd.Series(d, name=self.rets.index.freq or guess_freq(self.rets.index)) def _repr_html_(self): from tia.util.fmt import new_dynamic_formatter fmt = new_dynamic_formatter(method='row', precision=2, pcts=1, trunc_dot_zeros=1, parens=1) df = self.summary.to_frame() return fmt(df)._repr_html_() def get_alpha_beta(self, bm_rets): if isinstance(bm_rets, pd.Series): bm = CumulativeRets(bm_rets) elif isinstance(bm_rets, CumulativeRets): bm = bm_rets else: raise ValueError('bm_rets must be series or CumulativeRetPerformace not %s' % (type(bm_rets))) bm_freq = guess_freq(bm_rets) if self.pds_per_year != bm.pds_per_year: tgt = {'B': 'dly', 'W': 'weekly', 'M': 'monthly', 'Q': 'quarterly', 'A': 'annual'}.get(bm_freq, None) if tgt is None: raise ValueError('No mapping for handling benchmark with frequency: %s' % bm_freq) tmp = getattr(self, tgt) y = tmp.rets y_ann = tmp.ltd_ann else: y = self.rets y_ann = self.ltd_ann x = bm.rets.truncate(y.index[0], y.index[-1]) x_ann = bm.ltd_ann model = pd.ols(x=x, y=y) beta = model.beta[0] alpha = y_ann - beta * x_ann return pd.Series({'alpha': alpha, 'beta': beta}, name=bm_freq) def plot_ltd(self, ax=None, style='k', label='ltd', show_dd=1, title=True, legend=1): ltd = self.ltd_rets ax = ltd.plot(ax=ax, style=style, label=label) if show_dd: dd = self.drawdowns dd.plot(style='r', label='drawdowns', alpha=.5, ax=ax) ax.fill_between(dd.index, 0, dd.values, facecolor='red', alpha=.25) fmt = PercentFormatter AxesFormat().Y.percent().X.label("").apply(ax) legend and ax.legend(loc='upper left', prop={'size': 12}) # show the actualy date and value mdt, mdd = self.maxdd_dt, self.maxdd bbox_props = dict(boxstyle="round", fc="w", ec="0.5", alpha=0.25) try: dtstr = '{0}'.format(mdt.to_period()) except: # assume daily dtstr = '{0}'.format(hasattr(mdt, 'date') and mdt.date() or mdt) ax.text(mdt, dd[mdt], "{1} \n {0}".format(fmt(mdd), dtstr).strip(), ha="center", va="top", size=8, bbox=bbox_props) if title is True: pf = new_percent_formatter(1, parens=False, trunc_dot_zeros=True) ff = new_float_formatter(precision=1, parens=False, trunc_dot_zeros=True) total = pf(self.ltd_ann) vol = pf(self.std_ann) sh = ff(self.sharpe_ann) mdd = pf(self.maxdd) title = 'ret$\mathregular{_{ann}}$ %s vol$\mathregular{_{ann}}$ %s sharpe %s maxdd %s' % ( total, vol, sh, mdd) title and ax.set_title(title, fontdict=dict(fontsize=10, fontweight='bold')) return ax def plot_ret_on_dollar(self, title=None, show_maxdd=1, figsize=None, ax=None, append=0, label=None, **plot_args): plot_return_on_dollar(self.rets, title=title, show_maxdd=show_maxdd, figsize=figsize, ax=ax, append=append, label=label, **plot_args) def plot_hist(self, ax=None, **histplot_kwargs): pf = new_percent_formatter(precision=1, parens=False, trunc_dot_zeros=1) ff = new_float_formatter(precision=1, parens=False, trunc_dot_zeros=1) ax = self.rets.hist(ax=ax, **histplot_kwargs) AxesFormat().X.percent(1).apply(ax) m, s, sk, ku = pf(self.mean), pf(self.std), ff(self.skew), ff(self.kurtosis) txt = '$\mathregular{\mu}$=%s $\mathregular{\sigma}$=%s skew=%s kurt=%s' % (m, s, sk, ku) bbox = dict(facecolor='white', alpha=0.5) ax.text(0, 1, txt, fontdict={'fontweight': 'bold'}, bbox=bbox, ha='left', va='top', transform=ax.transAxes) return ax def filter(self, mask, keep_ltd=0): if isinstance(mask, pd.Series): mask = mask.values rets = self.rets.ix[mask] ltd = None if keep_ltd: ltd = self.ltd_rets.ix[mask] return CumulativeRets(rets=rets, ltd_rets=ltd)
raw = requests.get("http://www.google.com/finance/getprices?i="+interval+"&p="+lookback+"d&f=c&df=cpct&q="+symbol).text # Take the data and put it into a DataFrame raw = raw.split()[7:] data = pd.DataFrame(raw) data = data.astype("float") data["price"] = data[0] del data[0] # We only need 60 minutes worth of data if len(data["price"] >= 60): data["price"] = data["price"][-60:] # Columns for expanding mean and standard deviation data["mean"] = pd.expanding_mean(data["price"]) data["vol"] = pd.expanding_std(data["price"]) # Linear regression on price data x = range(len(data["price"][-60:])) y = data["price"][-60:].values A,B = curve_fit(f,x,y) # Print the trend to the console if A[0] < 0 : print("downtrend") else: print("uptrend") # Plot window plt.figure(1) # Plot for the price and its moving average
def spread_crossover(data_df,slow=1,fast=12): spread_log = pd.DataFrame(np.log(data_df.ix[:,0] * 100)) data_df['spread_z_ma'] = (spread_log - pd.expanding_mean(spread_log, min_periods=24))/ pd.expanding_std(spread_log, min_periods=24) data_df['spread_z_ema'] = (spread_log - pd.ewma(spread_log, min_periods = 24, halflife=12)) / pd.ewmstd(spread_log, halflife=12) data_df['spread_z_ema'] = pd.rolling_mean(data_df['spread_z_ema'], window=3) data_df['slow'] = pd.rolling_mean(data_df['US HY Spread'],slow) data_df['fast'] = pd.rolling_mean(data_df['US HY Spread'],fast) data_df['diff'] = (data_df['slow'] - data_df['fast']) * -1 data_df['diff'] = data_df['diff'] + 1 data_df['diff'] = np.log(data_df['diff']) data_df['tren_z_ma'] = (data_df['diff'] - pd.expanding_mean(data_df['diff'], min_periods=24))/ pd.expanding_std(data_df['diff'], min_periods=24) data_df['tren_z_ma'] = pd.rolling_mean(data_df['tren_z_ma'], window=3) trend_valuation_df = pd.concat([data_df['spread_z_ema'],data_df['tren_z_ma']], axis=1) trend_valuation_df.dropna(inplace=True) trend_valuation_df.plot() plt.show() algo_wghts_df = pd.DataFrame() wghts_array = [] valuation_threshold_cheap = 1 valuation_threshold_rich = -1.0 trend_threshold_tightening = 0.1 trend_threshold_widening = -0.1 data_df['spread_z_ma'].plot() plt.show() for score in trend_valuation_df.values: valuation_score = score[0] trend_score = score[1] if (trend_score >= -0.2 and valuation_score >= -1): wghts_array.append(min(1,abs(trend_score-valuation_score) / 1)) else: wghts_array.append(0) #elif trend_score <= -0.1 and valuation_score <= valuation_threshold_cheap: # wghts_array.append(-1) #elif valuation_score >= valuation_threshold_cheap: # wghts_array.append(1) #else: # wghts_array.append(0) wghts_df = pd.DataFrame(wghts_array, index = trend_valuation_df.index) long = wghts_df[wghts_df == 1].count()[0] / len(trend_valuation_df) neutral = wghts_df[wghts_df == 0].count()[0] / len(trend_valuation_df) short = wghts_df[wghts_df == -1].count()[0] / len(trend_valuation_df) wghts_df.columns = [data_df.columns.values[1]] wghts_df = wghts_df.shift(1) s1 = bt.Strategy('Valuation & Trend ', [bt.algos.WeighTarget(wghts_df), bt.algos.Rebalance()]) return_data = data_df.ix[:,1].to_frame() return_data.columns = [data_df.columns.values[1]] strategy = bt.Backtest(s1, return_data) res = bt.run(strategy) res.plot(logy=True) res.display() print(long,neutral,short)
def spread_holding_test(data_df): data_df['lg_spread'] = np.log(data_df['US HY Spread'] * 100) data_df['spread_z_ema'] = (data_df['lg_spread'] - pd.expanding_mean(data_df['lg_spread'], min_periods=24))/ pd.expanding_std(data_df['lg_spread'], min_periods=24) data_df['spread_z_ema'].dropna(inplace=True) hp_months_cheap = 12 hp_months_rich = 12 counter = 0 date_array = data_df.index.values signal = [] signal_abs_threshold_cheap= 1.5 signal_abs_threshold_rich= 1.5 for date in range(0,len(date_array)): if len(data_df['spread_z_ema']) - 1 >= counter: score = data_df['spread_z_ema'].ix[counter] if score > signal_abs_threshold_cheap: counter = counter+hp_months_cheap temp_list = [1] * hp_months_cheap signal.extend(temp_list) elif score < (signal_abs_threshold_rich * -1): counter = counter+hp_months_rich temp_list = [-1] * hp_months_rich signal.extend(temp_list) elif (score <= signal_abs_threshold_cheap) and score >= (signal_abs_threshold_rich * -1): counter = counter + 1 signal.extend([1]) signal = signal[:len(date_array)] weights = pd.DataFrame(signal,index=data_df['spread_z_ema'].index,columns=['US HY Return']) weights['bond_wght'] = weights weights['treasury_wght'] = weights['US HY Return'] * -1 bond_wght = weights['bond_wght'].to_frame() bond_wght.columns = ['US HY Return'] treasury_wght = weights['treasury_wght'].to_frame() treasury_wght.columns = ['US Int. Trsy Return'] combined_wghts = pd.concat([bond_wght,treasury_wght], axis=1) combined_wghts = combined_wghts.shift(1) combined_wghts.dropna(inplace=True) weighted_returns = combined_wghts * data_df[['US HY Return','US Int. Trsy Return']] portfolio_return = weighted_returns.sum(axis=1).to_frame() portfolio_return = portfolio_return.add(1).cumprod() risk_premia = long_only_ew(portfolio_return, name='Risk Premia') return risk_premia, combined_wghts
""" Created on 05/10/15 @author: Carlos Eduardo Barbosa Test convergence of Lick errors as a function of the number of simulation. """ import os import numpy as np from pandas import expanding_std import matplotlib.pyplot as plt from config import * if __name__ == "__main__": os.chdir(os.path.join(home, "single2/mc_logs")) cols = np.array([12, 13,16,17,18,19,20]) logs = os.listdir(".") fig = plt.figure(1, figsize=(5,15)) for log in logs: data = np.loadtxt(log).T[cols] for i,d in enumerate(data): ax = plt.subplot(7,1,i+1) ax.plot(expanding_std(d, min_periods=1) / d.std(), "-k") plt.pause(1) plt.show(block=False)
"&p=" + lookback + "d&f=c&df=cpct&q=" + symbol).text # Take the data and put it into a DataFrame raw = raw.split()[7:] data = pd.DataFrame(raw) data = data.astype("float") data["price"] = data[0] del data[0] # We only need 60 minutes worth of data if len(data["price"] >= 60): data["price"] = data["price"][-60:] # Columns for expanding mean and standard deviation data["mean"] = pd.expanding_mean(data["price"]) data["vol"] = pd.expanding_std(data["price"]) # Linear regression on price data x = range(len(data["price"][-60:])) y = data["price"][-60:].values A, B = curve_fit(f, x, y) # Print the trend to the console if A[0] < 0: print("downtrend") else: print("uptrend") # Plot window plt.figure(1) # Plot for the price and its moving average
def get_context_data(self, **kwargs): context = super(WellChartView, self).get_context_data(**kwargs) well = Well.objects.get(pk=context['pk']) name = unicode(well) options = { 'rangeSelector': { 'enabled': True, 'inputEnabled': True, }, 'navigator': {'adaptToUpdatedData': True, 'enabled': True}, 'chart': {'type': 'arearange', 'zoomType': 'x'}, 'title': {'text': name}, 'xAxis': {'type': 'datetime'}, 'yAxis': [{'title': {'text': 'Grondwaterstand\n(m tov NAP)'}} ], 'tooltip': {'valueSuffix': ' m', 'valueDecimals': 2, 'shared': True, }, 'legend': {'enabled': True}, 'plotOptions': {'line': {'marker': {'enabled': False}}}, 'credits': {'enabled': True, 'text': 'acaciawater.com', 'href': 'http://www.acaciawater.com', }, } series = [] xydata = [] for screen in well.screen_set.all(): name = unicode(screen) data = screen.to_pandas(ref='nap') if data.size > 0: xydata = zip(data.index.to_pydatetime(), data.values) series.append({'name': name, 'type': 'line', 'data': xydata, 'lineWidth': 1, 'color': '#0066FF', 'zIndex': 2, }) mean = pd.expanding_mean(data) std = pd.expanding_std(data) a = (mean - std).dropna() b = (mean + std).dropna() ranges = zip(a.index.to_pydatetime(), a.values, b.values) series.append({'name': 'spreiding', 'data': ranges, 'type': 'arearange', 'lineWidth': 0, 'color': '#0066FF', 'fillOpacity': 0.2, 'linkedTo' : ':previous', 'zIndex': 0, }) data = screen.to_pandas(ref='nap',kind='HAND') if data.size > 0: hand = zip(data.index.to_pydatetime(), data.values) series.append({'name': 'handpeiling', 'type': 'scatter', 'data': hand, 'zIndex': 3, 'marker': {'symbol': 'circle', 'radius': 6, 'lineColor': 'white', 'lineWidth': 2, 'fillColor': 'red'}, }) if len(xydata)>0: mv = [] mv.append((xydata[0][0], screen.well.maaiveld)) mv.append((xydata[-1][0], screen.well.maaiveld)) series.append({'name': 'maaiveld', 'type': 'line', 'lineWidth': 2, 'color': '#009900', 'dashStyle': 'Dash', 'data': mv, 'zIndex': 4, }) # neerslag toevoegen try: closest = Station.closest(well.location) name = 'Meteostation {} (dagwaarden)'.format(closest.naam) neerslag = Series.objects.get(name='RH',mlocatie__name=name) data = neerslag.to_pandas(start=xydata[0][0], stop=xydata[-1][0]) / 10.0 # 0.1 mm -> mm data = zip(data.index.to_pydatetime(), data.values) series.append({'name': 'Neerslag '+ closest.naam, 'type': 'column', 'data': data, 'yAxis': 1, 'pointRange': 24 * 3600 * 1000, # 1 day 'pointPadding': 0.01, 'pointPlacement': 0.5, 'zIndex': 1, 'color': 'orange', 'borderColor': '#cc6600', }) options['yAxis'].append({'title': {'text': 'Neerslag (mm)'}, 'opposite': 1, 'min': 0, }) except: pass options['series'] = series context['options'] = json.dumps(options, default=lambda x: int(time.mktime(x.timetuple())*1000)) context['object'] = well return context
def get_context_data(self, **kwargs): context = super(WellChartView, self).get_context_data(**kwargs) well = Well.objects.get(pk=context['pk']) name = unicode(well) options = { 'rangeSelector': { 'enabled': True, 'inputEnabled': True, }, 'navigator': {'adaptToUpdatedData': True, 'enabled': True}, 'chart': {'type': 'arearange', 'zoomType': 'x'}, 'title': {'text': name}, 'xAxis': {'type': 'datetime'}, 'yAxis': [{'title': {'text': 'm tov NAP'}} ], 'tooltip': {'valueSuffix': ' m', 'valueDecimals': 2, 'shared': True, }, 'legend': {'enabled': True}, 'plotOptions': {'line': {'marker': {'enabled': False}}}, 'credits': {'enabled': True, 'text': 'acaciawater.com', 'href': 'http://www.acaciawater.com', }, } series = [] xydata = [] start = datetime.datetime(2013,1,1) stop = datetime.datetime(2016,1,1) for screen in well.screen_set.all(): name = unicode(screen) data = screen.to_pandas(ref='nap')[start:stop] if data.size > 0: xydata = zip(data.index.to_pydatetime(), data.values) series.append({'name': name, 'type': 'line', 'data': xydata, 'lineWidth': 1, 'zIndex': 1, }) mean = pd.expanding_mean(data) # series.append({'name': 'gemiddelde', # 'type': 'line', # 'data': zip(mean.index.to_pydatetime(), mean.values), # 'linkedTo' : ':previous', # }) std = pd.expanding_std(data) a = (mean - std).dropna() b = (mean + std).dropna() ranges = zip(a.index.to_pydatetime(), a.values, b.values) series.append({'name': 'spreiding', 'data': ranges, 'type': 'arearange', 'lineWidth': 0, 'fillOpacity': 0.2, 'linkedTo' : ':previous', 'zIndex': 0, }) data = screen.to_pandas(ref='nap',kind='HAND')[start:stop] if data.size > 0: hand = zip(data.index.to_pydatetime(), data.values) series.append({'name': 'handpeiling', 'type': 'scatter', 'data': hand, 'zIndex': 2, 'marker': {'symbol': 'circle', 'radius': 6, 'lineColor': 'white', 'lineWidth': 2, 'fillColor': 'blue'}, }) if len(xydata)>0: mv = [] mv.append((xydata[0][0], screen.well.maaiveld)) mv.append((xydata[-1][0], screen.well.maaiveld)) series.append({'name': 'maaiveld', 'type': 'line', 'lineWidth': 1, 'dashStyle': 'Dash', 'color': 'white', 'data': mv }) options['series'] = series context['options'] = json.dumps(options, default=lambda x: int(time.mktime(x.timetuple())*1000)) context['object'] = well return context