def register_converter_cb(key): from pandas.plotting import register_matplotlib_converters from pandas.plotting import deregister_matplotlib_converters if cf.get_option(key): register_matplotlib_converters() else: deregister_matplotlib_converters()
def test_registering_no_warning(self): plt = pytest.importorskip("matplotlib.pyplot") s = Series(range(12), index=date_range('2017', periods=12)) _, ax = plt.subplots() # Set to the "warn" state, in case this isn't the first test run converter._WARN = True register_matplotlib_converters() with tm.assert_produces_warning(None) as w: ax.plot(s.index, s.values) assert len(w) == 0
def register_pandas_datetime_converter_if_needed(): # based on https://github.com/pandas-dev/pandas/pull/17710 global _registered if not _registered: try: from pandas.plotting import register_matplotlib_converters register_matplotlib_converters() except ImportError: # register_matplotlib_converters new in pandas 0.22 from pandas.tseries import converter converter.register() _registered = True
def test_plots(close_figures): exog = add_constant(dta[['m1', 'pop']]) mod = RecursiveLS(endog, exog) res = mod.fit() # Basic plot try: from pandas.plotting import register_matplotlib_converters register_matplotlib_converters() except ImportError: pass fig = res.plot_recursive_coefficient() # Specific variable fig = res.plot_recursive_coefficient(variables=['m1']) # All variables fig = res.plot_recursive_coefficient(variables=[0, 'm1', 'pop']) # Basic plot fig = res.plot_cusum() # Other alphas for alpha in [0.01, 0.10]: fig = res.plot_cusum(alpha=alpha) # Invalid alpha assert_raises(ValueError, res.plot_cusum, alpha=0.123) # Basic plot fig = res.plot_cusum_squares() # Numpy input (no dates) mod = RecursiveLS(endog.values, exog.values) res = mod.fit() # Basic plot fig = res.plot_recursive_coefficient() # Basic plot fig = res.plot_cusum() # Basic plot fig = res.plot_cusum_squares()
def test_registry_resets(self): units = pytest.importorskip("matplotlib.units") dates = pytest.importorskip("matplotlib.dates") # make a copy, to reset to original = dict(units.registry) try: # get to a known state units.registry.clear() date_converter = dates.DateConverter() units.registry[datetime] = date_converter units.registry[date] = date_converter register_matplotlib_converters() assert units.registry[date] is not date_converter deregister_matplotlib_converters() assert units.registry[date] is date_converter finally: # restore original stater units.registry.clear() for k, v in original.items(): units.registry[k] = v
def test_option_no_warning(self): pytest.importorskip("matplotlib.pyplot") ctx = cf.option_context("plotting.matplotlib.register_converters", False) plt = pytest.importorskip("matplotlib.pyplot") s = Series(range(12), index=date_range('2017', periods=12)) _, ax = plt.subplots() converter._WARN = True # Test without registering first, no warning with ctx: with tm.assert_produces_warning(None) as w: ax.plot(s.index, s.values) assert len(w) == 0 # Now test with registering converter._WARN = True register_matplotlib_converters() with ctx: with tm.assert_produces_warning(None) as w: ax.plot(s.index, s.values) assert len(w) == 0
Created on Tuesday, March 26, 2019 WIFA Quant Tutorial Answer Example ''' # 导入所需模块。 import os # 系统模块 path = os.getcwd() # 当前工作路径 import pandas as pd # 数据表操作 # 导入绘图工具包。 import seaborn as sns # 绘图包 sns.set(style="darkgrid") # 设置绘图风格 import matplotlib.pyplot as plt # 绘图包 plt.rcParams['font.sans-serif'] = ['SimHei'] # 显示简体中文 plt.rcParams['axes.unicode_minus'] = False # 显示负号 from pandas.plotting import register_matplotlib_converters # 自动在绘图时转换日期 register_matplotlib_converters() # 注册日期转换器 # 读取本地数据。 data = pd.read_csv(path + "\\Data\\example.csv", index_col=0) # 重命名列名,看起来舒服些。 data.columns = ["Stock Codes", "Month", "Monthly Return"] # 将字符串格式的日期转换为日期格式,方便进行时间序列操作。 data["Month"] = pd.to_datetime(data["Month"], format='%m/%d/%Y') # 只取2006年之后的数据。 data = data[data["Month"] > pd.to_datetime("2006", format='%Y')] # 生成一个包含每个月的数据表,这一步也可通过pd.date_range手工生成所需日期,这里采用原始数据取集合得到。 months_data = pd.DataFrame(data["Month"].unique(), columns=["Month"]).sort_values(by="Month") # 生成最终报告的数据表,方便后续填写。 report_dataframe = pd.DataFrame( index = months_data["Month"],
def Run(): fileName = 'Section_batch512_epoch100.h5' codeFileName = '005930.csv' x_train0, y_train0, x_test0, y_test0 = LoadData(50, codeFileName, 10) x_train = nanToZero(x_train0, True) y_train = nanToZero(y_train0, False) x_test = nanToZero(x_test0, True) y_test = nanToZero(y_test0, False) pivotDatas0 = nanToZero(np.array(pivotDatas), False) model = BuildModel() model.fit(x_train, y_train, batch_size=512, epochs=100, validation_split=0.05, verbose=2) model.save(fileName) #from keras.models import load_model #model = load_model(fileName) dateLength = 10 tmpData = pd.read_csv( os.path.join('/home/chlee/KOSPI_Prediction/PriceChangedData', codeFileName)) tmpDate = tmpData['날짜'] tmpDate = tmpDate[-dateLength:].values import datetime tmp = [] for i in range(tmpDate.shape[0]): tmp.append( np.datetime64( datetime.datetime.strptime(str(tmpDate[i]), "%Y%m%d"), 'D')) tmp = np.array(tmp) y_tmp = y_test[:-1, 0].copy() y_tmp2 = (y_tmp.astype(np.float64) + 1) * pivotDatas0[-(len(y_tmp) + 11):-11] y_tmp = y_test[-1] y_tmp2 = np.append(y_tmp2, (y_tmp.astype(np.float64) + 1) * pivotDatas0[-11]) from pandas.plotting import register_matplotlib_converters register_matplotlib_converters() x_test2 = x_test[-(dateLength):] y_test2 = y_tmp2[-dateLength:] pred = model.predict(x_test2) result_predict = [] for i in range(-len(pred), 0): result_predict.append((pred[i] + 1) * pivotDatas0[i]) plt.figure(facecolor='white') plt.plot(tmp, y_test2, label='actual') print(result_predict[-1]) print(y_test2[-1]) for i in range(len(result_predict)): plt.plot(tmp[:10], result_predict[i]) tmp = np.append( tmp[1:], np.datetime64( tmp[-1].astype(datetime.datetime) + datetime.timedelta(days=1), 'D')) plt.xticks(rotation=-45) plt.legend() plt.show()
import numpy as np import matplotlib.pyplot as plt import quandl from datetime import datetime from statsmodels.tsa.arima_model import ARIMA import pandas as pd import os from pandas.plotting import register_matplotlib_converters register_matplotlib_converters( ) # <-- Something about you having to explicity register your datatime converters or matplotlib has a fit. def validateDateString(date_string): """ Validates whether date_string is in the correct format. """ result = None try: datetime.strptime(date_string, "%Y%m%d") result = True except ValueError: result = False return result # Set quandl API key quandl.ApiConfig.api_key = "NszGhwY_Qh8Ubj1BWhVt"
def plot_1_prediction(self, date, dataset, TARGET_ORIG, USE_TEST, NORMALIZE, train, felist, filler=0): # date="2018-04-01 00:06:00" import matplotlib.pyplot as plt import datetime import utils from pandas.plotting import register_matplotlib_converters register_matplotlib_converters() model = self.model conf_i = self.conf def denorm(normed): if NORMALIZE: y_pred1, y_pred2, y_pred3 = utils.denormalize_by_train( normed, normed, normed, train, TARGET_ORIG, USE_TEST) else: y_pred1 = normed return y_pred1 def norm(unnormed): if NORMALIZE: x_train_n, x_val_n, x_test_n, y_train_n, y_val_n = utils.normalize_by_train( unnormed, unnormed, unnormed, unnormed, unnormed, train, TARGET_ORIG, USE_TEST) else: x_train_n = unnormed return x_train_n def convert_date_to_id(data, d="2018-04-01 00:03:00"): return data.index.tolist().index( datetime.datetime.strptime(d, '%Y-%m-%d %H:%M:%S')) idx = convert_date_to_id(dataset, date) h_s = int(conf_i['past_history']) f_t = int(conf_i['future_target']) features = felist[0] print('idx=', idx, 'h_s=', h_s, 'f_t=', f_t, 'features=', features) # x y for predict x_train_multilike_one = norm( dataset[features].iloc[idx - h_s:idx, :].values).reshape( 1, h_s, len(features)) y_train_pred_one = denorm(model.predict(x_train_multilike_one)) y_train_pred_one = np.pad(y_train_pred_one, [(0, 0), (h_s, 0)], 'constant', constant_values=(filler)).reshape(-1) # x y for true df = dataset.iloc[idx - h_s:idx + f_t, :] x_train_dates = df.index y_train_orig = df[TARGET_ORIG] # plot fig, axarr = plt.subplots() axarr.plot(x_train_dates, y_train_orig, label='true') axarr.plot(x_train_dates, y_train_pred_one, label='predicted') axarr.legend(loc="upper right") plt.show() return
def manage_matplotlib_context(): """Return a context manager for temporarily changing matplotlib unit registries and rcParams.""" originalRcParams = matplotlib.rcParams.copy() # Credits for this style go to the ggplot and seaborn packages. # We copied the style file to remove dependencies on the Seaborn package. # Check it out, it's an awesome library for plotting customRcParams = { "patch.facecolor": "#348ABD", # blue "patch.antialiased": True, "font.size": 10.0, "figure.edgecolor": "0.50", # Seaborn common parameters "figure.facecolor": "white", "text.color": ".15", "axes.labelcolor": ".15", "legend.numpoints": 1, "legend.scatterpoints": 1, "xtick.direction": "out", "ytick.direction": "out", "xtick.color": ".15", "ytick.color": ".15", "axes.axisbelow": True, "image.cmap": "Greys", "font.family": ["sans-serif"], "font.sans-serif": [ "Arial", "Liberation Sans", "Bitstream Vera Sans", "sans-serif", ], "grid.linestyle": "-", "lines.solid_capstyle": "round", # Seaborn darkgrid parameters # .15 = dark_gray # .8 = light_gray "axes.grid": True, "axes.facecolor": "#EAEAF2", "axes.edgecolor": "white", "axes.linewidth": 0, "grid.color": "white", # Seaborn notebook context "figure.figsize": [8.0, 5.5], "axes.labelsize": 11, "axes.titlesize": 12, "xtick.labelsize": 10, "ytick.labelsize": 10, "legend.fontsize": 10, "grid.linewidth": 1, "lines.linewidth": 1.75, "patch.linewidth": 0.3, "lines.markersize": 7, "lines.markeredgewidth": 0, "xtick.major.width": 1, "ytick.major.width": 1, "xtick.minor.width": 0.5, "ytick.minor.width": 0.5, "xtick.major.pad": 7, "ytick.major.pad": 7, } try: register_matplotlib_converters() matplotlib.rcParams.update(customRcParams) sns.set_style(style="white") yield finally: deregister_matplotlib_converters( ) # revert to original unit registries with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=matplotlib.cbook.mplDeprecation) matplotlib.rcParams.update( originalRcParams) # revert to original rcParams
def __init__(self, stdout=None, stderr=None, no_color=False): self.rarities = ['C', 'U', 'R', 'M'] register_matplotlib_converters() super().__init__(stdout=stdout, stderr=stderr, no_color=no_color)
def Join(self, report=False): self.gs.acquire() if self._joinResult is None: self.lib.api_Join.restype = ctypes.c_char_p r = self.lib.api_Join(self.ctx) self.lib.api_Release(self.ctx) self._joinResult = r self.gs.release() time.time = self.realTime if not report: return self._joinResult import pandas as pd try: from pandas.plotting import register_matplotlib_converters register_matplotlib_converters() except: pass ret = json.loads(self._joinResult) pnl = [] index = [] symbol = None eid = None for ele in ret['Snapshorts']: acc = ele[1][0] close = float('nan') eid = acc['Id'] balance = acc['Balance'] + acc['FrozenBalance'] stocks = acc['Stocks'] + acc['FrozenStocks'] commission = acc.get('Commission', 0) symbols = acc['Symbols'] if eid == 'Futures_CTP' or eid == 'Futures_LTS': if symbols: for s in symbols: pos = acc['Symbols'][s] for t in ['Long', 'Short']: if t in pos: balance += pos[t]['Margin'] + pos[t]['Profit'] pnl.append([acc['Balance'] + acc['FrozenBalance'], commission, balance]) elif 'Futures_' in eid: if symbols: for s in symbols: pos = acc['Symbols'][s] for t in ['Long', 'Short']: if t in pos: stocks += pos[t]['Margin'] + pos[t]['Profit'] pnl.append([acc['Stocks'] + acc['FrozenStocks'], commission, stocks]) else: if symbol is None and symbols: for s in acc['Symbols']: symbol = s break if symbol is not None: close = acc['Symbols'][symbol]['Last'] pnl.append([close, balance, stocks, commission, balance+(stocks*close)]) index.append(pd.Timestamp(ele[0], unit='ms', tz='Asia/Shanghai')) columns=["close", "balance", "stocks", "fee", "net"] if eid == 'Futures_CTP' or eid == 'Futures_LTS': columns=["balance", "fee", "net"] elif 'Futures_' in eid: columns=["stocks", "fee", "net"] return pd.DataFrame(pnl, index=index, columns=columns)
import pandas as pd from pandas.plotting import register_matplotlib_converters register_matplotlib_converters() # register converters import tempfile import itertools as IT import os def uniquePath(path, sep=''): def name_sequence(): count = IT.count() yield '' while True: yield '{s}{n:d}'.format(s=sep, n=next(count)) orig = tempfile._name_sequence with tempfile._once_lock: tempfile._name_sequence = name_sequence() path = os.path.normpath(path) dirname, basename = os.path.split(path) filename, ext = os.path.splitext(basename) fd, filename = tempfile.mkstemp(dir=dirname, prefix=filename, suffix=ext) tempfile._name_sequence = orig return filename def setDatetimeIndex(df, what='impression'): ''' Sets the index as datetime, can be either by publicationTime or impressionTime
# license: Creative Commons License # Title: Data management with pandas www.iaac.net # Created by: Diego Pajarito # # is licensed under a license Creative Commons Attribution 4.0 International License. # http://creativecommons.org/licenses/by/4.0/ # This script uses pandas for data management for more information visit; pandas.pydata.org/ # This script uses geopandas for data management for more information visit; geopandas.org/ import pandas as pd import matplotlib.pyplot as plt from pandas import plotting plotting.register_matplotlib_converters() ###################################################### # Read a csv file tweets = pd.read_csv('../data/tweets_b.csv') # Counting tweets per user and showing the count in a bar chart tweets_per_user = tweets['user_screen_name'].value_counts() # fig, ax = plt.subplots() # tweets_per_user.plot(kind='barh', title="Tweets per user") # plt.show() # top users top_users = tweets_per_user.nlargest(20)
def main(): start_datetime = datetime.datetime(2018, 12, 1, 23, 0, 0, tzinfo=gettz(config.DEFAULT_TIMEZONE)) end_datetime = datetime.datetime(2019, 3, 1, 0, 0, 0, tzinfo=gettz(config.DEFAULT_TIMEZONE)) homes_in_temp_smooth_size = 100 boiler_temp_smooth_size = 100 # noinspection SpellCheckingInspection allowed_homes = [ # "engelsa_35.csv.pickle", # "engelsa_37.csv.pickle", # "gaydara_1.csv.pickle", # "gaydara_22.csv.pickle", # "gaydara_26.csv.pickle", # "gaydara_28.csv.pickle", # "gaydara_30.csv.pickle", # "gaydara_32.csv.pickle", # "kuibysheva_10.csv.pickle", "kuibysheva_14.csv.pickle", "kuibysheva_16.csv.pickle", "kuibysheva_8.csv.pickle", ] register_matplotlib_converters() ax = plt.axes() boiler_df = pd.read_pickle( config.BOILER_PREPROCESSED_HEATING_CIRCUIT_DATASET_PATH) boiler_df = filter_by_timestamp_closed(boiler_df, start_datetime, end_datetime) boiler_temp = boiler_df[column_names.FORWARD_PIPE_COOLANT_TEMP] boiler_temp = average_values(boiler_temp, boiler_temp_smooth_size) ax.plot(boiler_df[column_names.TIMESTAMP], boiler_temp, label="real boiler temp") for home_dataset_name in os.listdir( config.HOMES_PREPROCESSED_HEATING_CIRCUIT_DATASETS_DIR): if home_dataset_name in allowed_homes: home_df = pd.read_pickle( f"{config.HOMES_PREPROCESSED_HEATING_CIRCUIT_DATASETS_DIR}/{home_dataset_name}" ) home_df = filter_by_timestamp_closed(home_df, start_datetime, end_datetime) home_in_temp = home_df[column_names.FORWARD_PIPE_COOLANT_TEMP] home_in_temp = average_values(home_in_temp, homes_in_temp_smooth_size) ax.plot(home_df[column_names.TIMESTAMP], home_in_temp, label=home_dataset_name) ax.grid(True) ax.legend() plt.show()
def __init__(self, dataset: DataFrame): self._dataset = dataset register_matplotlib_converters()
# pie charts from matplotlib import pyplot as plt from pandas.plotting import register_matplotlib_converters register_matplotlib_converters() plt.style.use("fivethirtyeight") # Language Popularity slices = [ 59219, 55466, 47544, 36443, 35917, 31991, 27097, 23030, 20524, 18523, 18017, 7920, 7331, 7201, 5833 ] labels = [ 'JavaScript', 'HTML/CSS', 'SQL', 'Python', 'Java', 'Bash/Shell/PowerShell', 'C#', 'PHP', 'C++', 'TypeScript', 'C', 'Other(s):', 'Ruby', 'Go', 'Assembly' ] # colors = ['#008fd5', '#fc4f30', '#e5ae37', '#6d904f'] explode = [0, 0, 0, 0.1, 0] # Make the dataset smaller! slices = [59219, 55466, 47544, 36443, 35917] labels = ['JavaScript', 'HTML/CSS', 'SQL', 'Python', 'Java'] plt.pie(slices, labels=labels, wedgeprops={'edgecolor': 'black'}, explode=explode, shadow=True,
import tweepy as tw import os import pandas as pd from pandas.plotting import register_matplotlib_converters #import twitter_codes as codes from google.cloud import language_v1 from google.cloud.language_v1 import enums import six import matplotlib.pyplot as plt import datetime from tweepy.auth import OAuthHandler register_matplotlib_converters(explicit=True) #Twitter API credentials consumer_key = "73tXNweN3wM5a23VuvdZChLX4" consumer_secret = "D2CwwA5CJMxGatQDeyxy9lp0afE3522MFJZwAvcASAwaIKvl64" access_key = "1171550663857332226-sVhwnq1LsLc2lzHOvSGTkvcOqVmBaR" access_secret = "etj66ATMtYBNecHTcqa2vczgrgfWExkv51p3c114Lw9Cp" def get_tweet(username, num_tweets): twitter_feed = [] twitter_feed_time = [] # Authorize auth = tw.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_key, access_secret) auth_tweet = tw.API(auth) Someone_tweets = auth_tweet.user_timeline(
def run_ucm(name): true = getattr(results_structural, name) for model in true['models']: kwargs = model.copy() kwargs.update(true['kwargs']) # Make a copy of the data values = dta.copy() freq = kwargs.pop('freq', None) if freq is not None: values.index = pd.date_range(start='1959-01-01', periods=len(dta), freq=freq) # Test pandas exog if 'exog' in kwargs: # Default value here is pd.Series object exog = np.log(values['realgdp']) # Also allow a check with a 1-dim numpy array if kwargs['exog'] == 'numpy': exog = exog.values.squeeze() kwargs['exog'] = exog # Create the model mod = UnobservedComponents(values['unemp'], **kwargs) # Smoke test for starting parameters, untransform, transform # Also test that transform and untransform are inverses mod.start_params roundtrip = mod.transform_params( mod.untransform_params(mod.start_params)) assert_allclose(mod.start_params, roundtrip) # Fit the model at the true parameters res_true = mod.filter(true['params']) # Check that the cycle bounds were computed correctly freqstr = freq[0] if freq is not None else values.index.freqstr[0] if 'cycle_period_bounds' in kwargs: cycle_period_bounds = kwargs['cycle_period_bounds'] elif freqstr == 'A': cycle_period_bounds = (1.5, 12) elif freqstr == 'Q': cycle_period_bounds = (1.5*4, 12*4) elif freqstr == 'M': cycle_period_bounds = (1.5*12, 12*12) else: # If we have no information on data frequency, require the # cycle frequency to be between 0 and pi cycle_period_bounds = (2, np.inf) # Test that the cycle frequency bound is correct assert_equal(mod.cycle_frequency_bound, (2*np.pi / cycle_period_bounds[1], 2*np.pi / cycle_period_bounds[0])) # Test that the likelihood is correct rtol = true.get('rtol', 1e-7) atol = true.get('atol', 0) assert_allclose(res_true.llf, true['llf'], rtol=rtol, atol=atol) # Optional smoke test for plot_components try: import matplotlib.pyplot as plt try: from pandas.plotting import register_matplotlib_converters register_matplotlib_converters() except ImportError: pass fig = plt.figure() res_true.plot_components(fig=fig) except ImportError: pass # Now fit the model via MLE with warnings.catch_warnings(record=True): res = mod.fit(disp=-1) # If we found a higher likelihood, no problem; otherwise check # that we're very close to that found by R if res.llf <= true['llf']: assert_allclose(res.llf, true['llf'], rtol=1e-4) # Smoke test for summary res.summary()
def call_test_policy(): register_matplotlib_converters() symbol = ['JPM'] in_start_date = dt.datetime(2008,1,1) in_end_date = dt.datetime(2009,12,31) out_start_date = dt.datetime(2010,1,1) out_end_date = dt.datetime(2011,12,31) trades_df = testPolicy(symbol,in_start_date,in_end_date,100000.0) portvals = msc.compute_portvals(trades_df, 100000, 9.95, 0.005, in_start_date, in_end_date) #Benchmark: Starting cash: $100,000, investing in 1000 shares of JPM and holding that position. market_dates = pd.date_range(in_start_date,in_end_date) prices_JPM = get_data(symbol,market_dates) prices_JPM = prices_JPM['JPM'] normed_prices = prices_JPM/prices_JPM.iloc[0] normed_prices = normed_prices.to_frame() bench_trades = pd.DataFrame(index=normed_prices.index) #initialzing bench_trades values bench_trades['Symbol'] ='JPM' bench_trades['Order'] = np.NaN bench_trades['Shares'] = 1000.0 #BUY 1000 JPM on day1 bench_trades.iloc[0,1] = 'BUY' for i in range(1,bench_trades.shape[0]): bench_trades.iloc[i,1] = np.NaN bench_trades.dropna(inplace=True) bench_portvals = msc.compute_portvals(bench_trades, 100000, 9.95, 0.005, in_start_date, in_end_date) #plot portvals and bench_portvals #normalize portvals = portvals/portvals.iloc[0] bench_portvals = bench_portvals/bench_portvals.iloc[0] fig,ax=plt.subplots() plt.title("Manual Rule-based Strategy vs. Benchmark") plt.xlabel("Date") plt.xticks(rotation=13) plt.ylabel("Normalized Portfolio Value") plt.plot(portvals , 'r', label="Manual Rule-based Strategy") plt.plot(bench_portvals, 'g', label="Benchmark") plt.legend() for index, item in trades_df.iterrows(): if item['Order'] == 'SELL': ax.axvline(index, color="blue") elif item['Order'] == 'BUY': ax.axvline(index, color="black") plt.savefig("In_sample_manual.png") #calculate statistics #cumulative return print("In-sample Statistics") cr_port = portvals.iloc[-1]/portvals.iloc[0] - 1 cr_bench = bench_portvals.iloc[-1]/bench_portvals.iloc[0] - 1 print("Cumulative return of Manual Rule-Based Strategy: " + str(cr_port)) print("Cumulative return of Benchmark: " + str(cr_bench)) #standard deviation daily_ret_port = (portvals/portvals.shift(1)) -1 daily_ret_bench = (bench_portvals/bench_portvals.shift(1)) -1 std_port = daily_ret_port.std() std_bench = daily_ret_bench.std() print("Standard Deviation of Daily Returns Manual Rule-Based Strategy: " + str(std_port)) print("Standard Deviation of Daily Returns of Benchmark: " + str(std_bench)) #Mean mean_port = daily_ret_port.mean() mean_bench = daily_ret_bench.mean() print("Mean of Daily Returns of Manual Rule-Based Strategy: " + str(mean_port)) print("Standard Deviation of Daily Returns of Benchmark: " + str(mean_bench)) #out-sample portfolio out_trades_df = testPolicy(symbol,out_start_date,out_end_date,100000.0) out_portvals = msc.compute_portvals(out_trades_df, 100000, 9.95, 0.005, out_start_date, out_end_date) #Out-sample Benchmark: Starting cash: $100,000, investing in 1000 shares of JPM and holding that position. out_market_dates = pd.date_range(out_start_date,out_end_date) out_prices_JPM = get_data(symbol,out_market_dates) out_prices_JPM = out_prices_JPM['JPM'] out_normed_prices = out_prices_JPM/out_prices_JPM.iloc[0] out_normed_prices = out_normed_prices.to_frame() out_bench_trades = pd.DataFrame(index=out_normed_prices.index) #initialzing bench_trades values out_bench_trades['Symbol'] ='JPM' out_bench_trades['Order'] = np.NaN out_bench_trades['Shares'] = 1000.0 #BUY 1000 JPM on day1 out_bench_trades.iloc[0,1] = 'BUY' for i in range(1,out_bench_trades.shape[0]): out_bench_trades.iloc[i,1] = np.NaN out_bench_trades.dropna(inplace=True) out_bench_portvals = msc.compute_portvals(out_bench_trades, 100000, 9.95, 0.005, out_start_date, out_end_date) #plot out_portvals and bench_portvals #normalize out_portvals = out_portvals/out_portvals.iloc[0] out_bench_portvals = out_bench_portvals/out_bench_portvals.iloc[0] fig,ax=plt.subplots() plt.title("Manual Rule-based Strategy vs. Benchmark") plt.xlabel("Date") plt.xticks(rotation=13) plt.ylabel("Normalized Portfolio Value") plt.plot(out_portvals , 'r', label="Manual Rule-based Strategy") plt.plot(out_bench_portvals, 'g', label="Benchmark") plt.legend() plt.savefig("Out_sample_manual.png") #calculate statistics print("Out-sample Statistics") #cumulative return out_cr_port = out_portvals.iloc[-1]/out_portvals.iloc[0] - 1 out_cr_bench = out_bench_portvals.iloc[-1]/out_bench_portvals.iloc[0] - 1 print("Cumulative return of Manual Rule-Based Strategy: " + str(out_cr_port)) print("Cumulative return of Benchmark: " + str(out_cr_bench)) #standard deviation out_daily_ret_port = (out_portvals/out_portvals.shift(1)) -1 out_daily_ret_bench = (out_bench_portvals/out_bench_portvals.shift(1)) -1 out_std_port = out_daily_ret_port.std() out_std_bench = out_daily_ret_bench.std() print("Standard Deviation of Daily Returns Manual Rule-Based Strategy: " + str(out_std_port)) print("Standard Deviation of Daily Returns of Benchmark: " + str(out_std_bench)) #Mean out_mean_port = out_daily_ret_port.mean() out_mean_bench = out_daily_ret_bench.mean() print("Mean of Daily Returns of Manual Rule-Based Strategy: " + str(out_mean_port)) print("Standard Deviation of Daily Returns of Benchmark: " + str(out_mean_bench))
"""A utility to load matplotlib and set the backend to AGG Example: from pyiem.plot.use_agg import plt """ import os from pandas.plotting import register_matplotlib_converters import matplotlib matplotlib.use('agg') import matplotlib.pyplot as plt # noqa: # pylint: disable # Workaround a pandas dataframe to matplotlib issue register_matplotlib_converters() # work around warning coming from pooch if 'TEST_DATA_DIR' not in os.environ: os.environ['TEST_DATA_DIR'] = '/tmp'
def predict_to_gif(data_source, model_source, predict_start_time, filename, ticks_step=15, size=[15, 7]): if isinstance(model_source, str): model = torch.load(model_source) else: model = model_source if isinstance(data_source, str): df = pd.read_csv(open(data_source)) else: df = data_source features = model['features'] target_column = model['target_column'] freq = model['freq'] time_step = model['time_step'] prediction_length = model['prediction_length'] Quantile = model['Quantile'] n_splits = model['n_splits'] mean_CV_RMSE = model['mean_CV_RMSE'] test_RMSE = model['test_RMSE'] cv_mode = model['cv_mode'] if 'time' in df.columns.tolist(): df.index = df.time.astype('datetime64[ns]') df >>= drop('time') df = df[features].astype(float).resample(freq).mean() time_index = pd.date_range(df.index[0], periods=len(df) + prediction_length, freq=freq) target_column_index = features.index(target_column) if df.isnull().values.sum() > 0: print( "The data source has missing value after aggregated by freq = '{}'" .format(freq)) print("Filling missing value use method = 'pad'") df = df.fillna(method='pad').dropna() fname = pkg_resources.resource_filename(__name__, '../Fonts/kaiu.ttf') image_list = [] register_matplotlib_converters() fig, ax = plt.subplots() for i in predict_start_time: pred_result = predict(data_source, model_source, i) time_index = pd.date_range(df.index[0], periods=len(df) + len(pred_result['time']), freq=freq) time_index_pred = pd.date_range(pred_result['time'][0], periods=len(pred_result['time']), freq=freq) diff = (time_index[1] - time_index[0]).total_seconds() if diff >= 360 * 86400: time_index_label = time_index.astype(str).str.slice(0, 4) title = '{} 未來走勢預測 (預測開始於 {})'.format(target_column, pred_result['time'][0][:4]) elif diff >= 20 * 86400: time_index_label = time_index.astype(str).str.slice(0, 7) title = '{} 未來走勢預測 (預測開始於 {})'.format(target_column, pred_result['time'][0][:7]) elif diff >= 86400: time_index_label = time_index.astype(str).str.slice(0, 10) title = '{} 未來走勢預測 (預測開始於 {})'.format(target_column, pred_result['time'][0][:10]) elif diff >= 3600: time_index_label = time_index.astype(str).str.slice(0, 13) title = '{} 未來走勢預測 (預測開始於 {})'.format(target_column, pred_result['time'][0][:13]) elif diff >= 60: time_index_label = time_index.astype(str).str.slice(0, 16) title = '{} 未來走勢預測 (預測開始於 {})'.format(target_column, pred_result['time'][0][:16]) else: time_index_label = time_index.astype(str) title = '{} 未來走勢預測 (預測開始於 {})'.format(target_column, pred_result['time'][0]) font = FontProperties(fname=fname, size=15) fig.set_size_inches(*size) p1 = ax.plot(df[target_column], 'b')[0] p2 = ax.plot(time_index_pred, pred_result['predict_result'][str(Quantile[0])], 'r')[0] p3 = ax.fill_between(time_index_pred, pred_result['predict_result'][str(Quantile[1])], pred_result['predict_result'][str(Quantile[2])], color='c') p4 = ax.plot([], [], ' ')[0] if cv_mode == 'kfold': label_cv_mode = 'fold' else: label_cv_mode = 'ts' ax.legend( [p1, p2, p3, p4, p4], ('實際值', '預測值', '{:.0%} 預測區間'.format(Quantile[2] - Quantile[1]), '{}-{} CV mean RMSE = {:.4f}'.format( n_splits, label_cv_mode, mean_CV_RMSE), 'test RMSE = {:.4f}'.format(test_RMSE)), loc='best', prop=font) ax.set_xticks(time_index[::ticks_step]) ax.set_xticklabels(time_index_label[::ticks_step]) ax.set_ylabel(target_column, fontproperties=font, fontsize=20) ax.set_xlabel('時間', fontproperties=font, fontsize=20) ax.set_title(title, fontproperties=font, fontsize=30, y=1.03) buf = io.BytesIO() fig.savefig(buf) plt.cla() buf.seek(0) image_list.append(imageio.imread(buf)) imageio.mimsave(filename, image_list, duration=1.5)
def historical_command(signal: str = "", start=""): """Displays historical price comparison between similar companies [Yahoo Finance]""" # Debug user input if cfg.DEBUG: logger.debug("scr-historical %s %s", signal, start) # Check for argument if signal not in so.d_signals_desc: raise Exception("Invalid preset selected!") register_matplotlib_converters() screen = ticker.Ticker() if signal in finviz_model.d_signals: screen.set_filter(signal=finviz_model.d_signals[signal]) else: preset_filter = configparser.RawConfigParser() preset_filter.optionxform = str # type: ignore preset_filter.read(so.presets_path + signal + ".ini") d_general = preset_filter["General"] d_filters = { **preset_filter["Descriptive"], **preset_filter["Fundamental"], **preset_filter["Technical"], } d_filters = {k: v for k, v in d_filters.items() if v} if d_general["Signal"]: screen.set_filter(filters_dict=d_filters, signal=d_general["Signal"]) else: screen.set_filter(filters_dict=d_filters) if start == "": start = datetime.now() - timedelta(days=365) else: start = datetime.strptime(start, cfg.DATE_FORMAT) # Output Data l_min = [] l_leg = [] l_stocks = screen.ScreenerView(verbose=0) if len(l_stocks) > 10: description = ( "\nThe limit of stocks to compare with are 10. Hence, 10 random similar stocks will be displayed." "\nThe selected list will be: ") random.shuffle(l_stocks) l_stocks = sorted(l_stocks[:10]) description = description + (", ".join(l_stocks)) logger.debug(description) plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI) while l_stocks: l_parsed_stocks = [] for symbol in l_stocks: try: df_similar_stock = yf.download( symbol, start=datetime.strftime(start, "%Y-%m-%d"), progress=False, threads=False, ) if not df_similar_stock.empty: plt.plot( df_similar_stock.index, df_similar_stock["Adj Close"].values, ) l_min.append(df_similar_stock.index[0]) l_leg.append(symbol) l_parsed_stocks.append(symbol) except Exception as e: error = ( f"{e}\nDisregard previous error, which is due to API Rate limits from Yahoo Finance. " f"Because we like '{symbol}', and we won't leave without getting data from it." ) return { "title": "ERROR Stocks: [Yahoo Finance] Historical Screener", "description": error, } for parsed_stock in l_parsed_stocks: l_stocks.remove(parsed_stock) if signal: plt.title( f"Screener Historical Price using {finviz_model.d_signals[signal]} signal" ) else: plt.title(f"Screener Historical Price using {signal} preset") plt.xlabel("Time") plt.ylabel("Share Price ($)") plt.legend(l_leg) plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) # ensures that the historical data starts from same datapoint plt.xlim([max(l_min), df_similar_stock.index[-1]]) imagefile = "scr_historical.png" plt.savefig(imagefile) imagefile = image_border(imagefile) return { "title": "Stocks: [Yahoo Finance] Historical Screener", "description": description, "imagefile": imagefile, }
def plot(codes, t0='min', t1='max', dt=None, how={}, plottype='linear', outfile='', indexcode=None, quiet=True, fig=None, figsize=[7, 5.25], figopt=None, lineopt=None, colors=None, offset=[], multiplier=[], minvalue=None, maxvalue=None, zlog=False, returnfig=False, slope=False, **kwargs): import numpy as np import datetime import pandas as pd import matplotlib as mpl import html import socket mpl.use('Agg') from sql.util import getdata from plot.dolueg2plots.windmap import windmap from plot.dolueg2plots.stationmap import stationmap from plot.dolueg2plots.iso import iso from plot.dolueg2plots.profiles import profiles from plot.dolueg2plots.mesh import mesh from plot.dolueg2plots.linear import linear from plot.dolueg2plots.watermark import watermark from plot.dolueg2plots.gropt import defaultfigopt, defaultlineopt, \ acceptedopt, extendopt, updateopt, \ sunpos, defaultct, defaultwindcolor, \ unituppercase import matplotlib.pyplot as plt import matplotlib.dates as mdates from pandas.plotting import register_matplotlib_converters register_matplotlib_converters() if figopt is None: _figopt = defaultfigopt() else: _figopt = defaultfigopt() for key in figopt: if type(figopt[key]) in [dict, list]: _figopt[key] = figopt[key].copy() else: _figopt[key] = figopt[key] if fig is None and figsize: fig = plt.subplots(figsize=_figopt['figsize'], dpi=_figopt['figdpi'] # constrained_layout=True, ) fig[0].tight_layout() if zlog: _figopt['zlog'] = zlog plottype = plottype.lower() types = [ ['xy', 'timeseries', 'linear'], ['profile', 'profiles'], ['iso', 'ispohypses', 'isolines'], ['wind', 'windrose', 'windmap'], ['stationnetwork', 'stationnetworks', 'station', 'stationmap'], [ 'contour', 'mesh', ], ] # windmap / stationnetwork if plottype in types[3] or plottype in types[4]: if fig[0].get_figwidth() != fig[0].get_figheight(): fmax = max([fig[0].get_figwidth(), fig[0].get_figheight()]) fig[0].set_figwidth(fmax) fig[0].set_figheight(fmax) if plottype in types[5]: # or plottype in types[2]: includestart = True else: includestart = False if type(codes) == str: codes = [codes] data, meta = getdata( codes, t0=t0, t1=t1, dt=dt, how=how, includestart=includestart, ) if not (data is None or data is False) and _figopt['ylog']: data[data.le(0)] = np.nan # need at lost some data to decide on the barwidth, if this is not give # report missing data (only for barcodes!) if not (data is None or data is False): missingbarcodes = [ c for c in _figopt['barcodes'] if c not in data.columns ] if missingbarcodes: print( 'These barcodes were given wrong in the call and have been removed', missingbarcodes) _figopt['barcodes'] = [ c for c in _figopt['barcodes'] if c in data.columns ] if _figopt['barcodes'] and len( data[_figopt['barcodes']].dropna(how='all').index) <= 2: data[data.columns] = np.nan # too few data to be sensible, forget it if _figopt['barcodes'] and len( data[_figopt['barcodes']].dropna(how='all').index) <= 3: print('Not enough data for barcodes plot. Parameters were:') print(t0, t1, dt, codes) data = None # check that we didnt get invalid data if data is None or data is False or meta is None or data.dropna( how='all').empty: if data is None or data is False or data.dropna(how='all').empty: errormsg = 'NO VALID VALUES YET IN DATABASE FOR \n' for codeno, code in enumerate(codes): errormsg += code if meta and code in meta: errormsg += ' in ' + meta[code]['sqldb'] + '\n' else: errormsg += ' does not exist in the database\n' if codeno > 10: errormsg += ' and more codes' break errormsg += '\nTimestamp t0:' + str(t0) + '\n' errormsg += 'Timestamp t1:' + str(t1) else: print('Passed codes do not exist, please fix your call:', codes) errormsg = 'CODES DO NOT EXIST \n\n' errormsg += '\n'.join(codes) if data is None or data is False: pass else: errormsg += '\n\nGiven parameters were\n' errormsg += 'Timestamp t0: ' + data.index[0].strftime( '%d %b %Y %H:%M:%S') + '\n' errormsg += 'Timestamp t1: ' + data.index[-1].strftime( '%d %b %Y %H:%M:%S') + '\n' fig[0].text(0.5, 0.5, errormsg, ha='center', va='center', fontsize=16) plt.axis('off') if outfile: fig[0].savefig(outfile, rasterized=True) if returnfig: return fig else: plt.close(fig[0]) return localtz = datetime.datetime.now().astimezone().tzinfo datatz = str(data.tz_convert(localtz).index.tzinfo) data = data.tz_convert(localtz).tz_localize(None) origcodes = codes.copy() if sorted(codes) == sorted(data.columns.tolist()): pass else: codes = data.columns if figopt is None: pass else: figremove = [] for newcode in codes: if newcode.split('_')[0] in figopt: figopt[newcode] = figopt[newcode.split('_')[0]] figremove.append(newcode.split('_')[0]) if figremove: for i in figremove: if i in figopt: del figopt[i] if lineopt is None: pass else: lineoptremove = [] for newcode in codes: if '_' in newcode and newcode.split('_')[0] in lineopt: lineopt[newcode] = lineopt[newcode.split('_')[0]] lineoptremove.append(newcode.split('_')[0]) if lineoptremove: for i in lineoptremove: if i in lineopt: del lineopt[i] if minvalue: data[data.lt(minvalue)] = np.nan if maxvalue: data[data.gt(maxvalue)] = np.nan if _figopt['cumulativecodes']: misscumulativecodes = [] for code in _figopt['cumulativecodes']: # several aggregations chosen if code in data: data[code] = data[code].cumsum() else: misscumulativecodes.append(code) if misscumulativecodes: print( 'These cumulative codes were given but are non existing in the selected data', misscumulativecodes) if slope: data = data.diff(axis=1) _lineopt = defaultlineopt(keys=data.columns) if lineopt is None: pass else: # update the specific codes now _lineopt = updateopt(_lineopt, lineopt) if colors is not None: if type(colors) is dict: for c in colors: if c in _lineopt: _lineopt[c]['color'] = colors[c] elif type(colors) is list: for cno, c in enumerate(origcodes): if c in _lineopt: _lineopt[c]['color'] = colors[cno % len(colors)] elif type(colors) == str and '#' in colors: for c in origcodes: if c in _lineopt: _lineopt[c]['color'] = colors elif type(colors) in [int, float]: for c in origcodes: if c in _lineopt: _lineopt[c]['color'] = colors # in case we did a mathoperation, the meta will contain all codes # which makes sense despite the operation, i.e. urban - rural # but we have to limit it to be the same length, this may cause problems if sorted(list(meta.keys())) != sorted(codes): newmeta = {} for codeno, code in enumerate(codes): ops = ['+', '-', '/', '*'] opword = { '+': 'sum', '-': 'difference', '/': 'ratio', '*': 'product' } for op in ops: _code = code.split(op) if len(_code) > 1: break if code in meta and len(_code) == 1: newmeta[code] = meta[code].copy() elif _code[0] in meta and _code[1] in meta: # both are found, combine relevant information # i.e. what we need for legend, consisting of # the below, excluding the numeric types # 'measurementheight', 'lat', 'lon' # as it doesnt make sense to have two coordinates when # math opeation took place newmeta[code] = meta[_code[0]] newmeta[code]['variable'] += ' ' + opword[op] newmeta[code]['locationname'] += ' and ' + meta[ _code[1]]['locationname'] newmeta[code]['device'] += ' and ' + meta[_code[1]]['device'] newmeta[code]['aggregation'] += ' and ' + meta[ _code[1]]['aggregation'] if meta[_code[1]]['unit'] != meta[_code[0]]['unit']: newmeta[code]['unit'] += ' ' + opword[op] + ' ' + meta[ _code[1]]['unit'] elif _code[0] in meta or _code[1] in meta: # just take whichever we found at least if _code[0] in meta: newmeta[_code[0]] = meta[_code[0]] else: newmeta[_code[1]] = meta[_code[1]] else: print(code, 'not in meta') if len(newmeta) == len(codes): break meta = newmeta if _lineopt is not None: # ensure we handle indexed colors defaultcolors = defaultct() #[2:] checkcol = [ 'markerfacecolor', 'color', ] for code in codes: for ccol in checkcol: if type(_lineopt[code][ccol]) in [int, float]: _lineopt[code][ccol] = defaultcolors[_lineopt[code][ccol] % len(defaultcolors)] elif type(_lineopt[code][ccol]) == list and len( _lineopt[code][ccol]) == 1: _lineopt[code][ccol] = defaultcolors[ _lineopt[code][ccol][0] % len(defaultcolors)] if mpl.colors.is_color_like(_lineopt[code][ccol]): pass else: print('Not a valid color code for', code, _lineopt[code][ccol], 'fallback to black') _lineopt[code][ccol] = '#000000' if offset: if type(offset) == list: if len(offset) != len(codes) and not len(offset) == 1: print( 'Ignoring offset as it\'s length is not matching the codes' ) print(codes, offset) else: if len(offset) == 1: offset = offset * len(codes) for codeno, code in enumerate(codes): data[code] += offset[codeno] else: data += offset if multiplier: if type(multiplier) == list: if len(multiplier) != len(codes) and not len(multiplier) == 1: print( 'Ignoring offset as it\'s length is not matching the codes' ) print(codes, multiplier) else: if len(multiplier) == 1: multiplier = multiplier * len(codes) for codeno, code in enumerate(codes): data[code] *= multiplier[codeno] else: data *= multiplier if dt is None: dt = min(data.index[1:] - data.index[:-1]) _dt = pd.to_timedelta(dt).total_seconds() if _dt >= datetime.timedelta(days=1).total_seconds(): strdt = _dt / datetime.timedelta(days=1).total_seconds() strunit = ' Days' elif _dt >= datetime.timedelta(hours=1).total_seconds(): strdt = _dt / datetime.timedelta(hours=1).total_seconds() strunit = ' Hours' else: strdt = _dt / datetime.timedelta(minutes=1).total_seconds() strunit = ' Minutes' if strdt % 1 == 0: strdt = int(strdt) else: strdt = round(strdt, 2) strdt = str(strdt) + strunit if _figopt['legtitle']: pass else: possiblelegtitle = np.unique( [meta[key]['variablename'] for key in data.columns]) if len(possiblelegtitle) == 1: _figopt['legtitle'] = possiblelegtitle[0] + ', ' onetitle = True else: _figopt['legtitle'] = '' onetitle = False if plottype not in types[4]: _figopt['legtitle'] += 'Agg. to ' + strdt else: _figopt['legtitle'] += 'Stations:' uniqstats = np.unique( [meta[key]['locationname'] for key in data.columns]) if len(uniqstats) == 1: onestat = True _figopt['legtitle'] = uniqstats[0] + ' ' + _figopt['legtitle'] else: onestat = False # all the same variables at the same place means we can create a slimmed # down version of the legend and still give information if onetitle and len(data.columns) > 10 and plottype in types[5]: if _figopt['legtitle']: _figopt['legtitle'] += '\n' key = data.columns[0] ix = np.nanargmax([i.isnumeric() for i in key]) varlist = [int(key[ix:]) for key in data.columns] # get the maximum and mininum numbered codes mincode, maxcode = np.nanargmin(varlist), np.nanargmax(varlist) mincode, maxcode = data.columns[mincode], data.columns[maxcode] _figopt['legtitle'] += mincode.upper( ) + ' - ' + maxcode[ix:] + ' with ' _figopt['legtitle'] += meta[key]['device'] + ' ' for key in [mincode, maxcode]: if meta[key]['measurementheight'] != -9999: if meta[key]['measurementheight'] < 0: word = ' m below ground' elif meta[key]['measurementheight'] > 0: word = ' m above ground' else: word = 'on the ground' if meta[key]['measurementheight'] % 1 == 0: number = str(int(meta[key]['measurementheight'])) else: number = str( np.round(meta[key]['measurementheight'], 2)) if key == mincode: _figopt['legtitle'] += number + ' - ' else: _figopt['legtitle'] += number + word + ', ' _figopt['legtitle'] += meta[key]['aggregation'] else: # otherwise create each legendentry based on the singular time-series for key in data.columns: if not onestat: _lineopt[key]['label'] = meta[key]['locationname'] + ' ' if onetitle: pass else: _lineopt[key]['label'] += ' ' + meta[key]['variablename'] _lineopt[key]['label'] += ' (' _lineopt[key]['label'] += key + ' with ' + meta[key]['device'] _lineopt[key]['label'] += ', ' if meta[key]['measurementheight'] != -9999: if meta[key]['measurementheight'] < 0: word = ' m below ground' elif meta[key]['measurementheight'] > 0: word = ' m above ground' else: word = 'on the ground' if meta[key]['measurementheight'] % 1 == 0: number = str(int(meta[key]['measurementheight'])) else: number = str( np.round(meta[key]['measurementheight'], 2)) _lineopt[key]['label'] += number + ' ' + word + ', ' _lineopt[key]['label'] += meta[key]['aggregation'] # only add the closing brackets if its onetitle if onetitle: pass else: _lineopt[key]['label'] += ')' # some warning for user in case there is no secondaryaxis set and # we have "non-matching" timeseries in a strict sense (unequal units, names) primcodes = [c for c in codes if c not in _figopt['secondaryaxis']] seccodes = [c for c in codes if c in _figopt['secondaryaxis']] if _figopt['secondaryylabel']: pass else: if len(np.unique([meta[key]['variablename'] for key in seccodes])) == 1: _figopt['secondaryylabel'] = meta[seccodes[0]]['variablename'] _figopt['secondaryylabel'] += ' [' + unituppercase( meta[seccodes[0]]['unit']) + ']' elif len(np.unique([meta[key]['unit'] for key in seccodes])) == 1: _figopt['secondaryylabel'] = ' [' + unituppercase( meta[seccodes[0]]['unit']) + ']' else: if plottype not in types[3] + types[4] and seccodes: print( 'Neither variable name nor units in database match for secondary codes:', seccodes, '\n', 'Are you sure you want to plot them on the same axis?') pass _figopt['secondaryylabel'] = html.unescape(_figopt['secondaryylabel']) if _figopt['ylabel']: pass else: if len(np.unique([meta[key]['variablename'] for key in primcodes])) == 1: _figopt['ylabel'] = meta[primcodes[0]]['variablename'] _figopt['ylabel'] += ' [' + unituppercase( meta[primcodes[0]]['unit']) + ']' elif len(np.unique([meta[key]['unit'] for key in primcodes])) == 1: _figopt['ylabel'] = ' [' + unituppercase( meta[primcodes[0]]['unit']) + ']' else: if plottype not in types[3] + types[4]: print( 'Neither variable name nor units in database match for primary codes:', primcodes, '\n', 'Are you sure you want to plot them on the same axis?') pass _figopt['ylabel'] = html.unescape(_figopt['ylabel']) # localtz, localutc = datetime.datetime.now(), datetime.datetime.utcnow() # dttz = round((localutc - localtz).total_seconds() / 3600) _figopt['xlabel'] = 'Time (' + datatz + ')' # _figopt['xlabel'] = 'Time (' + str(datetime.datetime.now().astimezone().tzinfo) + ')' if len(codes) < 2 and plottype == 'xy': print('For an xy plot, at least two timeseries code have to be given') plottype = 'timeseries' if plottype in types[0]: if plottype == types[0][0]: if indexcode is None: if not quiet: print('Autoselecting first code as new index') indexcode = codes[0] data.index = data[indexcode] data = data.drop(columns=indexcode) _figopt['type'] = 'xy' _figopt['xlabel'] = meta[indexcode]['variablename'] _figopt['xlabel'] += ' [' + unituppercase( meta[indexcode]['unit']) + ']' _figopt['xlabel'] = html.unescape(_figopt['xlabel']) _figopt['sunlines'] = False else: _figopt['type'] = 'timeseries' fig = linear(data, meta, fig=fig, lineopt=_lineopt, figopt=_figopt) #fig[1].set_xlim(data.index[0], data.index[-1]) if plottype == types[0][0]: fig[1].set_xlim(_figopt['xrange']) elif type(t0) == datetime.datetime and type(t1) == datetime.datetime: fig[1].set_xlim(t0, t1) # data.index[0], data.index[-1]) else: fig[1].set_xlim(data.index[0], data.index[-1]) # _xr = datetime.datetime.now(data.index[0], data.index[-1])) fig[0].tight_layout() elif plottype in types[1]: heights = [meta[i]['measurementheight'] for i in meta.keys()] fig = profiles( data, fig=fig, heights=heights, lineopt=_lineopt, figopt=_figopt, label=html.unescape(meta[data.columns[0]]['unit']), **kwargs, ) fig[0].tight_layout() elif plottype in types[2]: fig[0].subplots_adjust( bottom=0.12, left=0.08, right=0.90, ) fig = iso( data, fig=fig, lineopt=_lineopt, figopt=_figopt, simple=True, **kwargs, ) elif plottype in types[3] or plottype in types[4]: data.columns = [d.upper() for d in data.columns] # ensure winddata at 0 (which is faulty) is removed if plottype in types[4]: lats = [meta[k]['lat'] for k in meta] lons = [meta[k]['lon'] for k in meta] names = [meta[k]['locationname'] for k in meta] fig = stationmap(lats, lons, names, fig=fig, lineopt=_lineopt, figopt=_figopt, mapalpha=_figopt['mapalpha'], **kwargs) else: # v = 'variablename' # if f not in meta[] def winddircheck(text): text = text.lower() if 'wind' in text and ('dir' in text or 'direction' in text): return True else: return False winddircodes = [ m for m in meta if winddircheck(meta[m]['variablename']) ] windspeedcodes = [ m for m in meta if not winddircheck(meta[m]['variablename']) and m not in winddircodes ] if len(winddircodes) != len(windspeedcodes): fig = fig[0] print('Unequal length of wind direction and wind speed codes!') print( 'Winddirectioncodes:', winddircodes, '\n', 'Windspeedcodes:', windspeedcodes, ) else: # check for erraneous amount of 0 winddirection in data for wdir, wsp in zip(winddircodes, windspeedcodes): chk = data[data[wdir].eq(0)].index data.loc[chk, [wdir, wsp]] = np.nan if data.dropna(how='all').empty: errormsg = 'NO VALID WINDVALUES LEFT AFTER CONTROL \n\n' errormsg += '\n\nGiven parameters were\n' errormsg += 'Timestamp t0: ' + data.index[0].strftime( '%d %b %Y %H:%M:%S') + '\n' errormsg += 'Timestamp t1: ' + data.index[-1].strftime( '%d %b %Y %H:%M:%S') + '\n' fig[0].text(0.5, 0.5, errormsg, ha='center', va='center', fontsize=16) plt.axis('off') if outfile: fig[0].savefig(outfile) if returnfig: return fig else: plt.close(fig[0]) return lats = [meta[k]['lat'] for k in winddircodes] lons = [meta[k]['lon'] for k in winddircodes] if lats and lons: fig = windmap( lats, lons, data, fig=fig, lineopt=_lineopt, figopt=_figopt, windspeedcodes=windspeedcodes, winddircodes=winddircodes, cmap=defaultwindcolor(), mapalpha=_figopt['mapalpha'], **kwargs, ) else: fig = fig[0] print( 'Latitude and/or longitude could not be found automatically' ) # return values of windmap/stationmap are figures but not also axes # like in other figures as this would lead to even more complicated # keeping track of things fig = [fig, None] elif plottype in types[5]: measurementheight = [ meta[i]['measurementheight'] for i in data.columns ] _figopt['zlabel'] = html.unescape(_figopt['ylabel']) data.columns = [d.upper() for d in data.columns] fig[0].subplots_adjust(bottom=0.15, left=0.08, right=0.92) fig[1].set_xlim(data.index[0], data.index[-1]) fig = mesh( data, y=measurementheight, fig=fig, lineopt=_lineopt, figopt=_figopt, **kwargs, ) fig[1].set_xlim(data.index[0], data.index[-1]) else: print('Plottype not known, please use of the following:', types) # override user behaviour # if (data.index[-1] - data.index[0]) >= datetime.timedelta(days=14): # _figopt['sunlines'] = False # valid option only for timeseries, profiles and isoplots if _figopt['sunlines'] and plottype in types[0][1:] + types[1] + types[ 2] + types[5]: # isoplot, handle sunlines differently if plottype in types[2]: _xi = [mdates.num2date(i) for i in fig[1].get_xlim()] #print(_xi) # use a (likely) higher resolution array to calculate the usnposition # as the data.index may be of lower frequency and thus "falsify" # the sunposition sunindex = pd.date_range(_xi[0], _xi[1] + datetime.timedelta(days=1), freq='1Min')[1:] sundown = sunpos( sunindex, meta[codes[0]]['lat'], meta[codes[0]]['lon'], ) # reshape to the 2D field we have get the elements that are >= 0 # (implicit by np.where, i.e. when the sun is up sundown = np.reshape(sundown, (len(sundown) // 60 // 24, 24 * 60)) ylim = fig[1].get_ylim() ys = np.linspace(ylim[0], ylim[1], 24 * 60) fig[1].contour( mdates.date2num(np.unique(sunindex.date)[:-1]), ys, sundown.T, # the level doesnt matter except being between 0 and 1 # to surpress the user warning that would appear # at either 0 or 1 directly levels=[0.5], colors=_figopt['sunlinesisocolor'], linewidths=_figopt['sunlineswidth'], ) # calulcate string position for sunset/sunrise text dayindex = 5 strpos = np.nonzero(sundown[dayindex, :].ravel()) strpos = [strpos[0][0], strpos[0][-1]] text = [' Sunrise', ' Sunset'] for i, p in enumerate(strpos): fig[1].text( sunindex[dayindex], ys[p], text[i], rotation=-90, color=_figopt['sunlinestextcolor'], fontsize=10, ha='left', va='top', bbox=dict(boxstyle='round', pad=0.12, fc="w", ec="w", alpha=0.5), ) # linearplots else: _xi = [mdates.num2date(i) for i in fig[1].get_xlim()] # dont draw sunlines unless explicitly stated in figopt if (_xi[1] - _xi[0]) > datetime.timedelta(days=60): if figopt is not None and 'sunlines' in figopt and figopt[ 'sunlines']: pass else: _figopt['sunlines'] = False _figopt['marktropicalnight'] = False sunindex = pd.date_range(_xi[0], _xi[1], freq='1Min') sundown = sunpos( sunindex, meta[codes[0]]['lat'], meta[codes[0]]['lon'], ) # use a (likely) higher resolution array to calculate the # usnposition as the data.index may be of lower frequency and # thus "falsify" the sunposition ylim = fig[1].get_ylim() if _figopt['sunlines']: fig[1].fill_between(sunindex, ylim[0], ylim[1], zorder=0, where=~sundown, color=_figopt['sunlinescolor']) if _figopt['zeroline']: if fig[1].get_ylim()[0] <= 0 and fig[1].get_ylim()[1] > 0: fig[1].plot(fig[1].get_xlim(), [0, 0], color=_figopt['zerolinecolor'], lw=_figopt['zerolinewidth']) if _figopt['marktropicalnight']: if ylim[1] > 20 and ylim[0] < 20: fig[1].fill_between( sunindex, 20 - 0.005 * (ylim[1] - ylim[0]), 20 + 0.005 * (ylim[1] - ylim[0]), # 19.9, 20.1, zorder=0, where=~sundown, color=_figopt['tropicalnightscolor'], edgecolors=_figopt['tropicalnightsedgecolor'], linewidth=_figopt['tropicalnightsedgewidth']) # windmap / stationnetwork if plottype in types[3:5]: if 'mapfullscreen' in kwargs: watermark(fig[0], ypos=0.02, fontsize=8) else: watermark(fig[0], ypos=0.0, fontsize=8) else: watermark(fig[0], ypos=0.025, fontsize=10) if outfile: fig[0].savefig(outfile) if returnfig: return fig else: plt.close(fig[0]) return
def plot_price(cfg): param_list = cfg['plot_params'].split(" ") if len(param_list) < 3: logging.warn("Plot params malformed. Skipping plot.") return else: logging.info(f"Plotting symbol {param_list[0].strip()}") logging.info(f" from {param_list[1]}") logging.info(f" to {param_list[2]}") register_matplotlib_converters() prices_input_file = CLEANED_PRICES_FILE #prices_input_file = cfg['raw_data_dir'] + cfg['raw_prices_input_file'] try: logging.info("Reading " + prices_input_file) prices_df = pd.read_table(prices_input_file, sep=',') prices_df['date'] = pd.to_datetime(prices_df['date']) logging.info("Prices df shape " + str(prices_df.shape)) except Exception as e: logging.critical("Not parsed: " + prices_input_file + "\n" + str(e)) sys.exit() # param string [symbol start-date end-date] # e.g. IBM 2009-01-01 2019-01-01 symbol = param_list[0].strip() start_list = param_list[1].split('-') start_yr = int(start_list[0]) start_mo = int(start_list[1]) start_d = int(start_list[2]) end_list = param_list[2].split('-') end_yr = int(end_list[0]) end_mo = int(end_list[1]) end_d = int(end_list[2]) date_start = pd.Timestamp(start_yr, start_mo, start_d) date_end = pd.Timestamp(end_yr, end_mo, end_d) # filter on date range logging.info("Filtering on date range") df = prices_df[(prices_df['date'] >= date_start) & (prices_df['date'] <= date_end)] df = df.sort_values(['date']) # get group for this symbol logging.info("Filtering on symbol") df = df.groupby('symbol').get_group(symbol) # write df to file span_str = (date_start.strftime("%Y-%m-%d") + "_" + date_end.strftime("%Y-%m-%d")) csv_name = STOX_DATA_DIR + symbol + "_" + span_str + ".csv" df.to_csv(csv_name, index=False, sep="\t", float_format='%.3f') # plot open/close price fig = plt.figure() plt.suptitle(symbol, fontsize=10) plt.scatter(df['date'].tolist(), df['open'], color='green', s=2) plt.scatter(df['date'].tolist(), df['close'], color = 'blue', s=2) plt_filename = STOX_DATA_DIR + symbol + "_" + span_str + ".png" plt.savefig(plt_filename) plt.show()
# Importing the libraries from datetime import datetime import numpy as np import matplotlib.pyplot as plt import pandas as pd from pandas.plotting import register_matplotlib_converters from sklearn import preprocessing import data_loader import keras import tensorflow as tf from keras.models import Model from keras.layers import Dense, Dropout, LSTM, Input from keras import optimizers np.random.seed(10) from tensorflow import set_random_seed set_random_seed(10) register_matplotlib_converters() FIX_HISTORICAL_INTERVAL_DAYS = 60 # 1. load the data def load_data(): #lst_stocks = ['TSLA', 'MSFT', 'GOOGL', 'AMZN', 'NFLX', 'FB'] lst_stock_symbols = ['MSFT'] return data_loader.load_data(lst_stock_symbols) # 2. this would be data viewing and data cleansing # * look for missing values # * distribution of data (I need to see the standard deviation as a # measure of volatility) def describe_data(dct_data): # get the statistics of the data for stock_symbol, stock_data in dct_data.items():
def __init__(self): self.datasets = [] register_matplotlib_converters()
def scan_stocks(): """ The main method. Sells stocks in your portfolio if their 50 day moving average crosses below the 200 day, and buys stocks in your watchlist if the opposite happens. ############################################################################################### WARNING: Comment out the sell_holdings and buy_holdings lines if you don't actually want to execute the trade. ############################################################################################### If you sell a stock, this updates tradehistory.txt with information about the position, how much you've earned/lost, etc. """ print("----- Starting scan... -----\n") register_matplotlib_converters() watchlist_symbols = get_watchlist_symbols() watchlist_names = get_watchlist_names() portfolio_symbols = get_portfolio_symbols() holdings_data = get_modified_holdings() potential_buys = [] sells = [] print("Current Portfolio: " + str(portfolio_symbols) + "\n") # print("Current Watchlist Names: " + str(watchlist_names) + "\n") # print("Current Watchlist Tickers: " + str(watchlist_symbols) + "\n") print("----- Scanning portfolio for stocks to sell -----\n") for symbol in portfolio_symbols: cross = golden_cross(symbol, n1=5, n2=10, days=60, direction="below") if (cross == -1): print("----- Portfolio company " + str(symbol) + "has crossed and should be sold -----\n") answer = input("----- Would you like to sell your shares in " + str(symbol) + "? -----\n") # sell_holdings(symbol, holdings_data) sells.append(symbol) profile_data = r.build_user_profile() print("----- Scanning watchlist for stocks to buy -----\n") # Import watchlist since robinhood api doesn't seem to be working... # watchlist_symbols = ['BAC'] watchlist_file = 'energy_watchlist.txt' watchlist_data = read_watchlist(watchlist_file) watchlist_symbols = watchlist_data[1] for symbol in watchlist_symbols: if (symbol not in portfolio_symbols): cross = golden_cross(symbol, n1=10, n2=20, days=6, direction="above") if (cross == 1): potential_buys.append(symbol) if (len(potential_buys) > 0): print("\n----- There are: " + str(len(potential_buys)) + " potential buys in the watchlist -----\n") print(potential_buys) answer = input("\nWould you like to make a buy order (yes or no):") if answer == "yes": buy_holdings(potential_buys, profile_data, holdings_data) print("\n----- Buy order has been excecuted -----\n") elif answer == "no": print("\n----- Buy order has been terminated -----\n") else: print("\nPlease enter yes or no.") if (len(sells) > 0): update_trade_history(sells, holdings_data, "tradehistory.txt") print("----- Scan over -----\n")