def populate_data_timeframe(self, tickers, start_date, end_date): for ticker in tickers: if ticker not in self.tickers: self.tickers[ticker] = {} table_name = 'data_daily_%s' % ticker raw_data = db.get_data_interval(self.conn, table_name, start_date, end_date) columns = ('open', 'high', 'low', 'close', 'volume', 'dividend_amount', 'split_coefficient') for row in raw_data: if row[0] not in self.tickers[ticker]: self.tickers[ticker][row[0]] = {} for i, col in enumerate(row[1:]): self.tickers[ticker][row[0]][columns[i]] = col table_name = 'data_intraday_%s' % ticker raw_data = db.get_data_interval(self.conn, table_name, start_date, end_date) columns = ('open', 'high', 'low', 'close', 'volume') for row in raw_data: if row[0] not in self.tickers[ticker]: self.tickers[ticker][row[0]] = {} for i, col in enumerate(row[1:]): self.tickers[ticker][row[0]][columns[i]] = col
def ingest(environ, asset_db_writer, minute_bar_writer, daily_bar_writer, adjustment_writer, calendar, start_session, end_session, cache, show_progress, output_dir): metadata_dtype = [('symbol', 'object'), ('asset_name', 'object'), ('start_date', 'datetime64[ns]'), ('end_date', 'datetime64[ns]'), ('first_traded', 'datetime64[ns]'), ('auto_close_date', 'datetime64[ns]'), ('exchange', 'object')] ticker_list = [ 'aapl', 'amzn', 'msft', 'amd', 'nvda', 'goog', 'baba', 'fitb', 'mu', 'fb', 'sq', 'tsm', 'qcom', 'mo', 'bp', 'unh', 'cvs', 'tpr' ] conn = connect() metadata = pd.DataFrame( np.empty(len(ticker_list), dtype=metadata_dtype)) start_date = datetime(2020, 2, 14, 9, 31, 0) end_date = datetime(2020, 2, 28, 16, 0, 0) for i, ticker in enumerate(ticker_list): intraday = get_data_interval(conn, 'data_intraday_{}'.format(ticker), start_date, end_date) daily = get_data_interval(conn, 'data_daily_{}'.format(ticker), start_date.date(), end_date.date()) metadata.iloc[ i] = ticker, ticker, start_date, end_date, start_date, end_date + timedelta( days=1), 'NYSE' # print(metadata.iloc[i]) df = pd.DataFrame( intraday, columns=['date', 'open', 'high', 'low', 'close', 'volume']) df = df.set_index('date') df1 = pd.DataFrame(daily, columns=[ 'date', 'open', 'high', 'low', 'close', 'adjusted_close', 'volume', 'dividend', 'split' ]) df1 = df1.drop(['adjusted_close'], axis=1).set_index('date') try: minute_bar_writer.write([(i, df)], show_progress=True) except Exception as e: print(e) try: daily_bar_writer.write([(i, df1)], show_progress=True) except Exception as e: print(e) asset_db_writer.write(equities=metadata) print(metadata) adjustment_writer.write()
def ta_test(): ticker_list = [ 'aapl', 'amzn', 'msft', 'amd', 'nvda', 'goog', 'baba', 'fitb', 'mu', 'fb', 'sq', 'tsm', 'qcom', 'mo', 'bp', 'unh', 'cvs', 'tpr' ] data = {} conn = connect() timestamp1 = datetime(2008, 1, 1) timestamp2 = datetime(2030, 1, 1) for i in ticker_list: data[i] = get_data_interval(conn, 'data_daily_{}'.format(i), timestamp1, timestamp2, pandas=True) data[i] = ta.add_all_ta_features(data[i], open='open', high='high', low='low', close='close', volume='volume') print(data[i]) train, test, label_train, label_test = process_data(data) print_distribution(train, label_train) print_distribution(test, label_test) train_cnn(cnn(input_shape=(30, 4), num_classes=num_classes), train, label_train, test, label_test)
def csv_daily_data(conn, ticker, start_date, end_date, path): data = get_data_interval(conn, 'data_daily_{}'.format(ticker), start_date, end_date) df = pd.DataFrame(data, columns=[ 'date', 'open', 'high', 'low', 'close', 'adjusted_close', 'volume', 'dividend', 'split' ]) df = df.drop(['adjusted_close'], axis=1).set_index('date') print(df) df.to_csv('{}/daily/{}.csv'.format(path, ticker), header=True, index=True)
def csv_intraday_data(conn, ticker, start_date, end_date, path): data = get_data_interval(conn, 'data_intraday_{}'.format(ticker), start_date, end_date) df = pd.DataFrame( data, columns=['date', 'open', 'high', 'low', 'close', 'volume']) df = df.set_index('date') df['dividend'] = 0 df['split'] = 1 print(df) df.to_csv('{}/intraday/{}.csv'.format(path, ticker), header=True, index=True)
def main(): # ticker_list = ['aapl', 'amzn', 'msft', 'amd', 'nvda', 'goog', 'baba', 'fitb', 'mu', 'fb', 'sq', 'tsm', 'qcom', 'mo', # 'bp', 'unh', 'cvs', 'tpr'] ticker_list = ['aapl'] data = {} conn = connect() timestamp1 = datetime(2008, 1, 1) timestamp2 = datetime(2030, 1, 1) window = 50 num_classes = 4 dimensions = 1 label_window = 2 label_names = {0: 'losses', 1: 'gains', 2: 'both', 3: 'none'} for i in ticker_list: data[i] = get_data_interval(conn, 'data_daily_{}'.format(i), timestamp1, timestamp2) # train, test, label_train, label_test = process_cnn(data, window=window, label_window=label_window) train, test, label_train, label_test = process_lstm(data, window=window) # print_distribution(train, label_train, label_names) # print_distribution(test, label_test, label_names) # train_cnn(cnn(input_shape=(window, dimensions), num_classes=num_classes), train, label_train, test, label_test) train_lstm(lstm(input_shape=(window, dimensions)), train, label_train, test, label_test)