def av_api_wrapper(symbol, interval, _slice=None): if interval == '1m': ts = TimeSeries(output_format='csv') data_slice, meta_data = ts.get_intraday_extended(symbol, interval='1min', slice=_slice, adjusted='false') return data_slice else: ts = TimeSeries() data, meta_data = ts.get_daily_adjusted(symbol, outputsize='full') return data
def write_time_series_intraday_extended(cls, symbol: str = 'IBM') -> None: Utils.create_dir(os.path.join('..', '..', '..', 'data')) time_series = TimeSeries(output_format='csv') for year in range(cls.YEAR_RANGE): for month in range(cls.MONTH_RANGE): path: str = cls.CSV_PATH.format(symbol, year + 1, month + 1) if not os.path.isfile(path): s: str = 'year{}month{}'.format(year + 1, month + 1) csv_reader, _ = time_series.get_intraday_extended(symbol, slice=s) csv_file: IO = open(path, 'a') writer = csv.writer(csv_file) writer.writerows(csv_reader) if not Utils.is_test(): sleep(25)
def get_data( ticker, engine ): # give a list of ticker symbols and the engine object from sqlalchemy ts = TimeSeries(key=credentials.VANTAGE_API, output_format='csv') for company in ticker: for i in range(1, 13): month = 'year1month' + str(i) data_csv = ts.get_intraday_extended(symbol=company, interval='1min', slice=month) df = pd.DataFrame(list(data_csv[0])) df.columns = df.iloc[0] df = df.drop(0) df['stock'] = company df.to_sql("stock_info", engine, if_exists='append')
def PullHourlyData(stock): API_key = 'CSMN0LYTQ5UYMVUT' #'6JLJYIR4IV3WWRWS' ts = TimeSeries(key=API_key, output_format='csv') final_df = pd.DataFrame dct = {} for i, slices in enumerate([ 'year1month9', 'year1month8', 'year1month7', 'year1month6', 'year1month5', 'year1month4', 'year1month3', 'year1month2', 'year1month1' ]): data = ts.get_intraday_extended('DAL', interval='60min', slice=slices) dct[i] = pd.DataFrame(list(data[0])) final_df = pd.concat([ dct[0], dct[1], dct[2], dct[3], dct[4], dct[5], dct[6], dct[7], dct[8] ], ignore_index=True) columns = final_df.iloc[0] columns final_df = final_df[final_df.iloc[:, 0] != 'time'] final_df = final_df.rename(columns={ 0: 'time', 1: 'open', 2: 'high', 3: 'low', 4: 'close', 5: 'volume' }) final_df['time'] = pd.to_datetime(final_df['time']) final_df = final_df.set_index('time') final_df = final_df.sort_index() for col in final_df.columns: final_df[col] = pd.to_numeric(final_df[col]) ts_low = final_df['low'] ts_high = final_df['high'] ts_low = ts_low.asfreq('H', method='ffill') ts_high = ts_high.asfreq('H', method='ffill') ts_low = ts_low.apply(lambda x: math.log(x)) ts_high = ts_high.apply(lambda x: math.log(x)) ts_low.to_csv('hourly_stock_data.csv', header=True) ts_high.to_csv('hourly_stock_data_high.csv', header=True)
def HighLowTimestamp(df_final): API_key = '6JLJYIR4IV3WWRWS' ts = TimeSeries(key=API_key, output_format='csv') ### daily chart, include percent change ### dct = {} for i, slices in enumerate(['year1month3', 'year1month2', 'year1month1']): data = ts.get_intraday_extended('DAL', interval='60min', slice=slices) dct[i] = pd.DataFrame(list(data[0])) # data = ts.get_intraday_extended('DAL',interval='60min',slice = slices) # df.append(pd.DataFrame(list(data[0])),ignore_index = True) # print(slices) final_df = pd.concat([dct[0], dct[1], dct[2]], ignore_index=True) columns = final_df.iloc[0] columns final_df = final_df[final_df.iloc[:, 0] != 'time'] final_df = final_df.rename(columns={ 0: 'time', 1: 'open', 2: 'high', 3: 'low', 4: 'close', 5: 'volume' }) final_df['time'] = pd.to_datetime(final_df['time']) df = (final_df.set_index('time').between_time( '10:00:00', '16:00:00').reset_index().reindex(columns=final_df.columns)) df['Date'] = pd.to_datetime(df['time']).dt.date df['Timestamp'] = pd.to_datetime(df['time']).dt.time f_df = df date_lst = set(f_df['Date']) time_dict = {} for date in date_lst: day = f_df[f_df['Date'] == date] high_value = day.sort_values('high', ascending=False)['Timestamp'].iloc[0] time_dict[str(date)] = {'high': high_value} low_value = day.sort_values('low')['Timestamp'].iloc[0] time_dict[str(date)]['low'] = low_value return time_dict
def pull_tick_slice(self, tick, freq, desired_slice, adjusted): """ pulls a slice for a ticker """ logger.info(f"obtaining slice: ({tick},{freq},{desired_slice})") ts = TimeSeries(key=self.key, output_format='csv') self.tracker.wait() reader, meta_data = ts.get_intraday_extended( symbol=tick, interval=freq, slice=desired_slice ) # TODO figure out what adjusted isn't accepted https://github.com/RomelTorres/alpha_vantage/blob/develop/alpha_vantage/timeseries.py content = [l for l in reader] df = pd.DataFrame(content[1:], columns=content[0]) logger.debug(f"df.columns: {df.columns}") df['time'] = pd.to_datetime(df['time']) df = df.rename({'time': 'date'}, axis=1) df['ticker'] = tick self.tracker.update(1) return df
def get_stock_data(symbol, interval, slice='year1month1'): file_path = f'data/{symbol}_{slice}_{interval}.csv' # If the backup doesn't exist, go get it and store it locally if not os.path.exists(file_path): # Create directory if it doesn't exist if not os.path.exists('data'): os.mkdir('data') # Retrieve CSV data from API and save it to local file ts = TimeSeries(key=ALPHA_VANTAGE_TOKEN, output_format='csv') with open(file_path, 'w') as new_file: reader, _ = ts.get_intraday_extended(symbol=symbol, interval=interval, slice=slice) for row in reader: new_file.write(','.join(row)) new_file.write('\n') # Load the newly created CSV and update it with calculated indicators data = pd.read_csv(file_path) # Check if data is not empty if len(data.index) > 0: data = data.set_index(pd.DatetimeIndex(data['time'].values)) data = data.sort_index() data = _calculate_obv_data(data) data.to_csv(file_path) else: return None # Load the CSV file, sort it by the time, and return the specified date's data data = pd.read_csv(file_path) if len(data.index) > 0: data = data.set_index(pd.DatetimeIndex(data['time'].values)) data = data.drop(columns=['time', 'Unnamed: 0']) return data else: return None
import pandas as pd from alpha_vantage.timeseries import TimeSeries import time api_key = '4VU02OKEYKOXMVHL' ts = TimeSeries(key=api_key, output_format='csv') totalData = ts.get_intraday_extended(symbol='MSFT', slice="year1month1", interval='1min') dd = totalData #csv --> dataframe df = pd.DataFrame(list(dd[0])) #setup of column and index header_row = 0 df.columns = df.iloc[header_row] df = df.drop(header_row) df.set_index('time', inplace=True) #show output print(df)
section, filename)) return config if __name__ == '__main__': config = read_config() filename = config['file_name'] api_key = config['api_key'] slices = config['slices'].split(",") print(slices) filenames = glob.glob(filename + "*.csv") ts = TimeSeries(key=api_key, output_format='csv') for file in filenames: df = pd.read_csv(file) for index, row in df.iterrows(): for slice in slices: csv_reader, meta_data = ts.get_intraday_extended( symbol=row['Code'], interval='1min', slice=slice) #, adjusted=True) df_stock = pd.DataFrame(csv_reader, index=None) file_name = 'data\stock_data_' + row[ 'Code'] + '_' + slice + ".csv" df_stock.to_csv(file_name, sep=',', header=True)
import pandas as pd api_key = '9YBSFK0A7B24SLJ7' #pip install alpha_vantage #https://github.com/RomelTorres/alpha_vantage #https://www.alphavantage.co/documentation/ #https://alpha-vantage.readthedocs.io/en/latest/# #https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY_EXTENDED&symbol=IBM&interval=15min&slice=year1month2&apikey=demo from alpha_vantage.timeseries import TimeSeries from pprint import pprint ts = TimeSeries(key=api_key, output_format='csv') ##, output_format='pandas') #data, meta_data = ts.get_intraday(symbol='MSFT',interval='1min', outputsize='full') csv_reader, meta_data = ts.get_intraday_extended( symbol='MSFT', interval='15min', slice='year1month1') #, adjusted=True) #for row in csv_reader: # pprint(row)
class AlphaVantageData(DataSource): SOURCE = "ALPHAVANTAGE" MIN_TIMESTEP = "minute" TIMESTEP_MAPPING = [ { "timestep": "minute", "represntations": ["1min"] }, { "timestep": "day", "represntations": ["1D"] }, ] def __init__(self, config, max_workers=24, **kwargs): # Alpaca authorize 200 requests per minute and per API key # Setting the max_workers for multithreading with a maximum # of 200 self.name = "alphavantage" self.max_workers = min(max_workers, 24) # Stores self._data_store_minutes = {} self._data_store_days = {} # Connection to alphavantage REST API self.config = config self.api_key = config.API_KEY self.api = TimeSeries(key=self.api_key, output_format="csv") def _get_store(self, timestep): if timestep == "minute": return self._data_store_minutes return self._data_store_days def _csv_to_list(self, csv_reader): columns = next(csv_reader) data = [] for row in csv_reader: data.append(dict(zip(columns, row))) return data def _request_minutes_data(self, symbol, slice): csv_reader, metadata = self.api.get_intraday_extended(symbol, interval="1min", slice=slice) data = self._csv_to_list(csv_reader) return data def _request_daily_data(self, symbol): csv_reader, metadata = self.api.get_daily_adjusted(symbol, outputsize="full") data = self._csv_to_list(csv_reader) return data def _request_data(self, symbol, timestep): if timestep == "minute": slices = [ f"year{i}month{j}" for i in range(1, 3) for j in range(1, 13) ] with ThreadPoolExecutor( max_workers=self.max_workers, thread_name_prefix=f"{self.name}_requesting_data", ) as executor: tasks = [] for slice in slices: tasks.append( executor.submit(self._request_minutes_data, symbol, slice)) data = [] for task in as_completed(tasks): data.extend(task.result()) else: data = self._request_daily_data(symbol) return data def _append_data(self, symbol, data, timestep): store = self._get_store(timestep) df = pd.DataFrame(data) if "time" in df.columns: index_column = "time" else: index_column = "timestamp" df.set_index(index_column, inplace=True) df.sort_index(inplace=True) df.index = df.index.map( lambda d: datetime.strptime(d, "%Y-%m-%d")).tz_localize( self.DEFAULT_TIMEZONE) store[symbol] = df return df def _pull_source_symbol_bars(self, symbol, length, timestep=MIN_TIMESTEP, timeshift=None): self._parse_source_timestep(timestep, reverse=True) store = self._get_store(timestep) if symbol in store: data = store[symbol] else: data = self._request_data(symbol, timestep) if not data: raise NoDataFound(self.SOURCE, symbol) data = self._append_data(symbol, data, timestep) if timeshift: end = datetime.now() - timeshift end = self.to_default_timezone(end) data = data[data.index <= end] result = data.tail(length) return result def _pull_source_bars(self, symbols, length, timestep=MIN_TIMESTEP, timeshift=None): """pull broker bars for a list symbols""" result = {} self._parse_source_timestep(timestep, reverse=True) for symbol in symbols: result[symbol] = self._pull_source_symbol_bars(symbol, length, timestep=timestep, timeshift=timeshift) return result def _parse_source_symbol_bars(self, response, symbol): df = response.copy() if "adjusted_close" in df.columns: del df["adjusted_close"] if "dividend_amount" in df.columns: df.rename(columns={"dividend_amount": "dividend"}, inplace=True) else: df["dividend"] = 0 if "split_coefficient" in df.columns: df.rename(columns={"split_coefficient": "stock_splits"}, inplace=True) else: df["stock_splits"] = 0 df = df.astype({ "open": "float64", "high": "float64", "low": "float64", "close": "float64", "volume": "int64", "dividend": "float64", "stock_splits": "float64", }) df["price_change"] = df["close"].pct_change() df["dividend_yield"] = df["dividend"] / df["close"] df["return"] = df["dividend_yield"] + df["price_change"] bars = Bars(df, self.SOURCE, symbol, raw=response) return bars