def av_api_wrapper(symbol, interval, _slice=None):
    if interval == '1m':
        ts = TimeSeries(output_format='csv')
        data_slice, meta_data = ts.get_intraday_extended(symbol, interval='1min', slice=_slice, adjusted='false')
        return data_slice

    else:
        ts = TimeSeries()
        data, meta_data = ts.get_daily_adjusted(symbol, outputsize='full')
        return data
Beispiel #2
0
 def write_time_series_intraday_extended(cls, symbol: str = 'IBM') -> None:
     Utils.create_dir(os.path.join('..', '..', '..', 'data'))
     time_series = TimeSeries(output_format='csv')
     for year in range(cls.YEAR_RANGE):
         for month in range(cls.MONTH_RANGE):
             path: str = cls.CSV_PATH.format(symbol, year + 1, month + 1)
             if not os.path.isfile(path):
                 s: str = 'year{}month{}'.format(year + 1, month + 1)
                 csv_reader, _ = time_series.get_intraday_extended(symbol,
                                                                   slice=s)
                 csv_file: IO = open(path, 'a')
                 writer = csv.writer(csv_file)
                 writer.writerows(csv_reader)
                 if not Utils.is_test():
                     sleep(25)
def get_data(
    ticker, engine
):  # give a list of ticker symbols and the engine object from sqlalchemy
    ts = TimeSeries(key=credentials.VANTAGE_API, output_format='csv')
    for company in ticker:
        for i in range(1, 13):
            month = 'year1month' + str(i)
            data_csv = ts.get_intraday_extended(symbol=company,
                                                interval='1min',
                                                slice=month)
            df = pd.DataFrame(list(data_csv[0]))
            df.columns = df.iloc[0]
            df = df.drop(0)
            df['stock'] = company
            df.to_sql("stock_info", engine, if_exists='append')
Beispiel #4
0
def PullHourlyData(stock):

    API_key = 'CSMN0LYTQ5UYMVUT'  #'6JLJYIR4IV3WWRWS'
    ts = TimeSeries(key=API_key, output_format='csv')
    final_df = pd.DataFrame

    dct = {}
    for i, slices in enumerate([
            'year1month9', 'year1month8', 'year1month7', 'year1month6',
            'year1month5', 'year1month4', 'year1month3', 'year1month2',
            'year1month1'
    ]):

        data = ts.get_intraday_extended('DAL', interval='60min', slice=slices)
        dct[i] = pd.DataFrame(list(data[0]))

    final_df = pd.concat([
        dct[0], dct[1], dct[2], dct[3], dct[4], dct[5], dct[6], dct[7], dct[8]
    ],
                         ignore_index=True)
    columns = final_df.iloc[0]
    columns

    final_df = final_df[final_df.iloc[:, 0] != 'time']
    final_df = final_df.rename(columns={
        0: 'time',
        1: 'open',
        2: 'high',
        3: 'low',
        4: 'close',
        5: 'volume'
    })

    final_df['time'] = pd.to_datetime(final_df['time'])
    final_df = final_df.set_index('time')
    final_df = final_df.sort_index()

    for col in final_df.columns:
        final_df[col] = pd.to_numeric(final_df[col])

    ts_low = final_df['low']
    ts_high = final_df['high']
    ts_low = ts_low.asfreq('H', method='ffill')
    ts_high = ts_high.asfreq('H', method='ffill')
    ts_low = ts_low.apply(lambda x: math.log(x))
    ts_high = ts_high.apply(lambda x: math.log(x))
    ts_low.to_csv('hourly_stock_data.csv', header=True)
    ts_high.to_csv('hourly_stock_data_high.csv', header=True)
Beispiel #5
0
def HighLowTimestamp(df_final):

    API_key = '6JLJYIR4IV3WWRWS'
    ts = TimeSeries(key=API_key, output_format='csv')
    ### daily chart, include percent change ###

    dct = {}
    for i, slices in enumerate(['year1month3', 'year1month2', 'year1month1']):

        data = ts.get_intraday_extended('DAL', interval='60min', slice=slices)
        dct[i] = pd.DataFrame(list(data[0]))
        #     data = ts.get_intraday_extended('DAL',interval='60min',slice  = slices)
        # df.append(pd.DataFrame(list(data[0])),ignore_index = True)
        # print(slices)

    final_df = pd.concat([dct[0], dct[1], dct[2]], ignore_index=True)
    columns = final_df.iloc[0]
    columns
    final_df = final_df[final_df.iloc[:, 0] != 'time']
    final_df = final_df.rename(columns={
        0: 'time',
        1: 'open',
        2: 'high',
        3: 'low',
        4: 'close',
        5: 'volume'
    })
    final_df['time'] = pd.to_datetime(final_df['time'])
    df = (final_df.set_index('time').between_time(
        '10:00:00',
        '16:00:00').reset_index().reindex(columns=final_df.columns))

    df['Date'] = pd.to_datetime(df['time']).dt.date
    df['Timestamp'] = pd.to_datetime(df['time']).dt.time
    f_df = df

    date_lst = set(f_df['Date'])
    time_dict = {}
    for date in date_lst:
        day = f_df[f_df['Date'] == date]
        high_value = day.sort_values('high',
                                     ascending=False)['Timestamp'].iloc[0]
        time_dict[str(date)] = {'high': high_value}
        low_value = day.sort_values('low')['Timestamp'].iloc[0]
        time_dict[str(date)]['low'] = low_value

    return time_dict
Beispiel #6
0
 def pull_tick_slice(self, tick, freq, desired_slice, adjusted):
     """
         pulls a slice for a ticker
     """
     logger.info(f"obtaining slice: ({tick},{freq},{desired_slice})")
     ts = TimeSeries(key=self.key, output_format='csv')
     self.tracker.wait()
     reader, meta_data = ts.get_intraday_extended(
         symbol=tick, interval=freq, slice=desired_slice
     )  # TODO figure out what adjusted isn't accepted https://github.com/RomelTorres/alpha_vantage/blob/develop/alpha_vantage/timeseries.py
     content = [l for l in reader]
     df = pd.DataFrame(content[1:], columns=content[0])
     logger.debug(f"df.columns: {df.columns}")
     df['time'] = pd.to_datetime(df['time'])
     df = df.rename({'time': 'date'}, axis=1)
     df['ticker'] = tick
     self.tracker.update(1)
     return df
def get_stock_data(symbol, interval, slice='year1month1'):
    file_path = f'data/{symbol}_{slice}_{interval}.csv'

    # If the backup doesn't exist, go get it and store it locally
    if not os.path.exists(file_path):
        # Create directory if it doesn't exist
        if not os.path.exists('data'):
            os.mkdir('data')

        # Retrieve CSV data from API and save it to local file
        ts = TimeSeries(key=ALPHA_VANTAGE_TOKEN, output_format='csv')
        with open(file_path, 'w') as new_file:
            reader, _ = ts.get_intraday_extended(symbol=symbol, interval=interval, slice=slice)
            for row in reader:
                new_file.write(','.join(row))
                new_file.write('\n')
        
        # Load the newly created CSV and update it with calculated indicators
        data = pd.read_csv(file_path)

        # Check if data is not empty
        if len(data.index) > 0:
            data = data.set_index(pd.DatetimeIndex(data['time'].values))
            data = data.sort_index()
            data = _calculate_obv_data(data)
            data.to_csv(file_path)
        else:
            return None

    # Load the CSV file, sort it by the time, and return the specified date's data
    data = pd.read_csv(file_path)
    if len(data.index) > 0:
        data = data.set_index(pd.DatetimeIndex(data['time'].values))
        data = data.drop(columns=['time', 'Unnamed: 0'])
        return data
    else:
        return None
Beispiel #8
0
import pandas as pd
from alpha_vantage.timeseries import TimeSeries
import time

api_key = '4VU02OKEYKOXMVHL'
ts = TimeSeries(key=api_key, output_format='csv')
totalData = ts.get_intraday_extended(symbol='MSFT',
                                     slice="year1month1",
                                     interval='1min')
dd = totalData
#csv --> dataframe
df = pd.DataFrame(list(dd[0]))

#setup of column and index
header_row = 0
df.columns = df.iloc[header_row]
df = df.drop(header_row)
df.set_index('time', inplace=True)

#show output
print(df)
Beispiel #9
0
            section, filename))

    return config


if __name__ == '__main__':

    config = read_config()
    filename = config['file_name']
    api_key = config['api_key']
    slices = config['slices'].split(",")

    print(slices)

    filenames = glob.glob(filename + "*.csv")
    ts = TimeSeries(key=api_key, output_format='csv')

    for file in filenames:
        df = pd.read_csv(file)

        for index, row in df.iterrows():

            for slice in slices:
                csv_reader, meta_data = ts.get_intraday_extended(
                    symbol=row['Code'], interval='1min',
                    slice=slice)  #, adjusted=True)
                df_stock = pd.DataFrame(csv_reader, index=None)
                file_name = 'data\stock_data_' + row[
                    'Code'] + '_' + slice + ".csv"
                df_stock.to_csv(file_name, sep=',', header=True)
import pandas as pd

api_key = '9YBSFK0A7B24SLJ7'
#pip install alpha_vantage
#https://github.com/RomelTorres/alpha_vantage
#https://www.alphavantage.co/documentation/
#https://alpha-vantage.readthedocs.io/en/latest/#

#https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY_EXTENDED&symbol=IBM&interval=15min&slice=year1month2&apikey=demo

from alpha_vantage.timeseries import TimeSeries
from pprint import pprint

ts = TimeSeries(key=api_key, output_format='csv')  ##,  output_format='pandas')
#data, meta_data = ts.get_intraday(symbol='MSFT',interval='1min', outputsize='full')
csv_reader, meta_data = ts.get_intraday_extended(
    symbol='MSFT', interval='15min', slice='year1month1')  #, adjusted=True)

#for row in csv_reader:
#    pprint(row)
class AlphaVantageData(DataSource):
    SOURCE = "ALPHAVANTAGE"
    MIN_TIMESTEP = "minute"
    TIMESTEP_MAPPING = [
        {
            "timestep": "minute",
            "represntations": ["1min"]
        },
        {
            "timestep": "day",
            "represntations": ["1D"]
        },
    ]

    def __init__(self, config, max_workers=24, **kwargs):
        # Alpaca authorize 200 requests per minute and per API key
        # Setting the max_workers for multithreading with a maximum
        # of 200
        self.name = "alphavantage"
        self.max_workers = min(max_workers, 24)

        # Stores
        self._data_store_minutes = {}
        self._data_store_days = {}

        # Connection to alphavantage REST API
        self.config = config
        self.api_key = config.API_KEY
        self.api = TimeSeries(key=self.api_key, output_format="csv")

    def _get_store(self, timestep):
        if timestep == "minute":
            return self._data_store_minutes
        return self._data_store_days

    def _csv_to_list(self, csv_reader):
        columns = next(csv_reader)
        data = []
        for row in csv_reader:
            data.append(dict(zip(columns, row)))
        return data

    def _request_minutes_data(self, symbol, slice):
        csv_reader, metadata = self.api.get_intraday_extended(symbol,
                                                              interval="1min",
                                                              slice=slice)
        data = self._csv_to_list(csv_reader)
        return data

    def _request_daily_data(self, symbol):
        csv_reader, metadata = self.api.get_daily_adjusted(symbol,
                                                           outputsize="full")
        data = self._csv_to_list(csv_reader)
        return data

    def _request_data(self, symbol, timestep):
        if timestep == "minute":
            slices = [
                f"year{i}month{j}" for i in range(1, 3) for j in range(1, 13)
            ]
            with ThreadPoolExecutor(
                    max_workers=self.max_workers,
                    thread_name_prefix=f"{self.name}_requesting_data",
            ) as executor:
                tasks = []
                for slice in slices:
                    tasks.append(
                        executor.submit(self._request_minutes_data, symbol,
                                        slice))

                data = []
                for task in as_completed(tasks):
                    data.extend(task.result())

        else:
            data = self._request_daily_data(symbol)

        return data

    def _append_data(self, symbol, data, timestep):
        store = self._get_store(timestep)
        df = pd.DataFrame(data)
        if "time" in df.columns:
            index_column = "time"
        else:
            index_column = "timestamp"

        df.set_index(index_column, inplace=True)
        df.sort_index(inplace=True)
        df.index = df.index.map(
            lambda d: datetime.strptime(d, "%Y-%m-%d")).tz_localize(
                self.DEFAULT_TIMEZONE)
        store[symbol] = df
        return df

    def _pull_source_symbol_bars(self,
                                 symbol,
                                 length,
                                 timestep=MIN_TIMESTEP,
                                 timeshift=None):
        self._parse_source_timestep(timestep, reverse=True)
        store = self._get_store(timestep)
        if symbol in store:
            data = store[symbol]
        else:
            data = self._request_data(symbol, timestep)
            if not data:
                raise NoDataFound(self.SOURCE, symbol)

            data = self._append_data(symbol, data, timestep)

        if timeshift:
            end = datetime.now() - timeshift
            end = self.to_default_timezone(end)
            data = data[data.index <= end]

        result = data.tail(length)
        return result

    def _pull_source_bars(self,
                          symbols,
                          length,
                          timestep=MIN_TIMESTEP,
                          timeshift=None):
        """pull broker bars for a list symbols"""
        result = {}
        self._parse_source_timestep(timestep, reverse=True)
        for symbol in symbols:
            result[symbol] = self._pull_source_symbol_bars(symbol,
                                                           length,
                                                           timestep=timestep,
                                                           timeshift=timeshift)
        return result

    def _parse_source_symbol_bars(self, response, symbol):
        df = response.copy()
        if "adjusted_close" in df.columns:
            del df["adjusted_close"]

        if "dividend_amount" in df.columns:
            df.rename(columns={"dividend_amount": "dividend"}, inplace=True)
        else:
            df["dividend"] = 0

        if "split_coefficient" in df.columns:
            df.rename(columns={"split_coefficient": "stock_splits"},
                      inplace=True)
        else:
            df["stock_splits"] = 0

        df = df.astype({
            "open": "float64",
            "high": "float64",
            "low": "float64",
            "close": "float64",
            "volume": "int64",
            "dividend": "float64",
            "stock_splits": "float64",
        })

        df["price_change"] = df["close"].pct_change()
        df["dividend_yield"] = df["dividend"] / df["close"]
        df["return"] = df["dividend_yield"] + df["price_change"]
        bars = Bars(df, self.SOURCE, symbol, raw=response)
        return bars