def get_alphavantage_data(ticker): url_string = "https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=%s&outputsize=full&apikey=%s"%(ticker,api_key) file_to_save = 'stock_market_data-%s.csv'%ticker if not os.path.exists(file_to_save): with requests.get(url_string, verify = False) as r: data = r.json() #print(data) data = data['Time Series (Daily)'] df = pd.DataFrame(columns = ['Date','Low','High','Close','Open','Volume']) for k,v in data.items(): date = dt.datetime.strptime(k, '%Y-%m-%d') data_row = [date.date(), float(v['3. low']), float(v['2. high']), float(v['4. close']), float(v['1. open']), float(v['5. volume'])] df.loc[-1,:] = data_row df.index = df.index + 1 df = df.sort_values('Date') print('Data saved to : %s' %file_to_save) df.to_csv(file_to_save) else: print('File already exists. Loading data from CSV') df = pd.read_csv(file_to_save) return df
def get_alpha_vantage(stock, alphavantage_key): """ Get data from Alpha Vantage. """ url_string = "https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=%s&outputsize=full&apikey=%s" % ( stock, alphavantage_key) with urllib.request.urlopen(url_string) as url: data = json.loads(url.read().decode()) # extract stock market data data = data['Time Series (Daily)'] df = pd.DataFrame( columns=['Date', 'Open', 'High', 'Low', 'Close', 'Volume']) for k, v in data.items(): date = dt.datetime.strptime(k, '%Y-%m-%d') data_row = [ date.date(), float(v['1. open']), float(v['2. high']), float(v['3. low']), float(v['4. close']), float(v['5. volume']) ] df.loc[-1, :] = data_row df.index = df.index + 1 df = df.sort_values(by=['Date']) return df
def saveStockDate(ticker): # alpha vantage api key api_key = '6BXJN99BEYM5VWU3' # JSON file with all the stock market data for AMZN from the last 20 years url_string = "https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=%s&outputsize=full&apikey=%s"%(ticker,api_key) # Save data to this file file_to_save = 'Stock CSV Files/stock_market_data-%s.csv'%ticker # Saved data, # Grab the data from the url # And store date, low, high, volume, close, open, adj close values to a Pandas DataFrame with urllib.request.urlopen(url_string) as url: data = json.loads(url.read().decode()) # extract stock market data data = data['Time Series (Daily)'] df = pd.DataFrame(columns=['Date','Low','High','Close','Open','Volume', 'Adj Close']) for k,v in data.items(): date = dt.datetime.strptime(k, '%Y-%m-%d') data_row = [date.date(),float(v['3. low']),float(v['2. high']), float(v['4. close']),float(v['1. open']),float(v['6. volume']),float(v['5. adjusted close'])] df.loc[-1,:] = data_row df.index = df.index + 1 print('Data saved to : %s'%file_to_save) df.to_csv(file_to_save, mode='a', header=False) return df
def _populate_data_row(self, data_frame_row): ''' A function for adding additional stock information to the input data frame ''' data = self._get_stock_info(url_suffix=data_frame_row.Link) for key, value in data.items(): data_frame_row[key] = value return data_frame_row
def test(data1): # In[ ]: data_source = 'alphavantage' # alphavantage or kaggle if data_source == 'alphavantage': # ====================== Loading Data from Alpha Vantage ================================== api_key = 'UX64ZSMDKVH8KB48' # American Airlines stock market prices ticker = data1 # JSON file with all the stock market data for AAL from the last 20 years url_string = "https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=%s&outputsize=full&apikey=%s" % ( ticker, api_key) # Save data to this file file_to_save = 'stock_market_data-%s.csv' % ticker # If you haven't already saved data, # Go ahead and grab the data from the url # And store date, low, high, volume, close, open values to a Pandas DataFrame if not os.path.exists(file_to_save): with urllib.request.urlopen(url_string) as url: data = json.loads(url.read().decode()) # extract stock market data data = data['Time Series (Daily)'] df = pd.DataFrame( columns=['Date', 'Low', 'High', 'Close', 'Open']) for k, v in data.items(): date = dt.datetime.strptime(k, '%Y-%m-%d') data_row = [ date.date(), float(v['3. low']), float(v['2. high']), float(v['4. close']), float(v['1. open']) ] df.loc[-1, :] = data_row df.index = df.index + 1 print('Data saved to : %s' % file_to_save) df.to_csv(file_to_save) # If the data is already there, just load it from the CSV else: print('File already exists. Loading data from CSV') df = pd.read_csv(file_to_save) # In[ ]: # Sort DataFrame by date df = df.sort_values('Date') # In[ ]: return df
def fetchData(dataSource, stockTicker): """Fetches the data for a specific ticker from either kaggle or alpha Vantage Params: dataSource: alphavantage or whatever for Kaggle stockTicker: the ticker for the stock eg: ABC Returns: dataframe of the data requested""" if dataSource == 'alphavantage': api_key = getKey("Alpha_Vantage_Api") ticker = stockTicker #form string for api request: urlString = "https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=%s&outputsize=full&apikey=%s" % ( ticker, api_key) #choose a file to save fetched data to savedFile = 'stock_market_data-%s.csv' % ticker #check if file already exists: if not os.path.exists(savedFile): #build a pandas dataframe of the dataset with urllib.request.urlopen(urlString) as url: data = json.loads(url.read().decode()) # extract stock market data data = data['Time Series (Daily)'] df = pd.DataFrame( columns=['Date', 'Low', 'High', 'Close', 'Open']) for k, v in data.items(): date = dt.datetime.strptime(k, '%Y-%m-%d') data_row = [ date.date(), float(v['3. low']), float(v['2. high']), float(v['4. close']), float(v['1. open']) ] df.loc[-1, :] = data_row df.index = df.index + 1 print('Data saved to : %s' % savedFile) df.to_csv(savedFile) #if file already exists than just load the already downloaded CSV else: print('File already exists. Loading data from CSV') df = pd.read_csv(savedFile) #if dataSource not alphavantage then use kaggle instad else: df = pd.read_csv(os.path.join('Stocks', '%s.us.txt' % stockTicker), delimiter=',', usecols=['Date', 'Open', 'High', 'Low', 'Close']) print('Loaded data from the Kaggle repository') return (df) #return the final DataFrame
def retrieveStockData(api_key, stockTicker): print("Stock selected is: " + stockTicker) currentDate = dt.datetime.now().strftime("%x").replace("/", "-") print("Retrieving stock history up until %s\n" % currentDate) # JSON file with all the stock market data for AAL from the last 20 years url_string = "https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=%s&outputsize=full&apikey=%s" % ( stockTicker, api_key) # Save data to this file file_to_save = 'stockMarketData_%s_%s.csv' % (stockTicker, currentDate) if not os.path.exists(file_to_save): with urllib.request.urlopen(url_string) as url: data = json.loads(url.read().decode()) # Extract stock market data data = data['Time Series (Daily)'] # Set coloumn headers df = pd.DataFrame(columns=[ 'Date', 'Low', 'High', 'Close', 'Adjusted Close', 'Open' ]) for k, v in data.items(): date = dt.datetime.strptime(k, '%Y-%m-%d') data_row = [ date.date(), float(v['3. low']), float(v['2. high']), float(v['4. close']), float(v['5. adjusted close']), float(v['1. open']) ] df.loc[-1, :] = data_row df.index = df.index + 1 # Reverses order of the rows df = df[::-1] print('Data saved to: %s' % file_to_save) df.to_csv(file_to_save) return file_to_save # If data is up to date already then alert user else: print('Stock history is already up to date.') return file_to_save
url_string = "https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY&symbol=%s&interval=%s&apikey=%s" % ( ticker, interval, api_key) # Save data to this file file_to_save = 'stock_market_data-%s.csv' % ticker # If you haven't already saved data, # Go ahead and grab the data from the url # And store date, low, high, volume, close, open values to a Pandas DataFrame if not os.path.exists(file_to_save): with urllib.request.urlopen(url_string) as url: data = json.loads(url.read().decode()) # extract stock market data data = data['Time Series (%s)' % (interval)] df = pd.DataFrame(columns=['Date', 'Low', 'High', 'Close', 'Open']) for k, v in data.items(): date = dt.datetime.strptime(k, '%Y-%m-%d %H:%M:%S') data_row = [ date.date(), float(v['3. low']), float(v['2. high']), float(v['4. close']), float(v['1. open']) ] df.loc[-1, :] = data_row df.index = df.index + 1 print('Data saved to : %s' % file_to_save) df.to_csv(file_to_save) # If the data is already there, just load it from the CSV else: print('File already exists. Loading data from CSV')
data_source = 'kaggle' if data_source == 'alphavantage': api_key = '[API key]' ticker = "AAL" url_string = "https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=%s&outputsize=full&apikey=%s"%(ticker,api_key) file_to_save = 'stock_market_data-2018%s.csv'%ticker if not os.path.exists(file_to_save): with urllib.request.urlopen(url_string) as url: data = json.loads(url.read().decode()) #extract stock market data data = data['Time Series (Daily)'] df = pd.DataFrame(columns=['Date', 'Low', 'High', 'Close', 'Open']) for k, v in data.items(): date = dt.datetime.strptime(k, '%Y-%m-%d') data_row = [date.date(), float(v['3. low']), float(v['2. high']), float(v['4. close']), float(v['1. open'])] df.loc[-1,:] = data_row df.index = df.index + 1 print('Data saved to : %s'%file_to_save) df.to_csv(file_to_save) #if the data is alreade there, simply load it from the csv file else: print('File already exists. Loading data from CSV') df = pd.read_csv(file_to_save) else: # ====================== Loading Data from Kaggle ================================== # You will be using HP's data. Feel free to experiment with other data.
url = urllib.request.urlopen(url_string) # Parses data into json dictionary data = json.loads(url.read().decode()) # Selects one subdictionary. I don't know what the other keys do # This subdict is a collection of dates and their data data = data['Time Series (Daily)'] # I believe a Pandas DataFrame acts like a table # So this is creating the headers df = pd.DataFrame(columns=['Date', 'Low', 'High', 'Close', 'Open']) # Loops through each key,value pair in the data dict # Keys are dates, values are subdicts for date, values in data.items(): # Reformat date into computer-readable format date = dt.datetime.strptime(date, '%Y-%m-%d') # This list becomes the row for each date in the DataFrame data_row = [ date.date(), float(values['3. low']), float(values['2. high']), float(values['4. close']), float(values['1. open']) ] # I believe this appends the row to the end of the DataFrame # Who the f**k thought of this convoluted syntax?
def load_data(ds): data_source = ds #setup for piping out output print('\t"""') print('Loading Data...') print_code(lines[lineno():lineno() + 50]) print("Loading " + ticker + " data from " + data_source + "...") if data_source == 'alphavantage': # Loading Data from Alpha Vantage # Load API Key from file f = open("api_key", "r") api_key = f.read() if len(api_key) != 16: print("Error: Invalid API Key") exit(3) f.close() # Ticker selected in kwargs processing # ticker = ticker # JSON file with all the stock market data for AAL from the last 20 years url_string = "https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=%s&outputsize=full&apikey=%s" % ( ticker, api_key) # Setting up file to save to file_to_save = 'stock_market_data-%s.csv' % ticker # If files doesnt exit, download stock data, save data, and convert to pandas dataframe if not os.path.exists(file_to_save): with urllib.request.urlopen(url_string) as url: data = json.loads(url.read().decode()) # extract stock market data data = data['Time Series (Daily)'] df = pd.DataFrame( columns=['Date', 'Low', 'High', 'Close', 'Open']) for k, v in data.items(): date = dt.datetime.strptime(k, '%Y-%m-%d') data_row = [ date.date(), float(v['3. low']), float(v['2. high']), float(v['4. close']), float(v['1. open']) ] df.loc[-1, :] = data_row df.index = df.index + 1 print('Data saved to : %s' % file_to_save) # Save file for future use df.to_csv(file_to_save) # If file already created, load data from it else: print('File already exists. Loading data from CSV') df = pd.read_csv(file_to_save) else: # Load Data from pre-downloaded kaggle file of selected ticker df = pd.read_csv(os.path.join('Stocks', ticker.lower() + '.us.txt'), delimiter=',', usecols=['Date', 'Open', 'High', 'Low', 'Close']) print('Opening kaggle CSV') # Sort DataFrame by date df = df.sort_values('Date') # Double check the result #print(df.head()) #print(df.shape) return df
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Sun Apr 7 17:16:43 2019 @author: gavinswofford """ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Sun Apr 7 11:29:24 2019 @author: gavinswofford """ # Part 1 - Data Preprocessing from pandas_datareader import data import matplotlib.pyplot as plt import pandas as pd import datetime as dt import urllib.request, json import os import tensorflow as tf # This code has been tested with TensorFlow 1.6 from sklearn.preprocessing import MinMaxScaler import json import requests # Importing the libraries import numpy as np from keras.models import Sequential from keras.layers import Dense from keras.layers import LSTM from keras.layers import Dropout data_source = 'alphavantage'