def request_Quandl(symbol, tableName, cols, digits): con = connect_to_DB() create_table(tableName, con) str_ = """SELECT * FROM `""" +tableName+ """` ORDER BY DATE_TIME DESC LIMIT 1;""" print str_ data = pd.read_sql_query(str_, con=con, index_col='DATE_TIME') if empty(data) == 0: #execute query with conenction quandl_data = q.get(symbol, returns='pandas', authtoken=token, sort_order='asc') if empty(quandl_data) == 1: print quandl_data.tail() print "Data length: %s" % len(quandl_data) insert_data(quandl_data, cols, digits, con) else: last_date = str(data.index[0].year)+"-"+str(data.index[0].month)+"-"+str(data.index[0].day+1) print "Last date "+last_date quandl_data = q.get(symbol, returns='pandas', authtoken=token, trim_start=last_date, sort_order='asc') if empty(quandl_data) == 1: print quandl_data.tail() print "Data length: %s" % len(quandl_data) insert_data(quandl_data, cols, digits, con)
def index(): if request.method=='POST': if request.form.get('stocks',None): if request.form.get('symbols',None).find('#')==-1: try: sdata=[ql.get('WIKI/'+request.form.get('symbols',None),trim_start=get_start_date(),authtoken=q_toke)] names=[request.form.get('symbols',None)] except: flash("Sorry invalid name, please try another ticker symbol.") return render_template('index.html') else: try: sdata=[ql.get('WIKI/'+u,trim_start=get_start_date(),authtoken=q_toke)\ for u in request.form.get('symbols',None).rsplit('#')[:]] names=request.form.get('symbols',None).rsplit('#')[:] except: flash("Sorry invalid name, please try another ticker symbol.") return render_template('index.html') reqs=['adjc','adjo','c','o'] r2ql=dict(zip(reqs,['Adj. Close','Adj. Open','Close','Open'])) checked=[] #gather checked values for r in [request.form.get(n,None) for n in reqs]: if r: checked.append(r2ql[r]) #make bokeh plot with the data script, div = make_bokeh_html(sdata,names,checked) return render_template('stocks.html',script=script,div=div) return render_template('index.html')
def load(self): # If this is the first time of getting the data, send queries to Quandl website and download data there. # Then, store the date in local directories as csv files # If the data has already been downloaded, load the local csv. fund_code = self.fund_code index_code = self.index_code start_date = self.start_date fund_file_name = "../data/" + fund_code.replace("/", "_") + ".csv" index_file_name = "../data/" + index_code.replace("/", "_") + ".csv" if path.isfile(fund_file_name): # file exists at local fund_data = pd.io.parsers.read_csv(fund_file_name, index_col=0) # the csv file has to be parsed to have date as index else: # download from Quandl and save it to local as csv file fund_data = Quandl.get(fund_code, trim_start=start_date, authtoken="XANwfFd9CmdoE3PdFzRg") fund_data.to_csv(fund_file_name) if path.isfile(index_file_name): index_data = pd.io.parsers.read_csv(index_file_name, index_col=0) # the csv file has to be parsed to have date as index else: index_data = Quandl.get(index_code, trim_start=start_date, authtoken="XANwfFd9CmdoE3PdFzRg") index_data.to_csv(index_file_name) # rename columns so that the two dataframes don't share any common names index_data.columns = map(''.join, zip([index_code+'_']*index_data.shape[1], list(index_data.columns))) # rename the columns with index_code as prefix fund_data.columns = map(''.join, zip([fund_code+'_']*fund_data.shape[1], list(fund_data.columns))) # rename the columns with fund_code as prefix # join the two data frames by date self.data = fund_data.join(index_data, how='inner') if index_code+'_Adjusted Close' not in self.data.columns: # if no adjusted close, copy close to make name uniform across differnet data self.data[index_code+'_Adjusted Close'] = self.data[index_code+'_Close']
def getCommodityPrice(): #get crude oil data oilData = Quandl.get("OPEC/ORB", authtoken="xxxxxxxxxxxxxxxx") print(oilData.tail()) print(oilData.head()) oilData.to_csv('quandlcrudeoilData.csv') #get gold data. goldData = Quandl.get("BUNDESBANK/BBK01_WT5511", authtoken="xxxxxxxxxxxxxxxx") print(goldData.tail()) print(goldData.head()) goldData.to_csv('quandlgoldData.csv') #get copper data copperData = Quandl.get("LSE/COPA", authtoken="xxxxxxxxxxxxxxxx") print(copperData.tail()) print(copperData.head()) copperData.to_csv('quandlcopperData.csv') #get silver data silverData = Quandl.get("LBMA/SILVER", authtoken="xxxxxxxxxxxxxxxx") print(silverData.tail()) print(silverData.head()) silverData.to_csv('quandlsilverData.csv') #get bitcoin pricing data. bitcoinData = Quandl.get("BAVERAGE/USD", authtoken="xxxxxxxxxxxxxxxx") print(bitcoinData.tail()) print(bitcoinData.head()) bitcoinData.to_csv('bitcoindata.csv') time.sleep(86400) #sleep for a day
def extractData(self): #------------------------------ #-- extract rawdata from quandl #------------------------------ print "extracting data from quandl ... \n" # get data dataGbpAud = Quandl.get("BOE/XUDLADS", trim_start=self.startDate) dataEurGbp = Quandl.get("BOE/XUDLSER", trim_start=self.startDate) print "preparing data ... \n" dataGbpAud.columns = ['gbpaud'] dataGbpEur = 1 / dataEurGbp dataGbpEur.columns = ['gbpeur'] dataExchange = dataGbpEur.merge(dataGbpAud , how='inner' , left_index=True , right_index=True ) print "extracting rawdata ... \n" self.rawData = [] # files with data for index, row in dataExchange.iterrows(): rowval = ExchangeRates(period=index,gbpeur=row['gbpeur'],gbpaud=row['gbpaud']) self.rawData.append(rowval) print "rawdata extract complete. \n"
def request_Quandl(symbol, tableName, cols, digits): con = connect_to_DB() create_table(tableName, con) str_ = """SELECT * FROM `""" + tableName + """` ORDER BY DATE_TIME DESC LIMIT 1;""" print str_ data = pd.read_sql_query(str_, con=con, index_col='DATE_TIME') if empty(data) == 0: #execute query with conenction quandl_data = q.get(symbol, returns='pandas', authtoken=token, sort_order='asc') if empty(quandl_data) == 1: print quandl_data.tail() print "Data length: %s" % len(quandl_data) insert_data(quandl_data, cols, digits, con) else: last_date = str(data.index[0].year) + "-" + str( data.index[0].month) + "-" + str(data.index[0].day + 1) print "Last date " + last_date quandl_data = q.get(symbol, returns='pandas', authtoken=token, trim_start=last_date, sort_order='asc') if empty(quandl_data) == 1: print quandl_data.tail() print "Data length: %s" % len(quandl_data) insert_data(quandl_data, cols, digits, con)
def update_dataset_data(engine, dataset, api_key): """ Updates data for each code in the data set, retrieving new entries since the last update. """ update_code = sql.text( 'UPDATE ' + dataset + '_CODES ' 'SET [Start Date] = :start, [End Date] = :end, [Last Updated] = :updated ' 'WHERE Code = :code') conn = engine.connect() # retrieve the current code table code_table = load_sql_data(engine, dataset + '_CODES', date_columns=['Start Date', 'End Date', 'Last Updated']) # TESTING ONLY code_table = code_table.iloc[:10, :] # fetch the first code's data to create the data frame data = Quandl.get(code_table['API Code'].iloc[0], rows=1, authtoken=api_key) data = data.reset_index() data = data.rename(columns=lambda x: x[0] + x[1:].lower()) data['Code'] = code_table['Code'].iloc[0] data = data.iloc[0:0] # iterate over each code and append the returned data counter = 0 for index, row in code_table.iterrows(): if row['Last Updated'] == datetime(1900, 1, 1): # this is a new code so we need to pull all historical data print('Loading historical data for new company {0}...'.format(row['API Code'])) code_data = Quandl.get(row['API Code'], authtoken=api_key) else: # incremental update from the current end date for the code code_data = Quandl.get(row['API Code'], trim_start=str(row['End Date']), authtoken=api_key) # concat new data to the total set of new records code_data = code_data.reset_index() code_data = code_data.rename(columns=lambda x: x[0] + x[1:].lower()) code_data['Code'] = row['Code'] data = pd.concat([data, code_data]) # update the code table min_date = code_data['Date'].min().to_datetime() max_date = code_data['Date'].max().to_datetime() current_date = datetime.now() conn.execute(update_code, start=min_date, end=max_date, updated=current_date, code=row['Code']) counter += 1 if counter % 100 == 0: print('Sleeping for one minute to avoid hitting API call limits...') time.sleep(60) # move the code column to the beginning columns = data.columns.tolist() columns = [columns[-1]] + columns[:-1] data = data[columns] save_sql_data(engine, data, dataset, exists='append', index=False) print(dataset + ' code data updated successfully.')
def test_api_calls(api_key): """ Test function with examples of calls using the Quandl API via python wrapper. """ data = Quandl.get('WIKI/AAPL', authtoken=api_key) data_new = Quandl.get('WIKI/AAPL', rows=5, sort_order='desc', authtoken=api_key) data_range = Quandl.get('WIKI/AAPL', trim_start='2015-01-01', trim_end='2015-01-01', authtoken=api_key) data_multiple = Quandl.get(['WIKI/AAPL', 'WIKI/MSFT'], authtoken=api_key)
def getQuandle(what): """ Wrapper around Quandl requests, using authtoken only if available """ if authtoken: return Quandl.get(what, authtoken=authtoken) else: return Quandl.get(what)
def loadQuandlData(): quandl_id = "hThEeCQhKC2oWz2hSB4x" it = Quandl.get(['GOOG/NASDAQ_AAPL','GOOG/NASDAQ_GOOGL','GOOG/NASDAQ_MSFT', 'GOOG/NASDAQ_FB','GOOG/NYSE_ORCL'], trim_start='1998-01-01', authtoken=quandl_id) financial = Quandl.get(['GOOG/NYSE_DB','GOOG/NYSE_GS','GOOG/NYSE_SAN'], authtoken=quandl_id) industrial = Quandl.get(['GOOG/NYSE_SI', 'GOOG/NYSE_GE'], authtoken=quandl_id) closing_it = it[['GOOG.NASDAQ_AAPL - Close','GOOG.NASDAQ_GOOGL - Close','GOOG.NASDAQ_MSFT - Close','GOOG.NASDAQ_FB - Close','GOOG.NYSE_ORCL - Close']] closing_financial = financial[['GOOG.NYSE_DB - Close','GOOG.NYSE_GS - Close','GOOG.NYSE_SAN - Close']] closing_industrial = industrial[['GOOG.NYSE_SI - Close','GOOG.NYSE_GE - Close']] closing_it.to_csv('Data/Closing_it.csv', sep=';') closing_financial.to_csv('Data/Closing_financial.csv', sep=';') closing_industrial.to_csv('Data/Closing_industrial.csv', sep=';')
def fetchSpread(CONTRACT, M1, M2, ST_YEAR, END_YEAR, CONT_YEAR1, CONT_YEAR2, ST_DATE, END_DATE, BUCK_PRICE, STARTFROMZERO, years): startdate = datetime.strptime(ST_DATE, '%Y-%m-%d %H:%M:%S') enddate = datetime.strptime(END_DATE, '%Y-%m-%d %H:%M:%S') totalSpread = pd.Series() lastValue = 0 for i in years: year = str(i) price = str(BUCK_PRICE) filename = CONTRACT + M1 + M2 + year + ST_DATE + END_DATE + price filename = re.sub('[/ ]', '_', filename) filename = re.sub('[:]', '.', filename) cont1 = str(CONTRACT) + str(M1) + str(i + CONT_YEAR1) cont2 = str(CONTRACT) + str(M2) + str(i + CONT_YEAR2) print('==============') print("contract1: " + cont1) print("contract2: " + cont2) startDate = startdate.replace(year=ST_YEAR - 2000 + i) endDate = enddate.replace(year=END_YEAR - 2000 + i) print('==============') print('Trim start: ', startDate.strftime('%Y-%m-%d')) print('Trim end: ', endDate.strftime('%Y-%m-%d')) print('==============') if not checkIfCached(filename): data1 = q.get(cont1, authtoken=AUTH_TOKEN, trim_start=startDate, trim_end=endDate) data2 = q.get(cont2, authtoken=AUTH_TOKEN, trim_start=startDate, trim_end=endDate) spread = (data1 - data2).Settle * BUCK_PRICE if spread.size == 0: print('!!!!!!!!!!!!*****WARNING****!!!!!!!!!!!!') print('No data available for contracts %s, %s. Skiping period from %s to %s.' % ( cont1, cont2, startDate.strftime('%Y-%m-%d'), endDate.strftime('%Y-%m-%d'))) print('!!!!!!!!!!!!*****WARNING****!!!!!!!!!!!!') continue else: if math.isnan(spread[0]): spread = spread.fillna(method='bfill') #replace NaN value with a previous one spread = spread.fillna(method='pad') #remove row with NAN value # spread = spread.dropna() writeCacheToFile(filename, spread) else: print("Loading cached data from file: %s !" % filename) cache = readCacheFromFile(filename) spread = cache if STARTFROMZERO: delta = lastValue - spread[0] spread = spread + delta totalSpread = totalSpread.append(spread) lastValue = totalSpread[-1] if totalSpread.size == 0: sys.exit(-1) return totalSpread
def readLast5DaysStockPrice(): with open('stock_features.csv', 'w') as csvfile: fieldnames = ['Stock', 'Adj_Close_1','High_1', 'Low_1' ,'Adj_Close_2','High_2', 'Low_2','VolChg2_1','IndexChg2_1' ,'Adj_Close_3','High_3', 'Low_3','VolChg3_2','IndexChg3_2' ,'Adj_Close_4','High_4', 'Low_4','VolChg4_3','IndexChg4_3' ,'Adj_Close_5','High_5', 'Low_5','VolChg5_4','IndexChg5_4'] writer = csv.DictWriter(csvfile, lineterminator='\n', fieldnames=fieldnames) writer.writeheader() dow_code = 'BCB/UDJIAD1' dji_index = [] j = 0 while (j < 10): ticker = Share(stock_dict.get(j)) i = 0 stock_price = [] current_date = date.today() fmt = '%Y-%m-%d' while( i < 5): if (ticker.get_historical(current_date.strftime(fmt), current_date.strftime(fmt)) == []): current_date = current_date - timedelta(days=1) else: stock_price += ticker.get_historical(current_date.strftime(fmt), current_date.strftime(fmt)) if(j == 0): if(i == 0 and Quandl.get(dow_code, trim_start=current_date, trim_end=current_date, authtoken="T246AaoCUiwSyz1C4Vfe").values.tolist() == []): dji_index.append(get_latest_dji()) else: dji_index.append(Quandl.get(dow_code, trim_start=current_date, trim_end=current_date, authtoken="T246AaoCUiwSyz1C4Vfe").values.tolist()[0][0]) current_date = current_date - timedelta(days=1) i = i + 1 AbVolChg2_1 = int(stock_price[1].get('Volume')) - int(stock_price[0].get('Volume')) VolChg2_1 = log(AbVolChg2_1,2) if AbVolChg2_1 > 0 else -1*log(fabs(AbVolChg2_1),2) AbVolChg3_2 = int(stock_price[2].get('Volume')) - int(stock_price[1].get('Volume')) VolChg3_2 = log(AbVolChg3_2,2) if AbVolChg3_2 > 0 else -1*log(fabs(AbVolChg3_2),2) AbVolChg4_3 = int(stock_price[3].get('Volume')) - int(stock_price[2].get('Volume')) VolChg4_3 = log(AbVolChg4_3,2) if AbVolChg4_3 > 0 else -1*log(fabs(AbVolChg4_3),2) AbVolChg5_4 = int(stock_price[4].get('Volume')) - int(stock_price[3].get('Volume')) VolChg5_4 = log(AbVolChg5_4,2) if AbVolChg5_4 > 0 else -1*log(fabs(AbVolChg5_4),2) writer.writerow({'Stock': stock_dict.get(j) ,'Adj_Close_1' : stock_price[0].get('Adj_Close'),'High_1' : stock_price[0].get('High'),'Low_1' : stock_price[0].get('Low') ,'Adj_Close_2' : stock_price[1].get('Adj_Close'),'High_2' : stock_price[1].get('High'),'Low_2' : stock_price[1].get('Low') ,'VolChg2_1': VolChg2_1,'IndexChg2_1': (float(dji_index[1]) - float(dji_index[0])) ,'Adj_Close_3' : stock_price[2].get('Adj_Close'),'High_3' : stock_price[2].get('High'),'Low_3' : stock_price[2].get('Low') ,'VolChg3_2': VolChg3_2,'IndexChg3_2': (dji_index[2] - dji_index[1]) ,'Adj_Close_4' : stock_price[3].get('Adj_Close'),'High_4' : stock_price[3].get('High'),'Low_4' : stock_price[3].get('Low') ,'VolChg4_3': VolChg4_3,'IndexChg4_3': (dji_index[3] - dji_index[2]) ,'Adj_Close_5' : stock_price[4].get('Adj_Close'),'High_5' : stock_price[4].get('High'),'Low_5' : stock_price[4].get('Low') ,'VolChg5_4': VolChg5_4,'IndexChg5_4': (dji_index[4] - dji_index[3]) }) j = j+1
def info(symbol): query_params = request.args trim_start = query_params.get('start_date') or '2015-12-01' trim_end = query_params.get('end_date') or '2015-12-31' datasets = Quandl.search(symbol, authtoken=keys['quandl'], verbose=False) code = datasets[0][u'code'] data = Quandl.get(code, authtoken=keys['quandl'], collapse='daily', trim_start=trim_start, trim_end=trim_end) return data.to_html()
def get_data_set(self, data_set, start, end, drop=None, collapse="daily", transform="None"): """ Method for downloading one data set from Quandl :param data_set: the data set code :param start: the start date :param end: the end date :param drop: which columns to drop :param collapse: frequency of the data :param transform: any data transformations from quandl :return: the data set as a pandas data frame """ if drop is None: drop = [] if self.proxy: # If we are running behind the proxy set it up os.environ['HTTP_PROXY'] = "http://" + self.username + ":" + self.password + "@" + self.server # Check if the dataframe has been downloaded already in this session hash_val = hash(data_set + str(start) + str(end) + str(transform)) if self.memoized_data.__contains__(hash_val): return self.memoized_data[hash_val] else: try: print("\tDownloading", data_set) # Otherwise download the data frame from scratch if transform is not "None": downloaded_data_frame = Quandl.get(data_set, authtoken=self.token, trim_start=start, trim_end=end, collapse=collapse, transformation=transform) else: downloaded_data_frame = Quandl.get(data_set, authtoken=self.token, trim_start=start, trim_end=end, collapse=collapse) # Remove any unnecessary columns and rename the columns # print downloaded_data_frame.columns updated_column_labels = [] for column_label in downloaded_data_frame.columns: if column_label in drop: downloaded_data_frame = downloaded_data_frame.drop([column_label], axis=1) else: updated_column_labels.append(data_set + "_" + column_label) downloaded_data_frame.columns = updated_column_labels self.memoized_data[hash_val] = downloaded_data_frame return downloaded_data_frame except Quandl.DatasetNotFound: print("Exception - DataSetNotFound", data_set) except Quandl.CodeFormatError: print("Exception - CallFormatError", data_set) except Quandl.DateNotRecognized: print("Exception - DateNotRecognized", data_set) except Quandl.ErrorDownloading: print("Exception - ErrorDownloading", data_set) except Quandl.ParsingError: print("Exception - ParsingError", data_set) except: print("Some other error occurred")
def main(): actually_query = True query_format = "WIKI/%s" sp500 = read_sp500() for c in sp500: query = query_format % c.stripped_ticker() #print("query: %s" % query) #continue if actually_query: ''' data = Quandl.get(query, collapse='daily', trim_start='2015-10-01', trim_end='2015-10-31', limit=31, authtoken='ZbyrpahXoQapzxxLR8Qe') print(data.index) sum = 0 for i,r in data.iterrows(): print(r['Volume']) sum += r['Volume'] print("sum: %d\n" % sum) ''' try: volume_data = Quandl.get(query, collapse='daily', trim_start='2015-10-01', trim_end='2015-10-31', row=1, limit=1, sort_order="desc", transformation="cumul", authtoken='ZbyrpahXoQapzxxLR8Qe') close_data = Quandl.get(query, collapse='monthly', trim_start='2015-09-01', trim_end='2015-10-31', row=1, limit=1, sort_order="desc", transformation="rdiff", authtoken='ZbyrpahXoQapzxxLR8Qe') if 'Volume' in volume_data and len(volume_data['Volume']) > 0: c.stock_volume = volume_data['Volume'][0] else: c.stock_volume = 'NaN' if 'Close' in close_data and len(close_data['Close']) > 0: c.stock_close = close_data['Close'][0] else: c.stock_close = '0.0' except Quandl.DatasetNotFound as e: c.stock_volume = 'DatasetNotFound' c.stock_close = '0.0' except Quandl.ErrorDownloading as e: c.stock_volume = 'ErrorDownloading' c.stock_close = '0.0' print("%s" % (c))
def load_historical_data(engine, dataset, api_key, mode='normal'): """ Creates a new data table for the provided data set and loads historical data for each code into the table. """ update_code = sql.text( 'UPDATE ' + dataset + '_CODES ' 'SET [Start Date] = :start, [End Date] = :end, [Last Updated] = :updated ' 'WHERE Code = :code') conn = engine.connect() # retrieve the current code table code_table = load_sql_data(engine, dataset + '_CODES', date_columns=['Start Date', 'End Date', 'Last Updated']) # TESTING ONLY code_table = code_table.iloc[:10, :] # fetch the first code's data to create the data frame data = Quandl.get(code_table['API Code'].iloc[0], rows=1, authtoken=api_key) data = data.reset_index() data = data.rename(columns=lambda x: x[0] + x[1:].lower()) data['Code'] = code_table['Code'].iloc[0] data = data.iloc[0:0] # iterate over each code and append the returned data counter = 0 for index, row in code_table.iterrows(): code_data = Quandl.get(row['API Code'], authtoken=api_key) code_data = code_data.reset_index() code_data = code_data.rename(columns=lambda x: x[0] + x[1:].lower()) code_data['Code'] = row['Code'] data = pd.concat([data, code_data]) # update the code table min_date = code_data['Date'].min().to_datetime() max_date = code_data['Date'].max().to_datetime() current_date = datetime.now() conn.execute(update_code, start=min_date, end=max_date, updated=current_date, code=row['Code']) counter += 1 if counter % 100 == 0: print('Sleeping for one minute to avoid hitting API call limits...') time.sleep(60) # move the code column to the beginning columns = data.columns.tolist() columns = [columns[-1]] + columns[:-1] data = data[columns] save_sql_data(engine, data, dataset, exists='replace', index=False) print(dataset + ' historical data loaded successfully.')
def __init__ (self, job_queue, trim_start, trim_end, calc_params, authtoken, fp): self.job_queue = job_queue self.trim_start = trim_start self.trim_end = trim_end self.calc_params = calc_params self.authtoken = authtoken self.index_data = ql.get("YAHOO/INDEX_SPY", column=6, trim_start=self.trim_start, trim_end=self.trim_end, authtoken=self.authtoken) self.index_data = self.index_data["Adjusted Close"] self.rfr_data = ql.get("USTREASURY/YIELD", column=1, trim_start=self.trim_start, trim_end=self.trim_end, authtoken=self.authtoken) self.rfr_data = self.rfr_data["1 Mo"]*0.01 self.fp = fp threading.Thread.__init__ (self)
def get(content): if bool(content): return Quandl.get( content["dataCode"], trim_start=content["startDate"], trim_end=content["finishDate"], collapse=content["timeRange"], transformation="rdiff", returns=content["dataType"], ) # Warning: Uses Internet Connection else: return Quandl.get( "YAHOO/AAPL", trim_start="2001-01-01", trim_end="2010-01-01", collapse="annual", returns="pandas" )
def get_market_value(stock='all', start='', end='', size=0): filename = 'WIKI_tickers.csv' if (stock == 'all'): f = open(filename, 'r') f.readline() for line in f: stock_value = line.replace('"', '').replace('\n', '').split(',', 1) print "%-10s : %s" % (stock_value[0], stock_value[1]) else: data = None if (start == '' and end == ''): data = Quandl.get(stock) else: data = Quandl.get(stock, trim_start=start, trim_end=end) if (size == 0): print data else: print data.head(size).to_string()
def get_market_value(stock='all', start='', end='', size=0): filename = 'WIKI_tickers.csv' if (stock == 'all'): f = open(filename, 'r') f.readline() for line in f: stock_value = line.replace('"','').replace('\n','').split(',', 1) print "%-10s : %s" % (stock_value[0], stock_value[1]) else: data = None if (start == '' and end == ''): data = Quandl.get(stock) else: data = Quandl.get(stock, trim_start=start, trim_end=end) if (size == 0): print data else: print data.head(size).to_string()
def get(content): if bool(content): return Quandl.get( content['dataCode'], trim_start=content['startDate'], trim_end=content['finishDate'], collapse=content['timeRange'], transformation='rdiff', returns=content['dataType']) # Warning: Uses Internet Connection else: return Quandl.get('YAHOO/AAPL', trim_start='2001-01-01', trim_end='2010-01-01', collapse='annual', returns='pandas')
def get_stock(self, ticker, start, end, drop=None, collapse="daily", transform="rdiff"): """ :param ticker: ticker :param start: start-date :param end: end-date :param drop: columns in data-frame to drop :param collapse: frequency of data :param transform: rdiff = simple returns """ quandl_args = "GOOG/" + ticker if drop is None: drop = ["Open", "High", "Low", "Volume"] hash_val = str(ticker) + "_" + str(start) + "_" + str(end) try: cached_data = pandas.read_csv("cache/" + str(hash_val)) cached_data = cached_data.set_index("Date") return cached_data except IOError: try: # print("Downloading", data_set) # Otherwise download the data frame from scratch if transform is not "None": downloaded_data_frame = Quandl.get(quandl_args, authtoken=self.token, trim_start=start, trim_end=end, collapse=collapse, transformation=transform) else: downloaded_data_frame = Quandl.get(quandl_args, authtoken=self.token, trim_start=start, trim_end=end, collapse=collapse) # Remove any unnecessary columns and rename the columns # print downloaded_data_frame.columns updated_column_labels = [] for column_label in downloaded_data_frame.columns: if column_label in drop: downloaded_data_frame = downloaded_data_frame.drop([column_label], axis=1) else: updated_column_labels.append(quandl_args + "_" + column_label) downloaded_data_frame.columns = updated_column_labels downloaded_data_frame.to_csv("cache/" + str(hash_val)) return downloaded_data_frame except Quandl.DatasetNotFound: print("Exception - DataSetNotFound", quandl_args) except Quandl.CodeFormatError: print("Exception - CallFormatError", quandl_args) except Quandl.DateNotRecognized: print("Exception - DateNotRecognized", quandl_args) except Quandl.ErrorDownloading: print("Exception - ErrorDownloading", quandl_args) except Quandl.ParsingError: print("Exception - ParsingError", quandl_args)
def grabCountryStats(self, indicators): numToPlot = self.numToPlot dne = dict() f = open(indicators[0], 'r+') for line in f: dne[line.replace('\n', '')] = 0 for country in self.gdpLowToHigh: if numToPlot <= 0: break if country in dne.keys(): pass else: #print countryStats[country]['name'] + '\n' for wwdiIndicator in indicators: try: requestCode = 'WWDI/' + country + '_' + wwdiIndicator data = q.get(requestCode, authtoken=quandlToken) # pick the latest date, and first value value = data.tail(1).iloc[0, 0] countryStats[country][wwdiIndicator] = value numToPlot -= 1 except q.DatasetNotFound as e: print "Dataset not found for: " + self.countryStats[ country]['name'] f.write(country + '\n') break except IndexError as e: print e except q.ErrorDownloading as e: print e
def draw(tick): #import numpy as np mydata = Quandl.get("WIKI/" + tick) #markers_on = np.array(['2013-02-26','2015-01-26','2016-02-26', '2016-04-01'], dtype='datetime64') #df3 = pd.DataFrame(markers_on) #df4 = df3.set_index(0) #df5 = df4.join(mydata,how='left') #df6 = df5['Adj. Close'] #mynewdata = mydata.join(df6,how="left",lsuffix='_OG',rsuffix='_Mark') #get trading start def tradedetails(tradetype,tradevalue,minprice,maxprice,isofficer,ceo,cfo,isdir,is10,isother,stock): hf = pd.read_html("http://openinsider.com/screener?fd=0&fdr=&td=0&tdr=&s="+ stock + "&o=&t="+ tradetype + "&minprice=" + str(minprice) + "&maxprice=" + str(maxprice) + "&v="+ str(tradevalue) +"&isofficer=" + str(isofficer) + "&isceo=" + str(ceo) + "&iscfo=" + str(cfo) + "&isdirector=" + str(isdir) + "&istenpercent=" + str(is10) + "&isother=" + str(isother) + "&sicMin=&sicMax=&sortcol=1&maxresults=1000") return hf[5] def convertdate(x): return x[5:7] + "/" + x[8:10] + "/" + x[0:4] def converttime(x): return x[11:] def convertnumber(x): return x.replace("+","").replace("$","").replace(",","") def cleandataframe(df): df['Trade Date'] = df['Trade Date'].apply(convertdate) df['Filing Time'] = df['Filing Date'].apply(converttime) df['Filing Date'] = df['Filing Date'].apply(convertdate) #df['Shares Traded'] = df['Shares Traded'].apply(convertnumber) df['Value Traded'] = df['Value Traded'].apply(convertnumber) #df['Shares Owned'] = df['Shares Owned'].apply(convertnumber) return df def cleanerdataframe(df): df['Trade Date'] = df['Trade Date'].apply(convertdate) df['Filing Time'] = df['Filing Date'].apply(converttime) df['Filing Date'] = df['Filing Date'].apply(convertdate) df['Shares Traded'] = df['Shares Traded'].apply(convertnumber) df['Value Traded'] = df['Value Traded'].apply(convertnumber) #df['Shares Owned'] = df['Shares Owned'].apply(convertnumber) return df detail = tradedetails("p",25000,"","",0,1,1,0,0,0,tick) pd.to_datetime(detail['Trade Date']) detail = detail.set_index('Trade Date') newdetail = detail.join(mydata,how='left') df6 = newdetail['Adj. Close'] mynewdata = mydata.join(df6,how="left",lsuffix='_OG',rsuffix='_Mark') #get trading end plt.plot(mynewdata['Adj. Close_OG']) plt.plot(mynewdata['Adj. Close_Mark'],marker='o',color='r', markersize=11) plt.show()
def get_futures_contract_dataset(exchange, contract, start=None, end=None, auth_token=None): ''' Synopsis -------- Fetches dataset for the given futures contract from Quandl. Parameters ---------- exchange: Name of futures exchange. String. contract: Name of futures contract (e.g., 'SPZ2015'). String. start: Optional start date for dataset. Ignores all data timestamped before this date. Example: 2015-01-01. String. end: Optional end date for dataset. Ignores all data timestamped after this date. Example: 2016-01-01. String. Cannot be before the start date. auth_token: Quandl auth token. String. Returns ------- Pandas DataFrame ''' template = '{exchange}/{contract}' quandl_code = template.format(exchange=exchange, contract=contract) dataset = Quandl.get( quandl_code, trim_start=start, trim_end=end, authtoken=auth_token) # Quandl returns futures datasets as a pandas DataFrame with a # "Date" column as its index; sets this as a proper column to # make aggregating data easier dataset = dataset.reset_index() return dataset
def download_data_set(self, argument): assert isinstance(argument, Argument) data_frame = None try: data_set_name = argument.id if argument.prefix is not None: data_set_name = argument.prefix + data_set_name data_frame = Quandl.get(data_set_name, authtoken=self.api_key, trim_start=argument.start, trim_end=argument.end, transformation=argument.transformation, collapse=argument.collapse) assert isinstance(data_frame, pandas.DataFrame) for d in argument.drop: try: data_frame = data_frame.drop(d, axis=1) except: continue except Quandl.DatasetNotFound: print("Data set not found") except Quandl.ErrorDownloading: print("Error downloading") except Quandl.ParsingError: print("Parsing error") except Quandl.WrongFormat: print("Wrong format") except Quandl.CallLimitExceeded: print("Call limit exceeded") except Quandl.CodeFormatError: print("Code format error") except Quandl.MissingToken: print("Missing token") if data_frame is None: raise Exception("Data Set Not Initialized", argument.id) else: return data_frame
def run(self): while True: qcode = "N/A" try: qcode = self.job_queue.get() if qcode: data = ql.get(qcode, column=11, trim_start=self.trim_start, trim_end=self.trim_end, authtoken=self.authtoken) mpt = MPTStats(data["Adj. Close"], self.index_data, self.rfr_data) result = mpt.calculate(self.calc_params["calc_date"],\ freq=self.calc_params["freq"], span=self.calc_params["span"]) ticker = qcode.split("/")[1] csv = self._to_csv(result) with self._lock: self.fp.write(ticker + "," + str(self.calc_params["calc_date"]) + "," + csv + "\n") except Exception as e: _logger.exception("Error %s for qcode %s" % (str(e), qcode)) finally: self.job_queue.task_done()
def get_quandl_earnings(date, use_cache=True): ls_spr = get_index_symbols(date, 'spr', use_cache).index.values # Derive surprise dates for each equity for i, s_equity in enumerate(ls_spr): s_equity_path = make_dir(date) + '/' + s_equity + '.csv' if os.path.isfile(s_equity_path): continue df_earnings = Quandl.get("ZES/"+s_equity.replace('.','_'), authtoken=quandl_token) # get surprise date from Quandl ls_earnings_dates = df_earnings.index.values # derive close price from Yahoo while True: try: dt_start = ls_earnings_dates[0] - np.timedelta64(1, 'W') break except IndexError: pass dt_end = ls_earnings_dates[-1] + np.timedelta64(1, 'W') df_close = web.DataReader(s_equity.replace('.','/'), 'yahoo', dt_start, dt_end)['Adj Close'] ls_close_dates = list(df_close.index) # append before/after close into df_earnings ls_annouce_hour = [] ls_prev_close = [] ls_next_close = [] for dt_date in ls_earnings_dates: i_annouce_hour = int(str(dt_date)[11:13]) ls_annouce_hour.append(i_annouce_hour) while True: try: i_date = ls_close_dates.index(dt_date) break except ValueError: if i_annouce_hour > 16: dt_date -= np.timedelta64(1,'D') elif i_annouce_hour < 9: dt_date += np.timedelta64(1,'D') else: raise ValueError('Earning Annouce Hour between 9am and 4pm') if i_annouce_hour > 16: ls_prev_close.append(df_close[i_date]) ls_next_close.append(df_close[i_date+1]) elif i_annouce_hour < 9: ls_prev_close.append(df_close[i_date-1]) ls_next_close.append(df_close[i_date]) else: raise ValueError('Earning Annouce Hour between 9am and 4pm') df_earnings['ANNOUCE_HOUR'] = ls_annouce_hour df_earnings['BEFORE_CLOSE'] = ls_prev_close df_earnings['AFTER_CLOSE'] = ls_next_close # save into csv df_earnings.to_csv(s_equity_path, date_format='%Y%m%d')
def risk_free(start, end, date_index): # Get CDI Daily Rate risk_free = Quandl.get("BCB/12", trim_start=start, trim_end=end) risk_free.name = 'risk_free' risk_free.rename(columns={'Value': 'cdi'}, inplace=True) return risk_free.ix[date_index, :].dropna()
def put(self): stock_sticker = "WIKI/" + request.form['stocksticker'] stockdata = Quandl.get(stock_sticker, returns="numpy", trim_start="2015-01-01", trim_end="2015-01-31") return {"stockdata": list(stockdata.Close)}
def get_daily_historic_data_quandl(**kwargs): ticker = kwargs["ticker"] quandl_database_4ticker = get_quandl_database_4ticker(ticker) quandl_input = {"authtoken": "zwBtPkKDycmg5jmYvK_s"} if "date_from" in kwargs.keys(): date_from_string = cu.convert_datestring_format( {"date_string": str(kwargs["date_from"]), "format_from": "yyyymmdd", "format_to": "yyyy-mm-dd"} ) quandl_input["trim_start"] = date_from_string if "date_to" in kwargs.keys(): date_to_string = cu.convert_datestring_format( {"date_string": str(kwargs["date_to"]), "format_from": "yyyymmdd", "format_to": "yyyy-mm-dd"} ) quandl_input["trim_end"] = date_to_string try: data_out = qndl.get(quandl_database_4ticker + "/" + ticker, **quandl_input) success = True except: print("Error Loading " + quandl_database_4ticker + "/" + ticker + ": " + str(sys.exc_info()[0])) success = False data_out = pd.DataFrame() new_column_names = ["Open Interest" if x == "Prev. Day Open Interest" else x for x in data_out.columns] data_out.columns = new_column_names return {"success": success, "data_out": data_out}
def load_quandl_prices(dataset, start, transform='rdiff'): cache_file = '../data/prices-cache.csv' if os.path.exists(cache_file): print 'Loading prices from cache' return pd.read_csv(cache_file, index_col=[0, 1], parse_dates=True) else: prices = pd.DataFrame() quandl_auth = 'pdRRzNygCjs_YY5Y2MVe' for index, row in dataset.iterrows(): print 'Downloading prices for', row['Ticker'] all_price_data = Quandl.get(row['Code'], trim_start=start, transformation=transform, authtoken=quandl_auth) close_price_data = all_price_data[['Close']] close_price_data['Ticker'] = row['Ticker'] close_price_data['CloseClass'] = 'Neutral' close_price_data.loc[close_price_data['Close'] > 0, 'CloseClass'] = 'Gain' close_price_data.loc[close_price_data['Close'] < 0, 'CloseClass'] = 'Loss' prices = prices.append(close_price_data) prices.index = pd.MultiIndex.from_arrays( [prices.index, prices['Ticker']]) prices.drop('Ticker', axis=1, inplace=True) prices.to_csv(cache_file) return prices
def get_index(self, index='XU030', price_type='Close', price_frequency='daily'): px = Quandl.get("GOOG/INDEXIST_%s" % index, collapse=price_frequency, trim_start=self.start_date, trim_end=self.end_date, authtoken="Vbxts9YuGDArmJJbie4e") return px[price_type]
def gdp_data(): df = Quandl.get("BCB/4385", trim_start="1975-01-01", authtoken=api_key) df["Value"] = (df["Value"] - df["Value"][0]) / df["Value"][0] * 100.0 df = df.resample('M') df.rename(columns={'Value': 'GDP'}, inplace=True) df = df['GDP'] return df
def short_interest(x): x = x.capitalize() symbol = 'SI/'+ x + '_SI' sym = q.get(symbol, authtoken = auth) if "Days To Cover" in sym.columns: dtc = "Days To Cover" sint = sym[dtc] else: dtc = "Days to Cover" sint = sym[dtc] sint = sint[-24:] median = sint.median() recent = sint[-1] avg = 100*(recent-median)/median avg = round(avg,2) if avg > 0: avg = str(avg) return 'The short interest in '+ x.upper() + " rose " + avg +'%' else: avg = str(avg) return 'The short interest in '+ x.upper() + " fell " + avg + '%' std = sint.std() cstd = (recent - median)/std cstd = abs(cstd) cstd = round(cstd,2) cstd = str(cstd) print "This is a " +cstd + " standard deviation move"
def getStockPrices(ticker, frequency="monthly", update=False): """ Gets the closing prices for a given stock ticker at a given frequency :param ticker: Name of the company's ticker :param frequency: Frequency of returned time series. See Quandl.get()'s collapse param. :param update: Always updates instead of using cache if true :return: Pandas dataframe representing time series of ticker's closing prices, -1 for connection errors """ name = ticker + "_" + frequency # Name of data in cache prices = None # If there is no cached version of the pickle, or update flag is on, download price data and cache it if update or not util.pickleExists(name): try: prices = Quandl.get(STOCK_DATA_SOURCE + ticker, collapse=frequency, authtoken="xx_T2u2fsQ_MjyZjTb6E") util.savePickle(prices, name) # Catch various connection errors except: return -1 # Otherwise, use most recent cache entry else: prices = util.getMostRecentPickle(name) # Return closing prices return prices.get("Close")
def quandl_download(ds_code, ds_path): """ Executes Quandl API call for downloading whole dataset and saves it to given path. """ print("Downloading dataset: {0}".format(ds_code)) ds_data = Quandl.get(ds_code, authtoken=auth_token) ds_data.to_csv(ds_path)
def getDatasets(self): print "In get datasets" datasets = Quandl.search(query=self.symbol, source="SEC", prints=False, authtoken=self.key) self.rawData = datasets
def get_stock_data(symbol, start_date=None, end_date=None, db_code="WIKI"): """ Get OHLC stock data from Quandl for a single stock :param symbol: string :param start_date: datetime :param end_date: datetime :param db_code: Quandl database code :return: DataFrame of stock data from start_date to end_date """ if start_date is None: start_date = dt(year=1990, month=1, day=1) if end_date is None: end_date = dt.today() if start_date is not None and end_date is not None: assert start_date < end_date, "Start date is later than end date." log.info("Loading symbol: {}".format(symbol)) quandl_code = db_code + "/" + symbol symbol_data = qd.get(quandl_code, returns="pandas", trim_start=start_date, trim_end=end_date) return symbol_data
def web_download(contract, start, end): df = Quandl.get(contract, trim_start=start, trim_end=end, returns="pandas", authtoken=simple.get_quandl_auth()) return df
def Pivots(symb,symbec='STK',eday=None): if eday== None: eday = datetime.now(timezone('US/Eastern')) bday=eday-timedelta(days=10) symbDict={'INDU':'^DJI','COMP':'^IXIC','SPX':'^GSPC','RUT':'^RUT'} if symb in symbDict.keys(): symb=symbDict[symb] if symb=='^DJI': Data = Quandl.get("YAHOO/INDEX_DJI", authtoken='977FGLXLZ1wsdKt8DgUH', trim_start="%d-%02d-%02d" %(bday.year,bday.month,bday.day), trim_end="%d-%02d-%02d" %(eday.year,eday.month,eday.day))[0:1] H=Data['High'].values[-1] L=Data['Low'].values[-1] C=Data['Close'].values[-1] else: Data = yf.getHistoricData(symb,bday.timetuple()[0:3],eday.timetuple()[0:3])[['high','low','close','adj_close']] # get data from yahoo finance H=Data['high'].values[-1] L=Data['low'].values[-1] C=Data['close'].values[-1] P = (H + L + C) / 3 #P = (H + L + C + C) / 4 #P = (H + L + O + O) / 4 R1 = 2*P-L S1 = 2*P-H R2 = P+(H-L) S2 = P-(H-L) R3 = R1+(H-L) S3 = S1-(H-L) return [round(R3,1),round(R2,1),round(R1,1),round(P,1),round(S1,1),round(S2,1),round(S3,1)]
def fetch_quandl(self, code, **kwargs): ''' Quandl entry point in datafeed object _____________________________________ Parameters code: str quandl data code kwargs: dict keyword args passed to quandl call _____________________________________ Return: data: pandas.dataframe or numpy array returned from quandl call ''' log.debug('Fetching QuanDL data (%s)' % code) # This way you can use your credentials even if # you didn't provide them to the constructor if 'authtoken' in kwargs: self.quandl_key = kwargs.pop('authtoken') # Harmonization: Quandl call start_date trim_start if 'start_date' in kwargs: kwargs['trim_start'] = kwargs.pop('start_date') if 'end_date' in kwargs: kwargs['trim_end'] = kwargs.pop('end_date') try: data = Quandl.get(code, authtoken=self.quandl_key, **kwargs) data.index = data.index.tz_localize(pytz.utc) except: log.error('** Fetching %s from Quandl' % code) data = pd.DataFrame() return data
def download_time_series(self, vendor_ticker, pretty_ticker, start_date, source, csv_file=None): if source == 'Quandl': import Quandl # Quandl requires API key for large number of daily downloads # https://www.quandl.com/help/api spot = Quandl.get( vendor_ticker) # Bank of England's database on Quandl spot = pandas.DataFrame(data=spot['Value'], index=spot.index) spot.columns = [pretty_ticker] elif source == 'Bloomberg': from bbg_com import HistoricalDataRequest req = HistoricalDataRequest([vendor_ticker], ['PX_LAST'], start=start_date) req.execute() spot = req.response_as_single() spot.columns = [pretty_ticker] elif source == 'CSV': dateparse = lambda x: pandas.datetime.strptime(x, '%Y-%m-%d') # in case you want to use a source other than Bloomberg/Quandl spot = pandas.read_csv(csv_file, index_col=0, parse_dates=0, date_parser=dateparse) return spot
def load(*args, **kwargs): """Load data from Quandl into a dataframe, modify column names and check for non-numeric values.""" # Grab the Quandl token # token = os.environ.get('QUANDL_TOKEN') # if token is None: token = raw_input("Enter Quandl token: ") ticker = raw_input("Enter Quandl ticker symbol (or hit Enter for default of YAHOO/INDEX_GSPC): ") if len(ticker) < 1: ticker = 'YAHOO/INDEX_GSPC' print(ticker) start_date = raw_input("Enter start date as YYYY-MM-DD (or hit ENTER for default of 1990-01-01): ") if len(start_date) < 1: start_date = '1990-01-01' print(start_date) # Call Quandl module, trim input by default from 1990 forward print('Pulling Quandl data...') df = Quandl.get(ticker, authtoken=token, trim_start=start_date) # Get the column labels # old_columns = list(df.columns.values) # Use the ticker symbol as our new prefix # ticker_tag = ticker.split('_')[-1] + '_' # Drop spaces and concatenate # new_labels = [ticker_tag + i.replace(' ', '') for i in old_columns] # Create a dictionary of old and new column labels # new_columns = dict(zip(old_columns, new_labels)) # Rename the columns using our dictionary # df = df.rename(columns=new_columns) nulls = df[~df.applymap(np.isreal).all(1)] # Check for non-numeric values if len(nulls) > 0: raise ValueError('Dataframe contains non-numeric values') row_count = len(df) print('%d rows loaded into dataframe.' % row_count) return df
def store_ETF_data(self, tickers) : settings = Settings() db = settings.get_mongo_db('Quandl', local=True) coll = db['ETF'] for t in tickers: # find the ticker dates = coll.find_one({'name':t}, sort=[('Date', -1)]) if (dates is None): print 'max_date is none' mdate = None else : mdate = pd.to_datetime(dates['Date']*1000*1000).strftime(format='%Y-%m-%d') print 'downloadng {t}'.format(t=t) df = Quandl.get(t, trim_start=mdate) df['name'] = t df = df.reset_index() if mdate is None : df_a = df else : df_a = df[df['Date'] > mdate] if len(df_a) == 0: print "data for {ticker} is already updated till {date}".format(ticker=t, date=mdate) continue records = json.loads(df_a.T.to_json()).values() #print records print "uploading {t}".format(t=t) coll.insert_many(records)
def hit_quandl(symbol, start, end): """Gets adjusted close data for a stock.""" import os quandl_token = os.environ['QUANDL_TOKEN'] price = Quandl.get("YAHOO/{}.6".format(symbol), trim_start=start, trim_end=end, authtoken=quandl_token) return price.rename(columns={'Adjusted Close': symbol})
def SMA(symb,intvs,symbec='STK',eday=None): if eday==None: eday = datetime.now(timezone('US/Eastern')) bday=eday-timedelta(days=365) symbDict={'INDU':'^DJI','COMP':'^IXIC','SPX':'^GSPC','RUT':'^RUT'} if symb in symbDict.keys(): symb=symbDict[symb] if symb=='^DJI': Data = Quandl.get("YAHOO/INDEX_DJI", authtoken='977FGLXLZ1wsdKt8DgUH', trim_start="%d-%02d-%02d" %(bday.year,bday.month,bday.day), trim_end="%d-%02d-%02d" %(eday.year,eday.month,eday.day)) sums=list() for n in intvs: if len(Data.index)<n: #no enough data sums.append(-1) else: sums.append(round(Data['Adjusted Close'][-n-1:-1].mean(),1)) else: Data = yf.getHistoricData(symb,bday.timetuple()[0:3],eday.timetuple()[0:3])[['adj_close']] # get data from yahoo finance print Data.tail() sums=list() for n in intvs: if len(Data.index)<n: #no enough data sums.append(-1) else: sums.append(round(Data['adj_close'][-n-1:-1].mean(),1)) return sums
def readData(self, lookupTicker, source, start, end): '''Read the data - assumes start and end are datetime.date objects''' try: lookupTicker = str(lookupTicker) if source == 'Quandl': #use Quandl reader start = str(start) end = str(end) data = Quandl.get(lookupTicker, authtoken = self.quandlAuthToken, trim_start = start, trim_end= end) else: #use pandas.io DataReader data = DataReader(lookupTicker, source , start, end) data = data.reset_index() logging.info("Read ticker {}".format(lookupTicker)) except: logging.error("importData: Can't read ticker {}".format(lookupTicker)) raise else: return data
def all_dbmetafrmSRC(pglmt=10000, src="YALE", authtkn=AUTHTOKEN): """ all_dbmetafrmSRC = all_dbmetafrmSRC(pglmt=10000,src="YALE",authtkn=AUTHTOKEN) all_dbmetafrmSRC, i.e. all database Metadata from a Source, returns all the "metadata", the information describing a databse, for all databases from a single source, specified by a string that is the Quandl code INPUTS: int pglmt: (default=10000), page numberS to search str sre: (default="YALE", chosen as an example), the Quandl code for the SOURCE or "src" to search from str authtkn: (default="AUTHTOKEN", that's globally declared at the beginning of the .py code), your authentication code with Quandl, you should find that under API-key in your Quandl account and copy and paste that into the code """ metalst = [] counter = 1 while (counter <= pglmt): resp = Quandl.search(query="*", source=src, page=counter, authtoken=authtkn) if len(resp) != 0: metalst = metalst + resp counter += 1 else: break if counter > pglmt: print "Counter limit, pglmt, was reached; try a higher pglmt, i.e. page limit \n" return metalst
def get_leanhog_contract_data(years): print("Loading contracts from Quandl web service....") print(' loading years: {0}'.format(years)) recordList = [] columnList = [] columnRenameDict = {} for year in years: for month in all_months: s = "CME/LN{0}{1}".format(month, year) recordList.append(s) t = "CME.LN{0}{1} - Settle".format(month, year) tx = "LN{0}{1}".format(month, year) # May contracts didn't start until 2008 if not ((month == 'K') and (year < 2008)): columnList.append(t) columnRenameDict.update({t: tx}) df = q.get(recordList, authtoken="dXzByDoZicZy-WFvPyTf") # take out any of the columns that Quandl didn't return that were expected # possibly a contract year that hasn't started columnList = [x for x in columnList if x in df.columns] df = df[columnList] df.rename(columns=columnRenameDict, inplace=True) return df
def load_prices_quandl(codes, start_date=None, end_date=None, field_selector='CLOSE'): if not os.path.isdir(os.path.abspath(_CACHE_LOCATION)): os.makedirs(os.path.abspath(_CACHE_LOCATION)) datasets_encoded = quote_plus('@'.join(codes)) cache_path = os.path.abspath(os.sep.join([_CACHE_LOCATION, datasets_encoded])) if os.path.isfile(cache_path): logging.info('reading datasets from local cache') quandl_df = pandas.read_pickle(cache_path) else: quandl_df = Quandl.get(codes, authtoken=sensitive('quandl_token')).dropna() quandl_df.to_pickle(cache_path) filtered = quandl_df if start_date is not None and end_date is not None: filtered = quandl_df.ix[start_date:end_date] elif start_date is not None and end_date is None: filtered = quandl_df.ix[start_date::] elif start_date is None and end_date is not None: filtered = quandl_df.ix[::end_date] selected = filtered if field_selector is not None: pattern = ' - ' + field_selector.upper() selected = filtered.loc[:, filtered.columns.str.upper().str.endswith(pattern)] return selected
def createChart(t,d): ticker = t trading_days = int(d) try: df = Quandl.get('WIKI/' + ticker, authtoken=q_key)[-trading_days:] ptitle = ticker + ' Closing Price: last ' + str(trading_days) + ' trading days' max_split = max(df['Split Ratio']) if max_split > 1: split_date = np.argmax(df['Split Ratio']) df['Multiply'] = [split_date > d for d in df.index] df['AdjClose'] = [df.Close[x]*(1 - (max_split - 1)/max_split*df.Multiply[x].sum()) for x in np.arange(len(df))] df['Close'] = df['AdjClose'] output_file("stockplot.html", title='Close Plot') p = figure(x_axis_type = 'datetime') p.line(df.index, df['Close'], color='black') p.title = ptitle p.xaxis.axis_label = 'Date' p.yaxis.axis_label = 'Price' save(p) st = df['Close'][0] ed = df['Close'][-1] cdgr = round((ed-st)/st*100.,2) script, div = components(p) p_dict = {'script':script, 'div':div, 'cdgr':cdgr} except Quandl.Quandl.DatasetNotFound: p_dict = {'script':None, 'div':None, 'cdgr':None} return p_dict
def mortgage_30y(): df = Quandl.get('FMAC/MORTG', trim_start="1975-01-01", authtoken=api_key) df['Value'] = (df['Value'] - df['Value'][0]) / df['Value'][0] * 100.0 df.columns = ['M30'] df = df.resample('D').mean() df = df.resample('M').mean() return df
def dl_quandl(query="USD", authtoken=None): count = 0 for name, code in name_to_code(query=query, authtoken=authtoken).items(): curve = Quandl.get(code, authtoken=authtoken) curve.to_csv(name + ".csv") count += 1 print ("Got %d curves" % count)
def fetch_data(self, qcode, start_date, end_date, column=None, transformation=None): if not self._is_open: self.open_cache() import Quandl as ql fetch = False data = None try: if self._db.has_key(qcode): cache = self._db[qcode] if (cache[0]>start_date) or (cache[1]<end_date): fetch = True else: data = cache[2] else: fetch = True except Exception as e: logging.debug("Error happened with db") fetch = True try: if fetch: trim_start = self._format_date( start_date ) trim_end = self._format_date(end_date) data = ql.get(qcode, column=column, trim_start=trim_start, trim_end=trim_end, authtoken=self._authtoken, transformation=transformation) if data is not None: self._db[qcode] = (start_date, end_date, data) except Exception as e: logging.exception("Exception fetching %s : %s"%(qcode, str(e))) #data = self._snap_to_interval(data,start_date, end_date) return data.ix[start_date:end_date]
def download_datasets(collection): """ Downloads datasets of a given collection. New files are fetched in full, existing files are appended only the new entries since last update. """ print("Downloading datasets for {0}".format(collection)) collection_path = os.path.join("collections/{}".format(collection)) collection_metadata = pandas.read_csv(os.path.join(collection_path, "metadata.csv")) executor = concurrent.futures.ThreadPoolExecutor(max_workers=5) # TODO: DUPLICATES # Iterate over datasets in collection for idx, row in collection_metadata.iterrows(): #print("Dataset {0} last updated at {1}".format(row[0], row[5])) ds_code = row[0] ds_name = ds_code.split("/")[1] ds_path = os.path.join(collection_path, ds_name+".csv") if not os.path.exists(ds_path): executor.submit(quandl_download, ds_code, ds_path) else: ds_data = pandas.read_csv(ds_path) last_update = pandas.Timestamp(ds_data.iloc[-1][0]) last_change = pandas.Timestamp(row[3]) #print("Last update {0}: {1}, last change: {2}".format(ds_name, last_update, last_change)) if last_change > last_update: ds_diff = Quandl.get(ds_code, trim_start=last_update+pandas.DateOffset(days=1), authtoken=auth_token) if not ds_diff.empty: print("Updating dataset: {0}".format(ds_code)) ds_file = open(ds_path, "a") ds_file.write(ds_diff.to_csv(header=False, index_names=False)) ds_file.close()