Example #1
0
def request_Quandl(symbol, tableName, cols, digits):        
    
    con = connect_to_DB()
    
    create_table(tableName, con)
    
    str_ = """SELECT * FROM `""" +tableName+ """` ORDER BY DATE_TIME DESC LIMIT 1;"""
    print str_
    
    data = pd.read_sql_query(str_, con=con, index_col='DATE_TIME')
    
    if empty(data) == 0:
        #execute query with conenction
        quandl_data = q.get(symbol, returns='pandas', authtoken=token, sort_order='asc')
        if empty(quandl_data) == 1:
            print quandl_data.tail()
            print "Data length: %s" % len(quandl_data)
            insert_data(quandl_data, cols, digits, con)
    else:
        last_date = str(data.index[0].year)+"-"+str(data.index[0].month)+"-"+str(data.index[0].day+1)
        print "Last date "+last_date
        quandl_data = q.get(symbol, returns='pandas', authtoken=token, trim_start=last_date, sort_order='asc')
        if empty(quandl_data) == 1:
            print quandl_data.tail()
            print "Data length: %s" % len(quandl_data)
            insert_data(quandl_data, cols, digits, con)
Example #2
0
def index():
  if request.method=='POST':
    if request.form.get('stocks',None):
      if request.form.get('symbols',None).find('#')==-1:
	
        try:
          sdata=[ql.get('WIKI/'+request.form.get('symbols',None),trim_start=get_start_date(),authtoken=q_toke)]
          names=[request.form.get('symbols',None)]
	except: 
          flash("Sorry invalid name, please try another ticker symbol.")
	  return render_template('index.html')

      else:
        try:
          sdata=[ql.get('WIKI/'+u,trim_start=get_start_date(),authtoken=q_toke)\
             for u in request.form.get('symbols',None).rsplit('#')[:]]
          names=request.form.get('symbols',None).rsplit('#')[:]
        except: 
          flash("Sorry invalid name, please try another ticker symbol.")
	  return render_template('index.html')
      reqs=['adjc','adjo','c','o']	      
      r2ql=dict(zip(reqs,['Adj. Close','Adj. Open','Close','Open']))
      checked=[]

      #gather checked values
      for r in [request.form.get(n,None) for n in reqs]:
        if r:
          checked.append(r2ql[r])

      #make bokeh plot with the data
      script, div = make_bokeh_html(sdata,names,checked)

      return render_template('stocks.html',script=script,div=div)
  return render_template('index.html')
Example #3
0
    def load(self):
        # If this is the first time of getting the data, send queries to Quandl website and download data there.
        #    Then, store the date in local directories as csv files
        # If the data has already been downloaded, load the local csv.
        fund_code = self.fund_code
        index_code = self.index_code
        start_date = self.start_date
        fund_file_name = "../data/" + fund_code.replace("/", "_") + ".csv"
        index_file_name = "../data/" + index_code.replace("/", "_") + ".csv"

        if path.isfile(fund_file_name): # file exists at local
            fund_data = pd.io.parsers.read_csv(fund_file_name, index_col=0) # the csv file has to be parsed to have date as index
        else: # download from Quandl and save it to local as csv file
            fund_data = Quandl.get(fund_code, trim_start=start_date, authtoken="XANwfFd9CmdoE3PdFzRg")
            fund_data.to_csv(fund_file_name)
        if path.isfile(index_file_name):
            index_data = pd.io.parsers.read_csv(index_file_name, index_col=0) # the csv file has to be parsed to have date as index
        else:
            index_data = Quandl.get(index_code, trim_start=start_date, authtoken="XANwfFd9CmdoE3PdFzRg")
            index_data.to_csv(index_file_name)

        # rename columns so that the two dataframes don't share any common names
        index_data.columns = map(''.join, zip([index_code+'_']*index_data.shape[1], list(index_data.columns))) # rename the columns with index_code as prefix
        fund_data.columns = map(''.join, zip([fund_code+'_']*fund_data.shape[1], list(fund_data.columns))) # rename the columns with fund_code as prefix    
        # join the two data frames by date
        self.data = fund_data.join(index_data, how='inner')
        if index_code+'_Adjusted Close' not in self.data.columns: # if no adjusted close, copy close to make name uniform across differnet data
            self.data[index_code+'_Adjusted Close'] = self.data[index_code+'_Close']
def getCommodityPrice():
    #get crude oil data
    oilData = Quandl.get("OPEC/ORB", authtoken="xxxxxxxxxxxxxxxx")
    print(oilData.tail())
    print(oilData.head())
    oilData.to_csv('quandlcrudeoilData.csv')

    #get gold data.
    goldData = Quandl.get("BUNDESBANK/BBK01_WT5511",
                          authtoken="xxxxxxxxxxxxxxxx")
    print(goldData.tail())
    print(goldData.head())
    goldData.to_csv('quandlgoldData.csv')

    #get copper data
    copperData = Quandl.get("LSE/COPA", authtoken="xxxxxxxxxxxxxxxx")
    print(copperData.tail())
    print(copperData.head())
    copperData.to_csv('quandlcopperData.csv')

    #get silver data
    silverData = Quandl.get("LBMA/SILVER", authtoken="xxxxxxxxxxxxxxxx")
    print(silverData.tail())
    print(silverData.head())
    silverData.to_csv('quandlsilverData.csv')

    #get bitcoin pricing data.
    bitcoinData = Quandl.get("BAVERAGE/USD", authtoken="xxxxxxxxxxxxxxxx")
    print(bitcoinData.tail())
    print(bitcoinData.head())
    bitcoinData.to_csv('bitcoindata.csv')

    time.sleep(86400)  #sleep for a day
	def extractData(self):
		#------------------------------
		#-- extract rawdata from quandl
		#------------------------------

		print "extracting data from quandl ... \n"

		# get data
		dataGbpAud = Quandl.get("BOE/XUDLADS", trim_start=self.startDate)
		dataEurGbp = Quandl.get("BOE/XUDLSER", trim_start=self.startDate)

		print "preparing data ... \n"
		dataGbpAud.columns = ['gbpaud']
		dataGbpEur = 1 / dataEurGbp
		dataGbpEur.columns = ['gbpeur']
		dataExchange = dataGbpEur.merge(dataGbpAud
			, how='inner'
			, left_index=True
			, right_index=True
		)

		print "extracting rawdata ... \n"
		self.rawData = []
		# files with data
		for index, row in dataExchange.iterrows():
			rowval = ExchangeRates(period=index,gbpeur=row['gbpeur'],gbpaud=row['gbpaud'])
			self.rawData.append(rowval)

		print "rawdata extract complete. \n"
Example #6
0
def request_Quandl(symbol, tableName, cols, digits):

    con = connect_to_DB()

    create_table(tableName, con)

    str_ = """SELECT * FROM `""" + tableName + """` ORDER BY DATE_TIME DESC LIMIT 1;"""
    print str_

    data = pd.read_sql_query(str_, con=con, index_col='DATE_TIME')

    if empty(data) == 0:
        #execute query with conenction
        quandl_data = q.get(symbol,
                            returns='pandas',
                            authtoken=token,
                            sort_order='asc')
        if empty(quandl_data) == 1:
            print quandl_data.tail()
            print "Data length: %s" % len(quandl_data)
            insert_data(quandl_data, cols, digits, con)
    else:
        last_date = str(data.index[0].year) + "-" + str(
            data.index[0].month) + "-" + str(data.index[0].day + 1)
        print "Last date " + last_date
        quandl_data = q.get(symbol,
                            returns='pandas',
                            authtoken=token,
                            trim_start=last_date,
                            sort_order='asc')
        if empty(quandl_data) == 1:
            print quandl_data.tail()
            print "Data length: %s" % len(quandl_data)
            insert_data(quandl_data, cols, digits, con)
Example #7
0
def update_dataset_data(engine, dataset, api_key):
    """
    Updates data for each code in the data set, retrieving new entries since the last update.
    """
    update_code = sql.text(
        'UPDATE ' + dataset + '_CODES '
        'SET [Start Date] = :start, [End Date] = :end, [Last Updated] = :updated '
        'WHERE Code = :code')
    conn = engine.connect()

    # retrieve the current code table
    code_table = load_sql_data(engine, dataset + '_CODES', date_columns=['Start Date', 'End Date', 'Last Updated'])

    # TESTING ONLY
    code_table = code_table.iloc[:10, :]

    # fetch the first code's data to create the data frame
    data = Quandl.get(code_table['API Code'].iloc[0], rows=1, authtoken=api_key)
    data = data.reset_index()
    data = data.rename(columns=lambda x: x[0] + x[1:].lower())
    data['Code'] = code_table['Code'].iloc[0]
    data = data.iloc[0:0]

    # iterate over each code and append the returned data
    counter = 0
    for index, row in code_table.iterrows():
        if row['Last Updated'] == datetime(1900, 1, 1):
            # this is a new code so we need to pull all historical data
            print('Loading historical data for new company {0}...'.format(row['API Code']))
            code_data = Quandl.get(row['API Code'], authtoken=api_key)
        else:
            # incremental update from the current end date for the code
            code_data = Quandl.get(row['API Code'], trim_start=str(row['End Date']), authtoken=api_key)

        # concat new data to the total set of new records
        code_data = code_data.reset_index()
        code_data = code_data.rename(columns=lambda x: x[0] + x[1:].lower())
        code_data['Code'] = row['Code']
        data = pd.concat([data, code_data])

        # update the code table
        min_date = code_data['Date'].min().to_datetime()
        max_date = code_data['Date'].max().to_datetime()
        current_date = datetime.now()
        conn.execute(update_code, start=min_date, end=max_date, updated=current_date, code=row['Code'])

        counter += 1
        if counter % 100 == 0:
            print('Sleeping for one minute to avoid hitting API call limits...')
            time.sleep(60)

    # move the code column to the beginning
    columns = data.columns.tolist()
    columns = [columns[-1]] + columns[:-1]
    data = data[columns]

    save_sql_data(engine, data, dataset, exists='append', index=False)

    print(dataset + ' code data updated successfully.')
Example #8
0
def test_api_calls(api_key):
    """
    Test function with examples of calls using the Quandl API via python wrapper.
    """
    data = Quandl.get('WIKI/AAPL', authtoken=api_key)
    data_new = Quandl.get('WIKI/AAPL', rows=5, sort_order='desc', authtoken=api_key)
    data_range = Quandl.get('WIKI/AAPL', trim_start='2015-01-01', trim_end='2015-01-01', authtoken=api_key)
    data_multiple = Quandl.get(['WIKI/AAPL', 'WIKI/MSFT'], authtoken=api_key)
Example #9
0
def getQuandle(what):
    """ 
    Wrapper around Quandl requests, using authtoken only if available
    """
    if authtoken:
        return Quandl.get(what, authtoken=authtoken)
    else:
        return Quandl.get(what)
Example #10
0
def loadQuandlData():
    quandl_id = "hThEeCQhKC2oWz2hSB4x"
    it = Quandl.get(['GOOG/NASDAQ_AAPL','GOOG/NASDAQ_GOOGL','GOOG/NASDAQ_MSFT', 'GOOG/NASDAQ_FB','GOOG/NYSE_ORCL'], trim_start='1998-01-01', authtoken=quandl_id)
    financial = Quandl.get(['GOOG/NYSE_DB','GOOG/NYSE_GS','GOOG/NYSE_SAN'], authtoken=quandl_id)
    industrial = Quandl.get(['GOOG/NYSE_SI', 'GOOG/NYSE_GE'], authtoken=quandl_id)
    closing_it = it[['GOOG.NASDAQ_AAPL - Close','GOOG.NASDAQ_GOOGL - Close','GOOG.NASDAQ_MSFT - Close','GOOG.NASDAQ_FB - Close','GOOG.NYSE_ORCL - Close']]
    closing_financial = financial[['GOOG.NYSE_DB - Close','GOOG.NYSE_GS - Close','GOOG.NYSE_SAN - Close']]
    closing_industrial = industrial[['GOOG.NYSE_SI - Close','GOOG.NYSE_GE - Close']]
    closing_it.to_csv('Data/Closing_it.csv', sep=';')
    closing_financial.to_csv('Data/Closing_financial.csv', sep=';')
    closing_industrial.to_csv('Data/Closing_industrial.csv', sep=';')
Example #11
0
def fetchSpread(CONTRACT, M1, M2, ST_YEAR, END_YEAR, CONT_YEAR1, CONT_YEAR2, ST_DATE, END_DATE, BUCK_PRICE,
                STARTFROMZERO, years):
    startdate = datetime.strptime(ST_DATE, '%Y-%m-%d %H:%M:%S')
    enddate = datetime.strptime(END_DATE, '%Y-%m-%d %H:%M:%S')
    totalSpread = pd.Series()
    lastValue = 0
    for i in years:
        year = str(i)
        price = str(BUCK_PRICE)
        filename = CONTRACT + M1 + M2 + year + ST_DATE + END_DATE + price
        filename = re.sub('[/ ]', '_', filename)
        filename = re.sub('[:]', '.', filename)
        cont1 = str(CONTRACT) + str(M1) + str(i + CONT_YEAR1)
        cont2 = str(CONTRACT) + str(M2) + str(i + CONT_YEAR2)
        print('==============')
        print("contract1: " + cont1)
        print("contract2: " + cont2)
        startDate = startdate.replace(year=ST_YEAR - 2000 + i)
        endDate = enddate.replace(year=END_YEAR - 2000 + i)
        print('==============')
        print('Trim start: ', startDate.strftime('%Y-%m-%d'))
        print('Trim end: ', endDate.strftime('%Y-%m-%d'))
        print('==============')
        if not checkIfCached(filename):
            data1 = q.get(cont1, authtoken=AUTH_TOKEN, trim_start=startDate, trim_end=endDate)
            data2 = q.get(cont2, authtoken=AUTH_TOKEN, trim_start=startDate, trim_end=endDate)
            spread = (data1 - data2).Settle * BUCK_PRICE
            if spread.size == 0:
                print('!!!!!!!!!!!!*****WARNING****!!!!!!!!!!!!')
                print('No data available for contracts %s, %s. Skiping period from %s to %s.' % (
                    cont1, cont2, startDate.strftime('%Y-%m-%d'), endDate.strftime('%Y-%m-%d')))
                print('!!!!!!!!!!!!*****WARNING****!!!!!!!!!!!!')
                continue
            else:
                if math.isnan(spread[0]):
                    spread = spread.fillna(method='bfill')
                #replace NaN value with a previous one
                spread = spread.fillna(method='pad')

                #remove row with NAN value
                # spread = spread.dropna()
                writeCacheToFile(filename, spread)
        else:
            print("Loading cached data from file: %s !" % filename)
            cache = readCacheFromFile(filename)
            spread = cache
        if STARTFROMZERO:
            delta = lastValue - spread[0]
            spread = spread + delta
            totalSpread = totalSpread.append(spread)
            lastValue = totalSpread[-1]
    if totalSpread.size == 0:
        sys.exit(-1)
    return totalSpread
def readLast5DaysStockPrice():
    with open('stock_features.csv', 'w') as csvfile:
        fieldnames = ['Stock', 'Adj_Close_1','High_1', 'Low_1'
                      ,'Adj_Close_2','High_2', 'Low_2','VolChg2_1','IndexChg2_1'
                      ,'Adj_Close_3','High_3', 'Low_3','VolChg3_2','IndexChg3_2'
                      ,'Adj_Close_4','High_4', 'Low_4','VolChg4_3','IndexChg4_3'
                      ,'Adj_Close_5','High_5', 'Low_5','VolChg5_4','IndexChg5_4']
        writer = csv.DictWriter(csvfile, lineterminator='\n', fieldnames=fieldnames)
        writer.writeheader()
    
        dow_code = 'BCB/UDJIAD1'
        dji_index = []
        j = 0
        while (j < 10):
            ticker = Share(stock_dict.get(j))
            i = 0
            stock_price = []
            current_date = date.today()
            fmt = '%Y-%m-%d'
            while( i < 5):
                if (ticker.get_historical(current_date.strftime(fmt), current_date.strftime(fmt)) == []):
                    current_date = current_date - timedelta(days=1)
                else:
                    stock_price += ticker.get_historical(current_date.strftime(fmt), current_date.strftime(fmt))
                    if(j == 0):
                        if(i == 0 and Quandl.get(dow_code, trim_start=current_date, trim_end=current_date, authtoken="T246AaoCUiwSyz1C4Vfe").values.tolist() == []):
                            dji_index.append(get_latest_dji())
                        else:
                            dji_index.append(Quandl.get(dow_code, trim_start=current_date, trim_end=current_date, authtoken="T246AaoCUiwSyz1C4Vfe").values.tolist()[0][0])
                                
                    current_date = current_date - timedelta(days=1)
                    i = i + 1    
            AbVolChg2_1 = int(stock_price[1].get('Volume')) - int(stock_price[0].get('Volume'))  
            VolChg2_1 = log(AbVolChg2_1,2)  if AbVolChg2_1 > 0 else -1*log(fabs(AbVolChg2_1),2)
            AbVolChg3_2 = int(stock_price[2].get('Volume')) - int(stock_price[1].get('Volume'))  
            VolChg3_2 = log(AbVolChg3_2,2)  if AbVolChg3_2 > 0 else -1*log(fabs(AbVolChg3_2),2)                        
            AbVolChg4_3 = int(stock_price[3].get('Volume')) - int(stock_price[2].get('Volume'))  
            VolChg4_3 = log(AbVolChg4_3,2)  if AbVolChg4_3 > 0 else -1*log(fabs(AbVolChg4_3),2)                        
            AbVolChg5_4 = int(stock_price[4].get('Volume')) - int(stock_price[3].get('Volume'))  
            VolChg5_4 = log(AbVolChg5_4,2)  if AbVolChg5_4 > 0 else -1*log(fabs(AbVolChg5_4),2)        
            writer.writerow({'Stock': stock_dict.get(j)
                         ,'Adj_Close_1' : stock_price[0].get('Adj_Close'),'High_1' : stock_price[0].get('High'),'Low_1' : stock_price[0].get('Low') 
                         ,'Adj_Close_2' : stock_price[1].get('Adj_Close'),'High_2' : stock_price[1].get('High'),'Low_2' : stock_price[1].get('Low')
                         ,'VolChg2_1': VolChg2_1,'IndexChg2_1': (float(dji_index[1]) - float(dji_index[0])) 
                         ,'Adj_Close_3' : stock_price[2].get('Adj_Close'),'High_3' : stock_price[2].get('High'),'Low_3' : stock_price[2].get('Low')
                         ,'VolChg3_2': VolChg3_2,'IndexChg3_2': (dji_index[2] - dji_index[1]) 
                         ,'Adj_Close_4' : stock_price[3].get('Adj_Close'),'High_4' : stock_price[3].get('High'),'Low_4' : stock_price[3].get('Low')
                         ,'VolChg4_3': VolChg4_3,'IndexChg4_3': (dji_index[3] - dji_index[2]) 
                         ,'Adj_Close_5' : stock_price[4].get('Adj_Close'),'High_5' : stock_price[4].get('High'),'Low_5' : stock_price[4].get('Low')
                         ,'VolChg5_4': VolChg5_4,'IndexChg5_4': (dji_index[4] - dji_index[3]) 
                         }) 
    
            j = j+1
Example #13
0
def info(symbol):
    query_params = request.args
    trim_start = query_params.get('start_date') or '2015-12-01'
    trim_end = query_params.get('end_date') or '2015-12-31'
    datasets = Quandl.search(symbol, authtoken=keys['quandl'], verbose=False)
    code = datasets[0][u'code']
    data = Quandl.get(code,
                      authtoken=keys['quandl'],
                      collapse='daily',
                      trim_start=trim_start,
                      trim_end=trim_end)
    return data.to_html()
 def get_data_set(self, data_set, start, end, drop=None, collapse="daily", transform="None"):
     """
     Method for downloading one data set from Quandl
     :param data_set: the data set code
     :param start: the start date
     :param end: the end date
     :param drop: which columns to drop
     :param collapse: frequency of the data
     :param transform: any data transformations from quandl
     :return: the data set as a pandas data frame
     """
     if drop is None:
         drop = []
     if self.proxy:
         # If we are running behind the proxy set it up
         os.environ['HTTP_PROXY'] = "http://" + self.username + ":" + self.password + "@" + self.server
     # Check if the dataframe has been downloaded already in this session
     hash_val = hash(data_set + str(start) + str(end) + str(transform))
     if self.memoized_data.__contains__(hash_val):
         return self.memoized_data[hash_val]
     else:
         try:
             print("\tDownloading", data_set)
             # Otherwise download the data frame from scratch
             if transform is not "None":
                 downloaded_data_frame = Quandl.get(data_set, authtoken=self.token, trim_start=start,
                                                    trim_end=end, collapse=collapse, transformation=transform)
             else:
                 downloaded_data_frame = Quandl.get(data_set, authtoken=self.token, trim_start=start,
                                                    trim_end=end, collapse=collapse)
             # Remove any unnecessary columns and rename the columns
             # print downloaded_data_frame.columns
             updated_column_labels = []
             for column_label in downloaded_data_frame.columns:
                 if column_label in drop:
                     downloaded_data_frame = downloaded_data_frame.drop([column_label], axis=1)
                 else:
                     updated_column_labels.append(data_set + "_" + column_label)
             downloaded_data_frame.columns = updated_column_labels
             self.memoized_data[hash_val] = downloaded_data_frame
             return downloaded_data_frame
         except Quandl.DatasetNotFound:
             print("Exception - DataSetNotFound", data_set)
         except Quandl.CodeFormatError:
             print("Exception - CallFormatError", data_set)
         except Quandl.DateNotRecognized:
             print("Exception - DateNotRecognized", data_set)
         except Quandl.ErrorDownloading:
             print("Exception - ErrorDownloading", data_set)
         except Quandl.ParsingError:
             print("Exception - ParsingError", data_set)
         except:
             print("Some other error occurred")
Example #15
0
def main():
	actually_query = True
	query_format = "WIKI/%s"
	sp500 = read_sp500()
	for c in sp500:
		query = query_format % c.stripped_ticker()
		#print("query: %s" % query)
		#continue
		if actually_query:
			'''
			data = Quandl.get(query, collapse='daily', trim_start='2015-10-01', trim_end='2015-10-31', limit=31, authtoken='ZbyrpahXoQapzxxLR8Qe')
			print(data.index)
			sum = 0
			for i,r in data.iterrows():
				print(r['Volume'])
				sum += r['Volume']
			print("sum: %d\n" % sum)
			'''
			try:
				volume_data = Quandl.get(query,
					collapse='daily',
					trim_start='2015-10-01',
					trim_end='2015-10-31',
					row=1,
					limit=1,
					sort_order="desc",
					transformation="cumul",
					authtoken='ZbyrpahXoQapzxxLR8Qe')
				close_data = Quandl.get(query,
					collapse='monthly',
					trim_start='2015-09-01',
					trim_end='2015-10-31',
					row=1,
					limit=1,
					sort_order="desc",
					transformation="rdiff",
					authtoken='ZbyrpahXoQapzxxLR8Qe')
				if 'Volume' in volume_data and len(volume_data['Volume']) > 0:
					c.stock_volume = volume_data['Volume'][0]
				else:	
					c.stock_volume = 'NaN'
				if 'Close' in close_data and len(close_data['Close']) > 0:
					c.stock_close = close_data['Close'][0]
				else:
					c.stock_close = '0.0'
			except Quandl.DatasetNotFound as e:
				c.stock_volume = 'DatasetNotFound'
				c.stock_close = '0.0'
			except Quandl.ErrorDownloading as e:
				c.stock_volume = 'ErrorDownloading'
				c.stock_close = '0.0'
			print("%s" % (c))
Example #16
0
def load_historical_data(engine, dataset, api_key, mode='normal'):
    """
    Creates a new data table for the provided data set and loads historical data for each code into the table.
    """
    update_code = sql.text(
        'UPDATE ' + dataset + '_CODES '
        'SET [Start Date] = :start, [End Date] = :end, [Last Updated] = :updated '
        'WHERE Code = :code')
    conn = engine.connect()

    # retrieve the current code table
    code_table = load_sql_data(engine, dataset + '_CODES', date_columns=['Start Date', 'End Date', 'Last Updated'])

    # TESTING ONLY
    code_table = code_table.iloc[:10, :]

    # fetch the first code's data to create the data frame
    data = Quandl.get(code_table['API Code'].iloc[0], rows=1, authtoken=api_key)
    data = data.reset_index()
    data = data.rename(columns=lambda x: x[0] + x[1:].lower())
    data['Code'] = code_table['Code'].iloc[0]
    data = data.iloc[0:0]

    # iterate over each code and append the returned data
    counter = 0
    for index, row in code_table.iterrows():
        code_data = Quandl.get(row['API Code'], authtoken=api_key)
        code_data = code_data.reset_index()
        code_data = code_data.rename(columns=lambda x: x[0] + x[1:].lower())
        code_data['Code'] = row['Code']
        data = pd.concat([data, code_data])

        # update the code table
        min_date = code_data['Date'].min().to_datetime()
        max_date = code_data['Date'].max().to_datetime()
        current_date = datetime.now()
        conn.execute(update_code, start=min_date, end=max_date, updated=current_date, code=row['Code'])

        counter += 1
        if counter % 100 == 0:
            print('Sleeping for one minute to avoid hitting API call limits...')
            time.sleep(60)

    # move the code column to the beginning
    columns = data.columns.tolist()
    columns = [columns[-1]] + columns[:-1]
    data = data[columns]

    save_sql_data(engine, data, dataset, exists='replace', index=False)

    print(dataset + ' historical data loaded successfully.')
Example #17
0
 def __init__ (self, job_queue, trim_start, trim_end, calc_params, authtoken, fp):
     self.job_queue = job_queue
     self.trim_start = trim_start
     self.trim_end = trim_end
     self.calc_params = calc_params
     self.authtoken = authtoken
     self.index_data = ql.get("YAHOO/INDEX_SPY", column=6, trim_start=self.trim_start, 
           trim_end=self.trim_end, authtoken=self.authtoken)
     self.index_data = self.index_data["Adjusted Close"]
     self.rfr_data = ql.get("USTREASURY/YIELD", column=1, trim_start=self.trim_start, 
           trim_end=self.trim_end, authtoken=self.authtoken)
     self.rfr_data = self.rfr_data["1 Mo"]*0.01
     self.fp = fp
     threading.Thread.__init__ (self)
Example #18
0
def get(content):
    if bool(content):
        return Quandl.get(
            content["dataCode"],
            trim_start=content["startDate"],
            trim_end=content["finishDate"],
            collapse=content["timeRange"],
            transformation="rdiff",
            returns=content["dataType"],
        )  # Warning: Uses Internet Connection
    else:
        return Quandl.get(
            "YAHOO/AAPL", trim_start="2001-01-01", trim_end="2010-01-01", collapse="annual", returns="pandas"
        )
Example #19
0
def get_market_value(stock='all', start='', end='', size=0):
    filename = 'WIKI_tickers.csv'
    if (stock == 'all'):
        f = open(filename, 'r')
        f.readline()
        for line in f:
            stock_value = line.replace('"', '').replace('\n', '').split(',', 1)
            print "%-10s : %s" % (stock_value[0], stock_value[1])
    else:
        data = None
        if (start == '' and end == ''): data = Quandl.get(stock)
        else: data = Quandl.get(stock, trim_start=start, trim_end=end)
        if (size == 0): print data
        else: print data.head(size).to_string()
def get_market_value(stock='all', start='', end='', size=0):
    filename = 'WIKI_tickers.csv'
    if (stock == 'all'):
        f = open(filename, 'r')
        f.readline()
        for line in f:
            stock_value = line.replace('"','').replace('\n','').split(',', 1)
            print "%-10s : %s" % (stock_value[0], stock_value[1])
    else:
        data = None
        if (start == '' and end == ''): data = Quandl.get(stock)
        else: data = Quandl.get(stock, trim_start=start, trim_end=end)
        if (size == 0): print data
        else: print data.head(size).to_string()
Example #21
0
def test_api_calls(api_key):
    """
    Test function with examples of calls using the Quandl API via python wrapper.
    """
    data = Quandl.get('WIKI/AAPL', authtoken=api_key)
    data_new = Quandl.get('WIKI/AAPL',
                          rows=5,
                          sort_order='desc',
                          authtoken=api_key)
    data_range = Quandl.get('WIKI/AAPL',
                            trim_start='2015-01-01',
                            trim_end='2015-01-01',
                            authtoken=api_key)
    data_multiple = Quandl.get(['WIKI/AAPL', 'WIKI/MSFT'], authtoken=api_key)
Example #22
0
def get(content):
    if bool(content):
        return Quandl.get(
            content['dataCode'],
            trim_start=content['startDate'],
            trim_end=content['finishDate'],
            collapse=content['timeRange'],
            transformation='rdiff',
            returns=content['dataType'])  # Warning: Uses Internet Connection
    else:
        return Quandl.get('YAHOO/AAPL',
                          trim_start='2001-01-01',
                          trim_end='2010-01-01',
                          collapse='annual',
                          returns='pandas')
 def get_stock(self, ticker, start, end, drop=None, collapse="daily", transform="rdiff"):
     """
     :param ticker: ticker
     :param start: start-date
     :param end: end-date
     :param drop: columns in data-frame to drop
     :param collapse: frequency of data
     :param transform: rdiff = simple returns
     """
     quandl_args = "GOOG/" + ticker
     if drop is None:
         drop = ["Open", "High", "Low", "Volume"]
     hash_val = str(ticker) + "_" + str(start) + "_" + str(end)
     try:
         cached_data = pandas.read_csv("cache/" + str(hash_val))
         cached_data = cached_data.set_index("Date")
         return cached_data
     except IOError:
         try:
             # print("Downloading", data_set)
             # Otherwise download the data frame from scratch
             if transform is not "None":
                 downloaded_data_frame = Quandl.get(quandl_args, authtoken=self.token, trim_start=start,
                                                    trim_end=end, collapse=collapse, transformation=transform)
             else:
                 downloaded_data_frame = Quandl.get(quandl_args, authtoken=self.token, trim_start=start,
                                                    trim_end=end, collapse=collapse)
             # Remove any unnecessary columns and rename the columns
             # print downloaded_data_frame.columns
             updated_column_labels = []
             for column_label in downloaded_data_frame.columns:
                 if column_label in drop:
                     downloaded_data_frame = downloaded_data_frame.drop([column_label], axis=1)
                 else:
                     updated_column_labels.append(quandl_args + "_" + column_label)
             downloaded_data_frame.columns = updated_column_labels
             downloaded_data_frame.to_csv("cache/" + str(hash_val))
             return downloaded_data_frame
         except Quandl.DatasetNotFound:
             print("Exception - DataSetNotFound", quandl_args)
         except Quandl.CodeFormatError:
             print("Exception - CallFormatError", quandl_args)
         except Quandl.DateNotRecognized:
             print("Exception - DateNotRecognized", quandl_args)
         except Quandl.ErrorDownloading:
             print("Exception - ErrorDownloading", quandl_args)
         except Quandl.ParsingError:
             print("Exception - ParsingError", quandl_args)
Example #24
0
    def grabCountryStats(self, indicators):
        numToPlot = self.numToPlot

        dne = dict()
        f = open(indicators[0], 'r+')
        for line in f:
            dne[line.replace('\n', '')] = 0

        for country in self.gdpLowToHigh:
            if numToPlot <= 0:
                break

            if country in dne.keys():
                pass
            else:
                #print countryStats[country]['name'] + '\n'
                for wwdiIndicator in indicators:
                    try:
                        requestCode = 'WWDI/' + country + '_' + wwdiIndicator
                        data = q.get(requestCode, authtoken=quandlToken)
                        # pick the latest date, and first value
                        value = data.tail(1).iloc[0, 0]
                        countryStats[country][wwdiIndicator] = value

                        numToPlot -= 1

                    except q.DatasetNotFound as e:
                        print "Dataset not found for: " + self.countryStats[
                            country]['name']
                        f.write(country + '\n')
                        break
                    except IndexError as e:
                        print e
                    except q.ErrorDownloading as e:
                        print e
Example #25
0
def draw(tick):
    #import numpy as np

    mydata = Quandl.get("WIKI/" + tick)


    #markers_on = np.array(['2013-02-26','2015-01-26','2016-02-26', '2016-04-01'], dtype='datetime64')
    #df3 = pd.DataFrame(markers_on)
    #df4 = df3.set_index(0)
    #df5 = df4.join(mydata,how='left')
    #df6 = df5['Adj. Close']
    #mynewdata = mydata.join(df6,how="left",lsuffix='_OG',rsuffix='_Mark')


    #get trading start

    def tradedetails(tradetype,tradevalue,minprice,maxprice,isofficer,ceo,cfo,isdir,is10,isother,stock):
        hf = pd.read_html("http://openinsider.com/screener?fd=0&fdr=&td=0&tdr=&s="+ stock + "&o=&t="+ tradetype + "&minprice=" + str(minprice) + "&maxprice=" + str(maxprice) + "&v="+ str(tradevalue) +"&isofficer=" + str(isofficer) + "&isceo=" + str(ceo) + "&iscfo=" + str(cfo) + "&isdirector=" + str(isdir) + "&istenpercent=" + str(is10) + "&isother=" + str(isother) + "&sicMin=&sicMax=&sortcol=1&maxresults=1000")
        return hf[5]

    def convertdate(x):
        return x[5:7] + "/" + x[8:10] + "/" + x[0:4]

    def converttime(x):
        return x[11:]

    def convertnumber(x):
        return x.replace("+","").replace("$","").replace(",","")

    def cleandataframe(df):
        df['Trade Date'] = df['Trade Date'].apply(convertdate)
        df['Filing Time'] = df['Filing Date'].apply(converttime)
        df['Filing Date'] = df['Filing Date'].apply(convertdate)
        #df['Shares Traded'] = df['Shares Traded'].apply(convertnumber)
        df['Value Traded'] = df['Value Traded'].apply(convertnumber)
        #df['Shares Owned'] = df['Shares Owned'].apply(convertnumber)
        return df

    def cleanerdataframe(df):
        df['Trade Date'] = df['Trade Date'].apply(convertdate)
        df['Filing Time'] = df['Filing Date'].apply(converttime)
        df['Filing Date'] = df['Filing Date'].apply(convertdate)
        df['Shares Traded'] = df['Shares Traded'].apply(convertnumber)
        df['Value Traded'] = df['Value Traded'].apply(convertnumber)
        #df['Shares Owned'] = df['Shares Owned'].apply(convertnumber)
        return df


    detail = tradedetails("p",25000,"","",0,1,1,0,0,0,tick)
    pd.to_datetime(detail['Trade Date'])
    detail = detail.set_index('Trade Date')
    newdetail = detail.join(mydata,how='left')
    df6 = newdetail['Adj. Close']
    mynewdata = mydata.join(df6,how="left",lsuffix='_OG',rsuffix='_Mark')

    #get trading end

    plt.plot(mynewdata['Adj. Close_OG'])
    plt.plot(mynewdata['Adj. Close_Mark'],marker='o',color='r', markersize=11)
    plt.show()
Example #26
0
def get_futures_contract_dataset(exchange, contract, start=None, end=None, 
      auth_token=None):
    '''
    Synopsis
    --------
    Fetches dataset for the given futures contract from Quandl.

    Parameters
    ----------
    exchange: Name of futures exchange. String.
    contract: Name of futures contract (e.g., 'SPZ2015'). String.
    start: Optional start date for dataset. Ignores all data timestamped
           before this date. Example: 2015-01-01. String.
    end: Optional end date for dataset. Ignores all data timestamped
         after this date. Example: 2016-01-01. String. Cannot be before 
         the start date.
    auth_token: Quandl auth token. String.

    Returns
    -------
    Pandas DataFrame
    '''
    template = '{exchange}/{contract}'
    quandl_code = template.format(exchange=exchange, contract=contract)
    dataset = Quandl.get(
                quandl_code, 
                trim_start=start,
                trim_end=end,
                authtoken=auth_token)
    # Quandl returns futures datasets as a pandas DataFrame with a
    # "Date" column as its index; sets this as a proper column to
    # make aggregating data easier
    dataset = dataset.reset_index()
    return dataset
 def download_data_set(self, argument):
     assert isinstance(argument, Argument)
     data_frame = None
     try:
         data_set_name = argument.id
         if argument.prefix is not None:
             data_set_name = argument.prefix + data_set_name
         data_frame = Quandl.get(data_set_name, authtoken=self.api_key,
                                 trim_start=argument.start, trim_end=argument.end,
                                 transformation=argument.transformation, collapse=argument.collapse)
         assert isinstance(data_frame, pandas.DataFrame)
         for d in argument.drop:
             try:
                 data_frame = data_frame.drop(d, axis=1)
             except:
                 continue
     except Quandl.DatasetNotFound:
         print("Data set not found")
     except Quandl.ErrorDownloading:
         print("Error downloading")
     except Quandl.ParsingError:
         print("Parsing error")
     except Quandl.WrongFormat:
         print("Wrong format")
     except Quandl.CallLimitExceeded:
         print("Call limit exceeded")
     except Quandl.CodeFormatError:
         print("Code format error")
     except Quandl.MissingToken:
         print("Missing token")
     if data_frame is None:
         raise Exception("Data Set Not Initialized", argument.id)
     else:
         return data_frame
Example #28
0
 def run(self):
     while True:
         qcode = "N/A"
         try:
             qcode = self.job_queue.get()
             if qcode:
                 data = ql.get(qcode,
                               column=11,
                               trim_start=self.trim_start,
                               trim_end=self.trim_end,
                               authtoken=self.authtoken)
                 mpt = MPTStats(data["Adj. Close"], self.index_data,
                                self.rfr_data)
                 result = mpt.calculate(self.calc_params["calc_date"],\
                     freq=self.calc_params["freq"], span=self.calc_params["span"])
                 ticker = qcode.split("/")[1]
                 csv = self._to_csv(result)
                 with self._lock:
                     self.fp.write(ticker + "," +
                                   str(self.calc_params["calc_date"]) +
                                   "," + csv + "\n")
         except Exception as e:
             _logger.exception("Error %s for qcode %s" % (str(e), qcode))
         finally:
             self.job_queue.task_done()
def get_quandl_earnings(date, use_cache=True):
    ls_spr = get_index_symbols(date, 'spr', use_cache).index.values
    
    # Derive surprise dates for each equity
    for i, s_equity in enumerate(ls_spr):
        s_equity_path = make_dir(date) + '/' + s_equity + '.csv'
        if os.path.isfile(s_equity_path):
            continue
        
        
        df_earnings = Quandl.get("ZES/"+s_equity.replace('.','_'), authtoken=quandl_token) # get surprise date from Quandl
        ls_earnings_dates = df_earnings.index.values
        
        # derive close price from Yahoo
        while True:
            try:
                dt_start = ls_earnings_dates[0] - np.timedelta64(1, 'W')
                break
            except IndexError:
                pass
        dt_end   = ls_earnings_dates[-1] + np.timedelta64(1, 'W')
        df_close = web.DataReader(s_equity.replace('.','/'), 'yahoo', dt_start, dt_end)['Adj Close']
        ls_close_dates = list(df_close.index)
        
        # append before/after close into df_earnings
        ls_annouce_hour = []
        ls_prev_close = []
        ls_next_close  = []
        
        for dt_date in ls_earnings_dates:
            i_annouce_hour = int(str(dt_date)[11:13])
            ls_annouce_hour.append(i_annouce_hour)
            
            while True:
                try:
                    i_date = ls_close_dates.index(dt_date)
                    break
                except ValueError:
                    if i_annouce_hour > 16:
                        dt_date -= np.timedelta64(1,'D')
                    elif i_annouce_hour < 9:
                        dt_date += np.timedelta64(1,'D')
                    else:
                        raise ValueError('Earning Annouce Hour between 9am and 4pm')
        
            if i_annouce_hour > 16:
                ls_prev_close.append(df_close[i_date])
                ls_next_close.append(df_close[i_date+1])
            elif i_annouce_hour < 9:
                ls_prev_close.append(df_close[i_date-1])
                ls_next_close.append(df_close[i_date])
            else:
                raise ValueError('Earning Annouce Hour between 9am and 4pm')

        df_earnings['ANNOUCE_HOUR'] = ls_annouce_hour
        df_earnings['BEFORE_CLOSE'] = ls_prev_close
        df_earnings['AFTER_CLOSE']  = ls_next_close
        
        # save into csv
        df_earnings.to_csv(s_equity_path, date_format='%Y%m%d')
Example #30
0
def risk_free(start, end, date_index):
    # Get CDI Daily Rate
    risk_free = Quandl.get("BCB/12", trim_start=start, trim_end=end)
    risk_free.name = 'risk_free'
    risk_free.rename(columns={'Value': 'cdi'}, inplace=True)

    return risk_free.ix[date_index, :].dropna()
 def put(self):
     stock_sticker = "WIKI/" + request.form['stocksticker']
     stockdata = Quandl.get(stock_sticker,
                            returns="numpy",
                            trim_start="2015-01-01",
                            trim_end="2015-01-31")
     return {"stockdata": list(stockdata.Close)}
def get_daily_historic_data_quandl(**kwargs):

    ticker = kwargs["ticker"]
    quandl_database_4ticker = get_quandl_database_4ticker(ticker)

    quandl_input = {"authtoken": "zwBtPkKDycmg5jmYvK_s"}

    if "date_from" in kwargs.keys():
        date_from_string = cu.convert_datestring_format(
            {"date_string": str(kwargs["date_from"]), "format_from": "yyyymmdd", "format_to": "yyyy-mm-dd"}
        )
        quandl_input["trim_start"] = date_from_string

    if "date_to" in kwargs.keys():
        date_to_string = cu.convert_datestring_format(
            {"date_string": str(kwargs["date_to"]), "format_from": "yyyymmdd", "format_to": "yyyy-mm-dd"}
        )
        quandl_input["trim_end"] = date_to_string

    try:
        data_out = qndl.get(quandl_database_4ticker + "/" + ticker, **quandl_input)
        success = True
    except:
        print("Error Loading " + quandl_database_4ticker + "/" + ticker + ": " + str(sys.exc_info()[0]))
        success = False
        data_out = pd.DataFrame()

    new_column_names = ["Open Interest" if x == "Prev. Day Open Interest" else x for x in data_out.columns]
    data_out.columns = new_column_names

    return {"success": success, "data_out": data_out}
Example #33
0
def load_quandl_prices(dataset, start, transform='rdiff'):
    cache_file = '../data/prices-cache.csv'
    if os.path.exists(cache_file):
        print 'Loading prices from cache'
        return pd.read_csv(cache_file, index_col=[0, 1], parse_dates=True)
    else:
        prices = pd.DataFrame()
        quandl_auth = 'pdRRzNygCjs_YY5Y2MVe'
        for index, row in dataset.iterrows():
            print 'Downloading prices for', row['Ticker']
            all_price_data = Quandl.get(row['Code'],
                                        trim_start=start,
                                        transformation=transform,
                                        authtoken=quandl_auth)
            close_price_data = all_price_data[['Close']]
            close_price_data['Ticker'] = row['Ticker']

            close_price_data['CloseClass'] = 'Neutral'
            close_price_data.loc[close_price_data['Close'] > 0,
                                 'CloseClass'] = 'Gain'
            close_price_data.loc[close_price_data['Close'] < 0,
                                 'CloseClass'] = 'Loss'

            prices = prices.append(close_price_data)
        prices.index = pd.MultiIndex.from_arrays(
            [prices.index, prices['Ticker']])
        prices.drop('Ticker', axis=1, inplace=True)
        prices.to_csv(cache_file)
        return prices
Example #34
0
 def get_index(self, index='XU030', price_type='Close',
               price_frequency='daily'):
     px = Quandl.get("GOOG/INDEXIST_%s" % index,
                     collapse=price_frequency, trim_start=self.start_date,
                     trim_end=self.end_date,
                     authtoken="Vbxts9YuGDArmJJbie4e")
     return px[price_type]
Example #35
0
def gdp_data():
    df = Quandl.get("BCB/4385", trim_start="1975-01-01", authtoken=api_key)
    df["Value"] = (df["Value"] - df["Value"][0]) / df["Value"][0] * 100.0
    df = df.resample('M')
    df.rename(columns={'Value': 'GDP'}, inplace=True)
    df = df['GDP']
    return df
Example #36
0
def short_interest(x):
	x = x.capitalize()
	symbol = 'SI/'+ x + '_SI'
	sym = q.get(symbol, authtoken = auth)
	if "Days To Cover" in sym.columns:
		dtc = "Days To Cover"
		sint = sym[dtc]
	else:
		dtc = "Days to Cover"
		sint = sym[dtc]
	sint = sint[-24:]
	median = sint.median()
	recent = sint[-1]
	avg = 100*(recent-median)/median
	avg = round(avg,2)
	if avg > 0:
		avg = str(avg)
		return 'The short interest in '+ x.upper() + " rose " + avg +'%'
	else:
		avg = str(avg)
		return 'The short interest in '+ x.upper() + " fell " + avg + '%'
	std = sint.std()
	cstd = (recent - median)/std
	cstd = abs(cstd)
	cstd = round(cstd,2)
	cstd = str(cstd)
	print "This is a " +cstd + " standard deviation move"
Example #37
0
def getStockPrices(ticker, frequency="monthly", update=False):
    """
    Gets the closing prices for a given stock ticker at a given frequency
    :param ticker: Name of the company's ticker
    :param frequency: Frequency of returned time series. See Quandl.get()'s collapse param.
    :param update: Always updates instead of using cache if true
    :return: Pandas dataframe representing time series of ticker's closing prices, -1 for
        connection errors
    """
    name = ticker + "_" + frequency  # Name of data in cache
    prices = None
    # If there is no cached version of the pickle, or update flag is on, download price data and cache it
    if update or not util.pickleExists(name):
        try:
            prices = Quandl.get(STOCK_DATA_SOURCE + ticker,
                                collapse=frequency,
                                authtoken="xx_T2u2fsQ_MjyZjTb6E")
            util.savePickle(prices, name)
        # Catch various connection errors
        except:
            return -1
    # Otherwise, use most recent cache entry
    else:
        prices = util.getMostRecentPickle(name)

    # Return closing prices
    return prices.get("Close")
Example #38
0
def quandl_download(ds_code, ds_path):
    """
    Executes Quandl API call for downloading whole dataset and saves it to given path.
    """
    print("Downloading dataset: {0}".format(ds_code))
    ds_data = Quandl.get(ds_code, authtoken=auth_token)
    ds_data.to_csv(ds_path)
Example #39
0
 def getDatasets(self):
     print "In get datasets"
     datasets = Quandl.search(query=self.symbol,
                              source="SEC",
                              prints=False,
                              authtoken=self.key)
     self.rawData = datasets
Example #40
0
def get_stock_data(symbol, start_date=None, end_date=None, db_code="WIKI"):
    """
    Get OHLC stock data from Quandl for a single stock
    :param symbol: string
    :param start_date: datetime
    :param end_date: datetime
    :param db_code: Quandl database code
    :return: DataFrame of stock data from start_date to end_date
    """

    if start_date is None:
        start_date = dt(year=1990, month=1, day=1)

    if end_date is None:
        end_date = dt.today()

    if start_date is not None and end_date is not None:
        assert start_date < end_date, "Start date is later than end date."

    log.info("Loading symbol: {}".format(symbol))

    quandl_code = db_code + "/" + symbol
    symbol_data = qd.get(quandl_code,
                         returns="pandas",
                         trim_start=start_date,
                         trim_end=end_date)
    return symbol_data
Example #41
0
def web_download(contract, start, end):
    df = Quandl.get(contract,
                    trim_start=start,
                    trim_end=end,
                    returns="pandas",
                    authtoken=simple.get_quandl_auth())
    return df
Example #42
0
def Pivots(symb,symbec='STK',eday=None):
	if eday== None:
		eday = datetime.now(timezone('US/Eastern'))
	bday=eday-timedelta(days=10)
	symbDict={'INDU':'^DJI','COMP':'^IXIC','SPX':'^GSPC','RUT':'^RUT'}
	if symb in symbDict.keys():
		symb=symbDict[symb]

	if symb=='^DJI':
		Data = Quandl.get("YAHOO/INDEX_DJI", authtoken='977FGLXLZ1wsdKt8DgUH',
							trim_start="%d-%02d-%02d" %(bday.year,bday.month,bday.day), 
							trim_end="%d-%02d-%02d" %(eday.year,eday.month,eday.day))[0:1]
		H=Data['High'].values[-1]
		L=Data['Low'].values[-1]
		C=Data['Close'].values[-1]

	else:
		Data = yf.getHistoricData(symb,bday.timetuple()[0:3],eday.timetuple()[0:3])[['high','low','close','adj_close']] # get data from yahoo finance
		H=Data['high'].values[-1]
		L=Data['low'].values[-1]
		C=Data['close'].values[-1]

	P = (H + L + C) / 3
	#P = (H + L + C + C) / 4
	#P = (H + L + O + O) / 4
	R1 = 2*P-L
	S1 = 2*P-H
	R2 = P+(H-L)
	S2 = P-(H-L)
	R3 = R1+(H-L)
	S3 = S1-(H-L)
	return [round(R3,1),round(R2,1),round(R1,1),round(P,1),round(S1,1),round(S2,1),round(S3,1)]
Example #43
0
    def fetch_quandl(self, code, **kwargs):
        '''
        Quandl entry point in datafeed object
        _____________________________________
        Parameters
            code: str
                quandl data code 
            kwargs: dict
                keyword args passed to quandl call
        _____________________________________
        Return:
            data: pandas.dataframe or numpy array
                 returned from quandl call
        '''
        log.debug('Fetching QuanDL data (%s)' % code)
        # This way you can use your credentials even if 
        # you didn't provide them to the constructor 
        if 'authtoken' in kwargs:
            self.quandl_key = kwargs.pop('authtoken')

        # Harmonization: Quandl call start_date trim_start
        if 'start_date' in kwargs:
            kwargs['trim_start'] = kwargs.pop('start_date')
        if 'end_date' in kwargs:
            kwargs['trim_end'] = kwargs.pop('end_date')

        try:
            data = Quandl.get(code, authtoken=self.quandl_key, **kwargs)
            data.index = data.index.tz_localize(pytz.utc)
        except:
            log.error('** Fetching %s from Quandl' % code)
            data = pd.DataFrame()
        return data
Example #44
0
    def download_time_series(self,
                             vendor_ticker,
                             pretty_ticker,
                             start_date,
                             source,
                             csv_file=None):

        if source == 'Quandl':
            import Quandl
            # Quandl requires API key for large number of daily downloads
            # https://www.quandl.com/help/api
            spot = Quandl.get(
                vendor_ticker)  # Bank of England's database on Quandl
            spot = pandas.DataFrame(data=spot['Value'], index=spot.index)
            spot.columns = [pretty_ticker]

        elif source == 'Bloomberg':
            from bbg_com import HistoricalDataRequest
            req = HistoricalDataRequest([vendor_ticker], ['PX_LAST'],
                                        start=start_date)
            req.execute()

            spot = req.response_as_single()
            spot.columns = [pretty_ticker]
        elif source == 'CSV':
            dateparse = lambda x: pandas.datetime.strptime(x, '%Y-%m-%d')

            # in case you want to use a source other than Bloomberg/Quandl
            spot = pandas.read_csv(csv_file,
                                   index_col=0,
                                   parse_dates=0,
                                   date_parser=dateparse)

        return spot
Example #45
0
def load(*args, **kwargs):
    """Load data from Quandl into a dataframe, modify column names and
    check for non-numeric values."""
    # Grab the Quandl token
    # token = os.environ.get('QUANDL_TOKEN')
    # if token is None:
    token = raw_input("Enter Quandl token: ")
    ticker = raw_input("Enter Quandl ticker symbol (or hit Enter for default of YAHOO/INDEX_GSPC): ")
    if len(ticker) < 1:
        ticker = 'YAHOO/INDEX_GSPC'
    print(ticker)
    start_date = raw_input("Enter start date as YYYY-MM-DD (or hit ENTER for default of 1990-01-01): ")
    if len(start_date) < 1:
        start_date = '1990-01-01'
    print(start_date)
    # Call Quandl module, trim input by default from 1990 forward
    print('Pulling Quandl data...')
    df = Quandl.get(ticker, authtoken=token, trim_start=start_date)
    # Get the column labels
    # old_columns = list(df.columns.values)
    # Use the ticker symbol as our new prefix
    # ticker_tag = ticker.split('_')[-1] + '_'
    # Drop spaces and concatenate
    # new_labels = [ticker_tag + i.replace(' ', '') for i in old_columns]
    # Create a dictionary of old and new column labels
    # new_columns = dict(zip(old_columns, new_labels))
    # Rename the columns using our dictionary
    # df = df.rename(columns=new_columns)
    nulls = df[~df.applymap(np.isreal).all(1)]
    # Check for non-numeric values
    if len(nulls) > 0:
        raise ValueError('Dataframe contains non-numeric values')
    row_count = len(df)
    print('%d rows loaded into dataframe.' % row_count)
    return df
    def store_ETF_data(self, tickers) :
        settings = Settings()
        db = settings.get_mongo_db('Quandl', local=True)
        coll = db['ETF']

        for t in tickers:
            # find the ticker
            dates = coll.find_one({'name':t}, sort=[('Date', -1)])
            if (dates is None):
                print 'max_date is none'
                mdate = None
            else :
                mdate = pd.to_datetime(dates['Date']*1000*1000).strftime(format='%Y-%m-%d')


            print 'downloadng {t}'.format(t=t)
            df = Quandl.get(t, trim_start=mdate)
            df['name'] = t
            df = df.reset_index()

            if mdate is None :
                df_a = df
            else :
                df_a = df[df['Date'] > mdate]

            if len(df_a) == 0:
                print "data for {ticker} is already updated till {date}".format(ticker=t, date=mdate)
                continue
            records = json.loads(df_a.T.to_json()).values()
            #print records
            print "uploading {t}".format(t=t)
            coll.insert_many(records)
Example #47
0
def hit_quandl(symbol, start, end):
    """Gets adjusted close data for a stock."""
    import os
    quandl_token = os.environ['QUANDL_TOKEN']
    price = Quandl.get("YAHOO/{}.6".format(symbol), trim_start=start,
                       trim_end=end, authtoken=quandl_token)
    return price.rename(columns={'Adjusted Close': symbol})
Example #48
0
def SMA(symb,intvs,symbec='STK',eday=None):
	if eday==None:
		eday = datetime.now(timezone('US/Eastern'))
	bday=eday-timedelta(days=365)
	symbDict={'INDU':'^DJI','COMP':'^IXIC','SPX':'^GSPC','RUT':'^RUT'}
	if symb in symbDict.keys():
		symb=symbDict[symb]
	if symb=='^DJI':
		Data = Quandl.get("YAHOO/INDEX_DJI", authtoken='977FGLXLZ1wsdKt8DgUH',
							trim_start="%d-%02d-%02d" %(bday.year,bday.month,bday.day), 
							trim_end="%d-%02d-%02d" %(eday.year,eday.month,eday.day))

		sums=list()
		for n in intvs:
			if len(Data.index)<n: #no enough data
				sums.append(-1)
			else:
				sums.append(round(Data['Adjusted Close'][-n-1:-1].mean(),1))

	else:
		Data = yf.getHistoricData(symb,bday.timetuple()[0:3],eday.timetuple()[0:3])[['adj_close']] # get data from yahoo finance
		print Data.tail()
		sums=list()
		for n in intvs:
			if len(Data.index)<n: #no enough data
				sums.append(-1)
			else:
				sums.append(round(Data['adj_close'][-n-1:-1].mean(),1))

	return sums
Example #49
0
 def readData(self, lookupTicker, source, start, end):
     
     '''Read the data - assumes start and end are datetime.date objects'''
     
     try:  
         lookupTicker = str(lookupTicker)
         if source == 'Quandl':
             #use Quandl reader
             start = str(start)
             end = str(end)
             data = Quandl.get(lookupTicker,
                               authtoken = self.quandlAuthToken,
                               trim_start = start, 
                               trim_end= end)
         else:
             #use pandas.io DataReader
             data = DataReader(lookupTicker, source , start, end)
             
         data = data.reset_index()
         logging.info("Read ticker {}".format(lookupTicker))
     except:
         logging.error("importData: Can't read ticker {}".format(lookupTicker))
         raise
     else:
         return data
Example #50
0
def all_dbmetafrmSRC(pglmt=10000, src="YALE", authtkn=AUTHTOKEN):
    """
    all_dbmetafrmSRC = all_dbmetafrmSRC(pglmt=10000,src="YALE",authtkn=AUTHTOKEN)
    all_dbmetafrmSRC, i.e. all database Metadata from a Source, returns all the "metadata", the information describing a databse, for all databases from a single source, specified by a string that is the Quandl code

    INPUTS:
    int pglmt: (default=10000), page numberS to search
    str sre: (default="YALE", chosen as an example), the Quandl code for the SOURCE or "src" to search from
    str authtkn: (default="AUTHTOKEN", that's globally declared at the beginning of the .py code), your authentication code with Quandl, you should find that under API-key in your Quandl account and copy and paste that into the code
    """
    metalst = []
    counter = 1
    while (counter <= pglmt):
        resp = Quandl.search(query="*",
                             source=src,
                             page=counter,
                             authtoken=authtkn)
        if len(resp) != 0:
            metalst = metalst + resp
            counter += 1
        else:
            break
    if counter > pglmt:
        print "Counter limit, pglmt, was reached; try a higher pglmt, i.e. page limit \n"

    return metalst
Example #51
0
def get_leanhog_contract_data(years):
    print("Loading contracts from Quandl web service....")
    print('  loading years: {0}'.format(years))
    recordList = []
    columnList = []
    columnRenameDict = {}
    for year in years:
        for month in all_months:
            s = "CME/LN{0}{1}".format(month, year)
            recordList.append(s)
            t = "CME.LN{0}{1} - Settle".format(month, year)
            tx = "LN{0}{1}".format(month, year)
            # May contracts didn't start until 2008
            if not ((month == 'K') and (year < 2008)):
                columnList.append(t)
                columnRenameDict.update({t: tx})

    df = q.get(recordList, authtoken="dXzByDoZicZy-WFvPyTf")

    # take out any of the columns that Quandl didn't return that were expected
    # possibly a contract year that hasn't started
    columnList = [x for x in columnList if x in df.columns]

    df = df[columnList]
    df.rename(columns=columnRenameDict, inplace=True)

    return df
Example #52
0
def load_prices_quandl(codes, start_date=None, end_date=None, field_selector='CLOSE'):
    if not os.path.isdir(os.path.abspath(_CACHE_LOCATION)):
        os.makedirs(os.path.abspath(_CACHE_LOCATION))

    datasets_encoded = quote_plus('@'.join(codes))
    cache_path = os.path.abspath(os.sep.join([_CACHE_LOCATION, datasets_encoded]))
    if os.path.isfile(cache_path):
        logging.info('reading datasets from local cache')
        quandl_df = pandas.read_pickle(cache_path)

    else:
        quandl_df = Quandl.get(codes, authtoken=sensitive('quandl_token')).dropna()
        quandl_df.to_pickle(cache_path)

    filtered = quandl_df
    if start_date is not None and end_date is not None:
        filtered = quandl_df.ix[start_date:end_date]

    elif start_date is not None and end_date is None:
        filtered = quandl_df.ix[start_date::]

    elif start_date is None and end_date is not None:
        filtered = quandl_df.ix[::end_date]

    selected = filtered
    if field_selector is not None:
        pattern = ' - ' + field_selector.upper()
        selected = filtered.loc[:, filtered.columns.str.upper().str.endswith(pattern)]

    return selected
Example #53
0
def createChart(t,d):
	ticker = t
	trading_days = int(d)
	try:
		df = Quandl.get('WIKI/' + ticker, authtoken=q_key)[-trading_days:]
		ptitle = ticker + ' Closing Price: last ' + str(trading_days) + ' trading days'
		max_split = max(df['Split Ratio'])
		if max_split > 1:
			split_date = np.argmax(df['Split Ratio'])
			df['Multiply'] = [split_date > d for d in df.index]
			df['AdjClose'] = [df.Close[x]*(1 - (max_split - 1)/max_split*df.Multiply[x].sum()) for x in np.arange(len(df))]
			df['Close'] = df['AdjClose']
		output_file("stockplot.html", title='Close Plot')
		p = figure(x_axis_type = 'datetime')
		p.line(df.index, df['Close'], color='black')
		p.title = ptitle
		p.xaxis.axis_label = 'Date'
		p.yaxis.axis_label = 'Price'
		save(p)
		st = df['Close'][0]
		ed = df['Close'][-1]
		cdgr = round((ed-st)/st*100.,2)
		script, div = components(p)
		p_dict = {'script':script, 'div':div, 'cdgr':cdgr}
	except Quandl.Quandl.DatasetNotFound:
		p_dict = {'script':None, 'div':None, 'cdgr':None}
	return p_dict
Example #54
0
def mortgage_30y():
    df = Quandl.get('FMAC/MORTG', trim_start="1975-01-01", authtoken=api_key)
    df['Value'] = (df['Value'] - df['Value'][0]) / df['Value'][0] * 100.0
    df.columns = ['M30']
    df = df.resample('D').mean()
    df = df.resample('M').mean()
    return df
Example #55
0
def dl_quandl(query="USD", authtoken=None):
    count = 0
    for name, code in name_to_code(query=query, authtoken=authtoken).items():
        curve = Quandl.get(code, authtoken=authtoken)
        curve.to_csv(name + ".csv")
        count += 1
    print ("Got %d curves" % count)
Example #56
0
 def fetch_data(self, qcode, start_date, end_date, column=None, 
                transformation=None):
     if not self._is_open:
         self.open_cache()
     import Quandl as ql
     fetch = False
     data = None
     try:
         if self._db.has_key(qcode):
             cache = self._db[qcode]
             if (cache[0]>start_date) or (cache[1]<end_date):
                 fetch = True
             else:
                 data = cache[2]
         else:
             fetch = True
     except Exception as e:
         logging.debug("Error happened with db")
         fetch = True
     try:
         if fetch:
             trim_start = self._format_date( start_date )
             trim_end = self._format_date(end_date)
             data = ql.get(qcode, column=column, trim_start=trim_start, 
                                   trim_end=trim_end, authtoken=self._authtoken,
                                   transformation=transformation)
             if data is not None:
                 self._db[qcode] = (start_date, end_date, data)
     except Exception as e:
         logging.exception("Exception fetching %s : %s"%(qcode, str(e)))
         
     #data = self._snap_to_interval(data,start_date, end_date)
     return data.ix[start_date:end_date]
Example #57
0
def download_datasets(collection):
    """
    Downloads datasets of a given collection. New files are fetched in full,
    existing files are appended only the new entries since last update.
    """
    print("Downloading datasets for {0}".format(collection))
    collection_path = os.path.join("collections/{}".format(collection))
    collection_metadata = pandas.read_csv(os.path.join(collection_path, "metadata.csv"))
    executor = concurrent.futures.ThreadPoolExecutor(max_workers=5)
    # TODO: DUPLICATES
    # Iterate over datasets in collection
    for idx, row in collection_metadata.iterrows():
        #print("Dataset {0} last updated at {1}".format(row[0], row[5]))
        ds_code = row[0]
        ds_name = ds_code.split("/")[1]
        ds_path = os.path.join(collection_path, ds_name+".csv")
        
        if not os.path.exists(ds_path):
            executor.submit(quandl_download, ds_code, ds_path)
        else:
            ds_data = pandas.read_csv(ds_path)
            last_update = pandas.Timestamp(ds_data.iloc[-1][0])
            last_change = pandas.Timestamp(row[3])
            #print("Last update {0}: {1}, last change: {2}".format(ds_name, last_update, last_change))
            if last_change > last_update:
                ds_diff = Quandl.get(ds_code, trim_start=last_update+pandas.DateOffset(days=1), authtoken=auth_token)
                if not ds_diff.empty:
                    print("Updating dataset: {0}".format(ds_code))
                    ds_file = open(ds_path, "a")
                    ds_file.write(ds_diff.to_csv(header=False, index_names=False))
                    ds_file.close()