Beispiel #1
0
def load_eur():
    """ Return cash rate for EUR and DEM prior to the introduction of EUR """
    bank_rate = quandl.get(CashFile.GER_BANKRATE.value,
                           api_key=AdagioConfig.quandl_token)

    ww2_data = pd.DataFrame([4.0, 3.5, 5.0],
                            index=[datetime(1936, 6, 30),
                                   datetime(1940, 4, 9),
                                   datetime(1948, 6, 28)])
    ww2_month = pd.date_range('1936-06-01', '1948-06-01', freq='M')
    ww2_month = pd.DataFrame(index=ww2_month)
    ww2_data = pd.concat((ww2_data, ww2_month), axis=1).fillna(method="pad")

    parser = lambda d: date_shift(datetime.strptime(d, "%Y-%m"),
                                  "+BMonthEnd")
    filename = join(DATA_DIRECTORY, 'cash_rate', 'eur', 'BBK01.SU0112.csv')
    discount_rate = pd.read_csv(filename,
                                skiprows=[1, 2, 3, 4], index_col=0,
                                usecols=[0, 1], engine="python", skipfooter=95,
                                parse_dates=True, date_parser=parser)
    ib_rate = DataReader(CashFile.EUR_3M_IB_RATE.value, "fred", START_DATE)
    libor = quandl.get(CashFile.EUR_3M_EURIBOR.value,
                       api_key=AdagioConfig.quandl_token)

    data = (pd.concat((bank_rate[:"1936-06"].fillna(method="pad"),
                       ww2_data,
                       discount_rate[:"1959"].fillna(method="pad"),
                       to_monthend(ib_rate['1960':"1998"].fillna(method="pad")),
                       libor['1999':].fillna(method="pad")),
                      axis=1)
            .sum(axis=1).rename("cash_rate_eur"))
    return data
Beispiel #2
0
def sample():
    df1 = quandl.get("FMAC/HPI_AL", authtoken=api_key)
    df2 = quandl.get("FMAC/HPI_AK", authtoken=api_key)

    df1.columns = ['HPI_AL']
    df2.columns = ['HPI_AK']

    print(df1.head())
    print(df2.head())

    joined = df1.join(df2)
    print(joined.head())
def mortgage_30yr():
	df = quandl.get('FMAC/MORTG', trim_start="1975-01-01")
	df['Value'] = (df['Value'] - df['Value'][0]) / df['Value'][0] * 100
	df = df.resample('M').mean()
	df.rename(columns={'Value': 'M30'}, inplace=True)
	df = df['M30']
	return df 
def gdp_data():
    df = quandl.get("BCB/4385", trim_start="1975-01-01")
    df["Value"] = (df["Value"]-df["Value"][0]) / df["Value"][0] * 100.0
    df=df.resample('M').mean()
    df.rename(columns={'Value':'GDP'}, inplace=True)
    df = df['GDP'] # DataFrame to Series
    return df
def load_stock_datasets():
    data_file = "stocks.xlsx"
    if os.path.isfile(data_file):
        stocks = pd.read_excel(data_file)
    else:
        quandl.ApiConfig.api_key = 'a5JKbmNDb4k98huTPMcY'
        google = quandl.get('WIKI/GOOGL')
        google["Company"] = "Google"
        facebook = quandl.get('WIKI/FB')
        facebook["Company"] = "Facebook"
        apple = quandl.get('WIKI/AAPL')
        apple["Company"] = "Apple"
        stocks = pd.concat([apple, facebook, google])
        stocks = stocks.reset_index()
        stocks.to_excel(data_file, index=False)
    return stocks
Beispiel #6
0
def scrapeDailyNews():
	url = "http://finance.yahoo.com/q/hp?s=AAPL+Historical+Prices"

	content = urllib2.urlopen(url).read()
	soup = BeautifulSoup(content, "lxml")
	tbl = soup.find("table", {"class": "yfnc_datamodoutline1"}).findNext('table').find_all('tr')[1].find_all('td')
	
	newsDate = datetime.now()

	mydata = quandl.get("AOS/AAPL")

	row = mydata.iloc[-1:]
	avgSent = float(row['Article Sentiment'])
	impactScore = float(row['Impact Score'])

	news = News(
		date = newsDate,
		avgSent = avgSent,
		impactScore = impactScore)

	try:
		news.save()
		print("Saved news object ({})".format(news.objectId))
	except:
		print("News data has already been saved.")
Beispiel #7
0
 def load_from_quandl(self):
     """ Download data from quandl """
     logger.debug('Downloading data from Quandl')
     data = quandl.get(self[keys.quandl_ticker],
                       api_key=AdagioConfig.quandl_token)
     self.check_if_expired(data)
     return data
def grab_initial_state_data():
    states = state_list()
    main_df = pd.DataFrame()

    for abbv in states:
        query = "FMAC/HPI_"+str(abbv)
        print(query)
        
        df = quandl.get(query, authtoken=api_key)
        print(df.head())
        df.columns = [abbv]  ### This is the fix ###

        ## doing some manipulation on the DataFrame
        #df = df.pct_change()
        df[abbv] = (df[abbv] - df[abbv][0]) / df[abbv][0] *100
        
        if main_df.empty:
            main_df = df
        else:
            main_df = main_df.join(df)
            #eammain_df = main_df.join(df, lsuffix=abbv)  ### Could also do this ###

    print(main_df.head())
    
    pickle_out = open('fiddy_states3.pickle','wb')  ##write bytes
    pickle.dump(main_df, pickle_out)
    pickle_out.close()        
Beispiel #9
0
def load_quandl_newsentiment(dataset, start, end):
    cache_file = 'NS1/ns1-cache.csv'
    quandl_auth = 'T2GAyK64nwsePiJWMq8y'
    #ns1 = pd.DataFrame()
    i=1
    for index, row in dataset.iterrows():
        #ns2[i] = ns1
        ns1 = []
        ns1 = pd.DataFrame()
        stock_cache_file = row['NSCode']+'-cache.csv'
        if not(os.path.exists(stock_cache_file)):
            print(row['NSCode'])
            print ('Downloading news for', row['NSCode'])
            allnews_data = quandl.get(row['NSCode'], authtoken=quandl_auth)
            ns1 = ns1.append(allnews_data)
            ns1.to_csv(stock_cache_file)
        if os.path.exists(stock_cache_file):
            with open(stock_cache_file, 'r') as csvfile:
                csvFileReader = csv.reader(csvfile)
                next(csvFileReader)
                print ('Loading news from cache ', row['NSCode'])
                for rows in csvFileReader:
                    date=int(time_to_num(rows[0]))
                    if date > start and date < end:
                #        print(start,date,end)
                        datens.append(date)
                        sentiment.append(rows[1])
                #print (datens, ' ',sentiment,"\n")
        #ns1.to_csv(cache_file)
    return
def getEsFuturesStockPrice(symbol, start, end):
    """
    Adjusted close price from quandle.
    """
    import quandl
    mydata = quandl.get(symbol, start_date=start, end_date=end, authtoken="zYuLi6xBbvDYgsQJApiA")
    return mydata["Close"]
def HPI_Benchmark():
	df = quandl.get('FMAC/HPI_USA' , authtoken=api_key)
	df['United States'] = (df['Value'] - df['Value'][0]) / df['Value'][0] * 100.0
	
	pickle_out = open('us_pct.pickle', 'wb')
	pickle.dump(df, pickle_out)
	pickle_out.close()
def sp500_data():
    df = quandl.get("YAHOO/INDEX_GSPC", trim_start="1975-01-01", authtoken=api_key)
    df["Adjusted Close"] = (df["Adjusted Close"]-df["Adjusted Close"][0]) / df["Adjusted Close"][0] * 100.0
    df=df.resample('M').mean()
    df.rename(columns={'Adjusted Close':'sp500'}, inplace=True)
    df = df['sp500']
    return df
def gdp_data():
    df = quandl.get("BCB/4385", trim_start="1975-01-01", authtoken=api_key)
    df["Value"] = (df["Value"]-df["Value"][0]) / df["Value"][0] * 100.0
    df=df.resample('M').mean()
    df.rename(columns={'Value':'GDP'}, inplace=True)
    df = df['GDP']
    return df
Beispiel #14
0
def get_data():
	df = quandl.get('WIKI/GOOGL')
	df=df[['Adj. Open', 'Adj. High', 'Adj. Low', 'Adj. Close', 'Adj. Volume',]]
	df['HL_PCT'] = (df['Adj. High'] - df['Adj. Close']) / df['Adj. Close'] * 100.0
	df['PCT_change'] = (df['Adj. Close'] - df['Adj. Open']) / df['Adj. Open'] * 100.0
	df = df[['Adj. Close', 'HL_PCT', 'PCT_change', 'Adj. Volume']]
	return df
def grab_initial_state_data():
    states = state_list()

    main_df = pd.DataFrame()

    for abbv in states:
        query = "FMAC/HPI_" + str(abbv)
        df = quandl.get(query, authtoken=api_key)
        df = df.rename(columns={"Value": abbv})
        #        df = df.pct_change()
        df[abbv] = df[abbv] - df[abbv][0] / df[abbv][0] * 100.0

        #        print df.head()

        #        print(query)
        if main_df.empty:
            main_df = df
        else:
            #            main_df = pd.merge(main_df, df, right_index=True, left_index=True)
            #            main_df = main_df.join(df, lsuffix='_left', rsuffix='_right')
            main_df = main_df.join(df)

    pickle_out = open("fiddy_states3.pickle", "wb")
    pickle.dump(main_df, pickle_out)
    pickle_out.close()
    print df.head()
Beispiel #16
0
def HPI_Benchmark():
    df = quandl.get("FMAC/HPI_USA", authtoken=api_key)
    df.columns = ['United States']
    df["United States"] = (df["United States"]-df["United States"][0]) / df["United States"][0] * 100.0
    pickle_out = open('HPI_bench.pickle','wb')
    pickle.dump(df, pickle_out)
    pickle_out.close() 
Beispiel #17
0
def api_request(data_set):
    import quandl as q

    api_token = config.get("quandl", "api_key")
    data = q.get(data_set, authtoken=api_token).to_json(date_format="iso")
    # print(data)
    return data
def hpi_benchmark():
        df= quandl.get("FMAC/HPI_USA",authtoken=api_key)
        df.columns =['usa']
        df['usa'] = (df['usa'] - df['usa'][0]) / df['usa'][0] *100
        print('usa data is ready to analze')
        print(df.head())
        return df
def get_quandl_df(label, newlabel, percent_change=False, trim_start=None):
    df = quandl.get(label, authtoken=API_KEY, trim_start=trim_start)
    df.rename(columns={'Value': newlabel}, inplace=True)
    if percent_change:
        # percent change is (new - old) / old
        df[newlabel] = (df[newlabel] - df[newlabel][0]) / df[newlabel][0] * 100.0
    return df
Beispiel #20
0
def store_quandl(code):
    data = quandl.get(code)

    new_entry_c = 0

    print('Storing quandl data for: %s...' % (code), " ", end="")
    # index is the date
    for index, row in data.iterrows():
        date  = index
        value = row['Value']

        exists = db.session.query(db.base.classes.quandl).filter(db.base.classes.quandl.date == date).filter(db.base.classes.quandl.code == code).first()
        
        if exists:
            continue

        new_data = db.base.classes.quandl(p_time=datetime.now(),
                                          date=date,
                                          code=code,
                                          value=value)
   
        

        db.session.add(new_data)
        db.session.commit()
        new_entry_c += 1

    print('added: %s/%s entries' % (new_entry_c, len(data)))
Beispiel #21
0
def build_graph(ticker):
    # make a graph of closing prices from previous month

    # Create some data for our plot.
    data = quandl.get('WIKI/' + ticker)
    
    # graph last month's data
    enddate = date.today() - timedelta(1)
    startdate = enddate - relativedelta(months=1)
    wdata = data[startdate:enddate]

    x = wdata.index  # datatime formatted
    y = wdata['Close']  # closing prices

    # Create a heatmap from our data.
    plot = figure(title='Data from Quandle WIKI set',
              x_axis_label='date',
              x_axis_type='datetime',
              y_axis_label='price')

    plot.line(x, y, color='navy', alpha=0.5)

    script, div = components(plot)

    return script, div
Beispiel #22
0
def get_files(file_list, dir_prefix, target_directory_name):
    '''
    INPUT: list of file names, target directory name
    OUTPUT: csv files of passed names in target directory
    '''
    path = os.getcwd()+'/{}/{}/'.format(target_directory_name, dir_prefix)
    ql.ApiConfig.api_key = quandl_api_key

    if not os.path.exists(path):
        os.makedirs(path)

    for file_name in file_list:
        fixed_file_name = file_name[1].lower().replace(' - ', '_').replace(' ', '_')
        file_path = '{}{}.csv'.format(path, fixed_file_name)
        if not os.path.isfile(file_path):
            ql.get(file_name[0]).to_csv(file_path)
Beispiel #23
0
def scrapeDailyOptions():
	url = "http://finance.yahoo.com/q/hp?s=AAPL+Historical+Prices"

	content = urllib2.urlopen(url).read()
	soup = BeautifulSoup(content, "lxml")
	tbl = soup.find("table", {"class": "yfnc_datamodoutline1"}).findNext('table').find_all('tr')[1].find_all('td')
	
	optionDate = datetime.strptime(tbl[0].string, "%b %d, %Y")

	mydata = quandl.get("VOL/AAPL")

	row = mydata.iloc[-1:]
	ivmean10 = float(row['IvMean10'])
	ivmean20 = float(row['IvMean20'])
	ivmean30 = float(row['IvMean30'])
	ivmean60 = float(row['IvMean60'])


	option = Option(
		date = optionDate,
		ivMean10 = ivmean10,
		ivMean20 = ivmean10,
		ivMean30 = ivmean30,
		ivMean60 = ivmean60)

	try:
		option.save()
		print("Save option object ({})".format(option.objectId))
	except:
		print("Option data has already been saved.")
Beispiel #24
0
def get_unemployment(api_key):
	df = quandl.get("ECPI/JOB_G", trim_start="1975-01-01", authtoken=api_key)
	df.columns = ['unemployment']
	df['unemployment'] = (df['unemployment']-df['unemployment'][0])/df['unemployment'][0]*100.0
	df = df.resample('D').mean()
	df = df.resample('M').mean()
	return df
Beispiel #25
0
def second_stock():	
	n = app_stock.vars['name']
	ss = "WIKI/" + n + ".4"
	mydata = quandl.get(ss, encoding='latin1', parse_dates=['Date'], dayfirst=True, index_col='Date', trim_start="2016-05-05", trim_end="2016-06-05", returns = "numpy", authtoken="ZemsPswo-xM16GFxuKP2")
	mydata = pd.DataFrame(mydata)
	#mydata['Date'] = mydata['Date'].astype('datetime64[ns]')
	x = mydata['Date']
	y = mydata['Close']
	p = figure(title="Stock close price", x_axis_label='Date', y_axis_label='close price', plot_height = 300, plot_width = 550)
	p.line(x, y, legend="Price in USD", line_width=3, color = "#2222aa")
	
	
	# Configure resources to include BokehJS inline in the document.
    # For more details see:
    #   http://bokeh.pydata.org/en/latest/docs/reference/resources_embedding.html#bokeh-embed
	js_resources = INLINE.render_js()
	css_resources = INLINE.render_css()

    # For more details see:
    #   http://bokeh.pydata.org/en/latest/docs/user_guide/embedding.html#components
	script, div = components(p, INLINE)
    
	html = flask.render_template(
		'stockgraph.html',
		ticker = app_stock.vars['name'],
		plot_script=script,
		plot_div=div,
		js_resources=js_resources,
		css_resources=css_resources,
	)
	return encode_utf8(html)
Beispiel #26
0
def grab_initial_country_data():
    countries = country_list()

    main_df = pd.DataFrame()

    for abbv in countries:
        try:
            query = "ECONOMIST/BIGMAC_"+str(abbv)
            df = quandl.get(query, authtoken=api_key)

            c_name = [str(abbv)+"_"]
            k = []
            for i in range(0, len(df.columns)):
                k.append(str(c_name[0])+str(df.columns[i]))

            df.columns = k 
            print(query)

            if main_df.empty:
                main_df = df
            else:
                main_df = pd.concat([main_df,df], axis=1)

        except Exception:
            print(query, ' failed')

    pickle_out = open('BIGMAC_countries.pickle', 'wb')
    pickle.dump(main_df, pickle_out)
    pickle_out.close()
def load_tables():
    rate_table = {}
    with open('currencies.csv', 'r') as currencies:
        reader = csv.reader(currencies)
        for row in reader:
            rate_table[row[0]] = quandl.get('CURRFX/{}USD'.format(row[0]))
            print("Saved {} rate table as pickle...".format(row[0]))
    return rate_table
def download_n225(start_date):
    data = quandl.get(
        'YAHOO/INDEX_N225',
        start_date=start_date,
        returns='numpy',
    )

    return [new_n225(d) for d in data]
def download_from_tse(code, start_date):
    data = quandl.get(
        'TSE/{0}'.format(code),
        start_date=start_date,
        returns='numpy',
    )

    return [new_stock(code, d) for d in data]
Beispiel #30
0
def us_unemployment():
    df = quandl.get("ECPI/JOB_G", trim_start="1975-01-01", authtoken=api_key)
    df["Unemployment Rate"] = (df["Unemployment Rate"]-df["Unemployment Rate"][0]) / df["Unemployment Rate"][0] * 100.0
    df=df.resample('1D').mean()
    df=df.resample('M').mean()
    pickle_out = open('unemployment.pickle','wb')
    pickle.dump(df, pickle_out)
    pickle_out.close()        
Beispiel #31
0
"""BIL.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/138moqxt9unvShu9E6D6CItu9BCNK_60G
"""

pip install quandl

import quandl
import numpy as np

quandl.ApiConfig.api_key = 'FZCdiqtsRgoffcyx2XqB'

df = quandl.get("XNSE/BIL")
print(df.head())

df.info()

x = df.drop("Adjustment Factor",axis=1)
y = x.drop("Adjustment Type",axis=1)
print(y)

y = y[['Close']]
print(y.head())

forecast = 10
y['Prediction'] = y[['Close']].shift(-forecast)

print(y.tail())
Beispiel #32
0
    def calculateNonLinearRegression(self) :

        df = quandl.get('WIKI/GOOGL')
        df = df[['Adj. Open','Adj. High','Adj. Low','Adj. Close','Adj. Volume']]
        df['HL_PCT'] = (df['Adj. High'] - df['Adj. Close']) / df['Adj. Close'] * 100.0
        df['PCT_change'] = (df['Adj. Close'] - df['Adj. Open']) / df['Adj. Open'] * 100.0

        df = df[['Adj. Close','HL_PCT','PCT_change','Adj. Volume']]

        forecast_col = 'Adj. Close'
        df.fillna(-99999, inplace = True)

        forecast_out = int(math.ceil(0.01 * len(df)))

        df['label'] = df[forecast_col].shift(-forecast_out)

        x = np.array(df.drop(['label'], 1))
        x = preprocessing.scale(x)
        x = x[:-forecast_out]
        x_lately = x[-forecast_out:]

        df.dropna(inplace = True)
        y = np.array(df['label'])

        x_train, x_test, y_train, y_test = model_selection.train_test_split(x, y, test_size = 0.2)

        # clf = LinearRegression(n_jobs = -1)
        # clf.fit(x_train, y_train)
        # with open('linearregression.pickle', 'wb') as f:
        #     pickle.dump(clf, f)

        pickle_in = open('STIPModels/RegressionAnalysis/linearregression.pickle', 'rb')
        clf = pickle.load(pickle_in)

        accuracy = clf.score(x_test, y_test)

        forecast_set = clf.predict(x_lately)

        # print('Forecast Set :\n',forecast_set,'\n\n','Accuracy :',accuracy,'\n','Forecast Out :',forecast_out)

        df['Regression'] = np.nan

        last_date = df.iloc[-1].name
        last_unix = last_date.timestamp()
        one_day = 86400
        next_unix = last_unix + one_day

        for i in forecast_set :

            next_date = datetime.datetime.fromtimestamp(next_unix)
            next_unix += one_day
            df.loc[next_date] = [np.nan for _ in range(len(df.columns) - 1)] + [i]

        df['Adj. Close'].plot()
        df['Regression'].plot()
        plt.legend(loc = 4)
        plt.xlabel('Date')
        plt.ylabel('Price')
        plt.show(block = False)

        return accuracy, forecast_out, forecast_set
Beispiel #33
0
import quandl
import numpy as np



from sklearn.linear_model import LinearRegression

#support vector machine
from sklearn.svm import SVR

# used to split and train our method
from sklearn.model_selection import train_test_split

#retreiving stock data
dataframe = quandl.get("WIKI/GOOGL")
#result of data from Google
print(dataframe.head())

#we will use the adjacted closed column

#working with only the adjusted closed price
dataframe = dataframe[["Adj. Close"]]#adj.close is our independent variable
print(dataframe.head())

#predicting into the future variable for a number of days e.g 1
forecast_out = 30

#we need a target column which is shifted n units up

dataframe['Prediction'] = dataframe[['Adj. Close']].shift(-forecast_out) 
def api_call(ticker, start, end):
    prices_df = quandl.get('WIKI/'+ticker, authtoken=api_key, start_date=start, end_date=end)
    prices_df.index = pd.to_datetime(prices_df.index, infer_datetime_format=True)
    prices_df['Ticker'] = ticker
    prices_df.to_csv('trading_data.csv', mode='a', header=False)
# access and write data.hdf5 file using pandas.DataFrame.HDFStore
store = pd.HDFStore('data.hdf5')

# key in your quandl authtoken
# quandl.ApiConfig.api_key = <your_token>

# start & end dates for downloading data
start = "2007-07-01"
end = "2017-06-30"

# fx historical data (daily - 24/7)
temp = [] # temp list to store pandas dataframe from quandl
# USD as based currency
currencies = ['CAD', 'AUD', 'HKD', 'EUR', 'GBP', 'JPY', 'MXN', 'CHF']
for currency in currencies:
    temp.append(quandl.get('CUR/'+currency, start_date=start, end_date=end))
temp_df = pd.concat(temp, axis=1)
temp_df.columns = currencies
store['fx'] = temp_df

# commodities historical data (daily - trading days)
temp = []
commodities = [#Barley, Corn, Rice, Soybeans, Wheat
               "ODA/PBARL_USD", "TFGRAIN/CORN.1", "ODA/PRICENPQ_USD", "TFGRAIN/SOYBEANS.1", "ODA/PWHEAMT_USD",
               #Sugar, Coffee Robusta, Cotton, Tea, Milk
               "CHRIS/ICE_SB1.1", "ODA/PCOFFROB_USD", "CHRIS/ICE_CT1.1", "ODA/PTEA_USD", "COM/MILK",
               #Bananas, Oranges, Peanuts
               "ODA/PBANSOP_USD", "ODA/PORANG_USD", "ODA/PGNUTS_USD",
               #Olive Oil, Palm Oil, Sunflower Oil, Rapeseed Oil 
               "ODA/POLVOIL_USD", "ODA/PPOIL_USD", "ODA/PSUNO_USD", "ODA/PROIL_USD",
               #Rubber, Soft Logs, Hard Logs, Hard Sawnwood, Soft Sawnwood
import pandas as pd
import trading_calendars
from six import iteritems

items = {
    'CHRIS/CME_US1': 'US1.csv',
    'CHRIS/CME_US2': 'US2.csv',
    'CHRIS/CME_TY1': 'TY1.csv',
    'CHRIS/CME_FV1': 'FV1.csv',
    'CHRIS/CME_TU1': 'TU1.csv'
}

quandl.ApiConfig.api_key = "TDCy_PPtqVsQCjxBJpsb"

for k, v in items.iteritems():
    mydata = quandl.get(k)
    mydata.rename(columns={'Settle': 'Close'}, inplace=True)
    mydata.columns = map(str, mydata.columns)
    mydata.columns = map(str.lower, mydata.columns)
    mydata = mydata[['open', 'high', 'low', 'close', 'volume']]
    mydata = mydata['1991':]

    cal = trading_calendars.get_calendar('NYSE')
    dates = cal.sessions_in_range('1991', '2017').tz_localize(None)
    dates_df = pd.DataFrame(index=dates)
    dates_df = dates_df.join(mydata)

    dates_df.to_csv('./futures/daily/' + v)

# Change benchmark.py to this to kill it
import pandas as pd
Beispiel #37
0
"""
  Name     : c9_42_ff3factorDaily.py
  Book     : Hands-on Data Science with Anaconda )
  Publisher: Packt Publishing Ltd. 
  Author   : Yuxing Yan and James Yan
  Date     : 4/16/2018
  email    : [email protected]
             [email protected]
"""

import scipy as sp
import pandas as pd
import quandl as qd
import statsmodels.api as sm
#quandl.ApiConfig.api_key = 'YOUR_API_KEY'
a = qd.get("WIKI/IBM")
p = a['Adj. Close']
n = len(p)
ret = []
#
for i in range(n - 1):
    ret.append(p[i + 1] / p[i] - 1)
#
c = pd.DataFrame(ret, a.index[1:n], columns=['RET'])
ff = pd.read_pickle('c:/temp/ffDaily.pkl')
final = pd.merge(c, ff, left_index=True, right_index=True)
y = final['RET']
x = final[['MKT_RF', 'SMB', 'HML']]
#x=final[['MKT_RF']]
x = sm.add_constant(x)
results = sm.OLS(y, x).fit()
Beispiel #38
0
The Food and Agriculture Organization generates wide-ranging data on many agricultural 
products from most countries and areas on a world-wide basis. 
For more info, please visit http://faostat.fao.org/

FREQUENCY: Year
LAST UPDATED: 3 days ago, on 24 Jul 2020 

REF: https://www.quandl.com/data/UFAO/LV_BEE_GBR

"""

# My API key.
qd_api.ApiConfig.api_key = '####################'

# Execute payload request.
api_data = qd_api.get('UFAO/LV_BEE_GBR', collapse="yearly")

# Remove index.
api_data.reset_index(inplace=True)

# Rename API Columns.
api_data.rename(columns={'Date': 'Year', 'Stocks - No': 'Stock'}, inplace=True)

# Change Date format for date to year.
lst_year = []
for lab, row in api_data.iterrows():
    str_year = str(row["Year"]).split('-')[0]
    lst_year.append(str_year)

# Update the year series with the updated year date format.
api_data["Year"] = lst_year
Beispiel #39
0
import quandl
import math
import numpy as np
from sklearn import preprocessing, svm
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
from matplotlib import style
import datetime

style.use('ggplot')  # Style of graph plot is ggplot
"""
Get the data from quandl for specific company and pass to data_frame. Company selected here is 
eg. General Electric (GNE), the stock ticker is EURONEXT/GNE
"""
data_frame = quandl.get("EURONEXT/GNE")

# prints the first 5 rows/tables with their headings (old as received from Quandl)
print("\n", data_frame.head())
print("\nTotal number of data available in this set is :: ", len(data_frame))

# Creates a list of all the heads (Features) in the table
data_frame = data_frame[[
    "Open",
    "High",
    "Low",
    "Last",
    "Volume",
    "Turnover",
]]
Beispiel #40
0
import quandl
import pandas as pd

api_key = open('apikey.txt').read()

df = quandl.get('FMAC/HPI_MEDOR', authtoken=api_key)
# print(df.head())

fiddy_states = pd.read_html(
    'https://simple.wikipedia.org/wiki/List_of_U.S._states')

# this is a list
# print(fiddy_states)

# this is a dataframe
# print(fiddy_states[0])

# this is a column
# print(fiddy_states[0][0])

for abbv in fiddy_states[0][0][1:]:
    print('FMAC/HPI_' + str(abbv))
Beispiel #41
0
import quandl, math
import numpy as np
import pandas as pd
from sklearn import preprocessing, model_selection, svm
from sklearn.linear_model import LinearRegression

quandl.ApiConfig.api_key = "7HCx_y8DkJyhCMtUcvX5"

df = quandl.get('WIKI/GOOGL')

df = df[['Adj. Open', 'Adj. High', 'Adj. Low', 'Adj. Close', 'Adj. Volume']]
df['HL_PCT'] = (df['Adj. High'] - df['Adj. Close']) / df['Adj. Close'] * 100.0
df['PCT_change'] = (df['Adj. Close'] -
                    df['Adj. Open']) / df['Adj. Open'] * 100.0

df = df[['Adj. Close', 'HL_PCT', 'PCT_change', 'Adj. Volume']]

print(df.head())
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime
from datetime import timedelta
from tqdm import tqdm
from assets import config
from assets import tickers
sns.set()
tf.compat.v1.random.set_random_seed(1234)

ticker = tickers.ticker_INTC
df = quandl.get(ticker, api_key=config.key)
#print(df.head())

minmax = MinMaxScaler().fit(df.iloc[:, 4:5].astype('float32')) # Close index
df_log = minmax.transform(df.iloc[:, 4:5].astype('float32')) # Close index
df_log = pd.DataFrame(df_log)
df_log.head()


test_size = config.horizon
simulation_size = config.simSize

df_train = df_log.iloc[:-test_size]
df_test = df_log.iloc[-test_size:]
df.shape, df_train.shape, df_test.shape
Beispiel #43
0
import quandl
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
df = quandl.get("WIKI/AMZN")
print(df)
df = df[['Adj. Close']]
print(df)
forecast_out = 30
df['Prediction'] = df[['Adj. Close']].shift(-forecast_out)
print(df)
X = np.array(df.drop(['Prediction'],1))
X = X[:-forecast_out]
print(X)
y = np.array(df['Prediction'])
y = y[:-forecast_out]
print(y)
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)
svr_rbf.fit(x_train, y_train)
svm_confidence = svr_rbf.score(x_test, y_test)
print("svm confidence: ", svm_confidence) 
lr = LinearRegression()
lr.fit(x_train, y_train)
lr_confidence = lr.score(x_test, y_test)
print("lr confidence: ", lr_confidence)
x_forecast = np.array(df.drop(['Prediction'],1))[-forecast_out:]
print(x_forecast)
lr_prediction = lr.predict(x_forecast)
print(lr_prediction)
Beispiel #44
0
import quandl
from sklearn import preprocessing
#df = quandl.get('WIKI/GOOGL')
df = quandl.get('WIKI/AAPL')
import math
import numpy as np
# 定义预测列变量,它存放研究对象的标签名
forecast_col = 'Adj. Close'
# 定义预测天数,这里设置为所有数据量长度的1%
forecast_out = int(math.ceil(0.01 * len(df)))
# 只用到df中下面的几个字段
df = df[['Adj. Open', 'Adj. High', 'Adj. Low', 'Adj. Close', 'Adj. Volume']]
# 构造两个新的列
# HL_PCT为股票最高价与最低价的变化百分比
df['HL_PCT'] = (df['Adj. High'] - df['Adj. Close']) / df['Adj. Close'] * 100.0
# PCT_change为股票收盘价与开盘价的变化百分比
df['PCT_change'] = (df['Adj. Close'] -
                    df['Adj. Open']) / df['Adj. Open'] * 100.0
# 下面为真正用到的特征字段
df = df[['Adj. Close', 'HL_PCT', 'PCT_change', 'Adj. Volume']]
# 因为scikit-learn并不会处理空数据,需要把为空的数据都设置为一个比较难出现的值,这里取-9999,
df.fillna(-99999, inplace=True)
# 用label代表该字段,是预测结果
# 通过让与Adj. Close列的数据往前移动1%行来表示
df['label'] = df[forecast_col].shift(-forecast_out)
# 最后生成真正在模型中使用的数据X和y和预测时用到的数据数据X_lately
X = np.array(df.drop(['label'], 1))
X = preprocessing.scale(X)
# 上面生成label列时留下的最后1%行的数据,这些行并没有label数据,因此我们可以拿他们作为预测时用到的输入数据
X_lately = X[-forecast_out:]
X = X[:-forecast_out]
def scrape():
    # RANDOM TIMER TO MAKE ANY LOOPING CALLS TO A URL APPEAR MORE "HUMAN"
    rLow = int(g['LOOP_RNDM_SLEEP_LOW'])
    rHigh = int(g['LOOP_RNDM_SLEEP_HIGH'])
    rndm_sleep = random.randint(rLow, rHigh)
    # CALCULATE RETENTION DATE FROM RETENTION DAYS VARIABLE IN VARS TABLE
    retention_date = datetime.date.today() + datetime.timedelta(
        -int(g['DATA_RETENTION_DAYS']))
    retention_date_id = retention_date.strftime('%Y%m%d')
    # =============================================================================
    # DELETE FROM LOCAL DB WHERE A RERUN WOULD REPRODUCE "DUPLICATE" DATA
    # =============================================================================
    dbmgr = pyDB(g['DB'])
    q = r"""DELETE FROM {0} WHERE (captr_dte_id = {1} or captr_dte_id <= {2})""".format(
        g['TBL_NME'],  #[0]
        g['MSMT_DTE_ID'],  #[1]
        retention_date_id,  #[2]
    )
    dbmgr.query(q)
    # ==========================================================================================================================================================
    # SCRAPE PART - START
    # - this should be the primary section of code that changes
    # - only other sections that "may" change are DELETE and UPDATE DB statements
    # ==========================================================================================================================================================
    # PASS 1 - COMMODITY DATA =====================================================================
    quandl.ApiConfig.api_key = g['QUANDL_API_KEY']

    code_list = g['CMDTY_CDES'].split(',')

    for item in code_list:
        try:
            dat = quandl.get(
                item, authtoken=g['QUANDL_API_KEY'],
                rows=10)  #trim_start = "2016-01-01", trim_end = "2018-09-16")
            #print(dat)
            for index, row in dat.iterrows():
                #print( index, row[0], row[1], row[2], row[3], row[4])
                # =============================================================================
                # WRITE RESULTS OF SOUP ANALYISIS/SCRAPE TO LOCAL DB
                # =============================================================================
                dbmgr = pyDB(g['DB'])
                q = r"""INSERT INTO {0} (MSMT_DTE_ID, CMDTY_CDE, TRADE_DT, INDEX_VAL, HIGH_VAL, LOW_VAL, TTL_MRKT_VAL, DIV_MRKT_VAL, CAPTR_DTE_ID, STARTED_AT, FINISHED_AT) VALUES ({1}, '{2}', '{3}', '{4}', '{5}', '{6}', '{7}', {8}, {9}, '{10}', '{11}')""".format(
                    g['TBL_NME'],  #[0]
                    index.strftime('%Y%m%d'),  #[1]  time.strftime('%Y%m%d')
                    item,  #[2]
                    index.strftime('%Y-%m-%d'),  #[3]
                    row[0],  #[4]
                    row[1],  #[5]
                    row[2],  #[6]
                    row[3],  #[7]
                    row[4],  #[8]
                    g['MSMT_DTE_ID'],  #[9]
                    g['STARTED_AT'],  #[10]
                    ''  #[11]
                )
                dbmgr.query(q)
        except:
            # capture a finish time to be entered into the db
            finished_at = time.strftime("%Y-%m-%d %H:%M:%S")
            # =============================================================================
            # WRITE RESULTS OF ERROR TO LOCAL DB
            # =============================================================================
            e = sys.exc_info()
            dbmgr = pyDB(g['DB'])
            dbmgr.write_log(finished_at, 'QUANDL API ERROR: ' + str(e), **g)

    # ==========================================================================================================================================================
    # SCRAPE PART - END
    # - this should be the primary section of code that changes
    # - only other sections that "may" change are DELETE and UPDATE db statements
    # ==========================================================================================================================================================
    # =============================================================================
    # UPDATE LOCAL DB WITH A FINISH TIME
    # =============================================================================
    finished_at = time.strftime(
        "%Y-%m-%d %H:%M:%S")  # capture a finish time to be entered into the db
    dbmgr = pyDB(g['DB'])
    q = r"""UPDATE {0} SET finished_at = '{1}' WHERE captr_dte_id = {2}""".format(
        g['TBL_NME'],  #[0]
        finished_at,  #[1]
        g['MSMT_DTE_ID']  #[2]
    )
    dbmgr.query(q)
Beispiel #46
0
def graph():
    p = request.form['company']
    df = quandl.get("WIKI/MSFT")
    #print(data.head())
    print(df.columns.values)
    #creating dataframe
    data = df.sort_index(ascending=True, axis=0)
    data['Date'] = df.index
    new_data = pd.DataFrame(index=range(0, len(df)), columns=['Date', 'Value'])
    for i in range(0, len(data)):
        new_data['Date'][i] = data['Date'][i]
        new_data['Value'][i] = data['Close'][i]

    #setting index
    new_data.index = new_data.Date
    new_data.drop('Date', axis=1, inplace=True)
    print(len(new_data))
    #creating train and test sets
    dataset = new_data.values
    length = int(len(data) * 0.80)

    train = dataset[0:length, :]
    valid = dataset[length:, :]

    #converting dataset into x_train and y_train
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(dataset)

    x_train, y_train = [], []
    for i in range(60, len(train)):
        x_train.append(scaled_data[i - 60:i, 0])
        y_train.append(scaled_data[i, 0])
    x_train, y_train = np.array(x_train), np.array(y_train)

    x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))

    # create and fit the LSTM network
    # model = Sequential()
    # model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1],1)))
    # model.add(LSTM(units=50))
    # model.add(Dense(1))

    # model.compile(loss='mean_squared_error', optimizer='adam')
    # model.fit(x_train, y_train, epochs=1, batch_size=1, verbose=2)
    #model = load_model('StonksModel.h5')
    #predicting 246 values, using past 60 from the train data
    loaded_model = tf.keras.models.load_model('StonksModel.h5')
    inputs = new_data[len(new_data) - len(valid) - 60:].values
    inputs = inputs.reshape(-1, 1)
    inputs = scaler.transform(inputs)
    #print(len(inputs))

    X_test = []
    for i in range(60, inputs.shape[0]):
        X_test.append(inputs[i - 60:i, 0])
    X_test = np.array(X_test)
    print(len(X_test))

    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
    result = loaded_model.predict(X_test)
    closing_price = scaler.inverse_transform(result)
    print(len(closing_price))
    # closing_price = model.predict(X_test)
    train = new_data[:-len(X_test)]
    valid = new_data[-len(X_test):]
    valid['Predictions'] = closing_price
    print(valid.tail())
    print(train.tail())
    plt.plot(train['Value'])
    plt.plot(valid[['Value', 'Predictions']])
    #os.path.abspath('.')
    #os.path.join('/home/k_pritam/project/myApp', '/static/images')
    plt.savefig('static/new_plot.png')
    return render_template('graph.html', name='GDP', url='new_plot1.png')
# coding: utf-8
import quandl

sunspots = quandl.get("SIDC/SUNSPOTS_A")
print "Describe", sunspots.describe()
print "Non NaN observations", sunspots.count()
print "MAD", sunspots.mad()
print "Median", sunspots.median()
print "Min", sunspots.min()
print "Max", sunspots.max()
print "Mode", sunspots.mode()
print "Standard Deviation", sunspots.std()
print "Variance", sunspots.var()
print "Skewness", sunspots.skew()
print "Kurtosis", sunspots.kurt()
Beispiel #48
0
import pandas as pd
import quandl

pd.set_option("display.width", None)
df = quandl.get("WIKI/GOOGL")
df = df[["Adj. Open", "Adj. High", "Adj. Low", "Adj. Close", "Adj. Volume"]]
df["High-Low Percentage"] = (df["Adj. High"] -
                             df["Adj. Close"]) / df["Adj. Close"] * 100.0
df["Percentage Change"] = (df["Adj. Close"] -
                           df["Adj. Open"]) / df["Adj. Open"] * 100.0
df = df[[
    "Adj. Close", "High-Low Percentage", "Percentage Change", "Adj. Volume"
]]
print(df.head())
def initialize(context):
    context.i = 0
    context.assets = list(map(lambda x: symbol(x), high_cap_company.Symbol.values))
    print(context.assets, len(context.assets))
    context.model_fee = 1e-3
    context.previous_predict_reward = 0
    context.previous_action = 0
    context.set_commission(commission.PerShare(cost=0.005, min_trade_cost=1.0))
    context.set_slippage(slippage.VolumeShareSlippage())
    context.bootstrap_sequence_length = 300
    context.max_sequence_length = 500
    context.tb_log_dir = './log/%s' % back_test_name
    
    context.target_profit_multiplier = 1.1
    bundle = bundles.load('quandl')
    start_date_str = str(context.get_datetime().date())
    initial_history_start_date = bundle.equity_daily_bar_reader.sessions[bundle.equity_daily_bar_reader.sessions < start_date_str][(-context.bootstrap_sequence_length - 1)]
    initial_history_end_date = bundle.equity_daily_bar_reader.sessions[bundle.equity_daily_bar_reader.sessions > start_date_str][0]
    filterd_assets_index = (np.isnan(np.sum(bundle.equity_daily_bar_reader.load_raw_arrays(columns=['close'], start_date=initial_history_start_date, end_date=initial_history_end_date, assets=context.assets), axis=1)).flatten() == False)
    context.assets = list(np.array(context.assets)[filterd_assets_index])
    print(context.assets, len(context.assets))
    remain_symbols = list(map(lambda x: x.symbol, context.assets))
    if not os.path.exists('history_data'):
        print('Start to download good history data')
        history_data = {}
        for s in remain_symbols:
            print('downloading', s)
            stock = quandl.get_table('WIKI/PRICES', date={'gte': str(initial_history_start_date)}, ticker=s)
            stock.index = stock.date
            history_data[s] = stock
        history_data = pd.Panel(history_data)
        history_data = history_data.transpose(2, 1, 0)
        history_data.to_pickle('history_data')
        context.history_data = history_data
        print('Done')
    else:
        print('history data exist')
        history_data = pd.read_pickle('history_data')
        context.history_data = history_data
    if not os.path.exists('trading_content'):
        sys.exit(1)
    else:
        news_vec = pd.read_csv('trading_content')
        news_vec.index = news_vec.date
        news_vec = news_vec.drop('date', axis=1)
        context.news_vec = news_vec
    if not os.path.exists('index'):
        print('downloading index data')
        spy = quandl.get("CHRIS/CME_SP1", authtoken="CTq2aKvtCkPPgR4L_NFs")
        gc = quandl.get("CHRIS/CME_GC1", authtoken="CTq2aKvtCkPPgR4L_NFs")
        si = quandl.get("CHRIS/CME_SI1", authtoken="CTq2aKvtCkPPgR4L_NFs")
        vix = pd.read_csv('http://www.cboe.com/publish/scheduledtask/mktdata/datahouse/vixcurrent.csv')
        vix.columns = vix.iloc[0]
        vix = vix[1:]
        vix.index = pd.DatetimeIndex(vix.Date)
        vix = vix.drop('Date', axis=1)
        vix = vix.astype(np.float64)
        vix.columns = ['Open', 'High', 'Low', 'Last']
        index_data = pd.Panel({'vix': vix, 'gc': gc, 'si': si, 'spy': spy})
        index_data = index_data.transpose(2, 1, 0)
        index_data.to_pickle('index')
        context.index_data = index_data['Last', str(initial_history_start_date):]
    else:
        print('index data exist')
        index_data = pd.read_pickle('index')
        context.index_data = index_data['Last', str(initial_history_start_date):]
    
    context.model = DRL_Portfolio(feature_number=len(context.assets) * 8 + 100 + context.index_data.columns.shape[0] * 7, asset_number=len(context.assets) + 1, object_function='sortino')
    context.model.init_model()
    context.tensorboard = TensorBoard(log_dir=context.tb_log_dir,session=context.model.get_session())
from matplotlib import cm
from matplotlib import gridspec
from matplotlib import pyplot as plt
from pytrends.request import TrendReq

# Configure verbosity
tf.logging.set_verbosity(tf.logging.ERROR)

# Configure quandl API Key
# & Authenticate
apiKey = "-rLjiPduuzgzKp99MMHb"
ql.ApiConfig.api_key = apiKey

# Capture historical BTC price data
# Data showing the USD market price from Mt.gox
price = ql.get("BCHAIN/MKPRU", collapse="weekly")
price = price.rename(columns={"Value": "Price"})

price.plot(title='BTC Market Price', figsize=(18, 10))

# Bitcoin Difficulty
# Difficulty is a measure of how difficult it is to find a hash below a given target.
diff = ql.get("BCHAIN/DIFF", collapse="weekly")
diff = diff.rename(columns={"Value": "Network Difficulty"})

diff.plot(title='BTC Difficulty', figsize=(18, 10))

# Bitcoin Average Block Size
# The Average block size in MB
avbls = ql.get("BCHAIN/AVBLS", collapse="weekly")
avbls = avbls.rename(columns={"Value": "Average Block Size"})
import quandl
import pandas as pd
import numpy as np

mydata = quandl.get("YAHOO/INDEX_DJI",
                    start_date="2005-12-01",
                    end_date="2005-12-05")
authtoken = 'XXX'


def get_data_quandl(symbol, start_date, end_date):
    data = quandl.gty(symbol,
                      start_date=start_date,
                      end_date=end_date,
                      authtoken=authtoken)
    return data


def generate_features(df):
    """Generate features for a stock/index based on historical price and performaance
    Args:
    df (dataframe with columns "Open", "Close", "High", "Low', "Volume", "Adjusted Close")
    Returns:
    dataframe, data set with new features
    """
    df_new = pd.DataFrame()
    #6 original features
    df_new['open'] = df['Open']
    df_new['open_1'] = df['Open'].shift(1)
    #shift index by 1, in order to take the value of the previos day.
    df_new['close_1'] = df['Close'].shift(1)
Beispiel #52
0
# https://www.bilibili.com/video/BV1Rp4y1Y7Rn/?spm_id_from=autoNext
from datetime import date
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import quandl
from keras.layers import Dense, LSTM, Dropout
from keras.models import Sequential
from sklearn.preprocessing import MinMaxScaler

# 4.1 加载数据

start = date(2000, 10, 12)
end = date.today()
google_stock = pd.DataFrame(
    quandl.get("WIKI/GOOGL", start_date=start, end_date=end))

print(google_stock.shape)
print(google_stock.tail())
print(google_stock.head())
print(google_stock.columns.values)

# 4.2 绘制历史收盘价趋势图
google_stock['Close'].plot(figsize=(16, 8))
# plt.figure(figsize=(16, 8))
# plt.plot(google_stock['Close'])
plt.show()

# 4.3 构造训练集与验证集
# 时间点长度
time_stamp = 50
Beispiel #53
0
def simple_list(request):
    quandl.ApiConfig.api_key = os.environ.get('QUANDL_API_KEY')
    stock = 'SXX'
    req = quandl.get('XLON/{}'.format(stock))
    table = StockTable(req)
    return render(request, "simple_table.html", {"table": table})
Beispiel #54
0
def openfiles():
    #cornmkt = pd.read_csv('corn/corn_JUL14.txt', header = 0)
    cornmkt = quandl.get("SCF/CME_C1_FW",
                         authtoken="63gdVnc_-LzW9XyB1Ajk",
                         start_date="2007-05-11",
                         end_date="2018-04-08")
    #print(cornmkt.head())
    cmd = np.array(cornmkt.index.values)
    corn_mkt_dates = []  #np.chararray(cmd.shape)

    #print(corn_mkt_dates)
    for i in range(len(cmd)):
        date = (pd.to_datetime(str(cmd[i]))).strftime('%Y/%m/%d')
        #print(str(date))
        #date = date[0:4]+'/'+date[5:7]+'/'+date[8:10]
        #corn_mkt_dates[i] = str(date)
        corn_mkt_dates.append(str(date))
    corn_mkt_dates = np.array(corn_mkt_dates)
    cornmkt.index = corn_mkt_dates
    #print(corn_mkt_dates[0])
    #cornmkt = cornmkt.drop('Date', axis=1)
    #print(cornmkt.head())
    pure_mkt_corn = np.array(cornmkt)

    #soymkt = pd.read_csv('soybean/soybean_JUL14.txt', header = 0)
    soymkt = quandl.get("SCF/CME_S1_FW",
                        authtoken="63gdVnc_-LzW9XyB1Ajk",
                        start_date="2007-05-11",
                        end_date="2018-04-08")

    smd = np.array(soymkt.index.values)
    soy_mkt_dates = []  #np.chararray(smd.shape)

    #print(corn_mkt_dates)
    for i in range(len(smd)):
        date = (pd.to_datetime(str(cmd[i]))).strftime('%Y/%m/%d')
        #print(str(date))
        #date = date[0:4]+'/'+date[5:7]+'/'+date[8:10]
        #corn_mkt_dates[i] = str(date)
        soy_mkt_dates.append(str(date))
        #date = str(smd[i])
        #date = date[0:4]+'/'+date[5:7]+'/'+date[8:10]
        #soy_mkt_dates[i] = date
    soy_mkt_dates = np.array(soy_mkt_dates)
    soymkt.index = soy_mkt_dates

    #print(soymkt.head())
    pure_mkt_soy = np.array(soymkt)

    corndfs = []
    soydfs = []

    names = [
        '2007to2008', '2008to2009', '2009to2010', '2010to2011', '2011to2012',
        '2012to2013', '2013to2014', '2014to2015'
    ]  #, '2015to2016', '2016to2017', '2017to2018']

    for name in names:
        corndfs.append(
            pd.read_csv('corn/USDAProj_Corn_' + name + '.csv', header=0))
        soydfs.append(
            pd.read_csv('soybean/USDAProj_Soybean_' + name + '.csv', header=0))

    fullcorn = pd.concat(corndfs, ignore_index=True, join='inner')
    fullcorn = fullcorn.truncate(after=161)
    fullcorn_dates = fullcorn['Date']
    fullcorn.index = fullcorn['Date']
    fullcorn = fullcorn.drop('Date', axis=1)

    #fullcorn = fullcorn.truncate(before='2010/07/09')

    #print(fullcorn.head())

    fullsoy = pd.concat(soydfs, ignore_index=True, join='inner')
    fullsoy = fullsoy.truncate(after=161)
    fullsoy_dates = fullsoy['Date']

    fullsoy.index = fullsoy['Date']
    #fullsoy = fullsoy.truncate(before='2010/12/10')
    #fullsoy = fullsoy.truncate(after='2014/07/10')
    fullsoy = fullsoy.drop('Date', axis=1)
    #print(fullsoy.head(10))

    # print((cornmkt.tail()))
    # print((fullcorn.tail()))
    # print((soymkt.tail()))
    # print((fullsoy.tail()))

    # print((cornmkt.head()))
    # print((fullcorn.head()))
    # print((soymkt.head()))
    # print((fullsoy.head()))

    # print((cornmkt['Open']['2010/07/06']))

    #fullcorn = np.array(fullcorn)
    #cornmkt = np.array(cornmkt)
    #fullsoy = np.array(fullsoy)
    #soymkt = np.array(soymkt)
    return (cornmkt, soymkt, fullcorn, fullsoy, corn_mkt_dates, soy_mkt_dates,
            np.array(fullcorn_dates), np.array(fullsoy_dates))
Beispiel #55
0
import quandl
import pandas as pd

#myData = quandl.get("FRED/GDP")
#print(myData.head())
#print(myData.tail())

#Manipulando CSV
print("****  CSV  ****")
myData_01 = quandl.get("FRED/GDP")
myData_01.to_csv("recurso/Data_02_teste.csv")  #Salvando um CSV

myData = pd.read_csv("recurso/Data_02.csv")  #Importando CSV
print(myData.info())  #Pegando informações do CSV
print(myData)  #Lendo CSV
print(myData.head())

#Manipulando EXCEL
print("****  EXCEL  ****")
#myData_02 = pd.read_csv("recurso/Data_02.csv") #Importando CSV
#myData_02.to_excel("recurso/Data_03_teste.xlsx") #Salvando um EXCEL

myData_02 = pd.read_excel("recurso/Data_03.xlsx")  #Importando EXCEL
print(myData_02.head())  #Lendo EXCEL
Beispiel #56
0
#regression

import pandas as pd
import quandl
import math

df = quandl.get("WIKI/Googl")
df=df[["Adj.Open","Adj.High", "Adj.Close","Adj.Volume"]]

df["HL_%"] = (df["Adj.High"]-df["Adj.Close"]/df["Adj.Close"])* 100.0
df["% change"] = (df["Adj.Close"]-df["Adj.Open"]/df["Adj.Open"])* 100.0

df = df[["Adj.Close","HL_% ", "% change", "Adj.Volume"]]
print(df)

forecast_col = "Adj.Close"
df.fillna(-99999,inplace =True)

forecast_out = int(math.ceil(0.1 * len(df)))
df["label"] = df[forecast_col].shift (-forecast_out)

df.dropna(inplace = True)
print(df.tail())
import pandas as pd
import quandl

quandl.ApiConfig.api_key = "gAMrs6H3k6MaxsQMNFpL"
theData = quandl.get('WIKI/GOOGL')
theData = theData[[
    'Adj. Open', 'Adj. High', 'Adj. Low', 'Adj. Close', 'Adj. Volume'
]]
theData['raiseStock'] = (theData['Adj. High'] -
                         theData['Adj. Open']) / theData['Adj. Open'] * 100

theData = theData[['Adj. Open', 'Adj. High', 'raiseStock']]

print(theData.tail(20))
Beispiel #58
0
from plotly.offline import plot, iplot
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode
init_notebook_mode(connected=True)

import numpy as np

import pandas as pd

import quandl

my_data2 = quandl.get("WIKI/GOOGL", authtoken="g621vWh9j9bPtzWZYTDa")
my_data3 = quandl.get("BCHARTS/ABUCOINSUSD", authtoken="g621vWh9j9bPtzWZYTDa")

#figure4
header = dict(values=['Google', 'Bitcoin'],
              align=['left', 'center'],
              font=dict(color='white', size=12),
              fill=dict(color='#119DFF'))

cells = dict(values=[
    round(my_data2.Open.pct_change().head()[1:], 3),
    round(my_data3.Open.pct_change().head()[1:], 3)
],
             align=['left', 'center'],
             fill=dict(color=["yellow", "white"]))

trace_f4 = go.Table(header=header, cells=cells)

data = [trace_f4]
layout = dict(width=500, height=300)
Beispiel #59
0
# Imports
#Importing dependencies
import pandas as pd
import numpy as np
import talib as ta
import math
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (
    20, 10)  #change this if you want to reduce the plot images
import locale
from locale import atof
locale.setlocale(locale.LC_NUMERIC, '')

#Quandl dependency with API key
import quandl
quandl.ApiConfig.api_key = "f7_JWui3ztp2Yxh_xddT"

scriptcode = "ASHOKLEY"
df = quandl.get("NSE/" + scriptcode)
data = pd.DataFrame(df,
                    columns=[
                        'Date', 'Open', 'High', 'Low', 'Last', 'Close',
                        'Total Trade Quantity', 'Turnover (Lacs)'
                    ])
data['Date'] = data['Date'].apply(pd.to_datetime)
data = data.set_index("Date")
data = data.dropna(axis=0)

from MainProcess import DataManager
dm = DataManager()
datacopy = dm.Load(data, 5000, "Close", 0.9)
#Carregando dados
#Importando os dados por quandl
#Apos criar o objeto o que vai dentro do parenteses é a acao em estudo

import quandl

mydata = quandl.get('FRED/GDP')
print(mydata.tail())