def cleaned_maturity_data(self, expiry): filtedData = self.optionData data = filtedData.loc[(slice(None), expiry, slice(None)), :] otmPut = data.loc[slice(None, self.underlying_price), ["Last"]] times = 0 for index in range(len(otmPut) - 1, -1, -1): row = otmPut.iloc[index] if row.Last == 0.01: times = times + 1 if times > 2: data.drop(row.name, inplace=True) otmCall = data.loc[slice(self.underlying_price, None), ["Last"]] times = 0 for index in range(0, len(otmCall)): row = otmCall.iloc[index] if row.Last == 0.01: times = times + 1 if times > 2: data.drop(row.name, inplace=True) return data.loc[data["Last"] > 0]
def subsample_data(filename_data, filename_symbology, dir_pickle, start_date, end_date, query_attribute, query_criteria, include_avg): query_criteria_filename = '-'.join(query_criteria[:3]) pickle_name = dir_pickle+'pickle_sentiment_'+start_date+'_'+end_date+'_'+query_attribute+'_'+query_criteria_filename+'_'+str(include_avg)+'.p' try: data = pd.read_pickle(pickle_name) print("Loaded from pre-created pickle") except: print("Subsampling data from csv") # try to read first from pickle # read csv data = pd.read_csv(filename_data) # merge with symbology csv for additional info data_symbology = pd.read_csv(filename_symbology) # convert headers to uppercase for ease of use data_symbology.columns = [x.upper() for x in data_symbology.columns] data = pd.merge(data, data_symbology, left_on='SYMBOL', right_on='SYMBOL', how = "left") # perform filter query based on parameters data = data[data[query_attribute].isin(query_criteria)] # convert timestamps to datetime objects data['DATE'] = data['TIMESTAMP_UTC'].apply(lambda x: datetime.strptime(x,'%Y-%m-%dT%H:%M:%SZ')) data['DATE'] = data['DATE'].apply(lambda x: x.strftime('%x')) data['DATE'] = data['DATE'].apply(lambda x: pd.to_datetime(x)) # query between start and end date data = data[(data['DATE'] >= start_date) & (data['DATE'] <= end_date)] # remove avg if include_avg == False: avg_cols = [col for col in data.columns if 'AVG' in col] data.drop(avg_cols,inplace=True,axis=1) # save as pickle data.to_pickle(pickle_name) # return dataframe return data
def get_returns(df): '''Gets returns...''' ## make a deep copy of the input data so we don't accidentally change it data = df.copy() data['lastClose'] = data['close'].shift(1) data['ret_cc'] = np.log(data['close'] / data['lastClose']) # close to close return data['ret_oc'] = np.log(data['close'] / data['open']) # open to close return data['ret_co'] = np.log(data['open'] / data['lastClose']) # close to open return ## what is this for? data.drop('lastClose', 1, inplace=True) return data
def stock_pic(data, time_interval, start_date, end_date, gradient_period): x, y = [], [] print(data) trial_x, trial_y = 0, 0 while len(x) == 0 and len(y) == 0: x = data[data['date'] == str(end_date)].index.values y = data[data['date'] == str(start_date)].index.values if len(x) == 0: end_date += datetime.timedelta(minutes=1) trial_x += 1 if len(y) == 0: start_date += datetime.timedelta(minutes=1) trial_y += 1 print(x, y) x, y = x - trial_x, y - trial_y print(x, y) data['6. AVG'] = (data['1. open'] + data['4. close']) / 2 data['EWMA-AVG'] = data['6. AVG'].ewm(span=3).mean() gradient = (data['EWMA-AVG'][int(x)] - data['EWMA-AVG'][int(x) - gradient_period]) / gradient_period grad_label = torch.ones(1) if gradient > 0 else torch.zeros(1) data = data[int(y):int(x)] data = data.drop(['5. volume'], axis=1) fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(8, 8)) ax.plot(data['1. open'], c='g', alpha=1) ax.plot(data['4. close'], c='r', alpha=1) ax.plot(data['EWMA-AVG'], c='b', alpha=1) plt.axis('off') return __ImageToTensor(fig).cuda() if torch.cuda.is_available( ) else __ImageToTensor( fig), grad_label.cuda() if torch.cuda.is_available() else grad_label
def stock_pic(data, time_interval, start_date, end_date, gradient_period): x = data[data['date'] == str(end_date)].index.values y = data[data['date'] == str(start_date)].index.values if not x: x = x + 1 elif not y: y = y + 1 else: pass data['6. AVG'] = (data['1. open'] + data['4. close']) / 2 data['EWMA-AVG'] = data['6. AVG'].ewm(span=3).mean() gradient = (data['EWMA-AVG'][int(x)] - data['EWMA-AVG'][int(x) - gradient_period]) / gradient_period grad_label = torch.ones(1) if gradient > 0 else torch.zeros(1) data = data[int(x):int(y)] data = data.drop(['5. volume'], axis=1) fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(8, 8)) ax.plot(data['1. open'], c='g', alpha=1) ax.plot(data['4. close'], c='r', alpha=1) ax.plot(data['EWMA-AVG'], c='b', alpha=1) plt.axis('off') plt.show() return __ImageToTensor(fig).cuda() if torch.cuda.is_available( ) else __ImageToTensor( fig), grad_label.cuda() if torch.cuda.is_available() else grad_label
def normalise_stock_data(data): # ADJ data data_adj = data #data_adj['Date'] = data.index.values+1 for i in range(0, data.index.shape[0]): data_adj.loc[ data.index[i], 'Ordinal/1e6'] = data.index[i].to_pydatetime().toordinal() / 1e6 data_adj.loc[data.index[i], 'Weekday'] = data.index[i].to_pydatetime().weekday() data_adj = data.drop(data.columns[[0, 1, 2, 3, 4, 5]], axis=1) data_adj['Adj'] = data['Adj Close'] / data['Close'] data_adj['Adj Volume'] = data['Volume'] #data_adj['Adj Volume'] -= np.min(data_adj['Adj Volume']) data_adj['Adj Volume'] /= np.max(data_adj['Adj Volume']) data_adj['Adj Close'] = data['Adj Close'] / data['Adj Close'][0] data_adj[ 'Adj Open'] = data['Open'] * data_adj['Adj'] / data['Adj Close'][0] data_adj[ 'Adj High'] = data['High'] * data_adj['Adj'] / data['Adj Close'][0] data_adj['Adj Low'] = data['Low'] * data_adj['Adj'] / data['Adj Close'][0] data_adj.loc[data.index[0], 'Normalised Volume'] = 1 data_adj.loc[data.index[1:], 'Normalised Volume'] = data_adj['Adj Volume'][ 1:] / data_adj['Adj Close'][:-1].values data_adj.loc[data.index, 'Normalised Volume'] -= 1 data_adj.loc[data.index[0], 'Normalised Close'] = 1 data_adj.loc[data.index[1:], 'Normalised Close'] = data_adj['Adj Close'][ 1:] / data_adj['Adj Close'][:-1].values data_adj.loc[data.index, 'Normalised Close'] -= 1 data_adj.loc[data.index[0], 'Normalised Open'] = 1 data_adj.loc[data.index[1:], 'Normalised Open'] = data_adj['Adj Open'][ 1:] / data_adj['Adj Close'][:-1].values data_adj.loc[data.index, 'Normalised Open'] -= 1 data_adj.loc[data.index[0], 'Normalised High'] = 1 data_adj.loc[data.index[1:], 'Normalised High'] = data_adj['Adj High'][ 1:] / data_adj['Adj Close'][:-1].values data_adj.loc[data.index, 'Normalised High'] -= 1 data_adj.loc[data.index[0], 'Normalised Low'] = 1 data_adj.loc[data.index[1:], 'Normalised Low'] = data_adj['Adj Low'][ 1:] / data_adj['Adj Close'][:-1].values data_adj.loc[data.index, 'Normalised Low'] -= 1 #reduce some mean data_adj = data_adj.drop(['Adj'], axis=1) return data_adj
def download_ohlc(sector_tickers, start, end): sector_ohlc = {} for sector, tickers in sector_tickers.items(): print('Downloading data from Yahoo for %s sector' % sector) data = pdr.get_data_yahoo(tickers, start, end) for item in ['Open', 'High', 'Low']: data[item] = data[item] * data['Adj Close'] / data['Close'] data.rename(items={ 'Open': 'open', 'High': 'high', 'Low': 'low', 'Adj Close': 'close', 'Volume': 'volume' }, inplace=True) data.drop(['Close'], inplace=True) sector_ohlc[sector] = data print('Finished downloading data') return sector_ohlc
def __solver_fitted_iv_function(self, data, t, s): print("Solver fitted implied volatility function...") # fitted iv function def calculate_var(a, v, p, F, k, iv): result = self.calculate_sabr(a, v, p, F, k, t) var = ((iv - result) / iv)**2 return var def F(x): params = data a = x[0] v = x[1] p = x[2] params.loc[:, "Var"] = params.apply( lambda row: calculate_var(a, v, p, s, row.name, row.Iv), axis=1) result = params.loc[:, "Var"].sum() return result bnds = ((0.000001, None), (0.000001, math.sqrt(1 / t)), (-0.999999, 0.999999)) x = scipy.optimize.minimize(F, [0.5, 0.5, 0.5], bounds=bnds) if x.fun > 2.0: data.drop(data[data["Iv"] == min(data.Iv)].index, inplace=True) data.drop(data[data["Iv"] == max(data.Iv)].index, inplace=True) return self.__solver_fitted_iv_function(data, t, s) fitted_params = x.x print("Function solved. Function value: " + str(x.fun)) # print(x) return fitted_params
def __solver_fitted_iv_function(self, data, t, s): print("Solver fitted implied volatility function...") # fitted iv function def calculate_var(a, v, p, F, k, iv): result = self.calculate_sabr(a, v, p, F, k, t) var = ((iv - result) / iv) ** 2 return var def F(x): params = data a = x[0] v = x[1] p = x[2] params.loc[:, "Var"] = params.apply(lambda row: calculate_var(a, v, p, s, row.name, row.Iv), axis=1) result = params.loc[:, "Var"].sum() return result bnds = ((0.000001,None), (0.000001, math.sqrt(1 / t)), (-0.999999, 0.999999)) x = scipy.optimize.minimize(F, [0.5, 0.5, 0.5], bounds=bnds) if x.fun > 2.0: data.drop(data[data["Iv"]==min(data.Iv)].index, inplace=True) data.drop(data[data["Iv"]==max(data.Iv)].index, inplace=True) return self.__solver_fitted_iv_function(data, t, s) fitted_params = x.x print("Function solved. Function value: " + str(x.fun)) # print(x) return fitted_params
def normalise_stock_data(data): # ADJ data data_adj=data #data_adj['Date'] = data.index.values+1 for i in range(0,data.index.shape[0]): data_adj.loc[data.index[i],'Ordinal/1e6'] = data.index[i].to_pydatetime().toordinal()/1e6 data_adj.loc[data.index[i],'Weekday'] = data.index[i].to_pydatetime().weekday() data_adj=data.drop(data.columns[[0,1,2,3,4,5]], axis=1) data_adj['Adj'] = data['Adj Close']/data['Close'] data_adj['Adj Volume'] = data['Volume'] #data_adj['Adj Volume'] -= np.min(data_adj['Adj Volume']) data_adj['Adj Volume'] /= np.max(data_adj['Adj Volume']) data_adj['Adj Close'] = data['Adj Close'] / data['Adj Close'][0] data_adj['Adj Open'] = data['Open']*data_adj['Adj'] / data['Adj Close'][0] data_adj['Adj High'] = data['High']*data_adj['Adj'] / data['Adj Close'][0] data_adj['Adj Low'] = data['Low'] *data_adj['Adj'] / data['Adj Close'][0] data_adj.loc[data.index[0],'Normalised Volume'] = 1 data_adj.loc[data.index[1:],'Normalised Volume'] = data_adj['Adj Volume'][1:] / data_adj['Adj Close'][:-1].values data_adj.loc[data.index,'Normalised Volume'] -= 1 data_adj.loc[data.index[0],'Normalised Close'] = 1 data_adj.loc[data.index[1:],'Normalised Close'] = data_adj['Adj Close'][1:] / data_adj['Adj Close'][:-1].values data_adj.loc[data.index,'Normalised Close'] -= 1 data_adj.loc[data.index[0],'Normalised Open'] = 1 data_adj.loc[data.index[1:],'Normalised Open'] = data_adj['Adj Open'][1:] / data_adj['Adj Close'][:-1].values data_adj.loc[data.index,'Normalised Open'] -= 1 data_adj.loc[data.index[0],'Normalised High'] = 1 data_adj.loc[data.index[1:],'Normalised High'] = data_adj['Adj High'][1:] / data_adj['Adj Close'][:-1].values data_adj.loc[data.index,'Normalised High'] -= 1 data_adj.loc[data.index[0],'Normalised Low'] = 1 data_adj.loc[data.index[1:],'Normalised Low'] = data_adj['Adj Low'][1:] / data_adj['Adj Close'][:-1].values data_adj.loc[data.index,'Normalised Low'] -= 1 #reduce some mean data_adj=data_adj.drop(['Adj'], axis=1) return data_adj
from sklearn import linear_model #List with stocks to analyze. stocks =['BCBA:BMA'] start = datetime.date(2017,1,1) end = datetime.date(2017,10,6) #Read 'OLHC' data from google finance data = data.DataReader(stocks[0],'google',start,end) #Linear regression for close price. #Split train and test sets X = data.drop(['Close'],1) y = data['Close'] X_train = X[:-50] X_test = X[-50:] X_test.dropna(inplace=True) y_train = y[:-50] y_test = y[-50:] reg = linear_model.LinearRegression() reg.fit(X_train,y_train) #----------------------------------------------------------------------------------------------------------
def getTrendingNews(q1): try: newsapi = NewsApiClient(api_key='Your Key') except: newsapi = NewsApiClient(api_key='Your another key incase of exception') end = date.today() start = date.today() - relativedelta(days=+10) s1 = str(start).split(" ")[0] e1 = str(end).split(" ")[0] try: news = newsapi.get_everything(q=q1, from_param=s1, to=e1, language='en', sort_by='popularity', page_size=100, page=1) print(news, "1") #arranging the news articles in a numpy array news_data = [ ] #np.array(['publishedAt','title','description','content','url']) index = 0 for i in news['articles']: k = 0 data = [] while k != 1: data.append(i["publishedAt"]) data.append(i["title"]) data.append(i["description"]) data.append(i["content"]) data.append(i["url"]) data.append(i["urlToImage"]) k = 1 news_data.append(data) nd = np.array(news_data) data = pd.DataFrame(nd, columns=[ 'Date', 'Title', 'Description', 'Content', 'URL', 'Image' ]) #Spliting the date and time of each data field. new = data["Date"].str.split("T", n=1, expand=True) new1 = new[1].str.split("Z", n=1, expand=True) data = data.drop("Date", axis=1) data.insert(loc=0, column='Date', value=new[0]) data.insert(loc=1, column='Time', value=new1[0]) data['Date'] = pd.to_datetime(data.Date) ## data = data.sort_values(by='Date') data = data.sort_values(by=['Date'], ascending=False) return data except Exception as e: print(e) try: newsapi = NewsApiClient(api_key='Your another Key') news = newsapi.get_everything(q=q1, from_param=s1, to=e1, language='en', sort_by='popularity', page_size=100, page=1) print(news, "1") #arranging the news articles in a numpy array news_data = [ ] #np.array(['publishedAt','title','description','content','url']) index = 0 for i in news['articles']: k = 0 data = [] while k != 1: data.append(i["publishedAt"]) data.append(i["title"]) data.append(i["description"]) data.append(i["content"]) data.append(i["url"]) data.append(i["urlToImage"]) k = 1 news_data.append(data) nd = np.array(news_data) data = pd.DataFrame(nd, columns=[ 'Date', 'Title', 'Description', 'Content', 'URL', 'Image' ]) #Spliting the date and time of each data field. new = data["Date"].str.split("T", n=1, expand=True) new1 = new[1].str.split("Z", n=1, expand=True) data = data.drop("Date", axis=1) data.insert(loc=0, column='Date', value=new[0]) data.insert(loc=1, column='Time', value=new1[0]) data['Date'] = pd.to_datetime(data.Date) ## data = data.sort_values(by='Date') data = data.sort_values(by=['Date'], ascending=False) return data except: return [] return []
def getWordCloud(q1): text = "Insert Your text here" try: newsapi = NewsApiClient(api_key='Your Key') except: newsapi = NewsApiClient(api_key='Your another Key incase of exception') end = date.today() start = date.today() - relativedelta(days=+10) s1 = str(start).split(" ")[0] e1 = str(end).split(" ")[0] try: news = newsapi.get_everything(q=q1, from_param=s1, to=e1, language='en', sort_by='popularity', page_size=100, page=1) #arranging the news articles in a numpy array news_data = [ ] #np.array(['publishedAt','title','description','content','url']) index = 0 for i in news['articles']: k = 0 data = [] while k != 1: data.append(i["publishedAt"]) data.append(i["title"]) data.append(i["description"]) data.append(i["content"]) data.append(i["url"]) data.append(i["urlToImage"]) k = 1 news_data.append(data) nd = np.array(news_data) data = pd.DataFrame(nd, columns=[ 'Date', 'Title', 'Description', 'Content', 'URL', 'Image' ]) #Spliting the date and time of each data field. new = data["Date"].str.split("T", n=1, expand=True) new1 = new[1].str.split("Z", n=1, expand=True) data = data.drop("Date", axis=1) data.insert(loc=0, column='Date', value=new[0]) data.insert(loc=1, column='Time', value=new1[0]) data['Date'] = pd.to_datetime(data.Date) dff = data[['Date', 'Description']].groupby('Date').sum() text = " ".join(list(dff['Description'])) + q1 * 15 wave_mask = np.array(Image.open("assets/cloud.jpg")) wordcloud = WordCloud(mask=wave_mask, width=512, height=512, colormap="Greens").generate(text) in1 = str(random.randint(0, 100000000000000)) in2 = str(random.randint(0, 100000000000000)) in3 = str(random.randint(0, 100000000000000)) in4 = in1 + in2 + in3 shutil.rmtree('assets/images') os.mkdir("assets/images") # Try wordcloud.to_file("assets/images/" + in4 + ".jpg") print("saved...") ## img1 = base64.b64encode(open("assets/images/"+in4+".jpg", 'rb').read()) with open("assets/images/" + in4 + ".jpg", "rb") as imageFile: img1 = base64.b64encode(imageFile.read()).decode("utf-8") print("Yes\n\n") return [img1] except Exception as e: print(e) try: newsapi = NewsApiClient(api_key='Your another key') news = newsapi.get_everything(q=q1, from_param=s1, to=e1, language='en', sort_by='popularity', page_size=100, page=1) print(news, "1") #arranging the news articles in a numpy array news_data = [ ] #np.array(['publishedAt','title','description','content','url']) index = 0 for i in news['articles']: k = 0 data = [] while k != 1: data.append(i["publishedAt"]) data.append(i["title"]) data.append(i["description"]) data.append(i["content"]) data.append(i["url"]) data.append(i["urlToImage"]) k = 1 news_data.append(data) nd = np.array(news_data) data = pd.DataFrame(nd, columns=[ 'Date', 'Title', 'Description', 'Content', 'URL', 'Image' ]) #Spliting the date and time of each data field. new = data["Date"].str.split("T", n=1, expand=True) new1 = new[1].str.split("Z", n=1, expand=True) data = data.drop("Date", axis=1) data.insert(loc=0, column='Date', value=new[0]) data.insert(loc=1, column='Time', value=new1[0]) data['Date'] = pd.to_datetime(data.Date) ## data = data.sort_values(by='Date') data = data.sort_values(by=['Date'], ascending=False) return data except: return [] return []
def getNews(df,s1,e1,q1): try: newsapi = NewsApiClient(api_key='Your Key') except: newsapi = NewsApiClient(api_key='Your another key in case of exception') try: news = newsapi.get_everything(q=q1,from_param=s1, to=e1,language='en',sort_by='popularity',page_size=100,page=1) #arranging the news articles in a numpy array news_data = [] #np.array(['publishedAt','title','description','content','url']) index = 0 for i in news['articles']: k=0 data = [] while k!=1: data.append(i["publishedAt"]) data.append(i["title"]) data.append(i["description"]) data.append(i["content"]) data.append(i["url"]) k=1 news_data.append(data) nd = np.array(news_data) data = pd.DataFrame(nd,columns=['Date','Title','Description','Content','URL']) #Spliting the date and time of each data field. new = data["Date"].str.split("T", n = 1, expand = True) new1 = new[1].str.split("Z", n=1,expand=True) data = data.drop("Date",axis=1) data.insert(loc=0, column='Date', value=new[0]) data.insert(loc=1, column='Time', value=new1[0]) data['Date'] =pd.to_datetime(data.Date) data = data.sort_values(by='Date') dff=data[['Date','Title']].groupby('Date').sum() lTemp={'Date':[],'Title':[]} for i in range(len(df)): lTemp['Date'].append(df.index[i]) lTemp['Title'].append(" ") for i in range(len(lTemp['Date'])): if lTemp['Date'][i] in list(dff.index): lTemp['Title'][i]=dff['Title'][list(dff.index).index(lTemp['Date'][i])] else: pass lTemp=pd.DataFrame(lTemp) print(lTemp.head()) freq_Vec=createVector(lTemp) if freq_Vec.shape[1]>657: freq_Vec=freq_Vec.tocsr()[:,0:658] else: q=np.array(freq_Vec.todense()) b=np.zeros((freq_Vec.shape[0],657-freq_Vec.shape[1])) p = np.concatenate((q,b),axis=1) freq_Vec = csr_matrix(p) X_test = freq_Vec.toarray() mean = np.mean(X_test) X_test -= mean prediction=predict_news(X_test) ypred=prediction pol_score = [] maxScore=max(list(df['Volume'])) minScore=min(list(df['Volume'])) finScore=(maxScore+minScore)//2 #prediction=predict_news(X_test) #ypred=prediction pol_score = [] for i in range(len(ypred)): pol_score.append((ypred[i,1]-ypred[i,0])*finScore) df['Polarity']=pol_score return df except Exception as e: print(e) df['Polarity']=[0 for i in range(len(df))] try: newsapi = NewsApiClient(api_key='Your another Key') news = newsapi.get_everything(q=q1,from_param=s1, to=e1,language='en',sort_by='popularity',page_size=100,page=1) print(news,"1") #arranging the news articles in a numpy array news_data = [] #np.array(['publishedAt','title','description','content','url']) index = 0 for i in news['articles']: k=0 data = [] while k!=1: data.append(i["publishedAt"]) data.append(i["title"]) data.append(i["description"]) data.append(i["content"]) data.append(i["url"]) data.append(i["urlToImage"]) k=1 news_data.append(data) nd = np.array(news_data) data = pd.DataFrame(nd,columns=['Date','Title','Description','Content','URL','Image']) #Spliting the date and time of each data field. new = data["Date"].str.split("T", n = 1, expand = True) new1 = new[1].str.split("Z", n=1,expand=True) data = data.drop("Date",axis=1) data.insert(loc=0, column='Date', value=new[0]) data.insert(loc=1, column='Time', value=new1[0]) data['Date'] =pd.to_datetime(data.Date) ## data = data.sort_values(by='Date') data=data.sort_values(by=['Date']) dff=data[['Date','Title']].groupby('Date').sum() lTemp={'Date':[],'Title':[]} for i in range(len(df)): lTemp['Date'].append(df.index[i]) lTemp['Title'].append(" ") for i in range(len(lTemp['Date'])): if lTemp['Date'][i] in list(dff.index): lTemp['Title'][i]=dff['Title'][list(dff.index).index(lTemp['Date'][i])] else: pass lTemp=pd.DataFrame(lTemp) print(lTemp.head()) freq_Vec=createVector(lTemp) if freq_Vec.shape[1]>657: freq_Vec=freq_Vec.tocsr()[:,0:658] else: q=np.array(freq_Vec.todense()) b=np.zeros((freq_Vec.shape[0],657-freq_Vec.shape[1])) p = np.concatenate((q,b),axis=1) freq_Vec = csr_matrix(p) X_test = freq_Vec.toarray() mean = np.mean(X_test) X_test -= mean maxScore=max(list(df['Volume'])) minScore=min(list(df['Volume'])) finScore=(maxScore+minScore)//2 prediction=predict_news(X_test) ypred=prediction pol_score = [] for i in range(len(ypred)): pol_score.append((ypred[i,1]-ypred[i,0])*finScore) df['Polarity']=pol_score return df except: df['Polarity']=[0 for i in range(len(df))] return df return []
df.shape df.head() import pandas as pd from pandas_datareader import data # Set the start and end date start_date = '1990-01-01' end_date = '2019-04-27' # Set the ticker ticker = 'AMZN' # Get the data data = data.get_data_yahoo(ticker, start_date, end_date) data.head() data.shape df = data.drop(['Volume'], axis=1) import matplotlib.pyplot as plt data['Adj Close'].plot() df.plot() plt.savefig('pandas_datareader_demo1.png') import yfinance as yf data = yf.download("SPY AAPL", start="2017-01-01", end="2017-04-30") data.shape data.head(10) import yfinance as yf stock_code = "GOLD.AX"
def removeCheapEquity(data, threshold = 5): tmp = (data['adj_close'].iloc[-1] < threshold) tmp2 = data.columns.levels[1][[np.where(tmp)]] return data.drop(tmp2[0], level = 1, axis = 1).shape
columns=['Ohio', 'Texas', 'California']) frame2 = frame.reindex(['a', 'b', 'c', 'd']) states = ['Texas', 'Utah', 'California'] frame.reindex(columns=states) # frame.reindex(index=['a', 'b', 'c', 'd'], method='ffill', columns=states) frame.reindex(index=['a', 'b', 'c', 'd'], columns=states) frame.ix[['a', 'b', 'c', 'd'], states] obj = Series(np.arange(5.), index=['a', 'b', 'c', 'd', 'e']) new_obj = obj.drop('c') obj.drop(['d', 'c']) data = DataFrame(np.arange(16).reshape((4, 4)), index=['Ohio', 'Colorado', 'Utah', 'New York'], columns=['one', 'two', 'three', 'four']) data.drop(['Colorado', 'Ohio']) data.drop('two', axis=1) data.drop(['two', 'four'], axis=1) obj = Series(np.arange(4.), index=['a', 'b', 'c', 'd']) obj['b'] obj[1] obj[2:4] obj[['b', 'a', 'd']] obj[[1, 3]] obj[obj < 2] obj['b':'c'] obj['b':'c'] = 5 obj data = DataFrame(np.arange(16).reshape((4, 4)),