def extract_quote(ticker, starttime, endtime=time.strftime("%Y%m%d", time.gmtime())): ticker = ticker.upper() # endtime = time.strftime("%Y%m%d", time.gmtime()) # end time is today in posix time loaddata = 0 if loaddata == 1: requested = pd.read_csv(f'{lpath}{ticker}.csv') else: requested = yqd.load_yahoo_quote(ticker, starttime, endtime, info='quote', format_output='dataframe') if not isinstance(requested, str): requested = requested.replace('null', 'NaN') # drop null values requested['Open'] = requested['Open'].astype( 'float64' ) # converting data types to floats and calculating percentages requested['Close'] = requested['Close'].astype('float64') #requested.to_csv(f'{lpath}{ticker}.csv') # save extracted data to csv to avoid tangling with YQD requested['Percent'] = requested['Close'] nanindex = min(requested.isna()[::-1].idxmax().values) if nanindex < len(requested) - 1: requested = requested[requested.index > nanindex].reset_index( drop=True) for x in range(1, len(requested)): requested['Percent'][x] = truncate( (requested['Close'][x] - requested['Close'][x - 1]) / requested['Close'][x - 1]) return requested
def write_csv(ticker): f = open('./data/'+ticker+'.csv', 'w+') try: print(yqd.load_yahoo_quote(ticker, '20150102', '20160104'), file=f) except error.HTTPError: print('<'+ticker+'> not found or HTTP cant resolve the token') f.close()
def get_quote(self, ticker, start_date, end_date, interval='1d'): try: self.logger.info( 'trying get quote for ticker {} ...'.format(ticker)) quotes = yqd.load_yahoo_quote(ticker=ticker, begindate=start_date, enddate=end_date, interval=interval) with open(os.path.join(self.output, ticker + '.csv'), 'w') as fh: for line in quotes: if line.strip() != '': self.logger.debug(line) fh.write(line + '\n') self.logger.debug('result file output file %s' % os.path.join(self.output, ticker + '.csv')) return quotes except urllib.error.HTTPError: self.logger.error('get_quote HTTP ERROR') self.logger.info('refreshing yahoo cookie crumb') yqd._get_cookie_crumb() raise urllib.error.HTTPError except: self.logger.error('ticker %s quote not found from yahoo' % ticker, exc_info=True) raise
def ticker_history(self, start, end, ticker, info='quote', start_dic={}): """ Gets and returns the historic prices for a given ticker for between the time period provided. Inclusive. """ start_str = start.strftime('%Y%m%d') end_str = end.strftime('%Y%m%d') # info = 'quote', 'dividend', 'split' try: data = yqd.load_yahoo_quote(ticker, start_str, end_str, info=info) except (HTTPError, URLError, gaierror) as e: LOGGER.info("Yahoo request failed. Blocked?") return [] titles = tuple(t.replace(' ', '_').lower() for t in data[0].split(',')) history = [] for row in data[1:-1]: history_row = {k: v for k, v in start_dic.items()} iter_list = row.split(',') for element, title in zip(iter_list, titles): converted = self.convert_yahoo_element(element) history_row[title] = converted history.append(history_row) return history
def simulacion(nombre,desde,hasta,i_anios,i_tasa,i_precio, num_tray,tipo_derecho): #obtengo los datos de FB , desde-hasta datos = yqd.load_yahoo_quote(nombre, desde, hasta) #lista contiene todos los valores de cierre valores_cierre = [] dias_i = [] for i in datos: if (i): lista = i.split(',') if(lista[4]!='Close'): valores_cierre.append(float(lista[4])) a = str((lista[0])) dias_i.append(a) #lista contiene todos los valores del ln(j/j-1) valores_log = [] for j in range(1,len(valores_cierre)): calculo = (np.log(valores_cierre[j]))/(np.log(valores_cierre[j-1])) valores_log.append(calculo) #valores a utilizar trayectorias = num_tray d = int(i_anios*360) #numero de dias t = i_anios/1000 #tiempo en años r = i_tasa *d/360 #tasa libre de riesgo s = valores_cierre #lista que se utilizan para las simulaciones RC = valores_log #lista que se utilizan para las simulaciones (valores logaritmicos) precio = i_precio #precio estimado sigma = np.std(valores_log) #resultados obtenidos valores_trayectorias = [] valores_max = [] #simulacion de todas las trayectorias requeridas for i in range(1,trayectorias): #simulacion de 1 trayectoria sn_1 = trayectoria(s[len(s)-1],0,d,r,sigma,t) valores_trayectorias.append(sn_1) #agrego ultimo valor de la trayectoria simulada #Calculo de acuerdo al tipo de derecho de opcion escojida if tipo_derecho == 'Compra': valores_max.append(np.maximum(sn_1-precio,0)) #agrego maximo entre 0 y la diferencia entre el ultimo valor de trayectoria simulada y el precio if tipo_derecho == 'Venta': valores_max.append(np.maximum(precio-sn_1,0)) #agrego maximo entre 0 y la diferencia entre el precio y el ultimo valor de trayectoria simulada #inicializo valores para futuras trayectorias. sigma = np.std(valores_log) RC = valores_log esperanza = np.mean(valores_max) funcion = np.exp(-i_tasa*i_anios)*esperanza media_trayectorias = np.mean(valores_trayectorias) lista_num_tray = list(range(0,len(valores_trayectorias))) lista_prom_tray = [media_trayectorias] * len(valores_trayectorias) #[funcion, media_trayectorias, valores_trayectorias,valores_cierre,dias_i,lista_num_tray,lista_prom_tray,desde,hasta,i_anios,i_tasa,i_precio,nombre] return [funcion, media_trayectorias, valores_trayectorias,valores_cierre,dias_i,lista_num_tray,lista_prom_tray]
def write_csv(ticker): """Load ticker to csv from 2015 to 2016.""" f = open('./data/' + ticker + '.csv', 'w+') try: print(yqd.load_yahoo_quote(ticker, '20150102', '20160104'), file=f) print('Write Succeed') except error.HTTPError: print('<' + ticker + '> not found or HTTP cant resolve the token') f.close()
def simulacion(nombre, desde, hasta, i_anios, i_tasa, i_precio, num_tray): #obtengo los datos de FB , desde-hasta datos = yqd.load_yahoo_quote(nombre, desde, hasta) #lista contiene todos los valores de cierre valores_cierre = [] dias_i = [] for i in datos: if (i): lista = i.split(',') if (lista[4] != 'Close'): valores_cierre.append(float(lista[4])) a = str((lista[0])) dias_i.append(a) #lista contiene todos los valores del ln(j/j-1) valores_log = [] for j in range(1, len(valores_cierre)): calculo = (np.log(valores_cierre[j])) / (np.log(valores_cierre[j - 1])) valores_log.append(calculo) #valores a utilizar trayectorias = num_tray d = int(i_anios * 360) #numero de dias t = i_anios / 1000 #tiempo en años r = i_tasa * d / 360 #tasa libre de riesgo s = valores_cierre #lista que se utilizan para las simulaciones RC = valores_log #lista que se utilizan para las simulaciones (valores logaritmicos) precio = i_precio #precio estimado sigma = np.std(valores_log) #resultados obtenidos valores_trayectorias = [] valores_max = [] #simulacion de todas las trayectorias requeridas for i in range(1, trayectorias): #simulacion de 1 trayectoria sn_1 = trayectoria(s[len(s) - 1], 0, d, r, sigma, t) valores_trayectorias.append( sn_1) #agrego ultimo valor de la trayectoria simulada valores_max.append( np.maximum(sn_1 - precio, 0) ) #agrego maximo entre 0 y la diferencia entre el ultimo valor de trayectoria simuladay el precio #inicializo valores para futuras trayectorias. sigma = np.std(valores_log) RC = valores_log esperanza = np.mean(valores_max) funcion = np.exp(-i_tasa * i_anios) * esperanza media_trayectorias = np.mean(valores_trayectorias) lista_num_tray = list(range(0, len(valores_trayectorias))) lista_prom_tray = [media_trayectorias] * len(valores_trayectorias) return [ funcion, media_trayectorias, valores_trayectorias, valores_cierre, dias_i, lista_num_tray, lista_prom_tray ]
def load_data(tickers, start, end): x = {} for idx, t in enumerate(tickers): try: prices = yqd.load_yahoo_quote(t, start, end) for row in prices[1:]: if len(row) > 0: s = row.split(",") date = s[0] adj_close = float(s[5]) if not date in x.keys(): x[date] = [-1] * len(tickers) x[date][idx] = adj_close except (BaseException): print("Can't find %s!" % t) return x
def test(symbols, start_date, end_date, output): # Download quote for stocks for line in open(symbols): ticker = line.split('\t')[0] print('===', ticker, '===') list1 = [] list1 = yqd.load_yahoo_quote(ticker, '20170515', '20190217') if list1 is None: pass else: output = './stock/' + ticker + '.txt' file_out = open(output, 'w') if len(list1) > 0: str_out = ticker for i in range(len(list1)): str_out = str_out + '; ' + list1[i] file_out.write(str_out + '\n') file_out.close()
def makeData(ticker, start_date, end_date): # makes an array (index 0 corresponds to Monda; index 4 corresponds to Friday) def makeWeekday(): return ([0] * 5) # creates and opens a .csv file with open('data.csv', 'w') as myfile: wr = csv.writer(myfile, delimiter='\n') # gets the data from the 'load_yahoo_quote' library and writes it into the .csv file wr.writerow(yqd.load_yahoo_quote(ticker, start_date, end_date)) # creates a pandas dataframe to store the data in the .csv file in it df = pd.read_csv('data.csv') # counts how many days, the day of the weeks has been the maximum in that week weekdays = makeWeekday() # keeps track of the price in days of the week this_week = makeWeekday() # itterates over the rows of the dataframe for index, row in df.iterrows(): # gets the date from the dataframe date_str = row['Date'] format_str = '%Y-%m-%d' # creates a datetime object from the date date_object = datetime.datetime.strptime(date_str, format_str) # find what day of the week the date is – 0 for Monday; 4 for Friday day_index = date_object.date().weekday() # using the value provided in the 'Low' column of the row. However, 'Low' can be substituted with one of the following: 'High,' 'Open,' 'Close' day_value = df.at[index, 'Low'] # adds the value of the day to the list this_week[day_index] = day_value # executes the code once the week is over; that is once friday is reached. if day_index == 4: # finds the maximum value in that week max_day_value = max(this_week) # find the index of that week, which corresponds to what day of the week the maximum value was max_day_index = this_week.index(max_day_value) # adds 1 to that day of the week weekdays[max_day_index] += 1 # resets the 'this_week' list since the week is finished this_week = makeWeekday() # returs how many days each day of the week has had the maximum price of the week return weekdays
def getSymbolList(self): response = urllib2.urlopen(self.nasdaqCompanyList) cr = csv.reader(response) currentDate = datetime.datetime.now() earlierDate = currentDate - datetime.timedelta(days=180) currentDateStr = currentDate.strftime("%Y%m%d") earlierDateStr = earlierDate.strftime("%Y%m%d") # SymbolDict = {} # for row in cr: # if row[0] != "Symbol": # print row[0] # try: # historicalData = yqd.load_yahoo_quote(row[0], earlierDateStr, currentDateStr) # SymbolDict[row[0]] = float(historicalData[-2].split(",")[1]) * 100 / float(historicalData[1].split(",")[1]) # except: # continue # sorted_symbol = sorted(SymbolDict.items(), key=operator.itemgetter(1)) # print sorted_symbol with open('csvOutput/eggs.csv', 'wb') as csvfile: csvWriter = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL) for row in cr: if row[0] != "Symbol": print row[0] try: historicalData = yqd.load_yahoo_quote(row[0], earlierDateStr, currentDateStr) for day in historicalData[1:]: if day == "": continue dayList = day.split(",") result = [row[0], dayList[0], dayList[1]] csvWriter.writerow(result) except: continue
def __init__(self, symbol: str, start_date: str, end_date: str): try: BaseData.__init__(self, symbol) # self.__symbol = symbol self.__start_date = datetime.strptime(start_date, '%Y%m%d') self.__end_date = datetime.strptime(end_date, '%Y%m%d') self.__data = None #download yahoo data #returns a list where the first element is the header # example: ['Date,Open,High,Low,Close,Adj Close,Volume', '2017-05-15,55.080002,55.490002,55.080002,55.400002,53.346981,10686100', ''] yah = yqd.load_yahoo_quote(symbol, start_date, end_date) #retrieve Date,Open,High,Low,Close,Adj Close,Volume as list header = yah[0].split(',') #loop through remaining elements and add to dictionary table = [] for i in yah[1:]: #make sure we have complete quote quote = i.split(',') if len(quote) > 1: d = dict() d[header[0]] = quote[0] # Date d[header[1]] = float(quote[1]) # Open d[header[2]] = float(quote[2]) # High d[header[3]] = float(quote[3]) # Low d[header[4]] = float(quote[4]) # close d[header[5]] = float(quote[5]) # Adj Close d[header[6]] = int(quote[6]) # Volume table.append(d) #Create DataFrame self.__data = pd.DataFrame(table) self.__size = len(self.__data) except OSError as err: print("OS error for symbol {}: {}".format(symbol, err))
def load_quote(ticker): print('===', ticker, '===') print(yqd.load_yahoo_quote(ticker, '20170515', '20170517')) print(yqd.load_yahoo_quote(ticker, '20170515', '20170517', 'divident')) print(yqd.load_yahoo_quote(ticker, '20170515', '20170517', 'split'))
def load_quote(ticker): print('===', ticker, '===') print(yqd.load_yahoo_quote(ticker, '20181201', '20181231')) print(yqd.load_yahoo_quote(ticker, '20181201', '20181231', 'dividend')) print(yqd.load_yahoo_quote(ticker, '20181201', '20181231', 'split'))
def main(): # parse arguments parser = argparse.ArgumentParser(description='Yahoo Quote Downloader v' + __version__) parser.add_argument("-t", "--ticker", dest="ticker", required=True, help="The ticker") parser.add_argument("-b", "--begindate", dest="begindate", help="The beginning date (YYYY-MM-DD)") parser.add_argument("-e", "--enddate", dest="enddate", help="The end date (YYYY-MM-DD)") parser.add_argument("-f", "--datafile", dest="datafile", required=True, help="The destination data file") parser.add_argument("-m", "--max-retry", dest="maxretries", default=5, type=int, help="The maximum number of retries") parser.add_argument("-v", "--verbose", dest="verbose", default=1, type=int, help="Verbose level") parser.add_argument("--version", action="version", version=__version__) args = parser.parse_args() if args.verbose > 0: print("Downloading {} ...".format(args.ticker)) # Increment mode (only download the necessary data after what is already in the datafile) # Increment mode is only used when # - A last date can be extracted from the datafile AND # - A beginning date is not specified in the commandline increment_mode = False data_in_file = [] today = datetime.datetime.today() today_str = today.strftime('%Y-%m-%d') # Determine the starting date if it is not provided # If it can be extracted from the last line of the data file, then we use the next day # Otherwise we use a standard starting date 1970-01-01 if args.begindate is None: # Try to extract the last day in the data file if os.path.exists(args.datafile): with open(args.datafile) as df: # Read all the lines that is currently in datafile for cnt, line in enumerate(df): if len(line) >= 10: # At least has a date data_in_file.append(line) # Extract the last date try: # Extract the first day date (in case we need to redownload the whole thing) firstline = data_in_file[1] firstday_str = firstline.split(',')[0].strip() firstday = datetime.datetime.strptime( firstday_str, '%Y-%m-%d') # Extract the last day date (for increment mode) lastline = data_in_file[-1] lastday_str = lastline.split(',')[0].strip() lastday = datetime.datetime.strptime( lastday_str, '%Y-%m-%d') if lastday_str >= today_str: if args.verbose > 0: print('{}: datafile ({}) is update to today {}'. format(args.ticker, lastday_str, today_str)) print('Nothing to download') return nextday = lastday + datetime.timedelta(days=1) nextday_str = nextday.strftime('%Y-%m-%d') if args.verbose > 5: print('Last Date:', lastday_str, ', Next Day:', nextday_str, ', First Day:', firstday_str) args.begindate = nextday_str # All good, and set the increment mode increment_mode = True except: if args.verbose > 0: print( '!!! {}: failed to extract last date from date file' .format(args.ticker)) data_in_file = [] if args.begindate is None: # Two cases we are here: # 1. The datafile does not exist yet, or # 2. The datefile exists, but we failed to extract the last date args.begindate = '1970-01-01' # Determine the end date if it is not provided # It will be default to today's date if args.enddate is None: args.enddate = today_str # Print the parameters if args.verbose > 1: print(" Ticker:", args.ticker) print(" Beginning:", args.begindate) print(" Ending:", args.enddate) print(" File:", args.datafile) success = False for itry in range(args.maxretries): try: # Do a download of split and divident first in increment mode. # If such events exist, the scale will be adjusted for the entire sequence. # In those cases, we will need to redownload from the very beginning. if increment_mode: div_data = yqd.load_yahoo_quote(args.ticker, args.begindate.replace( '-', ""), args.enddate.replace('-', ''), info='dividend') has_div_event = (len(div_data) > 2 and len(div_data[-1]) > 10) split_data = yqd.load_yahoo_quote( args.ticker, args.begindate.replace('-', ""), args.enddate.replace('-', ''), info='split') has_split_event = (len(split_data) > 2 and len(split_data[-1]) > 10) if has_div_event or has_split_event: print('!!! {}: Has a recent event (dividend or split)') args.begindate = firstday increment_mode = False # Finally download the data data = yqd.load_yahoo_quote(args.ticker, args.begindate.replace('-', ""), args.enddate.replace('-', '')) success = True break except: if args.verbose > 2: print("Try {}/{} failed".format(itry + 1, args.maxretries)) # traceback.print_exc() # Download failed. Will retry in 2 seconds, until maxretries is reached, # Setting _crumb to None will force yqd to obtain a new set of cookies. # This solves the intermittent "401 Unauthorized" issue. yqd._crumb = None time.sleep(2) if success: if args.verbose > 0: print("Data download successful") #print("Dump", data) vdata = validater.validate(args.ticker, data, begindate=args.begindate, verbose=args.verbose) with open(args.datafile, "w") as f: if increment_mode: # Write back the original data in file for line in data_in_file: f.write(line) # Remove the field headline for the newly downloaded data del vdata[0] for line in vdata: # Skip lines that are empty if len(line) == 0: continue f.write(line) f.write('\n') else: print("!!! {}: Download unsuccessful!".format(args.ticker))
def parseData(self): # Convert dates to YYYYMMDD format date_start = self.startTime.replace('-', '') date_end = self.endTime.replace('-', '') # Extract data data_raw = yqd.load_yahoo_quote(self.symbol, date_start, date_end, info='quote') # Remove empty rows data_raw = [x for x in data_raw if len(x) > 0] # Split data by commas data_split = [x.split(',') for x in data_raw] data_item_length = [len(x) for x in data_split] if any(x != data_item_length[0] for x in data_item_length): raise ValueError("Inconsistent length of items") # Convert into dictionary data_dict = {} for i in range(data_item_length[0]): data_dict[data_split[0][i]] = [x[i] for x in data_split[1:] ] # first row are headers # Convert to pandas data frame data_pd = pd.DataFrame.from_dict(data_dict) # Convert data types data_pd['Date'] = pd.to_datetime(data_pd['Date'], format='%Y-%m-%d') non_date = data_pd.columns[ data_pd.columns != 'Date'].get_values().tolist() data_pd[non_date] = data_pd[non_date].apply(pd.to_numeric, errors='coerce') # Remove null dates data_pd = data_pd[data_pd['Close'].notna()] # Remove monthly entries # Cut-off number of days between consecutive trading days before classifying as a monthly entry # should be large enough to take into account holidays and trading halts but less than one month diff_cutoff = 15 data_pd.sort_values('Date', inplace=True) diff_date = data_pd['Date'].shift(-1) - data_pd['Date'] data_pd = data_pd[diff_date.map( lambda x: x < pd.to_timedelta(diff_cutoff, unit='days'))] # Back out adjustments over time - set to NaN if date has no adjustments adj_ratio = data_pd['Adj Close'] / data_pd['Adj Close'].shift() close_ratio = data_pd['Close'] / data_pd['Close'].shift() round_cutoff = 4 # number of decimal places to consider to identify adjustments data_pd['Adjustments'] = adj_ratio / close_ratio data_pd['Adjustments'] = data_pd['Adjustments'].map( lambda x: np.nan if round(x, round_cutoff) == 1 else x) # Prepare data frame data_pd.rename(columns={ 'Date': 'Timestamp', 'Open': 'open', 'High': 'high', 'Low': 'low', 'Close': 'close', 'Volume': 'volume', 'Adjustments': 'adjustment' }, inplace=True) data_pd.drop(['Adj Close'], axis=1, inplace=True) # Add symbol code data_pd['symbol'] = self.symbol # Convert to dictionary format for MongoDB self.data = [] for index, row in data_pd.iterrows(): document = row.to_dict() # convert rows to dictionary if np.isnan(document['adjustment']): del document['adjustment'] self.data.append(document) return self.data
def load_quote(ticker): print('===', ticker, '===') print(yqd.load_yahoo_quote(ticker, '20150102', '20160104'))