Esempio n. 1
0
def extract_quote(ticker,
                  starttime,
                  endtime=time.strftime("%Y%m%d", time.gmtime())):
    ticker = ticker.upper()
    # endtime = time.strftime("%Y%m%d", time.gmtime())  # end time is today in posix time
    loaddata = 0
    if loaddata == 1:
        requested = pd.read_csv(f'{lpath}{ticker}.csv')
    else:
        requested = yqd.load_yahoo_quote(ticker,
                                         starttime,
                                         endtime,
                                         info='quote',
                                         format_output='dataframe')
    if not isinstance(requested, str):
        requested = requested.replace('null', 'NaN')  # drop null values
        requested['Open'] = requested['Open'].astype(
            'float64'
        )  # converting data types to floats and calculating percentages
        requested['Close'] = requested['Close'].astype('float64')
        #requested.to_csv(f'{lpath}{ticker}.csv')  # save extracted data to csv to avoid tangling with YQD
        requested['Percent'] = requested['Close']
        nanindex = min(requested.isna()[::-1].idxmax().values)
        if nanindex < len(requested) - 1:
            requested = requested[requested.index > nanindex].reset_index(
                drop=True)
        for x in range(1, len(requested)):
            requested['Percent'][x] = truncate(
                (requested['Close'][x] - requested['Close'][x - 1]) /
                requested['Close'][x - 1])
    return requested
Esempio n. 2
0
def write_csv(ticker):
    f = open('./data/'+ticker+'.csv', 'w+')
    try:
        print(yqd.load_yahoo_quote(ticker, '20150102', '20160104'), file=f)
    except error.HTTPError:
        print('<'+ticker+'> not found or HTTP cant resolve the token')
    f.close()
Esempio n. 3
0
 def get_quote(self, ticker, start_date, end_date, interval='1d'):
     try:
         self.logger.info(
             'trying get quote for ticker {} ...'.format(ticker))
         quotes = yqd.load_yahoo_quote(ticker=ticker,
                                       begindate=start_date,
                                       enddate=end_date,
                                       interval=interval)
         with open(os.path.join(self.output, ticker + '.csv'), 'w') as fh:
             for line in quotes:
                 if line.strip() != '':
                     self.logger.debug(line)
                     fh.write(line + '\n')
         self.logger.debug('result file output file %s' %
                           os.path.join(self.output, ticker + '.csv'))
         return quotes
     except urllib.error.HTTPError:
         self.logger.error('get_quote HTTP ERROR')
         self.logger.info('refreshing yahoo cookie crumb')
         yqd._get_cookie_crumb()
         raise urllib.error.HTTPError
     except:
         self.logger.error('ticker %s quote not found from yahoo' % ticker,
                           exc_info=True)
         raise
Esempio n. 4
0
    def ticker_history(self, start, end, ticker, info='quote', start_dic={}):
        """
        Gets and returns the historic prices for a given ticker for between
        the time period provided. Inclusive.
        """

        start_str = start.strftime('%Y%m%d')
        end_str = end.strftime('%Y%m%d')

        # info = 'quote', 'dividend', 'split'
        try:
            data = yqd.load_yahoo_quote(ticker, start_str, end_str, info=info)
        except (HTTPError, URLError, gaierror) as e:
            LOGGER.info("Yahoo request failed. Blocked?")
            return []

        titles = tuple(t.replace(' ', '_').lower() for t in data[0].split(','))

        history = []
        for row in data[1:-1]:
            history_row = {k: v for k, v in start_dic.items()}
            iter_list = row.split(',')

            for element, title in zip(iter_list, titles):
                converted = self.convert_yahoo_element(element)
                history_row[title] = converted
            history.append(history_row)
        return history
Esempio n. 5
0
def simulacion(nombre,desde,hasta,i_anios,i_tasa,i_precio, num_tray,tipo_derecho):
        #obtengo los datos de FB , desde-hasta
        datos = yqd.load_yahoo_quote(nombre, desde, hasta)
        #lista contiene todos los valores de cierre
        valores_cierre = []
        dias_i = []
        for i in datos:
                if (i):
                        lista = i.split(',')
                        if(lista[4]!='Close'):
                                valores_cierre.append(float(lista[4]))
                                a = str((lista[0]))
                                dias_i.append(a)
        #lista contiene todos los valores del ln(j/j-1)
        valores_log = []
        for j in range(1,len(valores_cierre)):
                calculo = (np.log(valores_cierre[j]))/(np.log(valores_cierre[j-1]))
                valores_log.append(calculo)

        #valores a utilizar
        trayectorias = num_tray
        d = int(i_anios*360) #numero de dias
        t = i_anios/1000 #tiempo en años
        r = i_tasa *d/360 #tasa libre de riesgo
        s = valores_cierre #lista que se utilizan para las simulaciones
        RC = valores_log #lista que se utilizan para las simulaciones (valores logaritmicos)
        precio = i_precio #precio estimado
        sigma = np.std(valores_log)
        #resultados obtenidos
        valores_trayectorias = []
        valores_max = []



        #simulacion de todas las trayectorias requeridas

        for i in range(1,trayectorias):
                #simulacion de 1 trayectoria
                sn_1 = trayectoria(s[len(s)-1],0,d,r,sigma,t)
                valores_trayectorias.append(sn_1) #agrego ultimo valor de la trayectoria simulada
                
                #Calculo de acuerdo al tipo de derecho de opcion escojida
                if tipo_derecho == 'Compra':
                        valores_max.append(np.maximum(sn_1-precio,0)) #agrego maximo entre 0 y la diferencia entre el ultimo valor de trayectoria simulada y el precio
                if tipo_derecho == 'Venta':
                        valores_max.append(np.maximum(precio-sn_1,0)) #agrego maximo entre 0 y la diferencia entre el precio y el ultimo valor de trayectoria simulada 
                #inicializo valores para futuras trayectorias.
                sigma = np.std(valores_log)
                RC = valores_log

        esperanza = np.mean(valores_max)
        funcion = np.exp(-i_tasa*i_anios)*esperanza
        media_trayectorias = np.mean(valores_trayectorias)

        lista_num_tray = list(range(0,len(valores_trayectorias)))
        lista_prom_tray = [media_trayectorias] * len(valores_trayectorias)

        #[funcion, media_trayectorias, valores_trayectorias,valores_cierre,dias_i,lista_num_tray,lista_prom_tray,desde,hasta,i_anios,i_tasa,i_precio,nombre]
        return [funcion, media_trayectorias, valores_trayectorias,valores_cierre,dias_i,lista_num_tray,lista_prom_tray]
Esempio n. 6
0
def write_csv(ticker):
    """Load ticker to csv from 2015 to 2016."""
    f = open('./data/' + ticker + '.csv', 'w+')
    try:
        print(yqd.load_yahoo_quote(ticker, '20150102', '20160104'), file=f)
        print('Write Succeed')
    except error.HTTPError:
        print('<' + ticker + '> not found or HTTP cant resolve the token')
    f.close()
Esempio n. 7
0
def simulacion(nombre, desde, hasta, i_anios, i_tasa, i_precio, num_tray):
    #obtengo los datos de FB , desde-hasta
    datos = yqd.load_yahoo_quote(nombre, desde, hasta)
    #lista contiene todos los valores de cierre
    valores_cierre = []
    dias_i = []
    for i in datos:
        if (i):
            lista = i.split(',')
            if (lista[4] != 'Close'):
                valores_cierre.append(float(lista[4]))
                a = str((lista[0]))
                dias_i.append(a)
    #lista contiene todos los valores del ln(j/j-1)
    valores_log = []
    for j in range(1, len(valores_cierre)):
        calculo = (np.log(valores_cierre[j])) / (np.log(valores_cierre[j - 1]))
        valores_log.append(calculo)

    #valores a utilizar
    trayectorias = num_tray
    d = int(i_anios * 360)  #numero de dias
    t = i_anios / 1000  #tiempo en años
    r = i_tasa * d / 360  #tasa libre de riesgo
    s = valores_cierre  #lista que se utilizan para las simulaciones
    RC = valores_log  #lista que se utilizan para las simulaciones (valores logaritmicos)
    precio = i_precio  #precio estimado
    sigma = np.std(valores_log)
    #resultados obtenidos
    valores_trayectorias = []
    valores_max = []

    #simulacion de todas las trayectorias requeridas

    for i in range(1, trayectorias):
        #simulacion de 1 trayectoria
        sn_1 = trayectoria(s[len(s) - 1], 0, d, r, sigma, t)
        valores_trayectorias.append(
            sn_1)  #agrego ultimo valor de la trayectoria simulada
        valores_max.append(
            np.maximum(sn_1 - precio, 0)
        )  #agrego maximo entre 0 y la diferencia entre el ultimo valor de trayectoria simuladay el precio
        #inicializo valores para futuras trayectorias.
        sigma = np.std(valores_log)
        RC = valores_log

    esperanza = np.mean(valores_max)
    funcion = np.exp(-i_tasa * i_anios) * esperanza
    media_trayectorias = np.mean(valores_trayectorias)

    lista_num_tray = list(range(0, len(valores_trayectorias)))
    lista_prom_tray = [media_trayectorias] * len(valores_trayectorias)

    return [
        funcion, media_trayectorias, valores_trayectorias, valores_cierre,
        dias_i, lista_num_tray, lista_prom_tray
    ]
Esempio n. 8
0
def load_data(tickers, start, end):
    x = {}
    for idx, t in enumerate(tickers):
        try:
            prices = yqd.load_yahoo_quote(t, start, end)
            for row in prices[1:]:
                if len(row) > 0:
                    s = row.split(",")
                    date = s[0]
                    adj_close = float(s[5])
                    if not date in x.keys():
                        x[date] = [-1] * len(tickers)
                    x[date][idx] = adj_close
        except (BaseException):
            print("Can't find %s!" % t)
    return x
Esempio n. 9
0
def test(symbols, start_date, end_date, output):
    # Download quote for stocks

    for line in open(symbols):
        ticker = line.split('\t')[0]
        print('===', ticker, '===')
        list1 = []
        list1 = yqd.load_yahoo_quote(ticker, '20170515', '20190217')
        if list1 is None:
            pass
        else:
            output = './stock/' + ticker + '.txt'
            file_out = open(output, 'w')
            if len(list1) > 0:
                str_out = ticker
                for i in range(len(list1)):
                    str_out = str_out + '; ' + list1[i]
                file_out.write(str_out + '\n')
            file_out.close()
Esempio n. 10
0
 def makeData(ticker, start_date, end_date):
     # makes an array (index 0 corresponds to Monda; index 4 corresponds to Friday)
     def makeWeekday():
         return ([0] * 5)
     # creates and opens a .csv file
     with open('data.csv', 'w') as myfile:
         wr = csv.writer(myfile, delimiter='\n')
         # gets the data from the 'load_yahoo_quote' library and writes it into the .csv file
         wr.writerow(yqd.load_yahoo_quote(ticker, start_date, end_date))
     # creates a pandas dataframe to store the data in the .csv file in it
     df = pd.read_csv('data.csv')
     # counts how many days, the day of the weeks has been the maximum in that week
     weekdays = makeWeekday()
     # keeps track of the price in days of the week
     this_week = makeWeekday()
     # itterates over the rows of the dataframe
     for index, row in df.iterrows():
         # gets the date from the dataframe
         date_str = row['Date']
         format_str = '%Y-%m-%d'
         # creates a datetime object from the date
         date_object = datetime.datetime.strptime(date_str, format_str)
         # find what day of the week the date is – 0 for Monday; 4 for Friday
         day_index = date_object.date().weekday()
         # using the value provided in the 'Low' column of the row. However, 'Low' can be substituted with one of the following: 'High,' 'Open,' 'Close'
         day_value = df.at[index, 'Low']
         # adds the value of the day to the list
         this_week[day_index] = day_value
         # executes the code once the week is over; that is once friday is reached.
         if day_index == 4:
             # finds the maximum value in that week
             max_day_value = max(this_week)
             # find the index of that week, which corresponds to what day of the week the maximum value was
             max_day_index = this_week.index(max_day_value)
             # adds 1 to that day of the week
             weekdays[max_day_index] += 1
             # resets the 'this_week' list since the week is finished
             this_week = makeWeekday()
     # returs how many days each day of the week has had the maximum price of the week
     return weekdays
Esempio n. 11
0
    def getSymbolList(self):
        response = urllib2.urlopen(self.nasdaqCompanyList)
        cr = csv.reader(response)

        currentDate = datetime.datetime.now()
        earlierDate = currentDate - datetime.timedelta(days=180)
        currentDateStr = currentDate.strftime("%Y%m%d")
        earlierDateStr = earlierDate.strftime("%Y%m%d")

        # SymbolDict = {}
        # for row in cr: 
        #     if row[0] != "Symbol":
        #         print row[0]
                
        #         try:
        #             historicalData = yqd.load_yahoo_quote(row[0], earlierDateStr, currentDateStr)
        #             SymbolDict[row[0]] = float(historicalData[-2].split(",")[1]) * 100 / float(historicalData[1].split(",")[1])
        #         except:
        #             continue

        # sorted_symbol = sorted(SymbolDict.items(), key=operator.itemgetter(1))
        # print sorted_symbol

        with open('csvOutput/eggs.csv', 'wb') as csvfile:
            csvWriter = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)

            for row in cr: 
                if row[0] != "Symbol":
                    print row[0]
                    try:
                        historicalData = yqd.load_yahoo_quote(row[0], earlierDateStr, currentDateStr)
                        for day in historicalData[1:]:
                            if day == "":
                                continue
                            dayList = day.split(",")
                            result = [row[0], dayList[0], dayList[1]]
                            csvWriter.writerow(result)
                    except:
                        continue
Esempio n. 12
0
    def __init__(self, symbol: str, start_date: str, end_date: str):
        try:
            BaseData.__init__(self, symbol)
            # self.__symbol = symbol
            self.__start_date = datetime.strptime(start_date, '%Y%m%d')
            self.__end_date = datetime.strptime(end_date, '%Y%m%d')
            self.__data = None

            #download yahoo data
            #returns a list where the first element is the header
            # example: ['Date,Open,High,Low,Close,Adj Close,Volume', '2017-05-15,55.080002,55.490002,55.080002,55.400002,53.346981,10686100', '']
            yah = yqd.load_yahoo_quote(symbol, start_date, end_date)

            #retrieve Date,Open,High,Low,Close,Adj Close,Volume as list
            header = yah[0].split(',')

            #loop through remaining elements and add to dictionary
            table = []
            for i in yah[1:]:
                #make sure we have complete quote
                quote = i.split(',')

                if len(quote) > 1:
                    d = dict()
                    d[header[0]] = quote[0]  # Date
                    d[header[1]] = float(quote[1])  # Open
                    d[header[2]] = float(quote[2])  # High
                    d[header[3]] = float(quote[3])  # Low
                    d[header[4]] = float(quote[4])  # close
                    d[header[5]] = float(quote[5])  # Adj Close
                    d[header[6]] = int(quote[6])  # Volume
                    table.append(d)

            #Create DataFrame
            self.__data = pd.DataFrame(table)
            self.__size = len(self.__data)
        except OSError as err:
            print("OS error for symbol {}: {}".format(symbol, err))
Esempio n. 13
0
def load_quote(ticker):
	print('===', ticker, '===')
	print(yqd.load_yahoo_quote(ticker, '20170515', '20170517'))
	print(yqd.load_yahoo_quote(ticker, '20170515', '20170517', 'divident'))
	print(yqd.load_yahoo_quote(ticker, '20170515', '20170517', 'split'))
Esempio n. 14
0
def load_quote(ticker):
    print('===', ticker, '===')
    print(yqd.load_yahoo_quote(ticker, '20181201', '20181231'))
    print(yqd.load_yahoo_quote(ticker, '20181201', '20181231', 'dividend'))
    print(yqd.load_yahoo_quote(ticker, '20181201', '20181231', 'split'))
Esempio n. 15
0
def main():
    # parse arguments
    parser = argparse.ArgumentParser(description='Yahoo Quote Downloader v' +
                                     __version__)
    parser.add_argument("-t",
                        "--ticker",
                        dest="ticker",
                        required=True,
                        help="The ticker")
    parser.add_argument("-b",
                        "--begindate",
                        dest="begindate",
                        help="The beginning date (YYYY-MM-DD)")
    parser.add_argument("-e",
                        "--enddate",
                        dest="enddate",
                        help="The end date (YYYY-MM-DD)")
    parser.add_argument("-f",
                        "--datafile",
                        dest="datafile",
                        required=True,
                        help="The destination data file")
    parser.add_argument("-m",
                        "--max-retry",
                        dest="maxretries",
                        default=5,
                        type=int,
                        help="The maximum number of retries")
    parser.add_argument("-v",
                        "--verbose",
                        dest="verbose",
                        default=1,
                        type=int,
                        help="Verbose level")
    parser.add_argument("--version", action="version", version=__version__)
    args = parser.parse_args()

    if args.verbose > 0:
        print("Downloading {} ...".format(args.ticker))

    # Increment mode (only download the necessary data after what is already in the datafile)
    # Increment mode is only used when
    #       - A last date can be extracted from the datafile AND
    #       - A beginning date is not specified in the commandline
    increment_mode = False
    data_in_file = []
    today = datetime.datetime.today()
    today_str = today.strftime('%Y-%m-%d')

    # Determine the starting date if it is not provided
    # If it can be extracted from the last line of the data file, then we use the next day
    # Otherwise we use a standard starting date 1970-01-01
    if args.begindate is None:
        # Try to extract the last day in the data file
        if os.path.exists(args.datafile):
            with open(args.datafile) as df:
                # Read all the lines that is currently in datafile
                for cnt, line in enumerate(df):
                    if len(line) >= 10:  # At least has a date
                        data_in_file.append(line)
                # Extract the last date
                try:
                    # Extract the first day date (in case we need to redownload the whole thing)
                    firstline = data_in_file[1]
                    firstday_str = firstline.split(',')[0].strip()
                    firstday = datetime.datetime.strptime(
                        firstday_str, '%Y-%m-%d')
                    # Extract the last day date (for increment mode)
                    lastline = data_in_file[-1]
                    lastday_str = lastline.split(',')[0].strip()
                    lastday = datetime.datetime.strptime(
                        lastday_str, '%Y-%m-%d')
                    if lastday_str >= today_str:
                        if args.verbose > 0:
                            print('{}: datafile ({}) is update to today {}'.
                                  format(args.ticker, lastday_str, today_str))
                            print('Nothing to download')
                        return
                    nextday = lastday + datetime.timedelta(days=1)
                    nextday_str = nextday.strftime('%Y-%m-%d')
                    if args.verbose > 5:
                        print('Last Date:', lastday_str, ', Next Day:',
                              nextday_str, ', First Day:', firstday_str)
                    args.begindate = nextday_str
                    # All good, and set the increment mode
                    increment_mode = True
                except:
                    if args.verbose > 0:
                        print(
                            '!!! {}: failed to extract last date from date file'
                            .format(args.ticker))
                    data_in_file = []
    if args.begindate is None:
        # Two cases we are here:
        #   1. The datafile does not exist yet, or
        #   2. The datefile exists, but we failed to extract the last date
        args.begindate = '1970-01-01'

    # Determine the end date if it is not provided
    # It will be default to today's date
    if args.enddate is None:
        args.enddate = today_str

    # Print the parameters
    if args.verbose > 1:
        print("     Ticker:", args.ticker)
        print("  Beginning:", args.begindate)
        print("     Ending:", args.enddate)
        print("       File:", args.datafile)

    success = False
    for itry in range(args.maxretries):
        try:
            # Do a download of split and divident first in increment mode.
            # If such events exist, the scale will be adjusted for the entire sequence.
            # In those cases, we will need to redownload from the very beginning.
            if increment_mode:
                div_data = yqd.load_yahoo_quote(args.ticker,
                                                args.begindate.replace(
                                                    '-', ""),
                                                args.enddate.replace('-', ''),
                                                info='dividend')
                has_div_event = (len(div_data) > 2 and len(div_data[-1]) > 10)
                split_data = yqd.load_yahoo_quote(
                    args.ticker,
                    args.begindate.replace('-', ""),
                    args.enddate.replace('-', ''),
                    info='split')
                has_split_event = (len(split_data) > 2
                                   and len(split_data[-1]) > 10)
                if has_div_event or has_split_event:
                    print('!!! {}: Has a recent event (dividend or split)')
                    args.begindate = firstday
                    increment_mode = False

            # Finally download the data
            data = yqd.load_yahoo_quote(args.ticker,
                                        args.begindate.replace('-', ""),
                                        args.enddate.replace('-', ''))
            success = True
            break
        except:
            if args.verbose > 2:
                print("Try {}/{} failed".format(itry + 1, args.maxretries))
            # traceback.print_exc()

            # Download failed. Will retry in 2 seconds, until maxretries is reached,
            # Setting _crumb to None will force yqd to obtain a new set of cookies.
            # This solves the intermittent "401 Unauthorized" issue.
            yqd._crumb = None
            time.sleep(2)

    if success:
        if args.verbose > 0:
            print("Data download successful")
        #print("Dump", data)
        vdata = validater.validate(args.ticker,
                                   data,
                                   begindate=args.begindate,
                                   verbose=args.verbose)
        with open(args.datafile, "w") as f:
            if increment_mode:
                # Write back the original data in file
                for line in data_in_file:
                    f.write(line)
                # Remove the field headline for the newly downloaded data
                del vdata[0]
            for line in vdata:
                # Skip lines that are empty
                if len(line) == 0:
                    continue
                f.write(line)
                f.write('\n')
    else:
        print("!!! {}: Download unsuccessful!".format(args.ticker))
Esempio n. 16
0
    def parseData(self):
        # Convert dates to YYYYMMDD format
        date_start = self.startTime.replace('-', '')
        date_end = self.endTime.replace('-', '')

        # Extract data
        data_raw = yqd.load_yahoo_quote(self.symbol,
                                        date_start,
                                        date_end,
                                        info='quote')

        # Remove empty rows
        data_raw = [x for x in data_raw if len(x) > 0]

        # Split data by commas
        data_split = [x.split(',') for x in data_raw]
        data_item_length = [len(x) for x in data_split]
        if any(x != data_item_length[0] for x in data_item_length):
            raise ValueError("Inconsistent length of items")

        # Convert into dictionary
        data_dict = {}
        for i in range(data_item_length[0]):
            data_dict[data_split[0][i]] = [x[i] for x in data_split[1:]
                                           ]  # first row are headers

        # Convert to pandas data frame
        data_pd = pd.DataFrame.from_dict(data_dict)

        # Convert data types
        data_pd['Date'] = pd.to_datetime(data_pd['Date'], format='%Y-%m-%d')
        non_date = data_pd.columns[
            data_pd.columns != 'Date'].get_values().tolist()
        data_pd[non_date] = data_pd[non_date].apply(pd.to_numeric,
                                                    errors='coerce')

        # Remove null dates
        data_pd = data_pd[data_pd['Close'].notna()]

        # Remove monthly entries
        # Cut-off number of days between consecutive trading days before classifying as a monthly entry
        # should be large enough to take into account holidays and trading halts but less than one month
        diff_cutoff = 15
        data_pd.sort_values('Date', inplace=True)
        diff_date = data_pd['Date'].shift(-1) - data_pd['Date']
        data_pd = data_pd[diff_date.map(
            lambda x: x < pd.to_timedelta(diff_cutoff, unit='days'))]

        # Back out adjustments over time - set to NaN if date has no adjustments
        adj_ratio = data_pd['Adj Close'] / data_pd['Adj Close'].shift()
        close_ratio = data_pd['Close'] / data_pd['Close'].shift()
        round_cutoff = 4  # number of decimal places to consider to identify adjustments
        data_pd['Adjustments'] = adj_ratio / close_ratio
        data_pd['Adjustments'] = data_pd['Adjustments'].map(
            lambda x: np.nan if round(x, round_cutoff) == 1 else x)

        # Prepare data frame
        data_pd.rename(columns={
            'Date': 'Timestamp',
            'Open': 'open',
            'High': 'high',
            'Low': 'low',
            'Close': 'close',
            'Volume': 'volume',
            'Adjustments': 'adjustment'
        },
                       inplace=True)
        data_pd.drop(['Adj Close'], axis=1, inplace=True)

        # Add symbol code
        data_pd['symbol'] = self.symbol

        # Convert to dictionary format for MongoDB
        self.data = []
        for index, row in data_pd.iterrows():
            document = row.to_dict()  # convert rows to dictionary
            if np.isnan(document['adjustment']): del document['adjustment']
            self.data.append(document)
        return self.data
Esempio n. 17
0
def load_quote(ticker):
    print('===', ticker, '===')
    print(yqd.load_yahoo_quote(ticker, '20150102', '20160104'))