def processCsv(db, csvfile): with cd(getDataDir(S.DATA_DIR)): print os.getcwd() if len(csvfile) > 0: dbReplaceCounter(db, csvfile) if S.DBG_ALL: pprint.pprint(db.klseeod.find_one({0: '3A', 1: '2018-05-08'})) else: db.klseeod.drop() csvfiles = glob.glob("*.csv") for csvf in csvfiles: dbNewImport(db, csvf) if S.DBG_ALL: pprint.pprint( db.klseeod.find_one({ '0': '3A', '1': '2018-01-08' }))
def checkMPV(): if resume: mpvfile = getDataDir(S.DATA_DIR) + S.MVP_DIR + shortname + '.csv' if not os.path.isfile(mpvfile): return lines = tail(mpvfile) ldata = lines.split(',') if ldata[1] == lastTradingDate: print "Resume mode: Skipped MPV downloaded ->", shortname return if 1 == 1: if any("klsemvp" in s for s in pypath): updateMPV(shortname, stockCode, eod) else: if any("klsemvp" in s for s in pypath): if updateMPV(shortname, stockCode, eod): load_mvp_args(True) if mvpSynopsis(shortname, stockCode, dojson=1): if 1 == 0: # 2018-12-21 skip to speed up daily download load_mvp_args(False) # 2018-12-21 limit to 300 due to AKNIGHT exceeds Locator.MAXTICKS error mvpChart(shortname, stockCode, 300)
def writeLatestPrice(lastTradingDate=getToday('%Y-%m-%d'), writeEOD=False, resume=False, recentDate=""): def checkMPV(): if resume: mpvfile = getDataDir(S.DATA_DIR) + S.MVP_DIR + shortname + '.csv' if not os.path.isfile(mpvfile): return lines = tail(mpvfile) ldata = lines.split(',') if ldata[1] == lastTradingDate: print "Resume mode: Skipped MPV downloaded ->", shortname return if 1 == 1: if any("klsemvp" in s for s in pypath): updateMPV(shortname, stockCode, eod) else: if any("klsemvp" in s for s in pypath): if updateMPV(shortname, stockCode, eod): load_mvp_args(True) if mvpSynopsis(shortname, stockCode, dojson=1): if 1 == 0: # 2018-12-21 skip to speed up daily download load_mvp_args(False) # 2018-12-21 limit to 300 due to AKNIGHT exceeds Locator.MAXTICKS error mvpChart(shortname, stockCode, 300) stocksListing = loadfromi3(S.DATA_DIR + "i3/" + lastTradingDate + ".json", recent=recentDate) eodlist = [] pypath = os.environ['PYTHONPATH'].split(os.pathsep) if any("klsemvp" in s for s in pypath): from analytics.mvp import updateMPV, load_mvp_args from analytics.mvpchart import mvpChart, mvpSynopsis print ' Writing latest price from i3 ...' for key in sorted(stocksListing.iterkeys()): eod, shortname, stockCode = unpackStockData(key, lastTradingDate, stocksListing[key]) outfile = getDataDir(S.DATA_DIR) + shortname + '.' + stockCode + '.csv' if resume: lines = tail(outfile) ldata = lines.split(',') if ldata[1] == lastTradingDate: print "Resume mode: Skipped downloaded ->", shortname checkMPV() continue eodlist.append(eod) if writeEOD: try: with open(outfile, "ab") as fh: fh.write(eod + '\n') except Exception: print " ERR:WriteLatestPrice:", key, ':', shortname, ':', stockCode raise else: print eod checkMPV() return eodlist
sbchartlinks = [] SB_URL = 'https://my.stockbit.com/#/symbol/KLSE-' stocklist = formStocklist(counters, getI3Dir() + 'klse.txt') for key in stocklist.iterkeys(): if i3: i3chartlinks.append(S.I3_KLSE_URL + '/servlets/stk/chart/' + stocklist[key] + '.jsp') if sb: sbchartlinks.append(SB_URL + key + '/chartbit') return i3chartlinks, sbchartlinks if __name__ == '__main__': args = docopt(__doc__) loadCfg(getDataDir(S.DATA_DIR)) counters = getCounters(args['COUNTER'], args['--portfolio'], args['--watchlist']) if args['--debug']: S.DBG_ALL = True if S.DBG_ALL: print getTime(), counters if len(counters) > 0: i3chartlinks, sbchartlinks = compileLinks(args['--i3'], args['--sb'], counters) if S.DBG_ALL: print getTime(), i3chartlinks print getTime(), sbchartlinks else: LOGIN_URL = S.I3_KLSE_URL + LOGIN_URL
klse = "../i3investor/klse.txt" stocks = '' if len(stocks) > 0: # download only selected counters stocklist = formStocklist(stocks, klse) else: # Full download using klse.txt stocklist = loadKlseCounters(klse) cookie, crumb = getYahooCookie('https://uk.finance.yahoo.com/quote/AAPL/') for shortname in sorted(stocklist.iterkeys()): stock_code = stocklist[shortname] if len(stock_code) > 0: # OUTPUT_FILE = '../../data/yahoo/' + shortname + "." + stock_code + ".csv" OUTPUT_FILE = getDataDir( S.DATA_DIR) + 'yahoo/' + shortname + "." + stock_code + ".csv" if S.RESUME_FILE: lastdt = getLastDate(OUTPUT_FILE) if len(lastdt) == 0: # File is likely to be empty, hence scrape from beginning lastdt = S.ABS_START print shortname, stock_code print "Scraping", shortname, stock_code, lastdt, '$' q = YahooQuote(cookie, crumb, shortname, stock_code + ".KL", lastdt, "2018-02-01") if len(q.getCsvErr()) > 0: st_code, st_reason = q.getCsvErr().split(":") rtn_code = int(st_code) print rtn_code, st_reason else: print q
def to_df(self): """ returns a pandas DataFrame object based on parsed data from a Commodity object's HTML """ try: df = pd.read_html(self.response) df = df[0] # Ignore footer table if S.DBG_ALL: df.to_csv(getDataDir(S.DATA_DIR) + self.name + ".inf") price = df['Price'][0] # print self.name, type(price), price if math.isnan(price): # No result found return None df["Date"] = pd.to_datetime(df["Date"]) df.insert(0, "Commodity", np.nan) df["Commodity"] = self.name df.insert(6, "Close", np.nan) df["Close"] = df["Price"] df.insert(7, "Volume", np.nan) if self.name.startswith('USD'): df['Volume'] = 0 elif self.name.startswith('FTFBM'): df['Volume'] = df["Vol."] else: mp = {'K': ' * 10**3', 'M': ' * 10**6'} # vol = df['Vol.'][0] # print type(vol), vol df['Vol.'] = df['Vol.'].replace('-', '0.1K') df['Vol.'] = df['Vol.'].replace( 0, '0.1K') # replace all 0 vol with 100 shares ''' Convert k to 1000 and m to 1000000 Important: Can only support max 5 months of EOD to convert ''' try: df["Volume"] = pd.eval(df["Vol."].replace( mp.keys(), mp.values(), regex=True).str.replace(r'[^\d\.\*]+', '')) except Exception: df['Volume'] = df["Vol."] df.drop('Price', axis=1, inplace=True) df.drop('Change %', axis=1, inplace=True) if 'Vol.' in df.columns: # FOREX has no "Vol." column df.drop('Vol.', axis=1, inplace=True) df.sort_values(by='Date', inplace=True) except ValueError as ve: df = 'ValueError' self.csverr = self.name + ": ValueError (No data for date range) " + ' (' + str( ve) + ')' if S.DBG_ALL: with open(getDataDir(S.DATA_DIR) + "value.err", 'ab') as f: f.write('\n=============================\n') f.write(self.name + "\n") f.write(self.response) except Exception as e: # This happens when records being processed are larger than 3 months data, # try reducing the period if S.DBG_ALL: with open(getDataDir(S.DATA_DIR) + "value.err", 'ab') as f: f.write('\n=============================\n') f.write(self.name + "\n") f.write(self.response) self.csverr = self.name + ":" + self.start + "," + self.end + ":" + str( e) df = 'Exception' # raise e return df
def scrapeKlseRelated(klsemap, WRITE_CSV=True, dbg=False): idmap = loadIdMap(klsemap) # counters = 'USDMYR.2168,FTFBM100.0200,FTFBMKLCI.0201,FTFBMMES.0202,FTFBMSCAP.0203,FTFBM70.0204,FTFBMEMAS.0205' counterlist = S.KLSE_RELATED.split(',') eodlist = [] for i in counterlist: counter = i.split('.') shortname = counter[0] stock_code = counter[1] rtn_code = 0 OUTPUT_FILE = getDataDir( S.DATA_DIR) + shortname + "." + stock_code + ".csv" TMP_FILE = OUTPUT_FILE + 'tmp' if S.RESUME_FILE: lastdt = getLastDate(OUTPUT_FILE) if len(lastdt) == 0: # File is likely to be empty, hence scrape from beginning lastdt = S.ABS_START else: lastdt = S.ABS_START enddt = getToday('%Y-%m-%d') if lastdt == enddt: print "Skipped downloaded:", counter continue print 'Scraping {0},{1}: lastdt={2}, End={3}'.format( shortname, stock_code, lastdt, enddt) while True: startdt = lastdt if getDaysBtwnDates(lastdt, enddt) > 22 * 3: # do 3 months at a time stopdt = getDayOffset(startdt, 22 * 3) lastdt = getNextBusinessDay(stopdt) else: stopdt = enddt eod = InvestingQuote(idmap, shortname, startdt, stopdt) if dbg: for item in eod: print item if len(eod.getCsvErr()) > 0: print eod.getCsvErr() elif isinstance(eod.response, unicode): dfEod = eod.to_df() if isinstance(dfEod, pd.DataFrame): if dbg: print dfEod[:5] if WRITE_CSV: dfEod.index.name = 'index' dfEod.to_csv(TMP_FILE, index=False, header=False) dates = pd.to_datetime(dfEod["Date"], format='%Y%m%d') dfEod["Date"] = dates.dt.strftime('%Y-%m-%d').tolist() elist = dfEod.values.tolist()[0] eodlist.append(','.join(map(str, unpackEOD(*elist)))) else: print "ERR:" + dfEod + ": " + shortname + "," + lastdt rtn_code = -2 if WRITE_CSV: appendCsv(rtn_code, OUTPUT_FILE) if stopdt == enddt: break return eodlist
else: # Full download using klse.txt writeStocksListing = False if writeStocksListing: writeStocksListing() stocklist = loadKlseCounters(klse) for shortname in sorted(stocklist.iterkeys()): if shortname in S.EXCLUDE_LIST: print "Exclude: ", shortname continue stock_code = stocklist[shortname] if len(stock_code) > 0: rtn_code = 0 OUTPUT_FILE = getDataDir( S.DATA_DIR ) + 'investingcom/' + shortname + "." + stock_code + ".csv" TMP_FILE = OUTPUT_FILE + 'tmp' if S.RESUME_FILE: lastdt = getLastDate(OUTPUT_FILE) if len(lastdt) == 0: # File is likely to be empty, hence scrape from beginning lastdt = S.ABS_START else: lastdt = S.ABS_START enddt = getToday('%Y-%m-%d') print 'Scraping {0},{1}: lastdt={2}, End={3}'.format( shortname, stock_code, lastdt, enddt) failcount = 0 while True: if failcount == 0:
stk_list = [] if len(lastFinDate) > 0: stk_list = scrape_latest_fin(connect_stk_fin(''), lastFinDate) else: klse = S.KLSE_LIST if len(stocks) > 0: # download only selected counters stk_list = formStocklist(stocks, klse) else: stk_list = loadKlseCounters(klse) for stk_name in stk_list: stk_code = stk_list[stk_name] print 'Downloading financial for', stk_name, stk_code if len(stk_code) == 4: stkfin = scrape_stk_fin(connect_stk_fin(stk_code), lastFinDate) if stkfin is not None: fh = open( getDataDir(S.DATA_DIR) + stk_name + '.' + stk_code + ".fin", "w") for key in sorted(stkfin.iterkeys()): fin = ','.join(map(str, unpack_fin(key, *(stkfin[key])))) print fin fh.write(fin + '\n') fh.close() else: print 'Skipped:', stk_name, stk_code pass
with cd(S.DATA_DIR): mongod = subprocess.Popen( ['mongod', '--dbpath', os.path.expanduser(S.DATA_DIR)]) def initKlseDB(): startMongoD() global mongo_client mongo_client = MongoClient() db = mongo_client.klsedb return db def closeKlseDB(): if mongo_client is not None: print 'Terminating Mongo Client ...' mongo_client.close() if mongod is not None: print 'Terminating MongoDB daemon ...' mongod.terminate() if __name__ == '__main__': loadCfg(S.DATA_DIR) with cd(getDataDir(S.DATA_DIR)): exportCounters() exportQuotes() closeKlseDB() pass