def mt4update(lastTradingDate=getToday('%Y-%m-%d')): def latesteod(eodlist): if len(eodlist) > 0: with open(S.DATA_DIR + eodfile, 'wb') as eodf: for eod in eodlist: eodf.write(str(eod) + '\n') elif os.path.isfile(eodfile): print "Processing from last EOD file ... " return else: print "ERR: Missing EOD file!" return if len(S.MT4_DIR) == 0 or not mt4date: return eodfile = 'latest.eod' latesteod(mt4eod(lastTradingDate)) csvfiles = getCsvFiles(S.DATA_DIR + 'latest.eod') with cd(S.DATA_DIR): quotes = S.MT4_DIR + "quotes.csv" print os.getcwd() print "Writing to MT4 ... " + quotes with open(quotes, 'w') as qcsv: input_lines = fileinput.input(csvfiles) qcsv.writelines(input_lines) print "Writing to MT4 ... Done" with cd(S.MT4_DIR): cmd = "mt4.sh " + S.MT4_DIR os.system(cmd) print "Post-update Processing ... Done"
def __init__(self, idmap, sname, last_date, end_date=getToday("%Y-%m-%d")): if last_date == end_date: self.csverr = sname + ": Skipped downloaded (" + last_date + ")" return None last_date = du.getNextBusinessDay(last_date) if last_date > end_date: self.csverr = sname + ": Invalid dates (" + last_date + "," + end_date + ")" return None # Do not download today's EOD if market is still open if end_date == getToday("%Y-%m-%d"): now = datetime.datetime.now() if sname.startswith('USD'): if now.hour > 22: # US starts after 10pm Malaysia time end_date = du.getYesterday("%Y-%m-%d") elif now.hour < 18: # only download today's EOD if it is after 6pm local time end_date = du.getYesterday("%Y-%m-%d") if last_date > end_date: self.csverr = sname + ": Skipped downloaded (" + last_date + ")" return None ''' last_date = datetime.datetime.strptime(last_date, "%Y-%m-%d").strftime('%m/%d/%Y') end_date = datetime.datetime.strptime(end_date, "%Y-%m-%d").strftime('%m/%d/%Y') ''' last_date = du.change2IcomDateFmt(last_date) end_date = du.change2IcomDateFmt(end_date) super(InvestingQuote, self).__init__(sname, last_date, end_date, idmap) self.response = self.scrape() # s0 = Quote(sname, last_date, end_date, idmap) if isinstance(self.response, unicode): s1 = self.to_df() if isinstance(s1, pd.DataFrame): s1.index.name = 'index' self.s1 = s1 self.csverr = '' # s1.to_csv(OUTPUT_FILE, index=False, header=False) print self.name + ":", last_date elif s1 is None: self.csverr = self.name + ':Skipped no result' else: # Use csverr from to_df() return else: self.csverr = sname + ":" + self.response
def crawl_listing(trading_date=getToday("%d-%b-%Y"), formatted_output=True): latest_listings = scrape_listing(connect_url(I3_LISTING_URL), trading_date, formatted_output) if formatted_output and len(latest_listings) > 0: new_list = [] for key in latest_listings: new_list.append(latest_listings[key]) format_table_listing("Additional Listing", new_list) return new_list return latest_listings
def crawl_price_target( trading_date=getToday("%d-%b-%Y"), formatted_output=True): price_targets = scrape_target(connect_url(I3_TARGET_URL), trading_date, formatted_output) if formatted_output and len(price_targets) > 0: new_list = [] for key in price_targets: new_list.append(price_targets[key]) format_table_target("Price Target", new_list) return new_list return price_targets
def backupKLse(srcdir, tgtdir, prefix): if len(tgtdir) == 0 or not tgtdir.endswith('\\'): print "Skipped backing up data", tgtdir return with cd(srcdir): subprocess.call('pwd') bkfl = tgtdir + prefix + 'klse' + getToday() + '.tgz' print "Backing up", bkfl with tarfile.open(bkfl, "w:gz") as tar: for csvfile in glob.glob("*.csv"): tar.add(csvfile) '''
def backupjson(datadir): jsondir = os.path.join(datadir, "json", '') tgtdir = os.path.join(S.DATA_DIR, "json", '') jfiles = "*." + getToday("%Y-%m-%d") + ".json" with cd(jsondir): subprocess.call('pwd') print jfiles for jfile in glob.glob(jfiles): jname = jfile.split(".") bkfl = tgtdir + jname[0] + ".tgz" print "backing up", bkfl with tarfile.open(bkfl, "a:gz") as tar: tar.add(jfile)
def checkI3LastTradingDay(lastdt, i3onSat=""): dt, popen, pclose, vol = scrapeRecentEOD(connectRecentPrices("1295"), "PBBANK", 1295, lastdt, True) popen2, pclose2, vol2 = scrapeLatestPrice(connectStocksListing("P"), "1295") if S.DBG_ALL: print dt, popen, pclose, vol, popen2, pclose2, vol2 if dt == lastdt or i3onSat: dates = [dt] if popen == popen2 and pclose == pclose2 and vol == vol2: # Post processing mode on the following day if i3onSat: dates = [lastdt] # dates.append(getYesterday('%Y-%m-%d')) dates.append(i3onSat) else: if i3onSat: dates = [lastdt] dates.append(i3onSat) now = datetime.now() # Use i3 latest price if now.hour >= 17: # only download today's EOD if it is after 6pm local time dates.append(getToday('%Y-%m-%d')) return dates elif dt > lastdt and dt == getToday('%Y-%m-%d'): # sometimes i3 updates latest price on the same day dates = [lastdt, dt] return dates else: if lastdt > dt: # post processing mode on the same day return [lastdt] # lastdt < dt, Need to update multiple dates, even for yesterday's EOD alone return ['1', getNextBusinessDay(lastdt), '3']
def crawl_entitlement( trading_date=getToday("%d-%b-%Y"), formatted_output=False): url = I3_DIVIDEND_URL latest_dividends = scrape_entitlement(connect_url(url), url, trading_date, formatted_output) if formatted_output and len(latest_dividends) > 0: format_table_entitlement("Latest Dividends", latest_dividends) url = I3_ENTITLEMENT_OTHERS_URL latest_others = scrape_entitlement(connect_url(url), url, trading_date, formatted_output) if formatted_output and len(latest_others) > 0: format_table_entitlement("Latest Bonus, Share Split & Consolidation", latest_others) return latest_dividends, latest_others
def crawl_latest(trading_date=getToday("%d-%b-%Y"), formatted_output=False): url = I3_INSIDER_DIRECTOR_URL latest_dir = scrape_latest(connect_url(url), url, trading_date, formatted_output) if formatted_output and len(latest_dir) > 0: new_list = [] for key in latest_dir: new_list.append(latest_dir[key]) format_table_insiders("Latest Directors Transactions", new_list) # return new_list url = I3_INSIDER_SHAREHOLDER_URL latest_shd = scrape_latest(connect_url(url), url, trading_date, formatted_output) if formatted_output and len(latest_shd) > 0: format_table_insiders("Latest Substantial Shareholders Transactions", latest_shd) url = I3_INSIDER_COMPANY_URL latest_company = scrape_latest(connect_url(url), url, trading_date, formatted_output) if formatted_output and len(latest_company) > 0: format_table_insiders("Latest Company Transactions", latest_company) return latest_dir, latest_shd, latest_company
def scrapeKlse(procmode, force_update, resume, i3onSat): ''' Determine if can use latest price found in i3 stocks page Conditions: 1. latest eod record in csv file is not today 2. latest eod record in csv file is 1 trading day behind that of investing.com latest eod ''' lastdt = getLastDate(S.DATA_DIR + 'YTL.4677.csv') if force_update or resume: dates = [] dates.append(lastdt) dates.append(getToday('%Y-%m-%d')) else: if 1 == 1: # use i3 instead of investing.com due to delayed updating of EOD since 4Q 2018 dates = checkI3LastTradingDay(lastdt, i3onSat) else: dates = checkInvComLastTradingDay(lastdt) if dates is None or (len(dates) == 1 and dates[0] == lastdt): if procmode: print "Post updating mode ON" print lastdt, dates return print "Already latest. Post-updating now." postUpdateProcessing() else: if procmode: print "Post updating mode OFF" if len(dates) == 2 and dates[1] > lastdt and dates[0] == lastdt: useI3latest = True else: useI3latest = False S.DBG_ALL = True if useI3latest: print "Scraping from i3 latest ..." preUpdateProcessing() list1 = writeLatestPrice(dates[1], True, resume) else: print "Scraping from i3 recent ..." # I3 only keeps 1 month of EOD, while investing.com cannot do more than 5 months # Have enhanced investing.com code to break down downloads by every 3 months if 1 == 0: list1 = scrapeI3(loadKlseCounters(klse)) else: list1 = writeLatestPrice(dates[1], True, resume, dates[1]) list2 = scrapeKlseRelated('scrapers/investingcom/klse.idmap') if len(list2): pypath = os.environ['PYTHONPATH'].split(os.pathsep) if any("klsemvp" in s for s in pypath): from analytics.mvp import mpvUpdateKlseRelated mpvUpdateKlseRelated() if S.USEMONGO: # do not perform upsert ops due to speed eodlist = list2 + list1 dbUpdateLatest(eodlist)
def process(yaml_file, trading_date=getToday('%d-%b-%Y')): print("Trading date: " + trading_date) latest_dir, latest_shd, latest_com = crawl_latest(trading_date) latest_div, latest_bonus = crawl_entitlement(trading_date) latest_listing = crawl_listing(trading_date) latest_target = crawl_price_target(trading_date) latest_ar = crawl_latest_ar(trading_date) latest_qr = crawl_latest_qr(trading_date) deco_dir = format_decorator(format_director) deco_shd = format_decorator(format_shareholder) deco_com = format_decorator(format_company) stream = open(yaml_file, 'r') docs = yaml.load_all(stream, Loader=yaml.FullLoader) for doc in docs: for name, items in doc.items(): # print (name + " : " + str(items)) addr = items["email"] print (name + ": " + ", ".join(addr)) if skip_name is not None and name in skip_name: print ("\tSkipped") break for tracking_list in items.iterkeys(): if tracking_list == "email": continue print ("\t" + tracking_list) dir_list, shd_list, com_list, qr_list, ar_list = [], [], [], [], [] div_list, bns_list, listing_list, target_list = [], [], [], [] dir_title = "Latest Directors Transactions" shd_title = "Latest Substantial Shareholders Transactions" com_title = "Latest Company Transactions" qr_title = "Quarterly Results" ar_title = "Annual Reports" div_title = "Latest Dividend" bns_title = "Latest Bonus, Share Split & Consolidation" listing_title = "Latest Listing" target_title = "Price Target" for stock in items[tracking_list]: stock = stock.upper() ''' # res = match_selection(stock, latest_dir, dir_title) # if len(res) > 0: # for item in res: # dir_list.append(item) # shd = match_selection(stock, latest_shd, shd_title) # if len(shd) > 0: # for item in shd: # shd_list.append(item) # com = match_selection(stock, latest_com, com_title) # if len(com) > 0: # for item in com: # com_list.append(item) if stock in latest_dir: dr = latest_dir[stock] for item in dr: dir_list.append(format_director(True, *item)) if stock in latest_shd: shd = latest_shd[stock] for item in shd: shd_list.append(format_shareholder(True, *item)) if stock in latest_com: com = latest_com[stock] for item in com: com_list.append(format_company(True, *item)) ''' deco_dir(stock, latest_dir, dir_list) deco_shd(stock, latest_shd, shd_list) deco_com(stock, latest_com, com_list) if stock in latest_qr: qr = latest_qr[stock] qr_list.append(format_latest_qr(stock, *qr)) if latest_ar is not None and stock in latest_ar: ar = latest_ar[stock] ar_list.append(format_latest_ar(stock, *ar)) if stock in latest_div: div = latest_div[stock] div_list.append(format_div(True, False, *div)) if stock in latest_bonus: bns = latest_bonus[stock] bns_list.append(format_dividend(True, True, *bns)) if stock in latest_listing: listing = latest_listing[stock] listing_list.append(format_listing(True, *listing)) if stock in latest_target: target = latest_target[stock] target_list.append(format_target(True, *target)) format_table_insiders(dir_title, dir_list) format_table_insiders(shd_title, shd_list) format_table_insiders(com_title, com_list) format_qr_table(qr_title, qr_list) format_ar_table(ar_title, ar_list) format_table_entitlement(div_title, div_list) format_table_entitlement(bns_title, bns_list) format_table_listing(listing_title, listing_list) format_table_target(target_title, target_list) list_result = \ div_list + bns_list + qr_list + ar_list + dir_list + \ shd_list + com_list + listing_list + target_list if len(list_result) > 0: list_result.insert(0, T.t01) subject = "INSIDER UPDATE on {} for portfolio: {}".format( getToday("%d-%b-%Y"), tracking_list.upper() ) retry = 0 while True: try: yagmail.SMTP(S.MAIL_SENDER, S.MAIL_PASSWORD).send(addr, subject, list_result) except Exception, e: retry += 1 if retry > 10: print ("Unable to send mail! Exit") print e break print ("SMTP data error...retrying {} time(s)".format(retry)) time.sleep(120) finally: break
def getMt4StartDate(): mt4Start = getDayOffset(getToday('%Y-%m-%d'), S.MT4_DAYS * -1) # Fixed to 1st Jan of year mt4Start = mt4Start[:4] + "-01-01" return mt4Start
def writeLatestPrice(lastTradingDate=getToday('%Y-%m-%d'), writeEOD=False, resume=False, recentDate=""): def checkMPV(): if resume: mpvfile = getDataDir(S.DATA_DIR) + S.MVP_DIR + shortname + '.csv' if not os.path.isfile(mpvfile): return lines = tail(mpvfile) ldata = lines.split(',') if ldata[1] == lastTradingDate: print "Resume mode: Skipped MPV downloaded ->", shortname return if 1 == 1: if any("klsemvp" in s for s in pypath): updateMPV(shortname, stockCode, eod) else: if any("klsemvp" in s for s in pypath): if updateMPV(shortname, stockCode, eod): load_mvp_args(True) if mvpSynopsis(shortname, stockCode, dojson=1): if 1 == 0: # 2018-12-21 skip to speed up daily download load_mvp_args(False) # 2018-12-21 limit to 300 due to AKNIGHT exceeds Locator.MAXTICKS error mvpChart(shortname, stockCode, 300) stocksListing = loadfromi3(S.DATA_DIR + "i3/" + lastTradingDate + ".json", recent=recentDate) eodlist = [] pypath = os.environ['PYTHONPATH'].split(os.pathsep) if any("klsemvp" in s for s in pypath): from analytics.mvp import updateMPV, load_mvp_args from analytics.mvpchart import mvpChart, mvpSynopsis print ' Writing latest price from i3 ...' for key in sorted(stocksListing.iterkeys()): eod, shortname, stockCode = unpackStockData(key, lastTradingDate, stocksListing[key]) outfile = getDataDir(S.DATA_DIR) + shortname + '.' + stockCode + '.csv' if resume: lines = tail(outfile) ldata = lines.split(',') if ldata[1] == lastTradingDate: print "Resume mode: Skipped downloaded ->", shortname checkMPV() continue eodlist.append(eod) if writeEOD: try: with open(outfile, "ab") as fh: fh.write(eod + '\n') except Exception: print " ERR:WriteLatestPrice:", key, ':', shortname, ':', stockCode raise else: print eod checkMPV() return eodlist
def crawl_latest_ar(trading_date=getToday("%d-%b-%Y")): return scrape_latest_ar(connect_url(I3_LATEST_AR_URL), trading_date)
def __init__(self, cookie, crumb, sname, symbol, last_date, end_date=date.today().isoformat()): super(YahooQuote, self).__init__(last_date) self.sname = sname.upper() self.symbol = symbol.upper() if last_date == getToday("%Y-%m-%d"): # Will get 400 Bad Request if S.DBG_YAHOO: print "DBG:Skipped downloaded", last_date return None start_date = getNextDay(last_date) if S.DBG_ALL or S.DBG_YAHOO: print "DBG:YahooQuote:1:", symbol, self.symbol, last_date, start_date # Do not download today's EOD if market is still open if end_date == du.getToday("%Y-%m-%d"): now = datetime.now() if now.hour < 18: # only download today's EOD if it is after 6pm local time end_date = du.getYesterday("%Y-%m-%d") # self.url = self.formUrl_old(symbol,last_date,end_date) self.url = self.formUrl(crumb, symbol, start_date, end_date) if S.DBG_ALL or S.DBG_YAHOO: print "DBG:YahooQuote:2:", self.url # csv = urllib.urlopen(self.url).readlines() # if not csv[0].startswith("Date,Open,"): resUrl = requests.get(self.url, cookies={'B': cookie}) if resUrl.status_code != 200: self.csverr = str(resUrl.status_code) + ":" + resUrl.reason print "ERR:", symbol, ":", self.url print "ERR:" + self.csverr else: self.csverr = '' ''' csv = resUrl.text csv.reverse() for bar in xrange(0,len(csv)-1): ds,open_,high,low,close,volume,adjc = csv[bar].rstrip().split(',') ''' iterator = resUrl.iter_lines() skipline = next(iterator) if S.DBG_ALL: print "SKIP:YohooQuote:", skipline for csv in iterator: if S.DBG_ALL: print "DBG", csv if "null" in csv: if S.DBG_YAHOO: print "SKIP:", csv continue ds, open_, high, low, close, adjc, volume = ( csv.rstrip().split(',')) if S.DBG_YAHOO: print "DBG:", ds, open_, high, low, close, adjc, volume # print "DBG:", type(ds), type(open_), type(high), type(low), # type(close), type(adjc), type(volume) # Start of data validation if float(volume) <= 0: if S.DBG_YAHOO: print 'DBG:Skipped 0 volume as a result of non-trading day:', ds continue if ds < start_date: if S.DBG_YAHOO: print "DBG:Skipped older date:", ds continue if ds > getToday("%Y-%m-%d"): if S.DBG_YAHOO: print "DBG:Skipped future dividends:", ds continue if ds == self.lastdate: if S.DBG_YAHOO: print "INF:Skipped duplicated date:", sname, ds print '\tcsv:', csv print '\tlst:', self.lastcsv continue self.lastdate = ds self.lastcsv = csv ''' if not isnumberlist([high, low, close, adjc]): if S.DBG_YAHOO: print "SKIP:", ds, open_, high, low, close, adjc continue ''' open_, high, low, close, adjc = [ float(x) for x in [open_, high, low, close, adjc] ] if open_ > high or close > high: print "ERR:Invalid High:H<O,C, Patched.", sname, csv if high < open_: high = open_ if high < close: high = close if open_ < low or close < low: print "ERR:Invalid Low:L>O,C, Patched.", sname, csv if low > open_: low = open_ if low > close: low = close if low * 10000 < 1.0 or high * 10000 < 1.0: if open_ * 10000 < 1.0 and close * 10000 < 1.0: print "ERR:0 values detected - SKIPPED", sname, csv continue else: print "INF:0 value detected - PATCHED", sname, csv if low * 10000 < 1.0: if open_ < close: low = open_ else: low = close else: if high * 10000 < 1.0: if open_ > close: high = open_ else: high = close if not S.PRICE_WITHOUT_SPLIT: if close != adjc: factor = adjc / close open_, high, low, close = [ x * factor for x in [open_, high, low, close] ] dt = datetime.strptime(ds, '%Y-%m-%d') self.append(dt, open_, high, low, close, volume) if S.DBG_ALL: print "YahooQuote:End"
else: print eod checkMPV() return eodlist def mt4eod(lastTradingDate): stocksListing = loadfromi3(S.DATA_DIR + "i3/" + lastTradingDate + ".json") eodlist = [] for key in sorted(stocksListing.iterkeys()): eod, _, _ = unpackStockData(key, lastTradingDate, stocksListing[key]) eodlist.append(eod) return eodlist if __name__ == '__main__': S.DBG_ALL = False list1 = writeLatestPrice("2020-12-31", True, True, "2020-12-31") ''' writeStocksListing() writeLatestPrice(getDataDir(S.DATA_DIR) + 'i3/', False) from timeit import timeit print(timeit('i3ScrapeLatest()', number=2, setup="from __main__ import i3ScrapeLatest")) ''' # slisting = i3ScrapeLatest("0", concurrency=True) slisting = loadfromi3(S.DATA_DIR + "i3/" + getToday() + ".json", "0") print slisting
def crawl_latest_qr(trading_date=getToday("%d-%b-%Y")): latestQR = scrape_latest_qr(connect_url(I3_LATEST_QR_URL), trading_date) return latestQR
def generateMPV(counter, stkcode, today=getToday('%Y-%m-%d')): def preloadInitials(): firstline = next(reader) _, _, _, _, _, pclose, volume = unpackEOD(*firstline) pclose = float(pclose) * 0.05 volume = float(volume) * 0.05 avol, tvol, aprice, tprice, pdiff, vdiff = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 eodlist = FifoDict() for i in range(S.MVP_DAYS): # Starts with 15 days of dummy record as base # names=['Name', 'Date', 'Open', 'High', 'Low', 'Close', 'Volume', # 'Total Vol', 'Total Price', 'DayB4 Motion', 'MOTION', 'PRICE', 'VOLUME']) aprice = pclose + (pclose * (i - 7) / 100) tprice = ((aprice + pclose) / 2) * S.MVP_DAYS avol = volume + (volume * (i - 7) / 100) tvol = ((avol + volume) / 2) * S.MVP_DAYS pdiff = (aprice - pclose) / 100 vdiff = avol - volume pclose, volume = aprice, avol eodlist.append([ '', '1900-01-{:02d}'.format(i), 0, 0, 0, aprice, avol, tvol, tprice, 0, 0, pdiff, vdiff ]) lasteod = [ '', '1900-01-14'.format(i), 0, 0, 0, aprice, avol, tvol, tprice, 0, 0, pdiff, vdiff ] return eodlist, lasteod def unpackEOD(counter, dt, price_open, price_high, price_low, price_close, volume): if volume == "-": volume = 0 return counter, dt, price_open, price_high, price_low, price_close, volume if DBG_ALL: print shortname, stkcode totalVol = 0.0 totalPrice = 0.0 mvpDaysUp = 0 line, eodpop = "", [] try: fh = open(S.DATA_DIR + S.MVP_DIR + counter + '.csv', "wb") if stkcode == 0: inputfl = S.DATA_DIR + counter + '.csv' else: inputfl = S.DATA_DIR + counter + '.' + stkcode + '.csv' row_count = wc_line_count(inputfl) if row_count < S.MVP_DAYS * 2: print "Skipped rows: ", counter, row_count return with open(inputfl, "rb") as fl: try: reader = csv.reader(fl, delimiter=',') eodlist, lasteod = preloadInitials() if DBG_ALL: for _ in range(S.MVP_DAYS): print eodlist.pop() for i, line in enumerate(reader): stock, dt, popen, phigh, plow, pclose, volume = unpackEOD( *line) if DBG_ALL: print '{}: {},{},{},{},{},{},{}'.format( i, stock, dt, popen, phigh, plow, pclose, volume) if pclose >= popen and pclose >= lasteod[5]: dayUp = 1 else: dayUp = 0 eodpop = eodlist.pop() mvpDaysUp = mvpDaysUp + dayUp - int(eodpop[-4]) totalPrice = totalPrice + float(pclose) - float(eodpop[5]) totalVol = totalVol + float(volume) - float(eodpop[6]) aveVol = float(eodpop[7]) / S.MVP_DAYS avePrice = float(eodpop[8]) / S.MVP_DAYS volDiff = (float(volume) - aveVol) / aveVol if avePrice == 0.0: priceDiff = 0.0 else: priceDiff = (float(pclose) - avePrice) / avePrice # priceDiff *= 20 # easier to view as value is below 1 if DBG_ALL and dt.startswith('2018-07'): print '\t', dt, aveVol, avePrice, volDiff, priceDiff neweod = '{},{},{:.4f},{:.4f},{:.4f},{:.4f},{},{},{:.2f},{},{},{:.2f},{:.2f}\n'.format( stock, dt, float(popen), float(phigh), float(plow), float(pclose), volume, totalVol, totalPrice, dayUp, mvpDaysUp, priceDiff, volDiff) if DBG_ALL: print neweod if i < 100: if priceDiff > 0.03: pass # skip price diff impacts after dummy records print "Skipped: %d, %0.3f" % (i, priceDiff) else: fh.write(neweod) days = getBusDaysBtwnDates(dt, today) if DBG_ALL: print "Days=%d,%s,%s,%d,%0.3f" % (i, dt, today, days, priceDiff) if getBusDaysBtwnDates(dt, today) < S.MVP_DAYS: updateMpvSignals(stock, dt, mvpDaysUp, volDiff, priceDiff, avePrice) eodlist.append(neweod.split(',')) lasteod = line except Exception: print line print eodpop traceback.print_exc() except Exception: traceback.print_exc() print inputfl finally: fh.close()
if __name__ == '__main__': args = docopt(__doc__) cfg = loadCfg(S.DATA_DIR) global skip_name skip_name = args['--skip'] insider_date = args['--date'] if args['--test']: html_output = True if args['--test'] == "listing": result = crawl_listing(insider_date, html_output) if args['--test'] == "target": result = crawl_price_target(insider_date, html_output) if html_output: result.insert(0, T.t01) yagmail.SMTP(S.MAIL_SENDER, S.MAIL_PASSWORD). \ send("*****@*****.**", "INSIDER UPDATE: " + getToday("%d-%b-%Y"), result) else: for i in result: print i else: yaml_file = "scrapers/i3investor/insider/insider.yaml" if args['--yaml']: yaml_file = args['--yaml'] if insider_date is not None: process(yaml_file, insider_date) else: process(yaml_file) print ('\n...end processing')
def scrapeKlseRelated(klsemap, WRITE_CSV=True, dbg=False): idmap = loadIdMap(klsemap) # counters = 'USDMYR.2168,FTFBM100.0200,FTFBMKLCI.0201,FTFBMMES.0202,FTFBMSCAP.0203,FTFBM70.0204,FTFBMEMAS.0205' counterlist = S.KLSE_RELATED.split(',') eodlist = [] for i in counterlist: counter = i.split('.') shortname = counter[0] stock_code = counter[1] rtn_code = 0 OUTPUT_FILE = getDataDir( S.DATA_DIR) + shortname + "." + stock_code + ".csv" TMP_FILE = OUTPUT_FILE + 'tmp' if S.RESUME_FILE: lastdt = getLastDate(OUTPUT_FILE) if len(lastdt) == 0: # File is likely to be empty, hence scrape from beginning lastdt = S.ABS_START else: lastdt = S.ABS_START enddt = getToday('%Y-%m-%d') if lastdt == enddt: print "Skipped downloaded:", counter continue print 'Scraping {0},{1}: lastdt={2}, End={3}'.format( shortname, stock_code, lastdt, enddt) while True: startdt = lastdt if getDaysBtwnDates(lastdt, enddt) > 22 * 3: # do 3 months at a time stopdt = getDayOffset(startdt, 22 * 3) lastdt = getNextBusinessDay(stopdt) else: stopdt = enddt eod = InvestingQuote(idmap, shortname, startdt, stopdt) if dbg: for item in eod: print item if len(eod.getCsvErr()) > 0: print eod.getCsvErr() elif isinstance(eod.response, unicode): dfEod = eod.to_df() if isinstance(dfEod, pd.DataFrame): if dbg: print dfEod[:5] if WRITE_CSV: dfEod.index.name = 'index' dfEod.to_csv(TMP_FILE, index=False, header=False) dates = pd.to_datetime(dfEod["Date"], format='%Y%m%d') dfEod["Date"] = dates.dt.strftime('%Y-%m-%d').tolist() elist = dfEod.values.tolist()[0] eodlist.append(','.join(map(str, unpackEOD(*elist)))) else: print "ERR:" + dfEod + ": " + shortname + "," + lastdt rtn_code = -2 if WRITE_CSV: appendCsv(rtn_code, OUTPUT_FILE) if stopdt == enddt: break return eodlist
continue stock_code = stocklist[shortname] if len(stock_code) > 0: rtn_code = 0 OUTPUT_FILE = getDataDir( S.DATA_DIR ) + 'investingcom/' + shortname + "." + stock_code + ".csv" TMP_FILE = OUTPUT_FILE + 'tmp' if S.RESUME_FILE: lastdt = getLastDate(OUTPUT_FILE) if len(lastdt) == 0: # File is likely to be empty, hence scrape from beginning lastdt = S.ABS_START else: lastdt = S.ABS_START enddt = getToday('%Y-%m-%d') print 'Scraping {0},{1}: lastdt={2}, End={3}'.format( shortname, stock_code, lastdt, enddt) failcount = 0 while True: if failcount == 0: startdt = lastdt if getDaysBtwnDates( lastdt, enddt) > 22 * 3: # do 3 months at a time stopdt = getDayOffset(startdt, 22 * 3) lastdt = getNextBusinessDay(stopdt) else: stopdt = enddt print "\tstart=%s, stop=%s" % (startdt, stopdt) eod = InvestingQuote(idmap, shortname, startdt, stopdt) if DBG_ALL: