def __init__(self, stock_num): self.mongo = MongodbAPI() self.stock_num = stock_num self.htmlreq = HtmlRequests() self.req = self.htmlreq.get_session(SESSIONURL) now = datetime.now() self.stop_date = datetime(now.year, now.month, now.day, 13, 30, 10)
def __init__(self, stock_num: str, start_year: int, start_month: int): self.mongo = MongodbAPI() self.stock_num = stock_num self.htmlreq = HtmlRequests() self.req = self.htmlreq.get_session(SESSIONURL) self.req.keep_alive = False self.start_year = start_year self.start_month = start_month self.now_date = datetime.now() self.retry = 0
def parser(self, stock_num, url) -> list: daily = [] htmlparser = HtmlRequests() tree = htmlparser.get_html(url) if tree == None: return daily now = datetime.now() for i in tree.xpath('//div[@id="TickHeight"]/table/tr'): time = i.xpath('td[1]/text()')[0] buying = i.xpath('td[2]/text()')[0] selling = i.xpath('td[3]/text()')[0] if buying == '--' or selling == '--': continue transaction = i.xpath('td[4]/text()')[0] tmp_ups_and_downs = i.xpath('td[5]/text()')[0].split(" ") ups_and_downs = "" if len(tmp_ups_and_downs) < 2: ups_and_downs = "0.0" elif tmp_ups_and_downs[0] == "▼": ups_and_downs = "-" + tmp_ups_and_downs[1] elif tmp_ups_and_downs[0] == "▲": ups_and_downs = tmp_ups_and_downs[1] stock_volume = i.xpath('td[6]/text()')[0] time_tmp = time.split(':') date = datetime(now.year, now.month, now.day, int(time_tmp[0]), int(time_tmp[1]), int(time_tmp[2])) if self.mongo.CheckExists("Transaction_details", str(stock_num) + "@" + date.isoformat()): continue daily.append({ '_id': stock_num + "@" + date.isoformat(), 'ts': int(date.timestamp()), 'stock': stock_num, 'date': date, 'buying': float(buying), 'selling': float(selling), 'transaction': float(transaction), 'ups_and_downs': float(ups_and_downs), 'stock_volume': int(stock_volume) }) return daily
def __init__(self, date: datetime): self.mongo = MongodbAPI() self.htmlreq = HtmlRequests() self.__date = date pass
class Institutional_investors(): def __init__(self, date: datetime): self.mongo = MongodbAPI() self.htmlreq = HtmlRequests() self.__date = date pass def start(self): date = self.__date.strftime("%Y%m%d") source_url = TSELEGALPERSON.format(date=date) self.__crawl(source_url, self.__date.strftime("%Y/%m/%d")) pass def __crawl(self, url, date): json_data = self.htmlreq.get_json(requests, source_url=url) if json_data.get('stat', None) != "OK": logging.debug("This day not Opening :%s" % (date)) return data = self.__parser(json_data, date) err = self.mongo.Insert_Many_Data_To("stock_information", data) if err: logging.info("Insert Institutional investors to mongo , date: %s", date) else: logging.warn( "Fail to Insert Institutional investors to mongo , url: %s", url) def __parser(self, j, date) -> list: data = [] for i in j['data']: i = [x.replace(',', '') for x in i] if len(j['fields']) == 12: data.append({ '_id': str(i[0]) + "@" + date, 'date': datetime.strptime(date, "%Y/%m/%d"), 'stock_num': str(i[0]), 'foreign_investment_dealer_buy': float(i[2]), 'foreign_investment_dealer_sell': float(i[3]), 'foreign_investment_dealer_net_buy_sell': float(i[4]), 'institutional_investors_net_buy_sell': float(i[5]), 'investment_trust_buy': float(i[6]), 'investment_trust_sell': float(i[7]), 'investment_trust_net_buy_sell': float(i[8]), 'dealer_buy(Self-purchase)': float(i[9]), 'dealer_sell(Self-purchase)': float(i[10]), 'dealer_net_buy_sell': float(i[11]), }) elif len(j['fields']) < 18: data.append({ '_id': str(i[0]) + "@" + date, 'date': datetime.strptime(date, "%Y/%m/%d"), 'stock_num': str(i[0]), 'foreign_investment_buy': float(i[2]), 'foreign_investment_sell': float(i[3]), 'foreign_investment_net_buy_sell': float(i[4]), 'foreign_investment_dealer_buy': float(i[5]), 'foreign_investment_dealer_sell': float(i[6]), 'foreign_investment_dealer_net_buy_sell': float(i[7]), 'investment_trust_buy': float(i[8]), 'investment_trust_sell': float(i[9]), 'investment_trust_net_buy_sell': float(i[10]), 'dealer_net_buy_sell': float(i[11]), 'dealer_buy': float(i[12]), 'dealer_sell': float(i[13]), 'institutional_investors_net_buy_sell': float(i[14]), }) elif len(j['fields']) == 18: data.append({ '_id': str(i[0]) + "@" + date, 'date': datetime.strptime(date, "%Y/%m/%d"), 'stock_num': str(i[0]), 'foreign_investment_buy': float(i[2]), 'foreign_investment_sell': float(i[3]), 'foreign_investment_net_buy_sell': float(i[4]), 'foreign_investment_dealer_buy': float(i[5]), 'foreign_investment_dealer_sell': float(i[6]), 'foreign_investment_dealer_net_buy_sell': float(i[7]), 'investment_trust_buy': float(i[8]), 'investment_trust_sell': float(i[9]), 'investment_trust_net_buy_sell': float(i[10]), 'dealer_net_buy_sell': float(i[11]), 'dealer_buy(Self-purchase)': float(i[12]), 'dealer_sell(Self-purchase)': float(i[13]), 'dealer_net_buy_sell(Self-purchase)': float(i[14]), 'dealer_buy(Hedging)': float(i[15]), 'dealer_sell(Hedging)': float(i[16]), 'dealer_net_buy_sell(Hedging)': float(i[17]), 'institutional_investors_net_buy_sell': float(i[18]), }) return data
class Daily_stock_info(object): def __init__(self, date): self.__mongo = MongodbAPI() self.__htmlreq = HtmlRequests() self.__date = date pass def start(self): date = self.__date.strftime("%Y%m%d") source_url = DAILYSTOCKINFO.format(date=date) data = self.__crawl(source_url, self.__date.strftime("%Y/%m/%d")) if data != None: err = self.__mongo.Insert_Many_Data_To('stock_daily_info', data) if err: logging.info("Insert stock daily info to mongo , date: %s", date) else: logging.warn( "Fail to Insert stock daily info to mongo , url: %s", source_url) return def __crawl(self, url, date): for i in range(10): j = self.__htmlreq.get_json(requests, url) if j == {} or j['stat'] != 'OK': return None rows = [] if 'data5' in j: rows = [ x for x in j['data5'] if len(x[0]) == 4 and x[-1] != '0.00' ] elif 'data4' in j: rows = [ x for x in j['data4'] if len(x[0]) == 4 and x[-1] != '0.00' ] else: logging.warn("The daily info not have data5 or data4 url: %s", url) return None data = self.__parser(date, rows) return data else: logging.error("Fail to parser daily stock info , url: %s", url) def __parser(self, date, rows: list) -> list: data = [] for i in rows: data.append({ '_id': i[0] + "@" + date, 'stock': i[0], 'date': datetime.strptime(date, "%Y/%m/%d"), 'ts': int(datetime.timestamp(datetime.strptime(date, "%Y/%m/%d"))), 'transaction': float(i[3].replace(',', '')), 'open': self.__get_float(i[5]), 'high': self.__get_float(i[6]), 'low': self.__get_float(i[7]), 'close': self.__get_float(i[8]), 'change': self.__get_sign_float(i[9], i[10]), 'price_earning': float(i[-1].replace(',', '')), }) return data def __get_sign_float(self, sign, num) -> float: if "-" in sign: return float("-" + num) elif "+" in sign: return float(num) else: return 0.0 def __get_float(self, num) -> float: if num.replace(',', '') == 'X0.00': return 0.0 elif num == '--': return None else: return float(num.replace(',', ''))
class TWSE_realtime(): def __init__(self, stock_num): self.mongo = MongodbAPI() self.stock_num = stock_num self.htmlreq = HtmlRequests() self.req = self.htmlreq.get_session(SESSIONURL) now = datetime.now() self.stop_date = datetime(now.year, now.month, now.day, 13, 30, 10) def start(self): self.crawl() def crawl(self): now = datetime.now() if now < self.stop_date: threading.Timer(5.0, self.crawl).start() now_time = int(time.time()) * 1000 source_url = TWSEREALTIMEURL.format( stock_num=self.stock_num, time=now_time) json_data = self.htmlreq.get_json(self.req, source_url) data = self.parser(json_data) if data == None: return e = self.mongo.CheckExists('Realtime_data', data.get('_id', None)) if e == False: for i in range(5): err = self.mongo.Insert_Data_To("Realtime_data", data) if err: logging.info("Insert realtime data to mongo, id:%s" % (data.get("_id"))) return else: logging.error( "Fail to insert realtime data to mongo, id:%s" % (data.get("_id"))) def parser(self, j: json): # Process best result if len(j['msgArray']) == 0: return None data = j['msgArray'][0] def _split_best(d): if d: return d.strip('_').split('_') return d time = datetime.fromtimestamp( int(data['tlong']) / 1000).strftime('%Y-%m-%d %H:%M:%S') date = datetime.strptime(time, '%Y-%m-%d %H:%M:%S') return { "_id": str(self.stock_num) + "@"+time, "code": self.stock_num, 'ts': int(time.mktime(date.timetuple())), "time": date, "latest_trade_price": float(data.get('z', None)), "trade_volume": float(data.get('tv', None)), "accumulate_trade_volume": float(data.get('v', None)), "best_bid_price": [float(x) for x in _split_best(data.get('b', None))], "best_bid_volume": [float(x) for x in _split_best(data.get('g', None))], "best_ask_price": [float(x) for x in _split_best(data.get('a', None))], "best_ask_volume": [float(x) for x in _split_best(data.get('f', None))], "open": float(data.get('o', None)), "high": float(data.get('h', None)), "low": float(data.get('l', None)) }
class TWSE_daily(): def __init__(self, stock_num: str, start_year: int, start_month: int): self.mongo = MongodbAPI() self.stock_num = stock_num self.htmlreq = HtmlRequests() self.req = self.htmlreq.get_session(SESSIONURL) self.req.keep_alive = False self.start_year = start_year self.start_month = start_month self.now_date = datetime.now() self.retry = 0 def start(self): now_year = self.start_year now_month = self.start_month self.crawl(now_year, now_month) def crawl(self, year, month): logging.debug("%s/%s" % (year, month)) source_url = TWSEREALTIMEURL.format( stock_num=self.stock_num, time="%d%02d01" % (year, month)) json_data = self.htmlreq.get_json(self.req, source_url) if json_data == {} and self.retry < 5: self.retry += 1 self.crawl(year, month) else: logging.error("Can't get old daily stock %s@%s-%s ,url : %s " % (self.stock_num, year, month, source_url)) self.retry = 0 data = self.parser(json_data.get('data', None)) if data != None and len(data) > 0: for i in range(5): err = self.mongo.Insert_Many_Data_To("Daily_data", data) if err == True: logging.info("Insert Daily data %s@%s-%s" % (self.stock_num, year, month)) break else: logging.error("Fail, Insert Daily data to Mongo,id: %s@%s-%s" % (self.stock_num, year, month)) date = self._get_next_date(year, month) if date['year'] >= self.now_date.year and date['month'] > self.now_date.month: logging.info("Done crawl Daily data , %s@%s/%s" % (self.stock_num, date['year'], date['month'])) return # Start to crawl new year, month self.crawl(date["year"], date["month"]) def _convert_date(self, date): """Convert '106/05/01' to '2017/05/01'""" return '/'.join([str(int(date.split('/')[0]) + 1911)] + date.split('/')[1:]) def parser(self, j: json) -> list: data = [] if j == None: return data for item in j: date = datetime.strptime( self._convert_date(item[0]), '%Y/%m/%d') _id = self.stock_num + "@"+date.strftime("%Y/%m/%d") e = self.mongo.CheckExists( "Daily_data", _id) if e: logging.debug("Insert Daily data ,id :%s exists" % (_id)) continue try: data.append({ '_id': _id, 'stock': self.stock_num, 'date': date, 'ts': int(time.mktime(date.timetuple())), 'capacity': int(item[1].replace(',', '')), 'turnover': int(item[2].replace(',', '')), 'open': self._get_float(item[3]), 'high': self._get_float(item[4]), 'low': self._get_float(item[5]), 'close': self._get_float(item[6]), 'change': self._get_float(item[7]), 'transaction': int(item[8].replace(',', '')) }) except Exception as e: logging.error("daily data fail :%s %s" % (item, e)) continue return data def _get_next_date(self, year, month) -> dict: if month < 12: month += 1 else: year += 1 month = 1 return { 'year': year, 'month': month } def _get_float(self, number: str): if number.replace(',', '') == 'X0.00': return 0.0 elif number == '--': return None else: return float(number.replace(',', '')) return