class Money_link(): def __init__(self): self.mongo = MongodbAPI() def start(self, stock_num: str) -> list: source_url = MONEYLINKURL % (stock_num) return self.parser(stock_num, source_url) def parser(self, stock_num, url) -> list: daily = [] htmlparser = HtmlRequests() tree = htmlparser.get_html(url) if tree == None: return daily now = datetime.now() for i in tree.xpath('//div[@id="TickHeight"]/table/tr'): time = i.xpath('td[1]/text()')[0] buying = i.xpath('td[2]/text()')[0] selling = i.xpath('td[3]/text()')[0] if buying == '--' or selling == '--': continue transaction = i.xpath('td[4]/text()')[0] tmp_ups_and_downs = i.xpath('td[5]/text()')[0].split(" ") ups_and_downs = "" if len(tmp_ups_and_downs) < 2: ups_and_downs = "0.0" elif tmp_ups_and_downs[0] == "▼": ups_and_downs = "-" + tmp_ups_and_downs[1] elif tmp_ups_and_downs[0] == "▲": ups_and_downs = tmp_ups_and_downs[1] stock_volume = i.xpath('td[6]/text()')[0] time_tmp = time.split(':') date = datetime(now.year, now.month, now.day, int(time_tmp[0]), int(time_tmp[1]), int(time_tmp[2])) if self.mongo.CheckExists("Transaction_details", str(stock_num) + "@" + date.isoformat()): continue daily.append({ '_id': stock_num + "@" + date.isoformat(), 'ts': int(date.timestamp()), 'stock': stock_num, 'date': date, 'buying': float(buying), 'selling': float(selling), 'transaction': float(transaction), 'ups_and_downs': float(ups_and_downs), 'stock_volume': int(stock_volume) }) return daily
class TWSE_realtime(): def __init__(self, stock_num): self.mongo = MongodbAPI() self.stock_num = stock_num self.htmlreq = HtmlRequests() self.req = self.htmlreq.get_session(SESSIONURL) now = datetime.now() self.stop_date = datetime(now.year, now.month, now.day, 13, 30, 10) def start(self): self.crawl() def crawl(self): now = datetime.now() if now < self.stop_date: threading.Timer(5.0, self.crawl).start() now_time = int(time.time()) * 1000 source_url = TWSEREALTIMEURL.format( stock_num=self.stock_num, time=now_time) json_data = self.htmlreq.get_json(self.req, source_url) data = self.parser(json_data) if data == None: return e = self.mongo.CheckExists('Realtime_data', data.get('_id', None)) if e == False: for i in range(5): err = self.mongo.Insert_Data_To("Realtime_data", data) if err: logging.info("Insert realtime data to mongo, id:%s" % (data.get("_id"))) return else: logging.error( "Fail to insert realtime data to mongo, id:%s" % (data.get("_id"))) def parser(self, j: json): # Process best result if len(j['msgArray']) == 0: return None data = j['msgArray'][0] def _split_best(d): if d: return d.strip('_').split('_') return d time = datetime.fromtimestamp( int(data['tlong']) / 1000).strftime('%Y-%m-%d %H:%M:%S') date = datetime.strptime(time, '%Y-%m-%d %H:%M:%S') return { "_id": str(self.stock_num) + "@"+time, "code": self.stock_num, 'ts': int(time.mktime(date.timetuple())), "time": date, "latest_trade_price": float(data.get('z', None)), "trade_volume": float(data.get('tv', None)), "accumulate_trade_volume": float(data.get('v', None)), "best_bid_price": [float(x) for x in _split_best(data.get('b', None))], "best_bid_volume": [float(x) for x in _split_best(data.get('g', None))], "best_ask_price": [float(x) for x in _split_best(data.get('a', None))], "best_ask_volume": [float(x) for x in _split_best(data.get('f', None))], "open": float(data.get('o', None)), "high": float(data.get('h', None)), "low": float(data.get('l', None)) }
class TWSE_daily(): def __init__(self, stock_num: str, start_year: int, start_month: int): self.mongo = MongodbAPI() self.stock_num = stock_num self.htmlreq = HtmlRequests() self.req = self.htmlreq.get_session(SESSIONURL) self.req.keep_alive = False self.start_year = start_year self.start_month = start_month self.now_date = datetime.now() self.retry = 0 def start(self): now_year = self.start_year now_month = self.start_month self.crawl(now_year, now_month) def crawl(self, year, month): logging.debug("%s/%s" % (year, month)) source_url = TWSEREALTIMEURL.format( stock_num=self.stock_num, time="%d%02d01" % (year, month)) json_data = self.htmlreq.get_json(self.req, source_url) if json_data == {} and self.retry < 5: self.retry += 1 self.crawl(year, month) else: logging.error("Can't get old daily stock %s@%s-%s ,url : %s " % (self.stock_num, year, month, source_url)) self.retry = 0 data = self.parser(json_data.get('data', None)) if data != None and len(data) > 0: for i in range(5): err = self.mongo.Insert_Many_Data_To("Daily_data", data) if err == True: logging.info("Insert Daily data %s@%s-%s" % (self.stock_num, year, month)) break else: logging.error("Fail, Insert Daily data to Mongo,id: %s@%s-%s" % (self.stock_num, year, month)) date = self._get_next_date(year, month) if date['year'] >= self.now_date.year and date['month'] > self.now_date.month: logging.info("Done crawl Daily data , %s@%s/%s" % (self.stock_num, date['year'], date['month'])) return # Start to crawl new year, month self.crawl(date["year"], date["month"]) def _convert_date(self, date): """Convert '106/05/01' to '2017/05/01'""" return '/'.join([str(int(date.split('/')[0]) + 1911)] + date.split('/')[1:]) def parser(self, j: json) -> list: data = [] if j == None: return data for item in j: date = datetime.strptime( self._convert_date(item[0]), '%Y/%m/%d') _id = self.stock_num + "@"+date.strftime("%Y/%m/%d") e = self.mongo.CheckExists( "Daily_data", _id) if e: logging.debug("Insert Daily data ,id :%s exists" % (_id)) continue try: data.append({ '_id': _id, 'stock': self.stock_num, 'date': date, 'ts': int(time.mktime(date.timetuple())), 'capacity': int(item[1].replace(',', '')), 'turnover': int(item[2].replace(',', '')), 'open': self._get_float(item[3]), 'high': self._get_float(item[4]), 'low': self._get_float(item[5]), 'close': self._get_float(item[6]), 'change': self._get_float(item[7]), 'transaction': int(item[8].replace(',', '')) }) except Exception as e: logging.error("daily data fail :%s %s" % (item, e)) continue return data def _get_next_date(self, year, month) -> dict: if month < 12: month += 1 else: year += 1 month = 1 return { 'year': year, 'month': month } def _get_float(self, number: str): if number.replace(',', '') == 'X0.00': return 0.0 elif number == '--': return None else: return float(number.replace(',', '')) return