class _ReqTask: _date: int _is_neb: bool _logger: TaskLog = autowired(TaskLog) did_load_data = None data: list def __init__(self, date: int, is_neb: bool, did_load_data: any): self._date = date self._is_neb = is_neb self.did_load_data = did_load_data self.data = None def start(self): t = Thread(target=self._begin) t.daemon = True t.start() def _begin(self): str_date = DateTime(self._date, timezone_hours=0).to_str('%Y%m%d') ok, data = _Request(str_date, self._is_neb).get_result() if ok and len(data) > 0: self.data = data self.did_load_data(self._date) else: time.sleep(10) self.start()
class _Request: _id: str _date: str _is_neb: bool _data: list _success: bool _logger = autowired(TaskLog) def __init__(self, date: str, is_neb: bool): self._id = None self._date = date self._is_neb = is_neb self._data = list() self._success = True def get_result(self) -> (bool, list): try: self._req() return self._success, self._data except Exception as e: self._logger.log_err(e) self._success = False return self._success, None def _req(self): db = 'nebulas' if not self._is_neb: db = 'eth' while True: if self._id is None: url = 'http://111.203.228.11:9973/nr?db=' + db + '&batch_size=1000&date=' + self._date self._logger.log('url: ' + url) else: url = 'http://111.203.228.11:9973/cursor?db=' + db + '&id=' + self._id resp = _req.get(url) if 300 > resp.status_code >= 200: r: dict = resp.json() for i in r['result']: self._data.append(NrItem(i)) if 'id' in r.keys(): self._id = r['id'] if not r['has_more']: break else: if resp.status_code == 503: print('503') TaskLog.log_rewrite('503.log', url) time.sleep(0.1) continue else: self._logger.log_err(resp) self._success = False break
class EthNrDataSynchronizer(NrDataSynchronizer): _nr_state = autowired(NRState) def get_current_sync_date(self) -> int: return self._nr_state.eth_current_sync_date def get_last_sync_date(self) -> int: return self._nr_state.eth_last_sync_date def set_last_sync_date(self, date: int): self._nr_state.eth_last_sync_date = date def is_neb(self) -> bool: return False def db_context(self): return 'eth_nr_by_date', 'eth_nr_total', 'eth_nr_by_addr'
class NebMarketDataSynchronizer(MarketDataSynchronizer): _nr_state: NRState = autowired(NRState) def get_last_sync_date(self) -> int: return self._nr_state.neb_market_last_sync_date def set_last_sync_date(self, date: int): self._nr_state.neb_market_last_sync_date = date def url_with_date(self, date: str) -> str: end = DateTime(timezone_hours=0).date.to_str('%Y%m%d') return 'https://coinmarketcap.com/zh/currencies/nebulas-token/historical-data/?start=%s&end=%s' % (date, end) def save_market_data(self, data: list): sql = '' for item in data: sql = 'INSERT INTO neb_market_value (`date`, `opening`, `closing`, `highest`, `lowest`, `amount`, `total_circulation`, `total`) ' \ 'VALUES (%s, %s, %s, %s, %s, \'%s\', \'%s\', \'%s\');' % \ (str(item['date']), str(item['opening']), str(item['closing']), str(item['highest']), str(item['lowest']), str(item['amount']), str(item['total_circulation']), str(item['total'])) execute(sql) def currency_count(self) -> int: return 100000000
class NrDataSynchronizer(object): """ nr数据同步 """ _logger: TaskLog = autowired(TaskLog) _lock: RLock _running = False _timer: Timer = None _req_queue: ReqQueue _SEVEN_DAY_SECONDS = 7 * 24 * 60 * 60 def __init__(self): self._lock = RLock() self._req_queue = ReqQueue(self.is_neb(), self.get_current_sync_date()) def start(self): """ server 启动时即调用开始 :return: None """ with self._lock: if self._running: return t = Thread(target=self._begin_sync) t.daemon = True t.start() self._req_queue.start() # private ---------------------------------------------------------------------------------------------------------- def _begin_sync(self): with self._lock: if self._running: return self._running = True try: if self._check_can_sync(): if not self._sync(): time.sleep(10) self._running = False self.start() else: self._running = False self._start_timer() self._logger.log('%s start timer.' % self.__class__.__name__) except Exception as e: self._logger.log_err(e) time.sleep(10) self._running = False self.start() def _sync(self) -> bool: try: # str_date = DateTime(self.get_current_sync_date(), timezone_hours=0).to_str("%Y%m%d") dt = self.get_current_sync_date() date_table, _, _ = self.db_context() if self._exists(date_table, dt): self.set_last_sync_date(dt) self._logger.log_err('duplicate date: %s' % DateTime(dt, timezone_hours=0).to_str('%Y-%m-%d')) return True nr_data = self._req_queue.get(dt) # nr_data = self._get_daily_all_nr(str_date) if nr_data is not None: with DBLock: try: # save to models self._sort_nr_data(nr_data) self._save_nr_data(nr_data) # update sync date self.set_last_sync_date(dt) self._logger.log( "%s nr_data sync success. date: %s" % ( self.__class__.__name__, DateTime(self.get_last_sync_date(), timezone_hours=0).to_str("%Y%m%d") ) ) commit() self._req_queue.remove(dt) except Exception as e: rollback() raise e return True else: return False except Exception as e: self._logger.log_err(e) return False # 验证是否可以继续同步 def _check_can_sync(self) -> bool: return self.get_current_sync_date() < DateTime(timezone_hours=0).date.timestamp @staticmethod def _sort_key(nr: dict) -> float: return float(nr.score) @staticmethod def _sort_nr_data(nr_data: list): nr_data.sort(key=NrDataSynchronizer._sort_key, reverse=True) for i in range(len(nr_data)): nr_data[i].order = i + 1 def _start_timer(self): if self._timer is not None: self._timer.cancel() # 今天的数据,需要明天零点半后开始同步(utc+0) t = DateTime(timezone_hours=0).add_days(1).date.timestamp - DateTime().timestamp + 60 * 30 self._timer = Timer(t, self.start) self._timer.daemon = True self._timer.start() # tools ------------------------------------------------------------------------------------------------------------ @staticmethod def total_nr(nr_data: list) -> str: t: float = 0.0 for nr in nr_data: t += float(nr.score) return format(t, ".2f") @staticmethod def add_nr_to_pb_data(pb_data, nr: NrItem): item = pb_data.items.add() NrDataSynchronizer.init_pb_item(item, nr) @staticmethod def new_pb_item(nr: NrItem) -> any: item = Item() NrDataSynchronizer.init_pb_item(item, nr) return item @staticmethod def init_pb_item(item, nr: NrItem): item.address = nr.address item.in_outs = nr.in_outs item.out_val = nr.out_val item.in_val = nr.in_val item.degrees = nr.degrees item.out_degree = nr.out_degree item.in_degree = nr.in_degree item.weight = nr.weight item.median = nr.median item.score = nr.score item.date = nr.date item.order = nr.order @staticmethod def serialize_nr_data(nr_data: list) -> bytes: data = Data() for nr in nr_data: NrDataSynchronizer.add_nr_to_pb_data(data, nr) return zlib.compress(data.SerializeToString()) def get_valid_dates(self, dates: str): r = [] if dates is None or len(dates) == 0: return r now = DateTime().timestamp dts = dates.split(',') for d in dts: if now - int(d) <= self._SEVEN_DAY_SECONDS: r.append(d) return r # db --------------------------------------------------------------------------------------------------------------- def _save_nr_data(self, nr_data: list): date_table, total_table, address_table = self.db_context() self._save_date_data(date_table, self.get_current_sync_date(), nr_data) self._update_address_table(address_table, nr_data) self._save_total_nr(total_table, nr_data) def _save_date_data(self, db_table: str, date: int, nr_data: list): count = len(nr_data) s = self.serialize_nr_data(nr_data) loc = 0 first = True while True: c = 1024 * 1024 * 1 if c > len(s) - loc: c = len(s) - loc if c <= 0: break t = s[loc: loc + c] loc = loc + c h = ''.join(['%02x' % b for b in t]) if first: sql = 'INSERT INTO %s (`date`, `data`, `count`) VALUES (\'%s\', X\'%s\', \'%s\');' % \ (db_table, str(date), h, str(count)) first = False else: sql = 'UPDATE %s SET `data` = concat(`data`, X\'%s\') WHERE `date`=\'%s\';' \ % (db_table, h, str(date)) execute(sql) def _update_address_table(self, db_table, nr_data: list): addresses = self._addresses_from_db(db_table) n = 0 sql = '' for nr in nr_data: sql += self._add_to_address(db_table, addresses, nr) n += 1 if n >= 1000: execute(sql) n = 0 sql = '' if n > 0: execute(sql) def _save_total_nr(self, total_table, nr_data): sql = 'INSERT INTO %s (`date`, `nr_value`) VALUES (%s, \'%s\')' % \ (total_table, str(self.get_current_sync_date()), self.total_nr(nr_data)) execute(sql) def _add_to_address(self, db_table, addresses: dict, nr: NrItem) -> str: a = nr.address item = self.new_pb_item(nr) now = DateTime().timestamp c = self.get_current_sync_date() if a in addresses.keys(): count = addresses[a].count + 1 total = addresses[a].total_nr if total is None: total = 0 else: total = float(total) total += float(item.score) i_bytes = b'|' + item.SerializeToString() i_hex = ''.join(['%02x' % b for b in i_bytes]) dates = self.get_valid_dates(addresses[a].dates) if float(item.score) >= 0.1 and now - c <= self._SEVEN_DAY_SECONDS: dates.append(str(c)) n = len(dates) str_dts = ','.join(dates) return 'UPDATE %s SET last_above_0_dates=\'%s\', last_above_0_num=\'%s\', `count`=\'%s\', total_nr=\'%s\', `data`=concat(`data`, X\'%s\') WHERE `address`=\'%s\';' % \ (db_table, str_dts, str(n), str(count), str(total), i_hex, a) else: count = 1 total = float(item.score) i_bytes = item.SerializeToString() i_hex = ''.join(['%02x' % b for b in i_bytes]) dates = [] if float(item.score) >= 0.1 and now - c <= self._SEVEN_DAY_SECONDS: dates.append(str(c)) n = len(dates) str_dts = ','.join(dates) return 'INSERT INTO %s (`address`, `last_above_0_dates`, `last_above_0_num`, `count`, total_nr, `data`) VALUES (\'%s\', \'%s\', \'%s\', \'%s\', \'%s\', X\'%s\');' % \ (db_table, a, str_dts, str(n), str(count), str(total), i_hex) @staticmethod def _addresses_from_db(db_table: str) -> dict: rows = execute_and_fetchall('SELECT address, last_above_0_dates, last_above_0_num, `count`, total_nr FROM %s;' % db_table) result = {} for r in rows: result[r['address']] = AddressItem(r['last_above_0_dates'], r['count'], r['total_nr']) return result @staticmethod def _exists(db_table: str, date: int) -> bool: with DBLock: sql = 'SELECT COUNT(*) as c FROM %s WHERE `date` = %s;' % (db_table, str(date)) row = execute_and_fetchone(sql) return row['c'] > 0 # override --------------------------------------------------------------------------------------------------------- @abstractmethod def is_neb(self) -> bool: pass @abstractmethod def get_last_sync_date(self) -> int: pass @abstractmethod def set_last_sync_date(self, date: int): pass @abstractmethod def get_current_sync_date(self) -> int: pass @abstractmethod def db_context(self): pass
class MarketDataSynchronizer(object): """ nr数据同步 """ _logger: TaskLog = autowired(TaskLog) _lock = None _running = False _timer: Timer = None def __init__(self): self._lock = RLock() def start(self): """ server 启动时即调用开始 :return: None """ with self._lock: if self._running: return t = Thread(target=self._begin_sync) t.daemon = True t.start() # private ---------------------------------------------------------------------------------------------------------- def _begin_sync(self): with self._lock: if self._running: return self._running = True try: if self._check_can_sync(): if not self._sync(): time.sleep(10) self._running = False self.start() else: self._running = False self._start_timer() s = '%s start timer.' % self.__class__.__name__ self._logger.log(s) except Exception as e: self._logger.log_err(e) time.sleep(10) self._running = False self.start() def _sync(self) -> bool: try: str_date = DateTime(self.get_last_sync_date(), timezone_hours=0).to_str("%Y%m%d") market_data = self._get_market_data(str_date) if market_data is not None: with DBLock: try: # save to models self.save_market_data(market_data) # update sync date self.set_last_sync_date( DateTime(timezone_hours=0).date.timestamp) self._logger.log( "%s market data sync success. date: %s" % (self.__class__.__name__, DateTime(self.get_last_sync_date(), timezone_hours=0).to_str("%Y%m%d"))) commit() except Exception as e: rollback() raise e return True else: return False except Exception as e: self._logger.log_err(e) return False # 验证是否可以继续同步 def _check_can_sync(self) -> bool: return self.get_last_sync_date() < DateTime( timezone_hours=0).date.timestamp # 获取某一天的所有market数据 def _get_market_data(self, date) -> list: url = self.url_with_date(date) req = request.Request(url, headers={'User-Agent': 'Mozilla/5.0'}) content = request.urlopen(req).read() return self._pass_market_data(content) def _start_timer(self): if self._timer is not None: self._timer.cancel() # 今天的数据,需要明天零点半后开始同步(utc+0) t = DateTime(timezone_hours=0).add_days( 1).date.timestamp - DateTime().timestamp + 60 * 30 self._timer = Timer(t, self.start) self._timer.daemon = True self._timer.start() def _pass_market_data(self, content: str) -> list: if content is not None: tb = self._get_table(content) if tb is not None: items = [] trs = tb.find('tbody').findAll('tr') keys = [ 'date', 'opening', 'highest', 'lowest', 'closing', 'amount', 'total_circulation', 'total' ] for tr in trs: item = {} tds = tr.findAll('td') for i in range(len(tds)): key = keys[i] text: str = tds[i].text if key == 'date': item[key] = DateTime.from_str( text, '%Y年%m月%d日', timezone_hours=0).date.timestamp elif key == 'amount' or key == 'total_circulation': item[key] = text.replace(',', '') else: item[key] = float(text) item['total'] = format( self.currency_count() * item['closing'], '.0f') items.append(item) return items return None @staticmethod def _get_table(content: str) -> object: soup = BeautifulSoup(content, 'html.parser') tables = soup.findAll("table") for t in tables: ths = t.findAll('th') for th in ths: if th.text == '日期': return t return None # override --------------------------------------------------------------------------------------------------------- @abstractmethod def url_with_date(self, date: str) -> str: pass @abstractmethod def get_last_sync_date(self) -> int: pass @abstractmethod def set_last_sync_date(self, date: int): pass @abstractmethod def save_market_data(self, data: list): pass @abstractmethod def currency_count(self) -> int: pass
def _re_init(): autowired(DataBase).init_db() common.db.DB = autowired(DataBase).connection
return self.db.connect def init_db(self): self.db = MySQL() self._app = Flask(__name__) self._app.config['MYSQL_HOST'] = '127.0.0.1' self._app.config['MYSQL_PORT'] = 3307 self._app.config['MYSQL_USER'] = '******' self._app.config['MYSQL_PASSWORD'] = '******' self._app.config['MYSQL_DB'] = 'nr_db' self._app.config['MYSQL_CURSORCLASS'] = 'DictCursor' self.db.init_app(self._app) DBLock = RLock() DB = autowired(DataBase).connection def execute(sql): try: with DB.cursor() as cursor: cursor.execute(sql, None) except Exception as e: if 'Lost connection to MySQL' in str( e) or 'MySQL server has gone away' in str(e): _re_init() raise e def execute_and_fetchone(sql): try:
class ReqQueue: _is_neb = None _begin_date: int _last_date: int _waiting_date: int _dates: list _tasks: dict _wn: WaitNotify _lock: RLock _logger = autowired(TaskLog) def __init__(self, is_neb: any, begin_date: int): self._is_neb = is_neb self._begin_date = begin_date self._last_date = 0 self._waiting_date = 0 self._dates = [] self._tasks = {} self._wn = WaitNotify() self._lock = RLock() def start(self): with self._lock: now = DateTime(timezone_hours=0).date.timestamp dt = self._begin_date if self._last_date != 0: dt = self._last_date while dt < now: self._dates.append(dt) dt = DateTime(dt, timezone_hours=0).date.add_days(1).timestamp self._last_date = dt self._check_and_req() def get(self, date): with self._lock: data = self._get_data(date) if data is None: self._waiting_date = date self._wn.reset() if data is None: self._wn.wait() data = self._get_data(date) self._waiting_date = 0 return data def remove(self, date): with self._lock: self._tasks.pop(date) self._check_and_req() def _get_data(self, date): with self._lock: if date in self._tasks.keys() and self._tasks[date].data is not None: return self._tasks[date].data return None def _did_data_loaded(self, date: int): with self._lock: if date == self._waiting_date: self._wn.notify() def _check_and_req(self): with self._lock: c = len(self._tasks) if c >= _REQ_MAX_COUNT: return n = len(self._dates) if n == 0: return m = min(_REQ_MAX_COUNT - c, n) r = [] for i in range(m): dt = self._dates[i] task = _ReqTask(dt, self._is_neb, self._did_data_loaded) r.append(dt) self._tasks[dt] = task task.start() for d in r: self._dates.remove(d)
import signal from common.autowired import autowired from common.task_log import TaskLog logger = autowired(TaskLog) def on_signal_int(): logger.log_err('signal int') def on_signal_term(): logger.log_err('signal term') def start_signal_log(): signal.signal(signal.SIGINT, on_signal_int) signal.signal(signal.SIGTERM, on_signal_term)