Example #1
0
class _ReqTask:
    _date: int
    _is_neb: bool
    _logger: TaskLog = autowired(TaskLog)

    did_load_data = None
    data: list

    def __init__(self, date: int, is_neb: bool, did_load_data: any):
        self._date = date
        self._is_neb = is_neb
        self.did_load_data = did_load_data
        self.data = None

    def start(self):
        t = Thread(target=self._begin)
        t.daemon = True
        t.start()

    def _begin(self):
        str_date = DateTime(self._date, timezone_hours=0).to_str('%Y%m%d')
        ok, data = _Request(str_date, self._is_neb).get_result()
        if ok and len(data) > 0:
            self.data = data
            self.did_load_data(self._date)
        else:
            time.sleep(10)
            self.start()
Example #2
0
class _Request:
    _id: str
    _date: str
    _is_neb: bool
    _data: list
    _success: bool
    _logger = autowired(TaskLog)

    def __init__(self, date: str, is_neb: bool):
        self._id = None
        self._date = date
        self._is_neb = is_neb
        self._data = list()
        self._success = True

    def get_result(self) -> (bool, list):
        try:
            self._req()
            return self._success, self._data
        except Exception as e:
            self._logger.log_err(e)
            self._success = False
            return self._success, None

    def _req(self):
        db = 'nebulas'
        if not self._is_neb:
            db = 'eth'
        while True:
            if self._id is None:
                url = 'http://111.203.228.11:9973/nr?db=' + db + '&batch_size=1000&date=' + self._date
                self._logger.log('url: ' + url)
            else:
                url = 'http://111.203.228.11:9973/cursor?db=' + db + '&id=' + self._id
            resp = _req.get(url)
            if 300 > resp.status_code >= 200:
                r: dict = resp.json()
                for i in r['result']:
                    self._data.append(NrItem(i))
                if 'id' in r.keys():
                    self._id = r['id']
                if not r['has_more']:
                    break
            else:
                if resp.status_code == 503:
                    print('503')
                    TaskLog.log_rewrite('503.log', url)
                    time.sleep(0.1)
                    continue
                else:
                    self._logger.log_err(resp)
                    self._success = False
                    break
class EthNrDataSynchronizer(NrDataSynchronizer):
    _nr_state = autowired(NRState)

    def get_current_sync_date(self) -> int:
        return self._nr_state.eth_current_sync_date

    def get_last_sync_date(self) -> int:
        return self._nr_state.eth_last_sync_date

    def set_last_sync_date(self, date: int):
        self._nr_state.eth_last_sync_date = date

    def is_neb(self) -> bool:
        return False

    def db_context(self):
        return 'eth_nr_by_date', 'eth_nr_total', 'eth_nr_by_addr'
Example #4
0
class NebMarketDataSynchronizer(MarketDataSynchronizer):
    _nr_state: NRState = autowired(NRState)

    def get_last_sync_date(self) -> int:
        return self._nr_state.neb_market_last_sync_date

    def set_last_sync_date(self, date: int):
        self._nr_state.neb_market_last_sync_date = date

    def url_with_date(self, date: str) -> str:
        end = DateTime(timezone_hours=0).date.to_str('%Y%m%d')
        return 'https://coinmarketcap.com/zh/currencies/nebulas-token/historical-data/?start=%s&end=%s' % (date, end)

    def save_market_data(self, data: list):
        sql = ''
        for item in data:
            sql = 'INSERT INTO neb_market_value (`date`, `opening`, `closing`, `highest`, `lowest`, `amount`, `total_circulation`, `total`) ' \
                        'VALUES (%s, %s, %s, %s, %s, \'%s\', \'%s\', \'%s\');' % \
                  (str(item['date']), str(item['opening']), str(item['closing']), str(item['highest']), str(item['lowest']), str(item['amount']), str(item['total_circulation']), str(item['total']))
            execute(sql)

    def currency_count(self) -> int:
        return 100000000
class NrDataSynchronizer(object):
    """
    nr数据同步
    """
    _logger: TaskLog = autowired(TaskLog)
    _lock: RLock
    _running = False
    _timer: Timer = None
    _req_queue: ReqQueue

    _SEVEN_DAY_SECONDS = 7 * 24 * 60 * 60

    def __init__(self):
        self._lock = RLock()
        self._req_queue = ReqQueue(self.is_neb(), self.get_current_sync_date())

    def start(self):
        """
        server 启动时即调用开始
        :return: None
        """
        with self._lock:
            if self._running:
                return
            t = Thread(target=self._begin_sync)
            t.daemon = True
            t.start()
            self._req_queue.start()

    # private ----------------------------------------------------------------------------------------------------------

    def _begin_sync(self):
        with self._lock:
            if self._running:
                return
            self._running = True
            try:
                if self._check_can_sync():
                    if not self._sync():
                        time.sleep(10)
                    self._running = False
                    self.start()
                else:
                    self._running = False
                    self._start_timer()
                    self._logger.log('%s start timer.' % self.__class__.__name__)
            except Exception as e:
                self._logger.log_err(e)
                time.sleep(10)
                self._running = False
                self.start()

    def _sync(self) -> bool:
        try:
            # str_date = DateTime(self.get_current_sync_date(), timezone_hours=0).to_str("%Y%m%d")
            dt = self.get_current_sync_date()
            date_table, _, _ = self.db_context()
            if self._exists(date_table, dt):
                self.set_last_sync_date(dt)
                self._logger.log_err('duplicate date: %s' % DateTime(dt, timezone_hours=0).to_str('%Y-%m-%d'))
                return True
            nr_data = self._req_queue.get(dt)
            # nr_data = self._get_daily_all_nr(str_date)
            if nr_data is not None:
                with DBLock:
                    try:
                        # save to models
                        self._sort_nr_data(nr_data)
                        self._save_nr_data(nr_data)
                        # update sync date
                        self.set_last_sync_date(dt)
                        self._logger.log(
                            "%s nr_data sync success. date: %s" % (
                                self.__class__.__name__,
                                DateTime(self.get_last_sync_date(), timezone_hours=0).to_str("%Y%m%d")
                            )
                        )
                        commit()
                        self._req_queue.remove(dt)
                    except Exception as e:
                        rollback()
                        raise e
                return True
            else:
                return False
        except Exception as e:
            self._logger.log_err(e)
            return False

    # 验证是否可以继续同步
    def _check_can_sync(self) -> bool:
        return self.get_current_sync_date() < DateTime(timezone_hours=0).date.timestamp

    @staticmethod
    def _sort_key(nr: dict) -> float:
        return float(nr.score)

    @staticmethod
    def _sort_nr_data(nr_data: list):
        nr_data.sort(key=NrDataSynchronizer._sort_key, reverse=True)
        for i in range(len(nr_data)):
            nr_data[i].order = i + 1

    def _start_timer(self):
        if self._timer is not None:
            self._timer.cancel()
        # 今天的数据,需要明天零点半后开始同步(utc+0)
        t = DateTime(timezone_hours=0).add_days(1).date.timestamp - DateTime().timestamp + 60 * 30
        self._timer = Timer(t, self.start)
        self._timer.daemon = True
        self._timer.start()

    # tools ------------------------------------------------------------------------------------------------------------

    @staticmethod
    def total_nr(nr_data: list) -> str:
        t: float = 0.0
        for nr in nr_data:
            t += float(nr.score)
        return format(t, ".2f")

    @staticmethod
    def add_nr_to_pb_data(pb_data, nr: NrItem):
        item = pb_data.items.add()
        NrDataSynchronizer.init_pb_item(item, nr)

    @staticmethod
    def new_pb_item(nr: NrItem) -> any:
        item = Item()
        NrDataSynchronizer.init_pb_item(item, nr)
        return item

    @staticmethod
    def init_pb_item(item, nr: NrItem):
        item.address = nr.address
        item.in_outs = nr.in_outs
        item.out_val = nr.out_val
        item.in_val = nr.in_val
        item.degrees = nr.degrees
        item.out_degree = nr.out_degree
        item.in_degree = nr.in_degree
        item.weight = nr.weight
        item.median = nr.median
        item.score = nr.score
        item.date = nr.date
        item.order = nr.order

    @staticmethod
    def serialize_nr_data(nr_data: list) -> bytes:
        data = Data()
        for nr in nr_data:
            NrDataSynchronizer.add_nr_to_pb_data(data, nr)
        return zlib.compress(data.SerializeToString())

    def get_valid_dates(self, dates: str):
        r = []
        if dates is None or len(dates) == 0:
            return r
        now = DateTime().timestamp
        dts = dates.split(',')
        for d in dts:
            if now - int(d) <= self._SEVEN_DAY_SECONDS:
                r.append(d)
        return r

    # db ---------------------------------------------------------------------------------------------------------------

    def _save_nr_data(self, nr_data: list):
        date_table, total_table, address_table = self.db_context()
        self._save_date_data(date_table, self.get_current_sync_date(), nr_data)
        self._update_address_table(address_table, nr_data)
        self._save_total_nr(total_table, nr_data)

    def _save_date_data(self, db_table: str, date: int, nr_data: list):
        count = len(nr_data)
        s = self.serialize_nr_data(nr_data)
        loc = 0
        first = True
        while True:
            c = 1024 * 1024 * 1
            if c > len(s) - loc:
                c = len(s) - loc
            if c <= 0:
                break
            t = s[loc: loc + c]
            loc = loc + c
            h = ''.join(['%02x' % b for b in t])
            if first:
                sql = 'INSERT INTO %s (`date`, `data`, `count`) VALUES (\'%s\', X\'%s\', \'%s\');' % \
                      (db_table, str(date), h, str(count))
                first = False
            else:
                sql = 'UPDATE %s SET `data` = concat(`data`, X\'%s\') WHERE `date`=\'%s\';' \
                      % (db_table, h, str(date))
            execute(sql)

    def _update_address_table(self, db_table, nr_data: list):
        addresses = self._addresses_from_db(db_table)
        n = 0
        sql = ''
        for nr in nr_data:
            sql += self._add_to_address(db_table, addresses, nr)
            n += 1
            if n >= 1000:
                execute(sql)
                n = 0
                sql = ''
        if n > 0:
            execute(sql)

    def _save_total_nr(self, total_table, nr_data):
        sql = 'INSERT INTO %s (`date`, `nr_value`) VALUES (%s, \'%s\')' % \
              (total_table, str(self.get_current_sync_date()), self.total_nr(nr_data))
        execute(sql)

    def _add_to_address(self, db_table, addresses: dict, nr: NrItem) -> str:
        a = nr.address
        item = self.new_pb_item(nr)
        now = DateTime().timestamp
        c = self.get_current_sync_date()
        if a in addresses.keys():
            count = addresses[a].count + 1
            total = addresses[a].total_nr
            if total is None:
                total = 0
            else:
                total = float(total)
            total += float(item.score)

            i_bytes = b'|' + item.SerializeToString()
            i_hex = ''.join(['%02x' % b for b in i_bytes])
            dates = self.get_valid_dates(addresses[a].dates)
            if float(item.score) >= 0.1 and now - c <= self._SEVEN_DAY_SECONDS:
                dates.append(str(c))
            n = len(dates)
            str_dts = ','.join(dates)
            return 'UPDATE %s SET last_above_0_dates=\'%s\', last_above_0_num=\'%s\', `count`=\'%s\', total_nr=\'%s\', `data`=concat(`data`, X\'%s\') WHERE `address`=\'%s\';' % \
                   (db_table, str_dts, str(n), str(count), str(total), i_hex, a)
        else:
            count = 1
            total = float(item.score)
            i_bytes = item.SerializeToString()
            i_hex = ''.join(['%02x' % b for b in i_bytes])
            dates = []
            if float(item.score) >= 0.1 and now - c <= self._SEVEN_DAY_SECONDS:
                dates.append(str(c))
            n = len(dates)
            str_dts = ','.join(dates)
            return 'INSERT INTO %s (`address`, `last_above_0_dates`, `last_above_0_num`, `count`, total_nr, `data`) VALUES (\'%s\', \'%s\', \'%s\', \'%s\', \'%s\', X\'%s\');' % \
                   (db_table, a, str_dts, str(n), str(count), str(total), i_hex)

    @staticmethod
    def _addresses_from_db(db_table: str) -> dict:
        rows = execute_and_fetchall('SELECT address, last_above_0_dates, last_above_0_num, `count`, total_nr FROM %s;' %
                                    db_table)
        result = {}
        for r in rows:
            result[r['address']] = AddressItem(r['last_above_0_dates'], r['count'], r['total_nr'])
        return result

    @staticmethod
    def _exists(db_table: str, date: int) -> bool:
        with DBLock:
            sql = 'SELECT COUNT(*) as c FROM %s WHERE `date` = %s;' % (db_table, str(date))
            row = execute_and_fetchone(sql)
            return row['c'] > 0

    # override ---------------------------------------------------------------------------------------------------------

    @abstractmethod
    def is_neb(self) -> bool:
        pass

    @abstractmethod
    def get_last_sync_date(self) -> int:
        pass

    @abstractmethod
    def set_last_sync_date(self, date: int):
        pass

    @abstractmethod
    def get_current_sync_date(self) -> int:
        pass

    @abstractmethod
    def db_context(self):
        pass
class MarketDataSynchronizer(object):
    """
    nr数据同步
    """
    _logger: TaskLog = autowired(TaskLog)
    _lock = None
    _running = False
    _timer: Timer = None

    def __init__(self):
        self._lock = RLock()

    def start(self):
        """
        server 启动时即调用开始
        :return: None
        """
        with self._lock:
            if self._running:
                return
            t = Thread(target=self._begin_sync)
            t.daemon = True
            t.start()

    # private ----------------------------------------------------------------------------------------------------------

    def _begin_sync(self):
        with self._lock:
            if self._running:
                return
            self._running = True
            try:
                if self._check_can_sync():
                    if not self._sync():
                        time.sleep(10)
                    self._running = False
                    self.start()
                else:
                    self._running = False
                    self._start_timer()
                    s = '%s start timer.' % self.__class__.__name__
                    self._logger.log(s)
            except Exception as e:
                self._logger.log_err(e)
                time.sleep(10)
                self._running = False
                self.start()

    def _sync(self) -> bool:
        try:
            str_date = DateTime(self.get_last_sync_date(),
                                timezone_hours=0).to_str("%Y%m%d")
            market_data = self._get_market_data(str_date)
            if market_data is not None:
                with DBLock:
                    try:
                        # save to models
                        self.save_market_data(market_data)
                        # update sync date
                        self.set_last_sync_date(
                            DateTime(timezone_hours=0).date.timestamp)
                        self._logger.log(
                            "%s market data sync success. date: %s" %
                            (self.__class__.__name__,
                             DateTime(self.get_last_sync_date(),
                                      timezone_hours=0).to_str("%Y%m%d")))
                        commit()
                    except Exception as e:
                        rollback()
                        raise e
                return True
            else:
                return False
        except Exception as e:
            self._logger.log_err(e)
            return False

    # 验证是否可以继续同步
    def _check_can_sync(self) -> bool:
        return self.get_last_sync_date() < DateTime(
            timezone_hours=0).date.timestamp

    # 获取某一天的所有market数据
    def _get_market_data(self, date) -> list:
        url = self.url_with_date(date)
        req = request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
        content = request.urlopen(req).read()
        return self._pass_market_data(content)

    def _start_timer(self):
        if self._timer is not None:
            self._timer.cancel()
        # 今天的数据,需要明天零点半后开始同步(utc+0)
        t = DateTime(timezone_hours=0).add_days(
            1).date.timestamp - DateTime().timestamp + 60 * 30
        self._timer = Timer(t, self.start)
        self._timer.daemon = True
        self._timer.start()

    def _pass_market_data(self, content: str) -> list:
        if content is not None:
            tb = self._get_table(content)
            if tb is not None:
                items = []
                trs = tb.find('tbody').findAll('tr')
                keys = [
                    'date', 'opening', 'highest', 'lowest', 'closing',
                    'amount', 'total_circulation', 'total'
                ]
                for tr in trs:
                    item = {}
                    tds = tr.findAll('td')
                    for i in range(len(tds)):
                        key = keys[i]
                        text: str = tds[i].text
                        if key == 'date':
                            item[key] = DateTime.from_str(
                                text, '%Y年%m月%d日',
                                timezone_hours=0).date.timestamp
                        elif key == 'amount' or key == 'total_circulation':
                            item[key] = text.replace(',', '')
                        else:
                            item[key] = float(text)
                    item['total'] = format(
                        self.currency_count() * item['closing'], '.0f')
                    items.append(item)
                return items
        return None

    @staticmethod
    def _get_table(content: str) -> object:
        soup = BeautifulSoup(content, 'html.parser')
        tables = soup.findAll("table")
        for t in tables:
            ths = t.findAll('th')
            for th in ths:
                if th.text == '日期':
                    return t
        return None

    # override ---------------------------------------------------------------------------------------------------------

    @abstractmethod
    def url_with_date(self, date: str) -> str:
        pass

    @abstractmethod
    def get_last_sync_date(self) -> int:
        pass

    @abstractmethod
    def set_last_sync_date(self, date: int):
        pass

    @abstractmethod
    def save_market_data(self, data: list):
        pass

    @abstractmethod
    def currency_count(self) -> int:
        pass
Example #7
0
def _re_init():
    autowired(DataBase).init_db()
    common.db.DB = autowired(DataBase).connection
Example #8
0
            return self.db.connect

    def init_db(self):
        self.db = MySQL()
        self._app = Flask(__name__)
        self._app.config['MYSQL_HOST'] = '127.0.0.1'
        self._app.config['MYSQL_PORT'] = 3307
        self._app.config['MYSQL_USER'] = '******'
        self._app.config['MYSQL_PASSWORD'] = '******'
        self._app.config['MYSQL_DB'] = 'nr_db'
        self._app.config['MYSQL_CURSORCLASS'] = 'DictCursor'
        self.db.init_app(self._app)


DBLock = RLock()
DB = autowired(DataBase).connection


def execute(sql):
    try:
        with DB.cursor() as cursor:
            cursor.execute(sql, None)
    except Exception as e:
        if 'Lost connection to MySQL' in str(
                e) or 'MySQL server has gone away' in str(e):
            _re_init()
        raise e


def execute_and_fetchone(sql):
    try:
Example #9
0
class ReqQueue:
    _is_neb = None
    _begin_date: int
    _last_date: int
    _waiting_date: int
    _dates: list
    _tasks: dict
    _wn: WaitNotify
    _lock: RLock
    _logger = autowired(TaskLog)

    def __init__(self, is_neb: any, begin_date: int):
        self._is_neb = is_neb
        self._begin_date = begin_date
        self._last_date = 0
        self._waiting_date = 0
        self._dates = []
        self._tasks = {}
        self._wn = WaitNotify()
        self._lock = RLock()

    def start(self):
        with self._lock:
            now = DateTime(timezone_hours=0).date.timestamp
            dt = self._begin_date
            if self._last_date != 0:
                dt = self._last_date
            while dt < now:
                self._dates.append(dt)
                dt = DateTime(dt, timezone_hours=0).date.add_days(1).timestamp
                self._last_date = dt
            self._check_and_req()

    def get(self, date):
        with self._lock:
            data = self._get_data(date)
            if data is None:
                self._waiting_date = date
                self._wn.reset()
        if data is None:
            self._wn.wait()
            data = self._get_data(date)
        self._waiting_date = 0
        return data

    def remove(self, date):
        with self._lock:
            self._tasks.pop(date)
            self._check_and_req()

    def _get_data(self, date):
        with self._lock:
            if date in self._tasks.keys() and self._tasks[date].data is not None:
                return self._tasks[date].data
            return None

    def _did_data_loaded(self, date: int):
        with self._lock:
            if date == self._waiting_date:
                self._wn.notify()

    def _check_and_req(self):
        with self._lock:
            c = len(self._tasks)
            if c >= _REQ_MAX_COUNT:
                return
            n = len(self._dates)
            if n == 0:
                return
            m = min(_REQ_MAX_COUNT - c, n)
            r = []
            for i in range(m):
                dt = self._dates[i]
                task = _ReqTask(dt, self._is_neb, self._did_data_loaded)
                r.append(dt)
                self._tasks[dt] = task
                task.start()
            for d in r:
                self._dates.remove(d)
Example #10
0
import signal

from common.autowired import autowired
from common.task_log import TaskLog

logger = autowired(TaskLog)


def on_signal_int():
    logger.log_err('signal int')


def on_signal_term():
    logger.log_err('signal term')


def start_signal_log():
    signal.signal(signal.SIGINT, on_signal_int)
    signal.signal(signal.SIGTERM, on_signal_term)