def check_new_dump(self): logger.info('Check if dump.xml has updates since last sync.') last_date_dump = max(self.update_dump.lastDumpDate // 1000, self.update_dump.lastDumpDateUrgently // 1000) current_date_dump = max(int(Dump.get(Dump.param == 'lastDumpDate').value), int(Dump.get(Dump.param == 'lastDumpDateUrgently').value)) logger.info('Current date: lastDumpDate: %s, lastDumpDateUrgently: %s', datetime.fromtimestamp(int(Dump.get(Dump.param == 'lastDumpDate').value)) .strftime('%Y-%m-%d %H:%M:%S'), datetime.fromtimestamp(int(Dump.get(Dump.param == 'lastDumpDateUrgently').value)) .strftime('%Y-%m-%d %H:%M:%S')) logger.info('Last date: lastDumpDate: %s, lastDumpDateUrgently: %s', datetime.fromtimestamp(int(self.update_dump.lastDumpDate // 1000)).strftime('%Y-%m-%d %H:%M:%S'), datetime.fromtimestamp(int(self.update_dump.lastDumpDateUrgently // 1000)) .strftime('%Y-%m-%d %H:%M:%S')) if last_date_dump != current_date_dump or Dump.get(Dump.param == 'lastResult').value == 'Error': logger.info('New dump is available.') # Dump.update(value=last_dump.lastDumpDate // 1000).where(Dump.param == 'lastDumpDate').execute() # Dump.update(value=last_dump.lastDumpDateUrgently // 1000) \ # .where(Dump.param == 'lastDumpDateUrgently').execute() Dump.update(value='getLastDumpDate').where(Dump.param == 'lastAction').execute() Dump.update(value='NewDump').where(Dump.param == 'lastResult').execute() return True else: logger.info('Dump date without changes.') Dump.update(value='getLastDumpDate').where(Dump.param == 'lastAction').execute() Dump.update(value='lastDump').where(Dump.param == 'lastResult').execute() return False
def check_service_upd(self): msg = '' logger.info('Current versions: webservice: %s, dump: %s, doc: %s', Dump.get(Dump.param == 'webServiceVersion').value, Dump.get(Dump.param == 'dumpFormatVersion').value, Dump.get(Dump.param == 'docVersion').value) if self.update_dump.webServiceVersion != Dump.get(Dump.param == 'webServiceVersion').value: logger.warning('New webservice: %s', self.update_dump.webServiceVersion) msg = msg + 'Current webservice:' + Dump.get(Dump.param == 'webServiceVersion').value + \ '\nNew webservice: ' + self.update_dump.webServiceVersion + '\n\n' Dump.update(value=self.update_dump.webServiceVersion).where(Dump.param == 'webServiceVersion').execute() if self.update_dump.dumpFormatVersion != Dump.get(Dump.param == 'dumpFormatVersion').value: logger.warning('New dumpFormatVersion: %s', self.update_dump.dumpFormatVersion) msg = msg + 'Current dumpFormatVersion: ' + Dump.get(Dump.param == 'dumpFormatVersion').value + \ '\nNew dumpFormatVersion: ' + self.update_dump.dumpFormatVersion + '\n\n' Dump.update(value=self.update_dump.dumpFormatVersion).where(Dump.param == 'dumpFormatVersion').execute() if self.update_dump.docVersion != Dump.get(Dump.param == 'docVersion').value: logger.warning('New docVersion: %s', self.update_dump.docVersion) msg = msg + 'Current docVersion: ' + Dump.get(Dump.param == 'docVersion').value + '\nNew docVersion: ' + \ self.update_dump.docVersion + '\n\n' Dump.update(value=self.update_dump.docVersion).where(Dump.param == 'docVersion').execute() # print(msg) return msg
def check_service_upd(self): msg = '' logger.info('Current versions: webservice: %s, dump: %s, doc: %s', Dump.get(Dump.param == 'webServiceVersion').value, Dump.get(Dump.param == 'dumpFormatVersion').value, Dump.get(Dump.param == 'docVersion').value) if self.update_dump.webServiceVersion != Dump.get( Dump.param == 'webServiceVersion').value: logger.warning('New webservice: %s', self.update_dump.webServiceVersion) msg = msg + 'Current webservice:' + Dump.get(Dump.param == 'webServiceVersion').value + \ '\nNew webservice: ' + self.update_dump.webServiceVersion + '\n\n' Dump.update(value=self.update_dump.webServiceVersion).where( Dump.param == 'webServiceVersion').execute() if self.update_dump.dumpFormatVersion != Dump.get( Dump.param == 'dumpFormatVersion').value: logger.warning('New dumpFormatVersion: %s', self.update_dump.dumpFormatVersion) msg = msg + 'Current dumpFormatVersion: ' + Dump.get(Dump.param == 'dumpFormatVersion').value + \ '\nNew dumpFormatVersion: ' + self.update_dump.dumpFormatVersion + '\n\n' Dump.update(value=self.update_dump.dumpFormatVersion).where( Dump.param == 'dumpFormatVersion').execute() if self.update_dump.docVersion != Dump.get( Dump.param == 'docVersion').value: logger.warning('New docVersion: %s', self.update_dump.docVersion) msg = msg + 'Current docVersion: ' + Dump.get(Dump.param == 'docVersion').value + '\nNew docVersion: ' + \ self.update_dump.docVersion + '\n\n' Dump.update(value=self.update_dump.docVersion).where( Dump.param == 'docVersion').execute() # print(msg) return msg
def check_new_dump(self): logger.info('Check if dump.xml has updates since last sync.') if self.cfg.lastDumpDateUrgently() and not self.cfg.lastDumpDate(): last_date_dump = self.update_dump.lastDumpDateUrgently // 1000 current_date_dump = int( Dump.get(Dump.param == 'lastDumpDateUrgently').value) elif self.cfg.lastDumpDate() and not self.cfg.lastDumpDateUrgently(): last_date_dump = self.update_dump.lastDumpDate // 1000 current_date_dump = int( Dump.get(Dump.param == 'lastDumpDate').value) else: last_date_dump = max(self.update_dump.lastDumpDate // 1000, self.update_dump.lastDumpDateUrgently // 1000) current_date_dump = max( int(Dump.get(Dump.param == 'lastDumpDate').value), int(Dump.get(Dump.param == 'lastDumpDateUrgently').value)) logger.info( 'Current date: lastDumpDate: %s, lastDumpDateUrgently: %s', datetime.fromtimestamp( int(Dump.get(Dump.param == 'lastDumpDate').value)).strftime( '%Y-%m-%d %H:%M:%S'), datetime.fromtimestamp( int(Dump.get(Dump.param == 'lastDumpDateUrgently').value)). strftime('%Y-%m-%d %H:%M:%S')) logger.info( 'Last date: lastDumpDate: %s, lastDumpDateUrgently: %s', datetime.fromtimestamp(int(self.update_dump.lastDumpDate // 1000)).strftime('%Y-%m-%d %H:%M:%S'), datetime.fromtimestamp( int(self.update_dump.lastDumpDateUrgently // 1000)).strftime('%Y-%m-%d %H:%M:%S')) if last_date_dump != current_date_dump or Dump.get( Dump.param == 'lastResult').value == 'Error': logger.info('New dump is available.') # Dump.update(value=last_dump.lastDumpDate // 1000).where(Dump.param == 'lastDumpDate').execute() # Dump.update(value=last_dump.lastDumpDateUrgently // 1000) \ # .where(Dump.param == 'lastDumpDateUrgently').execute() Dump.update(value='getLastDumpDate').where( Dump.param == 'lastAction').execute() Dump.update(value='NewDump').where( Dump.param == 'lastResult').execute() return True else: logger.info('Dump date without changes.') Dump.update(value='getLastDumpDate').where( Dump.param == 'lastAction').execute() Dump.update(value='lastDump').where( Dump.param == 'lastResult').execute() return False
def statistics_show(self, diff=0, stdout=False): date_time = datetime.fromtimestamp(int(Dump.get(Dump.param == 'lastDumpDate') .value)).strftime('%Y-%m-%d %H:%M:%S') message = 'vigruzki.rkn.gov.ru update: ' + date_time + '\n' url_add_sql = self._url_diff_sql(diff, 'ignore', 1) message += '\nURLs added: \n\n' for url_add in url_add_sql: message += url_add.url + '\n' ip_add_sql = self._ip_diff_sql(diff, 'ignore', 1) message += '\nIPs added: \n\n' for ip_add in ip_add_sql: if ip_add.mask < 32: message += ip_add.ip + '/' + str(ip_add.mask) else: message += ip_add.ip + '\n' domain_add_sql = self._domain_diff_sql(diff, 'ignore', 1) message += '\nDOMAINs added: \n\n' for domain_add in domain_add_sql: message += domain_add.domain + '\n' url_del_sql = self._url_diff_sql(diff, 'ignore', 0) message += '\nURLs deleted: \n\n' for url_del in url_del_sql: message += url_del.url + '\n' ip_del_sql = self._ip_diff_sql(diff, 'ignore', 0) message += '\nIPs deleted: \n\n' for ip_del in ip_del_sql: if ip_del.mask < 32: message += ip_del.ip + '/' + str(ip_del.mask) else: message += ip_del.ip + '\n' domain_del_sql = self._domain_diff_sql(diff, 'ignore', 0) message += '\nDOMAINs deleted: \n\n' for domain_del in domain_del_sql: message += domain_del.domain + '\n' rb_list = self.idx_list[:diff] domain_count = Domain.select(fn.Count(fn.Distinct(Domain.domain)))\ .where(~(Domain.add << rb_list) & ((Domain.purge >> None) | (Domain.purge << rb_list))).scalar() url_count = URL.select(fn.Count(fn.Distinct(URL.url)))\ .where(~(URL.add << rb_list) & ((URL.purge >> None) | (URL.purge << rb_list))).scalar() ip_count = IP.select(fn.Count(fn.Distinct(IP.ip)))\ .where(~(IP.add << rb_list) & ((IP.purge >> None) | (IP.purge << rb_list))).scalar() id_count = Item.select(fn.Count(fn.Distinct(Item.content_id)))\ .where(~(Item.add << rb_list) & ((Item.purge >> None) | (Item.purge << rb_list))).scalar() message += '\nURLs count: ' + str(url_count) + '\n' message += 'IPs count: ' + str(ip_count) + '\n' message += 'DOMAINs count: ' + str(domain_count) + '\n' message += 'Item count: ' + str(id_count) + '\n' if stdout: print(message) return False else: return message
from collections import defaultdict from db import Dump print('Total:', Dump.select().count()) genres = Dump.get_all_genres() print(f'Genres ({len(genres)}): {genres}') games = Dump.get_all_games() print(f'Games ({len(games)}): {games}') sites = Dump.get_all_sites() print(f'Sites ({len(sites)}): {sites}') print() max_width = max(len(x.site) for x in Dump.select(Dump.site).distinct()) fmt_str = ' {:<%d} : {}' % max_width game_by_dump = defaultdict(list) for x in Dump.get(): game_by_dump[x.name].append(x) for game, dumps in game_by_dump.items(): print(game) for dump in dumps: print(fmt_str.format(dump.site, dump.genres)) print()