def processApp(self, appCode, hiveClient): appConfig = self.getAppConfig(appCode) if self.event: eventCodes = [self.event] else: eventCodes = [appEvent.code for appEvent in appConfig.getEvents()] dbSession = self.getDBSession() hiveMetaService = HiveMetaService(dbSession) counter = 0 _len = len(eventCodes) for eventCode in eventCodes: counter += 1 print "\n",'start pack key {}.{} for date {} ({}/{})'.format(appCode, eventCode, self.date, counter, _len) app = self.getApp(appCode) if app: # получаем таблицу hiveTable = hiveMetaService.getOrCreateHiveTable(app.appId, eventCode) if not hiveTable: print 'Cannot create new hiveTable. Terminate' return # существует ли партиция физически if not self.HDFSClient.isPartitionExist(appCode, eventCode, self.date): print 'folder for partition not exist. Next event' continue # получаем партицию таблицы hiveTablePartition = hiveMetaService.getOrCreateHiveTablePartition(hiveTable.hiveTableId, self.date) if not hiveTablePartition: print 'Cannot create new hiveTablePartition. Terminate' continue # если не сжата if not hiveTablePartition.isCompact or self.skipCheckInDB: print 'Start pack table {}.{}'.format(appCode, eventCode) try: start = datetime.now() query = PACK_TABLE_QUERY.format(eventCode, '%(year)d-%(month)02d-%(day)02d' % {'year': self.year, 'month': self.month, 'day': self.day}) hiveClient.execute('USE {}'.format(self.getDBName(appCode))) print query hiveClient.execute(query) end = datetime.now() print 'Pack complete. Query time: {}'.format(end - start) time.sleep(10) except Exception as ex: print 'Pack end with exception {}'.format(ex.message) else: hiveTablePartition.isCompact = 1 print 'Set compact label to in partition meta' dbSession.add(hiveTablePartition) dbSession.commit() else: print 'table {}.{} already packed'.format(appCode, eventCode) else: print 'cant find app {} in database'.format(appCode)
class InitHiveMetaDataScript(BaseAnalyticsScript): def run(self): print 'run InitHiveMetaDataScript' appCodes = self.getAppCodes() self.hiveMetaService = HiveMetaService(self.getDBSession()) for appCode in appCodes: appConfig = self.getAppConfig(appCode) self.processApp(appConfig) def processApp(self, appConfig): appCode = appConfig.getAppCode() app = self.getApp(appCode) if not app: print 'Cant find app with code {}. Terminate.'.format(appCode) self.terminate() print 'Process app {}'.format(appCode) for appEvent in appConfig.getEvents(): hiveTable = self.hiveMetaService.getOrCreateHiveTable(app.appId, appEvent.code) if not hiveTable: print 'Cant get or create HiveTable for {}, {}'.format(appCode, appEvent.code) continue self.processHiveTable(hiveTable, appCode, appEvent.code) def processHiveTable(self, hiveTable, appCode, eventCode): print 'processHiveTable for {}, {}'.format(appCode, eventCode) dbSession = self.getDBSession() analyticsWebHDFS = self.getWebHDFSClient() try: partitionsDates = analyticsWebHDFS.getPartitions(appCode, eventCode) except WebHDFSException as e: print 'Exception on getPartitions: {}'.format(e.message) else: for partitionDate in partitionsDates: hivePartition = self.hiveMetaService.getOrCreateHiveTablePartition(hiveTable.hiveTableId, partitionDate) if not hivePartition: print('Cant get or create partition for {} date {}'.format(eventCode, hivePartition.partitionDate)) continue if partitionsDates: minDate = min(partitionsDates) hiveTable.startFrom = minDate dbSession.add(hiveTable) dbSession.commit() print 'Set start from {} {} {}'.format(appCode, eventCode, minDate)
def run(self): print 'run InitHiveMetaDataScript' appCodes = self.getAppCodes() self.hiveMetaService = HiveMetaService(self.getDBSession()) for appCode in appCodes: appConfig = self.getAppConfig(appCode) self.processApp(appConfig)
def get(self, *args, **kwargs): index = self.get_argument('index') showDelta = timedelta(days = 60) now = datetime.now() # макс время начала календаря - начало сбора данных ключа eventCode = self.get_argument('eventName') app = self.checkAppAccess(self.get_argument('app')) hiveMetaService = HiveMetaService(self.getDBSession()) minDate = hiveMetaService.getMinDateForEvent(app.appId, eventCode) # что выделенно start = toDate(self.get_argument('start')) end = toDate(self.get_argument('end')) # начало календаря startWith = self.get_argument('startWith', None) if startWith: year, month, day = startWith.split('-') startWith = datetime(int(year), int(month), int(day)) else: startWith = now if end - start < showDelta: if end + timedelta(days = 30) > now: startWith = (end - timedelta(days = end.day + 1)).replace(day = 1, hour=0, minute=0, second=0, microsecond=0) else: pass # если факап с минимальной датой, то пусть она будет с начала календаря if not minDate: minDate = startWith.date() # конец календаря endWith = startWith for i in range(0, 2): if endWith.month == 12: endWith = endWith.replace(year = endWith.year + 1, month = 1) else: endWith = endWith.replace(month = endWith.month + 1) # собираем массив дат для отображеня dates = [] _startWith = startWith while _startWith < endWith: dates.append(_startWith.date()) _startWith = _startWith + timedelta(days = 1) # вычисляем предыдущий месяц prevMonth = addMonths(startWith, -1) # если начало календаря меньше чем мин. дата данных, то и мотать назад нечего if startWith.date() < minDate: prevMonth = False # вычисляем следующий месяц nextMonth = addMonths(startWith, 1) if addMonths(nextMonth, 1) > now: nextMonth = False self.render('dashboard/taskForm/dateSelector.jinja2', { 'start': start, 'end': end, 'dates': dates, 'maxEnd': now.date(), 'maxStart': minDate, 'prevMonth': prevMonth, 'nextMonth': nextMonth, 'index': index, 'isOneDay': start.date() == (end - timedelta(days = 1)).date() })