def processApp(self, appCode, hiveClient): appConfig = self.getAppConfig(appCode) if self.event: eventCodes = [self.event] else: eventCodes = [appEvent.code for appEvent in appConfig.getEvents()] dbSession = self.getDBSession() hiveMetaService = HiveMetaService(dbSession) counter = 0 _len = len(eventCodes) for eventCode in eventCodes: counter += 1 print "\n",'start pack key {}.{} for date {} ({}/{})'.format(appCode, eventCode, self.date, counter, _len) app = self.getApp(appCode) if app: # получаем таблицу hiveTable = hiveMetaService.getOrCreateHiveTable(app.appId, eventCode) if not hiveTable: print 'Cannot create new hiveTable. Terminate' return # существует ли партиция физически if not self.HDFSClient.isPartitionExist(appCode, eventCode, self.date): print 'folder for partition not exist. Next event' continue # получаем партицию таблицы hiveTablePartition = hiveMetaService.getOrCreateHiveTablePartition(hiveTable.hiveTableId, self.date) if not hiveTablePartition: print 'Cannot create new hiveTablePartition. Terminate' continue # если не сжата if not hiveTablePartition.isCompact or self.skipCheckInDB: print 'Start pack table {}.{}'.format(appCode, eventCode) try: start = datetime.now() query = PACK_TABLE_QUERY.format(eventCode, '%(year)d-%(month)02d-%(day)02d' % {'year': self.year, 'month': self.month, 'day': self.day}) hiveClient.execute('USE {}'.format(self.getDBName(appCode))) print query hiveClient.execute(query) end = datetime.now() print 'Pack complete. Query time: {}'.format(end - start) time.sleep(10) except Exception as ex: print 'Pack end with exception {}'.format(ex.message) else: hiveTablePartition.isCompact = 1 print 'Set compact label to in partition meta' dbSession.add(hiveTablePartition) dbSession.commit() else: print 'table {}.{} already packed'.format(appCode, eventCode) else: print 'cant find app {} in database'.format(appCode)
class InitHiveMetaDataScript(BaseAnalyticsScript): def run(self): print 'run InitHiveMetaDataScript' appCodes = self.getAppCodes() self.hiveMetaService = HiveMetaService(self.getDBSession()) for appCode in appCodes: appConfig = self.getAppConfig(appCode) self.processApp(appConfig) def processApp(self, appConfig): appCode = appConfig.getAppCode() app = self.getApp(appCode) if not app: print 'Cant find app with code {}. Terminate.'.format(appCode) self.terminate() print 'Process app {}'.format(appCode) for appEvent in appConfig.getEvents(): hiveTable = self.hiveMetaService.getOrCreateHiveTable(app.appId, appEvent.code) if not hiveTable: print 'Cant get or create HiveTable for {}, {}'.format(appCode, appEvent.code) continue self.processHiveTable(hiveTable, appCode, appEvent.code) def processHiveTable(self, hiveTable, appCode, eventCode): print 'processHiveTable for {}, {}'.format(appCode, eventCode) dbSession = self.getDBSession() analyticsWebHDFS = self.getWebHDFSClient() try: partitionsDates = analyticsWebHDFS.getPartitions(appCode, eventCode) except WebHDFSException as e: print 'Exception on getPartitions: {}'.format(e.message) else: for partitionDate in partitionsDates: hivePartition = self.hiveMetaService.getOrCreateHiveTablePartition(hiveTable.hiveTableId, partitionDate) if not hivePartition: print('Cant get or create partition for {} date {}'.format(eventCode, hivePartition.partitionDate)) continue if partitionsDates: minDate = min(partitionsDates) hiveTable.startFrom = minDate dbSession.add(hiveTable) dbSession.commit() print 'Set start from {} {} {}'.format(appCode, eventCode, minDate)