예제 #1
0
파일: packer.py 프로젝트: x100up/analystics
    def processApp(self, appCode, hiveClient):
        appConfig = self.getAppConfig(appCode)
        if self.event:
            eventCodes = [self.event]
        else:
            eventCodes = [appEvent.code for appEvent in appConfig.getEvents()]
        dbSession = self.getDBSession()
        hiveMetaService = HiveMetaService(dbSession)
        counter = 0
        _len = len(eventCodes)
        for eventCode in eventCodes:
            counter += 1
            print "\n",'start pack key {}.{} for date {} ({}/{})'.format(appCode, eventCode, self.date, counter, _len)
            app = self.getApp(appCode)
            if app:
                # получаем таблицу
                hiveTable = hiveMetaService.getOrCreateHiveTable(app.appId, eventCode)
                if not hiveTable:
                    print 'Cannot create new hiveTable. Terminate'
                    return

                # существует ли партиция физически
                if not self.HDFSClient.isPartitionExist(appCode, eventCode, self.date):
                    print 'folder for partition not exist. Next event'
                    continue

                # получаем партицию таблицы
                hiveTablePartition = hiveMetaService.getOrCreateHiveTablePartition(hiveTable.hiveTableId, self.date)
                if not hiveTablePartition:
                    print 'Cannot create new hiveTablePartition. Terminate'
                    continue

                # если не сжата
                if not hiveTablePartition.isCompact or self.skipCheckInDB:
                    print 'Start pack table {}.{}'.format(appCode, eventCode)
                    try:
                        start = datetime.now()
                        query = PACK_TABLE_QUERY.format(eventCode, '%(year)d-%(month)02d-%(day)02d' % {'year': self.year, 'month': self.month, 'day': self.day})
                        hiveClient.execute('USE {}'.format(self.getDBName(appCode)))
                        print query
                        hiveClient.execute(query)
                        end = datetime.now()
                        print 'Pack complete. Query time: {}'.format(end - start)
                        time.sleep(10)
                    except Exception as ex:
                        print 'Pack end with exception {}'.format(ex.message)
                    else:
                        hiveTablePartition.isCompact = 1
                        print 'Set compact label to in partition meta'

                        dbSession.add(hiveTablePartition)
                        dbSession.commit()
                else:
                    print 'table {}.{} already packed'.format(appCode, eventCode)
            else:
                print  'cant find app {} in database'.format(appCode)
예제 #2
0
class InitHiveMetaDataScript(BaseAnalyticsScript):

    def run(self):
        print 'run InitHiveMetaDataScript'
        appCodes = self.getAppCodes()
        self.hiveMetaService = HiveMetaService(self.getDBSession())


        for appCode in appCodes:
            appConfig = self.getAppConfig(appCode)
            self.processApp(appConfig)

    def processApp(self, appConfig):
        appCode = appConfig.getAppCode()
        app = self.getApp(appCode)
        if not app:
            print 'Cant find app with code {}. Terminate.'.format(appCode)
            self.terminate()
        print 'Process app {}'.format(appCode)

        for appEvent in appConfig.getEvents():
            hiveTable = self.hiveMetaService.getOrCreateHiveTable(app.appId, appEvent.code)
            if not hiveTable:
                print 'Cant get or create HiveTable for {}, {}'.format(appCode, appEvent.code)
                continue
            self.processHiveTable(hiveTable, appCode, appEvent.code)

    def processHiveTable(self, hiveTable, appCode, eventCode):
        print 'processHiveTable for {}, {}'.format(appCode, eventCode)
        dbSession = self.getDBSession()
        analyticsWebHDFS = self.getWebHDFSClient()
        try:
            partitionsDates = analyticsWebHDFS.getPartitions(appCode, eventCode)
        except WebHDFSException as e:
            print 'Exception on getPartitions: {}'.format(e.message)
        else:
            for partitionDate in partitionsDates:
                hivePartition = self.hiveMetaService.getOrCreateHiveTablePartition(hiveTable.hiveTableId, partitionDate)
                if not hivePartition:
                    print('Cant get or create partition for {} date {}'.format(eventCode, hivePartition.partitionDate))
                    continue

            if partitionsDates:
                minDate = min(partitionsDates)
                hiveTable.startFrom = minDate
                dbSession.add(hiveTable)
                dbSession.commit()
                print 'Set start from {} {} {}'.format(appCode, eventCode, minDate)
예제 #3
0
    def run(self):
        print 'run InitHiveMetaDataScript'
        appCodes = self.getAppCodes()
        self.hiveMetaService = HiveMetaService(self.getDBSession())


        for appCode in appCodes:
            appConfig = self.getAppConfig(appCode)
            self.processApp(appConfig)
예제 #4
0
    def get(self, *args, **kwargs):

        index = self.get_argument('index')

        showDelta = timedelta(days = 60)

        now = datetime.now()

        # макс время начала календаря - начало сбора данных ключа
        eventCode = self.get_argument('eventName')
        app = self.checkAppAccess(self.get_argument('app'))

        hiveMetaService = HiveMetaService(self.getDBSession())
        minDate = hiveMetaService.getMinDateForEvent(app.appId, eventCode)

        # что выделенно
        start = toDate(self.get_argument('start'))
        end = toDate(self.get_argument('end'))

        # начало календаря
        startWith = self.get_argument('startWith', None)
        if startWith:
            year, month, day = startWith.split('-')
            startWith = datetime(int(year), int(month), int(day))
        else:
            startWith = now
            if end - start < showDelta:
                if end + timedelta(days = 30) > now:
                    startWith = (end - timedelta(days = end.day + 1)).replace(day = 1, hour=0, minute=0, second=0, microsecond=0)
            else:
                pass

        # если факап с минимальной датой, то пусть она будет с начала календаря
        if not minDate:
            minDate = startWith.date()

        # конец календаря
        endWith = startWith
        for i in range(0, 2):
            if endWith.month == 12:
                endWith = endWith.replace(year = endWith.year + 1, month = 1)
            else:
                endWith = endWith.replace(month = endWith.month + 1)

        # собираем массив дат для отображеня
        dates = []
        _startWith = startWith
        while _startWith < endWith:
            dates.append(_startWith.date())
            _startWith = _startWith + timedelta(days = 1)

        # вычисляем предыдущий месяц
        prevMonth = addMonths(startWith, -1)

        # если начало календаря меньше чем мин. дата данных, то и мотать назад нечего
        if startWith.date() < minDate:
            prevMonth = False

        # вычисляем следующий месяц
        nextMonth = addMonths(startWith, 1)
        if addMonths(nextMonth, 1) > now:
            nextMonth = False

        self.render('dashboard/taskForm/dateSelector.jinja2',
            {
                'start': start,
                'end': end,
                'dates': dates,
                'maxEnd': now.date(),
                'maxStart': minDate,
                'prevMonth': prevMonth,
                'nextMonth': nextMonth,
                'index': index,
                'isOneDay': start.date() == (end - timedelta(days = 1)).date()
            })