Example #1
0
 def __init__(self, conf=None):
     if not conf:
         conf = HiveConf
     self.down = NewDownloadQuery()
     self.service = NewServiceQuery()
     self.org_down = DownloadQuery(conf['host'], conf['port'])
     self.org_service = ServiceQuery(conf['host'], conf['port'])
     self.factory = FactoryQuery(conf['host'], conf['port'])
     self.wap = WapQuery(conf['host'], conf['port'])
     self.is_factory_sum_stat_done = False
 def __init__(self, conf=None):
     if not conf:
         conf = HiveConf
     self.org_service = ServiceQuery(conf['host'],conf['port'])
     self.ip_finder = IPProvince()
class Hive2db(object):
    '''
    query from hive and import the result to mysql
    '''
    def __init__(self, conf=None):
        if not conf:
            conf = HiveConf
        self.org_service = ServiceQuery(conf['host'], conf['port'])

    def normlize_time(self, mode, start):
        assert mode in ('hour', 'day', 'week', 'month')
        start = time_start(start, mode)
        if mode == 'hour':
            end = start + datetime.timedelta(hours=1)
        elif mode == 'day':
            end = start + datetime.timedelta(days=1)
        elif mode == 'week':
            end = start + datetime.timedelta(days=7)
        elif mode == 'month':
            if start.month == 12:
                end = datetime.datetime(start.year + 1, 1, 1)
            else:
                end = datetime.datetime(start.year, start.month + 1, 1)
        return start, end

    def start_importing(self, mode, now=None, stats=None, partner_ids=None):
        n0 = datetime.datetime.now()
        # normalize time
        if not now:
            now = datetime.datetime.now() - datetime.timedelta(days=1)
        if mode == 'day':
            start = time_start(now, 'day')
        elif mode == 'week' and now.weekday() == 0:
            start = time_start(now, 'week')
        elif mode == 'month' and now.day == 1:
            start = time_start(now, 'month')
        else:
            print '%s %s not supported' % (mode, now)
            return
        # import data
        if not stats or '2db' in stats:
            self.import_data(mode, start)
        print mode, start, 'time:', datetime.datetime.now() - n0
        return True

    def import_data(self, mode, time):
        '''
        mode: hour,day,week,month
        start: start time
        return: one day's user run's a week fee
        '''
        if mode != 'day':
            return
        print time
        start, end = self.normlize_time(mode, time)
        tmp_user_task = self.org_service.get_all_user_task_stat(start)
        import_num = 0
        for data in tmp_user_task:
            try:
                data = data.strip()
                i = data.split('\t')
                stat = TmpUserTask.new()
                stat.time = i[0]
                stat.username = i[1]
                stat.action_type = i[2]
                stat.task_id = i[3]
                stat.task_name = i[4]
                stat.ds = i[5]
                stat.amount = i[6]
                stat.save()
                import_num += 1
            except Exception, e:
                print i
                logging.error('%s\n', str(e), exc_info=True)
        return import_num
class Hive2db(object):
    '''
    query from hive and import the result to mysql
    '''
    def __init__(self, conf=None):
        if not conf:
            conf = HiveConf
        self.org_service = ServiceQuery(conf['host'],conf['port'])
        self.ip_finder = IPProvince()

    def normlize_time(self, mode, start):
        assert mode in ('hour','day','week','month')
        start = time_start(start,mode)
        if mode == 'hour':
            end = start + datetime.timedelta(hours=1)
        elif mode == 'day':
            end = start + datetime.timedelta(days=1)
        elif mode == 'week':
            end = start + datetime.timedelta(days=7)
        elif mode == 'month':
            if start.month == 12:
                end = datetime.datetime(start.year+1,1,1)
            else:
                end = datetime.datetime(start.year,start.month+1,1)
        return start,end

    def start_importing(self, mode, now=None, stats=None, partner_ids=None):
        n0 = datetime.datetime.now()
        # normalize time
        if not now:
            now = datetime.datetime.now() - datetime.timedelta(days=1)
        if mode == 'day':
            start = time_start(now,'day') 
        elif mode == 'week' and now.weekday() == 0:
            start = time_start(now,'week') 
        elif mode == 'month' and now.day == 1:
            start = time_start(now,'month')
        else:
            print '%s %s not supported' % (mode,now)
            return
        # import data
        if not stats or '2db' in stats:
            self.import_data(mode,start)
        print mode,start,'time:',datetime.datetime.now()-n0
        return True

    def import_data(self, mode, time):
        '''
        mode: hour,day,week,month
        start: start time
        return: one day's user run's a week fee
        '''
        output_file = open('ip_province.txt', 'w')
        output = ""
        if mode != 'day':
            return
        print time
        start,end = self.normlize_time(mode,time)
        try:    
            import_num = 0
            ips = self.org_service.get_new_ips(start,end)
            #ips = self.org_service.get_all_ips(start,end)
            #ips = ['www.baidu.com','www.alipay.com']
            print "finding %s ips..." % str(len(ips))
            for ip in ips:
                ip = str(ip.strip())
                province = self.ip_finder.find_ip(ip)
                tmp = ip + '\t' + province + '\n'
                output += tmp
            output = output.encode('utf-8')

            output_file.write(output)
            output_file.close()
        except Exception,e:
            logging.error('%s\n',str(e),exc_info=True)
        return import_num
class Hive2db(object):
    '''
    query from hive and import the result to mysql
    '''
    def __init__(self, conf=None):
        if not conf:
            conf = HiveConf
        self.org_service = ServiceQuery(conf['host'],conf['port'])

    def normlize_time(self, mode, start):
        assert mode in ('hour','day','week','month')
        start = time_start(start,mode)
        if mode == 'hour':
            end = start + datetime.timedelta(hours=1)
        elif mode == 'day':
            end = start + datetime.timedelta(days=1)
        elif mode == 'week':
            end = start + datetime.timedelta(days=7)
        elif mode == 'month':
            if start.month == 12:
                end = datetime.datetime(start.year+1,1,1)
            else:
                end = datetime.datetime(start.year,start.month+1,1)
        return start,end

    def start_importing(self, mode, now=None, stats=None, partner_ids=None):
        n0 = datetime.datetime.now()
        # normalize time
        if not now:
            now = datetime.datetime.now() - datetime.timedelta(days=1)
        if mode == 'day':
            start = time_start(now,'day') 
        elif mode == 'week' and now.weekday() == 0:
            start = time_start(now,'week') 
        elif mode == 'month' and now.day == 1:
            start = time_start(now,'month')
        else:
            print '%s %s not supported' % (mode,now)
            return
        # import data
        if not stats or '2db' in stats:
            self.import_data(mode,start)
        print mode,start,'time:',datetime.datetime.now()-n0
        return True

    def import_data(self, mode, time):
        '''
        mode: hour,day,week,month
        start: start time
        return: one day's user run's a week fee
        '''
        if mode != 'day':
            return
        print time
        start,end = self.normlize_time(mode,time)
        user_task = self.org_service.get_user_task_stat(start)
        import_num = 0
        for data in user_task:
            try: 
                data = data.strip()
                i = data.split('\t')
                stat = UserTask.new()
                stat.time = start.strftime('%Y-%m-%d')
                stat.task_type = i[1]
                stat.task_id = i[2]
                stat.task_name = i[3]
                stat.uv = i[4]
                stat.save()
                import_num += 1
            except Exception,e:
                print i
                logging.error('%s\n',str(e),exc_info=True)
        return import_num
 def __init__(self, conf=None):
     if not conf:
         conf = HiveConf
     self.org_service = ServiceQuery(conf['host'],conf['port'])
class EValue2db(object):
    '''
    query from hive and import the result to mysql
    '''
    def __init__(self, conf=None):
        if not conf:
            conf = HiveConf
        self.org_service = ServiceQuery(conf['host'],conf['port'])

    def normlize_time(self, mode, start):
        assert mode in ('hour','day','week','month')
        start = time_start(start,mode)
        if mode == 'hour':
            end = start + datetime.timedelta(hours=1)
        elif mode == 'day':
            end = start + datetime.timedelta(days=1)
        elif mode == 'week':
            end = start + datetime.timedelta(days=7)
        elif mode == 'month':
            if start.month == 12:
                end = datetime.datetime(start.year+1,1,1)
            else:
                end = datetime.datetime(start.year,start.month+1,1)
        return start,end

    def start_importing(self, mode, now=None, stats=None, partner_ids=None):
        n0 = datetime.datetime.now()
        # normalize time
        if not now:
            now = datetime.datetime.now() - datetime.timedelta(days=1)
        if mode == 'day':
            start = time_start(now,'day') 
        elif mode == 'week':
        #elif mode == 'week' and now.weekday() == 6:
            start = time_start(now,'week') 
            print start
        elif mode == 'month' and now.day == 1:
            start = time_start(now,'month')
        else:
            print '%s %s not supported' % (mode,now)
            return
        # import data
        if not stats or 'retention' in stats:
            self.import_new_user_run_week_retention_data(mode,start)
        print mode,start,'time:',datetime.datetime.now()-n0
        return True

    def import_new_user_run_week_retention_data(self, mode, time):
        '''
        start: start time
        '''
        if mode != 'week':
            return
        #time = time - datetime.timedelta(days=7)
        lstart,lend = self.normlize_time(mode,time)
        llstart = lstart - datetime.timedelta(days=7)
        llend = lend - datetime.timedelta(days=7)
        #print lstart,lend
        #print llstart,llend
        yest = datetime.datetime.now() - datetime.timedelta(days=1)
        import_num = 0
        stat1 = self.org_service.get_last_last_week_new_user_run_stat(llstart,llend,yest)
        print stat1
        stat2 = self.org_service.get_last_week_retention_stat(yest, llstart, llend, lstart, lend)
        print stat2
        if stat1 and stat2:
            new_user_run = stat1[0]
            retention = stat2[0]
        #cur=datetime.datetime.now()
        try:
            stat = RetentionStat.new()
            stat.start_time = lstart
            stat.week_num = time.strftime("%W")
            stat.new_user_run = new_user_run
            stat.retention = retention
            print stat
            stat.save()
            import_num += 1
        except Exception,e:
            logging.error('%s\n',str(e),exc_info=True)
        return import_num
class Hive2db(object):
    '''
    query from hive and import the result to mysql
    '''
    def __init__(self, conf=None):
        if not conf:
            conf = HiveConf
        self.org_service = ServiceQuery(conf['host'],conf['port'])

    def normlize_time(self, mode, start):
        assert mode in ('hour','day','week','month')
        start = time_start(start,mode)
        if mode == 'hour':
            end = start + datetime.timedelta(hours=1)
        elif mode == 'day':
            end = start + datetime.timedelta(days=1)
        elif mode == 'week':
            end = start + datetime.timedelta(days=7)
        elif mode == 'month':
            if start.month == 12:
                end = datetime.datetime(start.year+1,1,1)
            else:
                end = datetime.datetime(start.year,start.month+1,1)
        return start,end

    def start_importing(self, mode, now=None, stats=None, partner_ids=None):
        n0 = datetime.datetime.now()
        # normalize time
        if not now:
            now = datetime.datetime.now() - datetime.timedelta(days=1)
        if mode == 'day':
            start = time_start(now,'day') 
        elif mode == 'week' and now.weekday() == 0:
            start = time_start(now,'week') 
        elif mode == 'month' and now.day == 1:
            start = time_start(now,'month')
        else:
            print '%s %s not supported' % (mode,now)
            return
        # import data
        if not stats or 'fujin' in stats:
            self.import_data(mode,start)
        print mode,start,'time:',datetime.datetime.now()-n0
        return True

    def import_data(self, mode, time):
        '''
        mode: hour,day,week,month
        start: start time
        return: one day's user run's a week fee
        '''
        if mode != 'day':
            return
        print time
        start,end = self.normlize_time(mode,time)
        try:    
            import_num = 0
            fujin1 = self.org_service.get_fujin_pvuv_stat(start,end)[0].split('\t')
            print fujin1
            fujin2 = self.org_service.get_fujin_fee_stat(start,end)[0].split('\t')
            print fujin2
            stat = FuJin.new()
            stat.time = start.strftime('%Y-%m-%d')
            stat.pv = fujin1[0]
            stat.uv = fujin1[1]
            stat.user_pay = fujin2[0]
            stat.fee = float(fujin2[1])
            stat.save()
            import_num += 1
        except Exception,e:
            print fujin1,fujin2
            logging.error('%s\n',str(e),exc_info=True)
        return import_num
Example #9
0
class EValue2db(object):
    '''
    query from hive and import the result to mysql
    '''
    def __init__(self, conf=None):
        if not conf:
            conf = HiveConf
        self.org_service = ServiceQuery(conf['host'], conf['port'])

    def normlize_time(self, mode, start):
        assert mode in ('hour', 'day', 'week', 'month')
        start = time_start(start, mode)
        if mode == 'hour':
            end = start + datetime.timedelta(hours=1)
        elif mode == 'day':
            end = start + datetime.timedelta(days=1)
        elif mode == 'week':
            end = start + datetime.timedelta(days=7)
        elif mode == 'month':
            if start.month == 12:
                end = datetime.datetime(start.year + 1, 1, 1)
            else:
                end = datetime.datetime(start.year, start.month + 1, 1)
        return start, end

    def start_importing(self, mode, now=None, stats=None, partner_ids=None):
        n0 = datetime.datetime.now()
        # normalize time
        if not now:
            now = datetime.datetime.now() - datetime.timedelta(days=1)
        if mode == 'day':
            start = time_start(now, 'day')
        elif mode == 'week' and now.weekday() == 0:
            start = time_start(now, 'week')
        elif mode == 'month' and now.day == 1:
            start = time_start(now, 'month')
        else:
            print '%s %s not supported' % (mode, now)
            return
        # import data
        if not stats or 'wechat' in stats:
            self.import_wechat_data(mode, start)
        print mode, start, 'time:', datetime.datetime.now() - n0
        return True

    def import_wechat_data(self, mode, time):
        '''
        start: start time
        return: one day's wechat stat
        '''
        if mode != 'day':
            return
        print time
        start, end = self.normlize_time(mode, time)
        import_num = 0
        wechat = self.org_service.get_wechat_stat(start, end)
        try:
            for line in wechat:
                line = line.strip()
                data = line.split('\t')
                stat = WechatStat.new()
                stat.datetime = start.strftime('%Y-%m-%d')
                stat.bookid = data[0]
                stat.pv = data[1]
                stat.uv = data[2]
                stat.save()
                import_num += 1
        except Exception, e:
            logging.error('%s\n', str(e), exc_info=True)
        return import_num
Example #10
0
class Hive2db(object):
    '''
    query from hive and import the result to mysql
    '''
    def __init__(self, conf=None):
        if not conf:
            conf = HiveConf
        self.org_service = ServiceQuery(conf['host'],conf['port'])

    def normlize_time(self, mode, start):
        assert mode in ('hour','day','week','month')
        start = time_start(start,mode)
        if mode == 'hour':
            end = start + datetime.timedelta(hours=1)
        elif mode == 'day':
            end = start + datetime.timedelta(days=1)
        elif mode == 'week':
            end = start + datetime.timedelta(days=7)
        elif mode == 'month':
            if start.month == 12:
                end = datetime.datetime(start.year+1,1,1)
            else:
                end = datetime.datetime(start.year,start.month+1,1)
        return start,end

    def start_importing(self, mode, now=None, stats=None, partner_ids=None):
        n0 = datetime.datetime.now()
        # normalize time
        if not now:
            now = datetime.datetime.now() - datetime.timedelta(days=1)
        if mode == 'day':
            start = time_start(now,'day') 
        elif mode == 'week' and now.weekday() == 0:
            start = time_start(now,'week') 
        elif mode == 'month' and now.day == 1:
            start = time_start(now,'month')
        else:
            print '%s %s not supported' % (mode,now)
            return
        # import data
        if not stats or '2db' in stats:
            self.import_data(mode,start)
        print mode,start,'time:',datetime.datetime.now()-n0
        return True

    def import_data(self, mode, time):
        '''
        mode: hour,day,week,month
        start: start time
        return: one day's user run's a week fee
        '''
        if mode != 'day':
            return
        print time
        start,end = self.normlize_time(mode,time)
        pvuv = self.org_service.get_bookworm_pvuv_stat(start)[0]
        right_pvuv = self.org_service.get_bookworm_right_pvuv_stat(start)[0]
        wrong_pvuv = self.org_service.get_bookworm_wrong_pvuv_stat(start)[0]
        pay_pvuv = self.org_service.get_bookworm_pay_pvuv_stat(start)[0]
        recharge_pvuv = self.org_service.get_bookworm_recharge_pvuv_stat(start)[0]
        amount_stat = self.org_service.get_bookworm_amount_stat(start)[0]
        count1 = self.org_service.get_bookworm_pay_count_1(start)[0]
        count2 = self.org_service.get_bookworm_pay_count_2(start)[0]
        count3 = self.org_service.get_bookworm_pay_count_3(start)[0]
        import_num = 0
        try: 
            pvuv = pvuv.strip()
            right_pvuv = right_pvuv.strip()
            wrong_pvuv = wrong_pvuv.strip()
            pay_pvuv = pay_pvuv.strip()
            recharge_pvuv = recharge_pvuv.strip()
            amount_stat = amount_stat.strip()
            pvuv_tmp = pvuv.split('\t')
            wrong_pvuv_tmp = right_pvuv.split('\t')
            right_pvuv_tmp = wrong_pvuv.split('\t')
            pay_pvuv_tmp = pay_pvuv.split('\t')
            recharge_pvuv_tmp = recharge_pvuv.split('\t')
            amount_pvuv_tmp = amount_stat.split('\t')
            stat = BookWorm.new()
            stat.time = start.strftime('%Y-%m-%d')
            stat.pv = int(pvuv_tmp[0])
            stat.uv = int(pvuv_tmp[1])
            stat.right_user = right_pvuv_tmp[0]
            stat.right_num = right_pvuv_tmp[1]
            stat.wrong_user = wrong_pvuv_tmp[0]
            stat.wrong_num = wrong_pvuv_tmp[1]
            stat.pay_user = pay_pvuv_tmp[1] 
            stat.pay_times = pay_pvuv_tmp[0]
            stat.recharge_page_pv = recharge_pvuv_tmp[0]
            stat.recharge_page_uv = recharge_pvuv_tmp[1]
            stat.recharge_user = amount_pvuv_tmp[0]
            stat.amount = amount_pvuv_tmp[1]
            stat.pay_num1 = count1
            stat.pay_num2 = count2
            stat.pay_num3 = count3
            stat.save()
            import_num += 1
        except Exception,e:
            logging.error('%s\n',str(e),exc_info=True)
        return import_num
class EValue2db(object):
    '''
    query from hive and import the result to mysql
    '''
    def __init__(self, conf=None):
        if not conf:
            conf = HiveConf
        self.org_service = ServiceQuery(conf['host'],conf['port'])

    def normlize_time(self, mode, start):
        assert mode in ('hour','day','week','month')
        start = time_start(start,mode)
        if mode == 'hour':
            end = start + datetime.timedelta(hours=1)
        elif mode == 'day':
            end = start + datetime.timedelta(days=1)
        elif mode == 'week':
            end = start + datetime.timedelta(days=7)
        elif mode == 'month':
            if start.month == 12:
                end = datetime.datetime(start.year+1,1,1)
            else:
                end = datetime.datetime(start.year,start.month+1,1)
        return start,end

    def start_importing(self, mode, now=None, stats=None, partner_ids=None):
        n0 = datetime.datetime.now()
        # normalize time
        if not now:
            now = datetime.datetime.now() - datetime.timedelta(days=1)
        if mode == 'day':
            start = time_start(now,'day') 
        elif mode == 'week' and now.weekday() == 0:
            start = time_start(now,'week') 
        elif mode == 'month' and now.day == 1:
            start = time_start(now,'month')
        else:
            print '%s %s not supported' % (mode,now)
            return
        # import reserve_book
        if not stats or 'arpu' in stats:
            self.import_one_week_fee_and_new_user_run(mode,start)
        print mode,start,'time:',datetime.datetime.now()-n0
        return True

    def import_one_week_fee_and_new_user_run(self, mode, time):
        '''
        mode: hour,day,week,month
        start: start time
        return: one day's user run's a week fee
        '''
        if mode != 'day':
            return
        print time
        start,end = self.normlize_time(mode,time)
        import_num = 0
        one_week_fee = self.org_service.get_arpu_one_week_fee(start,end) 
        new_user_visit = self.org_service.get_arpu_one_day_new_user_run(start,end)
        thirty_days_fee = self.org_service.get_arpu_30_days_fee(start,end)
        ninety_days_fee = self.org_service.get_arpu_90_days_fee(start,end)
        try:
            stat = Arpu7DaysArpuStat().new()
            stat.time = start - datetime.timedelta(days=6)
            stat.one_week_fee = one_week_fee
            stat.new_user_visit = new_user_visit
            stat.save()

            stat1 = Arpu30DaysArpuFeeStat.new()
            stat1.time = start - datetime.timedelta(days=29)
            stat1.thirty_days_fee = thirty_days_fee
            stat1.save()
            
            stat2 = Arpu90DaysArpuFeeStat.new()
            stat2.time = start - datetime.timedelta(days=89)
            stat2.ninety_days_fee = ninety_days_fee
            stat2.save()
            import_num += 1
        except Exception,e:
            logging.error('%s\n',str(e),exc_info=True)
        return import_num
Example #12
0
class EValue2db(object):
    '''
    query from hive and import the result to mysql
    '''
    def __init__(self, conf=None):
        if not conf:
            conf = HiveConf
        self.org_service = ServiceQuery(conf['host'], conf['port'])

    def normlize_time(self, mode, start):
        assert mode in ('hour', 'day', 'week', 'month')
        start = time_start(start, mode)
        if mode == 'hour':
            end = start + datetime.timedelta(hours=1)
        elif mode == 'day':
            end = start + datetime.timedelta(days=1)
        elif mode == 'week':
            end = start + datetime.timedelta(days=7)
        elif mode == 'month':
            if start.month == 12:
                end = datetime.datetime(start.year + 1, 1, 1)
            else:
                end = datetime.datetime(start.year, start.month + 1, 1)
        return start, end

    def start_importing(self, mode, now=None, stats=None, partner_ids=None):
        n0 = datetime.datetime.now()
        # normalize time
        if not now:
            now = datetime.datetime.now() - datetime.timedelta(days=1)
        if mode == 'day':
            start = time_start(now, 'day')
        elif mode == 'week' and now.weekday() == 0:
            start = time_start(now, 'week')
        elif mode == 'month' and now.day == 1:
            start = time_start(now, 'month')
        else:
            print '%s %s not supported' % (mode, now)
            return
        # import arpu
        if not stats or 'arpu' in stats:
            self.import_one_week_fee_and_new_user_run(mode, start)
        print mode, start, 'time:', datetime.datetime.now() - n0
        return True

    def import_one_week_fee_and_new_user_run(self, mode, time):
        '''
        mode: hour,day,week,month
        start: start time
        return: one day's user run's a week fee
        '''
        if mode != 'day':
            return
        print time
        start, end = self.normlize_time(mode, time)
        import_num = 0
        one_week_fee = self.org_service.get_arpu_one_week_fee(start, end)
        new_user_visit = self.org_service.get_arpu_one_day_new_user_run(
            start, end)
        thirty_days_fee = self.org_service.get_arpu_30_days_fee(start, end)
        ninety_days_fee = self.org_service.get_arpu_90_days_fee(start, end)
        try:
            stat = Arpu7DaysArpuStat().new()
            stat.time = start - datetime.timedelta(days=6)
            stat.one_week_fee = one_week_fee
            stat.new_user_visit = new_user_visit
            stat.save()

            stat1 = Arpu30DaysArpuFeeStat.new()
            stat1.time = start - datetime.timedelta(days=29)
            stat1.thirty_days_fee = thirty_days_fee
            stat1.save()

            stat2 = Arpu90DaysArpuFeeStat.new()
            stat2.time = start - datetime.timedelta(days=89)
            stat2.ninety_days_fee = ninety_days_fee
            stat2.save()
            import_num += 1
        except Exception, e:
            logging.error('%s\n', str(e), exc_info=True)
        return import_num
Example #13
0
class Hive2db(object):
    '''
    query from hive and import the result to mysql
    '''
    def __init__(self, conf=None):
        if not conf:
            conf = HiveConf
        self.org_service = ServiceQuery(conf['host'],conf['port'])

    def normlize_time(self, mode, start):
        assert mode in ('hour','day','week','month')
        start = time_start(start,mode)
        if mode == 'hour':
            end = start + datetime.timedelta(hours=1)
        elif mode == 'day':
            end = start + datetime.timedelta(days=1)
        elif mode == 'week':
            end = start + datetime.timedelta(days=7)
        elif mode == 'month':
            if start.month == 12:
                end = datetime.datetime(start.year+1,1,1)
            else:
                end = datetime.datetime(start.year,start.month+1,1)
        return start,end

    def start_importing(self, mode, now=None, stats=None, partner_ids=None):
        n0 = datetime.datetime.now()
        # normalize time
        if not now:
            now = datetime.datetime.now() - datetime.timedelta(days=1)
        if mode == 'day':
            start = time_start(now,'day') 
        elif mode == 'week' and now.weekday() == 0:
            start = time_start(now,'week') 
        elif mode == 'month' and now.day == 1:
            start = time_start(now,'month')
        else:
            print '%s %s not supported' % (mode,now)
            return
        # import data
        if not stats or 'userbandv6' in stats:
            self.import_data(mode,start)
        print mode,start,'time:',datetime.datetime.now()-n0
        return True

    def import_data(self, mode, time):
        '''
        mode: hour,day,week,month
        start: start time
        return: one day's user run's a week fee
        '''
        if mode != 'day':
            return
        print time
        start,end = self.normlize_time(mode,time)
        import_num = 0
        userbandv6 = self.org_service.get_userbandv6_stat(start,end)
        for line in userbandv6:
            try:
                line = line.strip()
                data = line.split('\t')
                stat = UserBandV6.new()
                stat.datetime = data[0].strip("[").strip("]")
                stat.username = data[1]
                stat.type = data[2]
                stat.uid = data[3]
                stat.plan_id = data[4]
                stat.inner_version = data[5]
                if stat.inner_version in INNER_VERSION.keys():
                    stat.inner_version = INNER_VERSION[stat.inner_version]
                stat.partner = data[6]
                stat.productname = data[7]
                stat.amount = data[8]
                stat.ds = data[9]
                stat.save()
                import_num += 1
            except Exception,e:
                print data
                logging.error('%s\n',str(e),exc_info=True)
        return import_num
Example #14
0
class EValue2db(object):
    '''
    query from hive and import the result to mysql
    '''
    def __init__(self, conf=None):
        if not conf:
            conf = HiveConf
        self.org_service = ServiceQuery(conf['host'], conf['port'])

    def normlize_time(self, mode, start):
        assert mode in ('hour', 'day', 'week', 'month')
        start = time_start(start, mode)
        if mode == 'hour':
            end = start + datetime.timedelta(hours=1)
        elif mode == 'day':
            end = start + datetime.timedelta(days=1)
        elif mode == 'week':
            end = start + datetime.timedelta(days=7)
        elif mode == 'month':
            if start.month == 12:
                end = datetime.datetime(start.year + 1, 1, 1)
            else:
                end = datetime.datetime(start.year, start.month + 1, 1)
        return start, end

    def start_importing(self, mode, now=None, stats=None, partner_ids=None):
        n0 = datetime.datetime.now()
        # normalize time
        if not now:
            now = datetime.datetime.now() - datetime.timedelta(days=1)
        if mode == 'day':
            start = time_start(now, 'day')
        elif mode == 'week':
            #elif mode == 'week' and now.weekday() == 6:
            start = time_start(now, 'week')
            print start
        elif mode == 'month' and now.day == 1:
            start = time_start(now, 'month')
        else:
            print '%s %s not supported' % (mode, now)
            return
        # import data
        if not stats or 'retention' in stats:
            self.import_new_user_run_week_retention_data(mode, start)
        print mode, start, 'time:', datetime.datetime.now() - n0
        return True

    def import_new_user_run_week_retention_data(self, mode, time):
        '''
        start: start time
        '''
        if mode != 'week':
            return
        #time = time - datetime.timedelta(days=7)
        lstart, lend = self.normlize_time(mode, time)
        llstart = lstart - datetime.timedelta(days=7)
        llend = lend - datetime.timedelta(days=7)
        #print lstart,lend
        #print llstart,llend
        yest = datetime.datetime.now() - datetime.timedelta(days=1)
        import_num = 0
        stat1 = self.org_service.get_last_last_week_new_user_run_stat(
            llstart, llend, yest)
        print stat1
        stat2 = self.org_service.get_last_week_retention_stat(
            yest, llstart, llend, lstart, lend)
        print stat2
        if stat1 and stat2:
            new_user_run = stat1[0]
            retention = stat2[0]
        #cur=datetime.datetime.now()
        try:
            stat = RetentionStat.new()
            stat.start_time = lstart
            stat.week_num = time.strftime("%W")
            stat.new_user_run = new_user_run
            stat.retention = retention
            print stat
            stat.save()
            import_num += 1
        except Exception, e:
            logging.error('%s\n', str(e), exc_info=True)
        return import_num
class Hive2db(object):
    '''
    query from hive and import the result to mysql
    '''
    def __init__(self, conf=None):
        if not conf:
            conf = HiveConf
        self.org_service = ServiceQuery(conf['host'],conf['port'])

    def handle_big_data(self, data):
        data = str(data)
        if 'E' in data:
            tmp = data.strip().split('E')
            num = float(tmp[0])
            e = int(tmp[1])
            return 10**e * num
        else:
            return 


    def normlize_time(self, mode, start):
        assert mode in ('hour','day','week','month')
        start = time_start(start,mode)
        if mode == 'hour':
            end = start + datetime.timedelta(hours=1)
        elif mode == 'day':
            end = start + datetime.timedelta(days=1)
        elif mode == 'week':
            end = start + datetime.timedelta(days=7)
        elif mode == 'month':
            if start.month == 12:
                end = datetime.datetime(start.year+1,1,1)
            else:
                end = datetime.datetime(start.year,start.month+1,1)
        return start,end

    def start_importing(self, mode, now=None, stats=None, partner_ids=None):
        n0 = datetime.datetime.now()
        # normalize time
        if not now:
            now = datetime.datetime.now() - datetime.timedelta(days=1)
        if mode == 'day':
            start = time_start(now,'day') 
        elif mode == 'week' and now.weekday() == 0:
            start = time_start(now,'week') 
        elif mode == 'month' and now.day == 1:
            start = time_start(now,'month')
        else:
            print '%s %s not supported' % (mode,now)
            return
        # import data
        if not stats or '2db' in stats:
            self.import_data(mode,start)
        print mode,start,'time:',datetime.datetime.now()-n0
        return True

    def import_data(self, mode, time):
        '''
        mode: hour,day,week,month
        start: start time
        return: one day's user run's a week fee
        '''
        if mode != 'day':
            return
        print time
        start,end = self.normlize_time(mode,time)

        import_num = 0
 
        #balance = self.org_service.get_balance_stat(start)
        #for data in balance:
        #    try:
        #        data = data.strip()
        #        i = data.split('\t')
        #        stat = PaymentBalanceStat.new()
        #        stat.time = start.strftime('%Y-%m-%d')
        #        stat.recharge_balance = self.handle_big_data(i[0])
        #        stat.gift_balance = self.handle_big_data(i[1])
        #        stat.save()
        #        import_num += 1
        #    except Exception,e:
        #        print i
        #        logging.error('%s\n',str(e),exc_info=True)

        recharge_amount_order = self.org_service.get_recharge_amount_order(start)
        print recharge_amount_order
        for data in recharge_amount_order:
            try:
                data = data.strip()
                i = data.split('\t')
                stat = PaymentRechargingAmountOrderStat.new()
                stat.time = start.strftime('%Y-%m-%d')
                stat.uv = i[0]
                stat.pv = i[1]
                stat.origin = i[2]
                stat.partner_id = i[3]
                stat.innerver = i[4]
                stat.amount = i[5]
                stat.recharging_type = i[6]
                stat.save()
                import_num += 1
            except Exception,e:
                print i
                logging.error('%s\n',str(e),exc_info=True)
        
        recharge_amount_finish = self.org_service.get_recharge_amount_finish(start)
        for data in recharge_amount_finish:
            try:
                data = data.strip()
                i = data.split('\t')
                stat = PaymentRechargingAmountFinishStat.new()
                stat.time = start.strftime('%Y-%m-%d')
                stat.uv = i[0]
                stat.pv = i[1]
                stat.origin = i[2]
                stat.partner_id = i[3]
                stat.innerver = i[4]
                stat.amount = i[5]
                stat.recharging_type = i[6]
                stat.save()
                import_num += 1
            except Exception,e:
                print i
                logging.error('%s\n',str(e),exc_info=True)
Example #16
0
 def __init__(self, conf=None):
     if not conf:
         conf = HiveConf
     self.org_service = ServiceQuery(conf['host'], conf['port'])
class EValue2db(object):
    '''
    query from hive and import the result to mysql
    '''
    def __init__(self, conf=None):
        if not conf:
            conf = HiveConf
        self.org_service = ServiceQuery(conf['host'],conf['port'])

    def normlize_time(self, mode, start):
        assert mode in ('hour','day','week','month')
        start = time_start(start,mode)
        if mode == 'hour':
            end = start + datetime.timedelta(hours=1)
        elif mode == 'day':
            end = start + datetime.timedelta(days=1)
        elif mode == 'week':
            end = start + datetime.timedelta(days=7)
        elif mode == 'month':
            if start.month == 12:
                end = datetime.datetime(start.year+1,1,1)
            else:
                end = datetime.datetime(start.year,start.month+1,1)
        return start,end

    def start_importing(self, mode, now=None, stats=None, partner_ids=None):
        n0 = datetime.datetime.now()
        # normalize time
        if not now:
            now = datetime.datetime.now() - datetime.timedelta(days=1)
        if mode == 'day':
            start = time_start(now,'day') 
        elif mode == 'week' and now.weekday() == 0:
            start = time_start(now,'week') 
        elif mode == 'month' and now.day == 1:
            start = time_start(now,'month')
        else:
            print '%s %s not supported' % (mode,now)
            return
        # import data
        if not stats or 'wechat' in stats:
            self.import_wechat_data(mode,start)
        print mode,start,'time:',datetime.datetime.now()-n0
        return True

    def import_wechat_data(self, mode, time):
        '''
        start: start time
        return: one day's wechat stat
        '''
        if mode != 'day':
            return
        print time
        start,end = self.normlize_time(mode,time)
        import_num = 0
        wechat = self.org_service.get_wechat_stat(start,end)
        try:
            for line in wechat:
                line = line.strip()
                data = line.split('\t')
                stat = WechatStat.new()
                stat.datetime = start.strftime('%Y-%m-%d')
                stat.bookid = data[0]
                stat.pv = data[1]
                stat.uv = data[2]
                stat.save()
                import_num += 1
        except Exception,e:
            logging.error('%s\n',str(e),exc_info=True)
        return import_num