コード例 #1
0
ファイル: track_cmd.py プロジェクト: zhouxin262/datapanel
    def handle_label(self, label, **options):
        print '====started at %s====' % datetime.now()

        days_before = 0
        try:
            days_before = int(label)
        except:
            print 'wrong label, should be int'
            return None

        processing_day = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) - timedelta(days=days_before)
        print '====processing %s====' % processing_day.strftime("%Y-%m-%d")

        """
        group by time per hour
        """
        print '====processing by hour===='
        for i in range(24):
            # get time range
            s = datetime.now().replace(hour=i, minute=0, second=0, microsecond=0) - timedelta(days=days_before)
            e = s + timedelta(seconds=3600)
            dateline = s
            t = Timeline.objects.get_or_create(dateline=dateline, datetype='hour')
            # foreach project
            for p in Project.objects.filter():
                # group by track action
                g = Group(Track, GAction)
                g.static_attr = {'project': p, 'timeline': t[0]}
                g.values = ['action', ]
                g.dynamic_attr = {'action_id': 'action'}
                g.annotate = {'count': Count('action'), 'timelength': Avg('timelength')}
                g.fargs = {'dateline__range': [s, e], 'project': p}
                g.easy_group()

        """
        group by time per day
        """
        # all day data
        s = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) - timedelta(days=days_before)
        e = s + timedelta(days=1)
        dateline = s
        t = Timeline.objects.get_or_create(dateline=dateline, datetype='day')

        # foreach projects
        print '====processing by day===='
        for p in Project.objects.filter():
            # group by track action

            g = Group(GAction, GAction)
            g.static_attr = {'project': p,  'timeline': t[0]}
            g.values = ['action', ]
            g.dynamic_attr = {'action_id': 'action'}
            g.annotate = {'count': Sum('count'), 'timelength': Avg('timelength')}
            g.fargs = {'timeline__dateline__range': [s, e], 'project': p}
            g.easy_group()

            # # group by track value
            # g = Group(TrackValue, GValue)
            # g.static_attr = {'project': p, 'dateline': dateline, 'datetype': 'day'}
            # g.values = ['valuetype_id', 'value']
            # g.annotate = {'count': Count('value'), 'timelength': Avg('track__timelength')}
            # g.fargs = {'track__dateline__range': [s, e], 'track__project': p}
            # g.easy_group()

            # group by track action and time and sessionreferrersite
            g = Group(Track, GReferrerSiteAndAction)
            g.static_attr = {'project': p, 'timeline': t[0]}
            g.values = ['action', 'session__referrer_site__id']
            g.dynamic_attr = {'action_id': 'action', 'referrer_site_id': 'session__referrer_site__id'}
            g.annotate = {'count': Count('action'), 'timelength': Avg('timelength')}
            g.fargs = {'dateline__range': [s, e], 'project': p}
            g.easy_group()

            # group by track action and time and sessionreferrerkeyword
            g = Group(Track, GReferrerKeywordAndAction)
            g.static_attr = {'project': p,  'timeline': t[0]}
            g.values = ['action', 'session__referrer_keyword__id']
            g.dynamic_attr = {'action_id': 'action', 'referrer_keyword_id': 'session__referrer_keyword__id'}
            g.annotate = {'count': Count('action'), 'timelength': Avg('timelength')}
            g.fargs = {'dateline__range': [s, e], 'project': p}
            g.easy_group()

        print '====moving data===='
        import MySQLdb as mdb
        con = mdb.connect('localhost', 'root', '', 'datapanel')
        cur = con.cursor()
        sql = "SELECT id FROM track_track WHERE dateline<='%s' ORDER BY dateline desc LIMIT 1" % processing_day
        cur.execute(sql)
        try:
            last_track_id = cur.fetchone()[0]
        except:
            last_track_id = 0
        sql = "REPLACE INTO %s(id, project_id, session_id, action_id, url, from_track_id, referrer_site_id, referrer_keyword_id, step, timelength, dateline) SELECT id, project_id, session_id, action_id, url, from_track_id, referrer_site_id, referrer_keyword_id, step, timelength, dateline FROM %s f WHERE f.id <= %d" % ('track_trackarch', 'track_track', last_track_id)
        cur.execute(sql)
        sql = "DELETE FROM %s WHERE id <= %d" % ('track_track', last_track_id)
        cur.execute(sql)
        sql = "REPLACE INTO %s(id, track_id, valuetype_id, value) SELECT id, track_id, valuetype_id, value FROM %s f WHERE f.track_id <= %d" % ('track_trackvaluearch', 'track_trackvalue', last_track_id)
        cur.execute(sql)
        sql = "DELETE FROM %s WHERE track_id <= %d" % ('track_trackvalue', last_track_id)
        cur.execute(sql)
        sql = "OPTIMIZE TABLE %s" % 'track_track'
        cur.execute(sql)
        sql = "OPTIMIZE TABLE %s" % 'track_trackvalue'
        cur.execute(sql)
        sql = "flush tables"
        cur.execute(sql)
        cache.clear()
        print '====finished at %s====' % datetime.now()
コード例 #2
0
ファイル: session_cmd.py プロジェクト: zhouxin262/datapanel
    def handle_label(self, label, **options):
        print '====started at %s====' % datetime.now()

        days_before = 0
        try:
            days_before = int(label)
        except:
            print 'wrong label, should be int'
            return None

        processing_day = datetime.now().replace(
            hour=0, minute=0, second=0,
            microsecond=0) - timedelta(days=days_before)
        print '====processing %s====' % processing_day.strftime("%Y-%m-%d")
        """
        group by time per hour
        """
        print '====processing by hour===='
        for i in range(24):

            # get time range
            s = datetime.now().replace(
                hour=i, minute=0, second=0,
                microsecond=0) - timedelta(days=days_before)
            e = s + timedelta(seconds=3600)
            dateline = s
            t = Timeline.objects.get_or_create(dateline=dateline,
                                               datetype='hour')

            # foreach project
            for p in Project.objects.filter():
                # group by time
                g = Group(Session, GTime)
                g.static_attr = {'project': p, 'timeline': t[0]}
                g.annotate = {
                    'count': Count('id'),
                    'track_count': Avg('track_count'),
                    'timelength': Avg('timelength')
                }
                g.fargs = {
                    'start_time__range': [s, e],
                    'project': p,
                    'track_count__gte': 1
                }
                g.easy_group()
        """
        group by time per day
        """
        # all day data
        s = datetime.now().replace(hour=0, minute=0, second=0,
                                   microsecond=0) - timedelta(days=days_before)
        e = s + timedelta(days=1)
        dateline = s
        t = Timeline.objects.get_or_create(dateline=dateline, datetype='day')

        # foreach projects
        print '====processing by day===='
        for p in Project.objects.filter():
            # group by time
            g = Group(GTime, GTime)
            g.static_attr = {'project': p, 'timeline': t[0]}
            g.annotate = {
                'count': Sum('count'),
                'track_count': Avg('track_count'),
                'timelength': Avg('timelength')
            }
            g.fargs = {'timeline__dateline__range': [s, e], 'project': p}
            g.easy_group()

            # group by user_referrer_site and time
            g = Group(Session, GReferrerSite)
            g.static_attr = {'project': p, 'timeline': t[0]}
            g.fargs = {
                'start_time__range': [s, e],
                'project': p,
                'track_count__gte': 1
            }
            g.values = [
                'referrer_site',
            ]
            g.dynamic_attr = {'referrer_site_id': 'referrer_site'}
            g.annotate = {
                'count': Count('referrer_site'),
                'track_count': Avg('track_count'),
                'timelength': Avg('timelength')
            }
            g.easy_group()

            # group by user_referrer_keyword and time
            g = Group(Session, GReferrerKeyword)
            g.static_attr = {'project': p, 'timeline': t[0]}
            g.fargs = {
                'start_time__range': [s, e],
                'project': p,
                'track_count__gte': 1
            }
            g.values = [
                'referrer_keyword',
            ]
            g.dynamic_attr = {'referrer_keyword_id': 'referrer_keyword'}
            g.annotate = {
                'count': Count('referrer_keyword'),
                'track_count': Avg('track_count'),
                'timelength': Avg('timelength')
            }
            g.easy_group()

        print '====moving data===='
        import MySQLdb as mdb
        con = mdb.connect('localhost', 'root', '', 'datapanel')
        cur = con.cursor()
        sql = "SELECT id FROM session_session WHERE start_time<='%s' ORDER BY start_time desc LIMIT 1" % processing_day
        cur.execute(sql)
        try:
            last_id = cur.fetchone()[0]
        except:
            last_id = 0
        sql = "REPLACE INTO %s(id, project_id, session_key, permanent_session_key, start_time, end_time, user_language, user_timezone, agent_id, os_id, device_id, referrer_site_id, referrer_keyword_id, track_count, timelength, ipaddress) SELECT id, project_id, session_key, permanent_session_key, start_time, end_time, user_language, user_timezone, agent_id, os_id, device_id, referrer_site_id, referrer_keyword_id, track_count, timelength, ipaddress FROM %s f WHERE f.id <= %d" % (
            'session_sessionarch', 'session_session', last_id)
        cur.execute(sql)
        sql = "DELETE FROM %s WHERE id <= %d" % ('session_session', last_id)
        cur.execute(sql)
        sql = "REPLACE INTO %s(id, session_id, valuetype_id, value) SELECT id, session_id, valuetype_id, value FROM %s f WHERE f.session_id <= %d" % (
            'session_sessionvaluearch', 'session_sessionvalue', last_id)
        cur.execute(sql)
        sql = "DELETE FROM %s WHERE session_id <= %d" % (
            'session_sessionvalue', last_id)
        cur.execute(sql)
        sql = "OPTIMIZE TABLE %s" % 'session_session'
        cur.execute(sql)
        sql = "OPTIMIZE TABLE %s" % 'session_sessionvalue'
        cur.execute(sql)
        sql = "flush tables"
        cur.execute(sql)
        cache.clear()
        print '====finished at %s====' % datetime.now()
コード例 #3
0
ファイル: track_cmd.py プロジェクト: zhouxin262/datapanel
    def handle_label(self, label, **options):
        print '====started at %s====' % datetime.now()

        days_before = 0
        try:
            days_before = int(label)
        except:
            print 'wrong label, should be int'
            return None

        processing_day = datetime.now().replace(
            hour=0, minute=0, second=0,
            microsecond=0) - timedelta(days=days_before)
        print '====processing %s====' % processing_day.strftime("%Y-%m-%d")
        """
        group by time per hour
        """
        print '====processing by hour===='
        for i in range(24):
            # get time range
            s = datetime.now().replace(
                hour=i, minute=0, second=0,
                microsecond=0) - timedelta(days=days_before)
            e = s + timedelta(seconds=3600)
            dateline = s
            t = Timeline.objects.get_or_create(dateline=dateline,
                                               datetype='hour')
            # foreach project
            for p in Project.objects.filter():
                # group by track action
                g = Group(Track, GAction)
                g.static_attr = {'project': p, 'timeline': t[0]}
                g.values = [
                    'action',
                ]
                g.dynamic_attr = {'action_id': 'action'}
                g.annotate = {
                    'count': Count('action'),
                    'timelength': Avg('timelength')
                }
                g.fargs = {'dateline__range': [s, e], 'project': p}
                g.easy_group()
        """
        group by time per day
        """
        # all day data
        s = datetime.now().replace(hour=0, minute=0, second=0,
                                   microsecond=0) - timedelta(days=days_before)
        e = s + timedelta(days=1)
        dateline = s
        t = Timeline.objects.get_or_create(dateline=dateline, datetype='day')

        # foreach projects
        print '====processing by day===='
        for p in Project.objects.filter():
            # group by track action

            g = Group(GAction, GAction)
            g.static_attr = {'project': p, 'timeline': t[0]}
            g.values = [
                'action',
            ]
            g.dynamic_attr = {'action_id': 'action'}
            g.annotate = {
                'count': Sum('count'),
                'timelength': Avg('timelength')
            }
            g.fargs = {'timeline__dateline__range': [s, e], 'project': p}
            g.easy_group()

            # # group by track value
            # g = Group(TrackValue, GValue)
            # g.static_attr = {'project': p, 'dateline': dateline, 'datetype': 'day'}
            # g.values = ['valuetype_id', 'value']
            # g.annotate = {'count': Count('value'), 'timelength': Avg('track__timelength')}
            # g.fargs = {'track__dateline__range': [s, e], 'track__project': p}
            # g.easy_group()

            # group by track action and time and sessionreferrersite
            g = Group(Track, GReferrerSiteAndAction)
            g.static_attr = {'project': p, 'timeline': t[0]}
            g.values = ['action', 'session__referrer_site__id']
            g.dynamic_attr = {
                'action_id': 'action',
                'referrer_site_id': 'session__referrer_site__id'
            }
            g.annotate = {
                'count': Count('action'),
                'timelength': Avg('timelength')
            }
            g.fargs = {'dateline__range': [s, e], 'project': p}
            g.easy_group()

            # group by track action and time and sessionreferrerkeyword
            g = Group(Track, GReferrerKeywordAndAction)
            g.static_attr = {'project': p, 'timeline': t[0]}
            g.values = ['action', 'session__referrer_keyword__id']
            g.dynamic_attr = {
                'action_id': 'action',
                'referrer_keyword_id': 'session__referrer_keyword__id'
            }
            g.annotate = {
                'count': Count('action'),
                'timelength': Avg('timelength')
            }
            g.fargs = {'dateline__range': [s, e], 'project': p}
            g.easy_group()

        print '====moving data===='
        import MySQLdb as mdb
        con = mdb.connect('localhost', 'root', '', 'datapanel')
        cur = con.cursor()
        sql = "SELECT id FROM track_track WHERE dateline<='%s' ORDER BY dateline desc LIMIT 1" % processing_day
        cur.execute(sql)
        try:
            last_track_id = cur.fetchone()[0]
        except:
            last_track_id = 0
        sql = "REPLACE INTO %s(id, project_id, session_id, action_id, url, from_track_id, referrer_site_id, referrer_keyword_id, step, timelength, dateline) SELECT id, project_id, session_id, action_id, url, from_track_id, referrer_site_id, referrer_keyword_id, step, timelength, dateline FROM %s f WHERE f.id <= %d" % (
            'track_trackarch', 'track_track', last_track_id)
        cur.execute(sql)
        sql = "DELETE FROM %s WHERE id <= %d" % ('track_track', last_track_id)
        cur.execute(sql)
        sql = "REPLACE INTO %s(id, track_id, valuetype_id, value) SELECT id, track_id, valuetype_id, value FROM %s f WHERE f.track_id <= %d" % (
            'track_trackvaluearch', 'track_trackvalue', last_track_id)
        cur.execute(sql)
        sql = "DELETE FROM %s WHERE track_id <= %d" % ('track_trackvalue',
                                                       last_track_id)
        cur.execute(sql)
        sql = "OPTIMIZE TABLE %s" % 'track_track'
        cur.execute(sql)
        sql = "OPTIMIZE TABLE %s" % 'track_trackvalue'
        cur.execute(sql)
        sql = "flush tables"
        cur.execute(sql)
        cache.clear()
        print '====finished at %s====' % datetime.now()
コード例 #4
0
ファイル: ecshop_cmd.py プロジェクト: zhouxin262/datapanel
    def handle_label(self, label, **options):
        print label, '====start====', datetime.now()
        days_before = 0
        try:
            days_before = int(label)
        except:
            print 'wrong label, should be int'
            return None
        """
        group by time per day
        """
        s = datetime.now().replace(hour=0, minute=0, second=0,
                                   microsecond=0) - timedelta(days=days_before)
        e = s + timedelta(days=1)

        for p in Project.objects.filter():
            timeline = Timeline.objects.get_or_create(datetype='day',
                                                      dateline=s)
            Report1.objects.generate(project=p,
                                     timeline=timeline[0],
                                     save=True)

            def get_goods(datarow):
                goods = Goods.objects.get_or_create(project=p,
                                                    goods_id=datarow['value'])
                return goods[0]

            # group by track value
            g = Group(TrackValue, Report2)
            g.static_attr = {'project': p, 'timeline': timeline[0]}
            g.values = ['value']
            g.annotate = {'viewcount': Count('value')}
            g.fargs = {
                'track__dateline__range': [s, e],
                'track__project': p,
                'valuetype__name': 'goods_goods_id'
            }
            g.function_attr = {'goods': get_goods}
            g.easy_group()

            def get_goods2(datarow):
                goods = Goods.objects.get_or_create(
                    project=p, goods_id=datarow['goods_id'])
                return goods[0]

            g = Group(OrderGoods, Report2)
            g.static_attr = {'project': p, 'timeline': timeline[0]}
            g.values = ['goods_id']
            g.exclude_attr = ['goods_id']
            g.annotate = {'sellcount': Sum('goods_number')}
            g.fargs = {'order__dateline__range': [s, e], 'project': p}
            g.function_attr = {'goods': get_goods2}
            g.easy_group(update=True)

        cache.clear()
        print label, '====finished====', datetime.now()
コード例 #5
0
ファイル: ecshop_cmd.py プロジェクト: zhouxin262/datapanel
    def handle_label(self, label, **options):
        print label, '====start====', datetime.now()
        days_before = 0
        try:
            days_before = int(label)
        except:
            print 'wrong label, should be int'
            return None

        """
        group by time per day
        """
        s = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) - timedelta(days=days_before)
        e = s + timedelta(days=1)

        for p in Project.objects.filter():
            timeline = Timeline.objects.get_or_create(datetype='day', dateline=s)
            Report1.objects.generate(project=p, timeline=timeline[0], save=True)

            def get_goods(datarow):
                goods = Goods.objects.get_or_create(project=p, goods_id=datarow['value'])
                return goods[0]

            # group by track value
            g = Group(TrackValue, Report2)
            g.static_attr = {'project': p, 'timeline': timeline[0]}
            g.values = ['value']
            g.annotate = {'viewcount': Count('value')}
            g.fargs = {'track__dateline__range': [s, e], 'track__project': p, 'valuetype__name': 'goods_goods_id'}
            g.function_attr = {'goods': get_goods}
            g.easy_group()

            def get_goods2(datarow):
                goods = Goods.objects.get_or_create(project=p, goods_id=datarow['goods_id'])
                return goods[0]

            g = Group(OrderGoods, Report2)
            g.static_attr = {'project': p, 'timeline': timeline[0]}
            g.values = ['goods_id']
            g.exclude_attr = ['goods_id']
            g.annotate = {'sellcount': Sum('goods_number')}
            g.fargs = {'order__dateline__range': [s, e], 'project': p}
            g.function_attr = {'goods': get_goods2}
            g.easy_group(update=True)

        cache.clear()
        print label, '====finished====', datetime.now()
コード例 #6
0
ファイル: session_cmd.py プロジェクト: zhouxin262/datapanel
    def handle_label(self, label, **options):
        print '====started at %s====' % datetime.now()

        days_before = 0
        try:
            days_before = int(label)
        except:
            print 'wrong label, should be int'
            return None

        processing_day = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) - timedelta(days=days_before)
        print '====processing %s====' % processing_day.strftime("%Y-%m-%d")

        """
        group by time per hour
        """
        print '====processing by hour===='
        for i in range(24):

            # get time range
            s = datetime.now().replace(hour=i, minute=0, second=0, microsecond=0) - timedelta(days=days_before)
            e = s + timedelta(seconds=3600)
            dateline = s
            t = Timeline.objects.get_or_create(dateline=dateline, datetype='hour')

            # foreach project
            for p in Project.objects.filter():
                # group by time
                g = Group(Session, GTime)
                g.static_attr = {'project': p, 'timeline': t[0]}
                g.annotate = {'count': Count('id'), 'track_count': Avg('track_count'), 'timelength': Avg('timelength')}
                g.fargs = {'start_time__range': [s, e], 'project': p, 'track_count__gte': 1}
                g.easy_group()

        """
        group by time per day
        """
        # all day data
        s = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) - timedelta(days=days_before)
        e = s + timedelta(days=1)
        dateline = s
        t = Timeline.objects.get_or_create(dateline=dateline, datetype='day')

        # foreach projects
        print '====processing by day===='
        for p in Project.objects.filter():
            # group by time
            g = Group(GTime, GTime)
            g.static_attr = {'project': p, 'timeline': t[0]}
            g.annotate = {'count': Sum('count'), 'track_count': Avg('track_count'), 'timelength': Avg('timelength')}
            g.fargs = {'timeline__dateline__range': [s, e], 'project': p}
            g.easy_group()

            # group by user_referrer_site and time
            g = Group(Session, GReferrerSite)
            g.static_attr = {'project': p, 'timeline': t[0]}
            g.fargs = {'start_time__range': [s, e], 'project': p, 'track_count__gte': 1}
            g.values = ['referrer_site', ]
            g.dynamic_attr = {'referrer_site_id': 'referrer_site'}
            g.annotate = {'count': Count('referrer_site'), 'track_count': Avg('track_count'), 'timelength': Avg('timelength')}
            g.easy_group()

            # group by user_referrer_keyword and time
            g = Group(Session, GReferrerKeyword)
            g.static_attr = {'project': p, 'timeline': t[0]}
            g.fargs = {'start_time__range': [s, e], 'project': p, 'track_count__gte': 1}
            g.values = ['referrer_keyword', ]
            g.dynamic_attr = {'referrer_keyword_id': 'referrer_keyword'}
            g.annotate = {'count': Count('referrer_keyword'), 'track_count': Avg('track_count'), 'timelength':
                          Avg('timelength')}
            g.easy_group()

        print '====moving data===='
        import MySQLdb as mdb
        con = mdb.connect('localhost', 'root', '', 'datapanel')
        cur = con.cursor()
        sql = "SELECT id FROM session_session WHERE start_time<='%s' ORDER BY start_time desc LIMIT 1" % processing_day
        cur.execute(sql)
        try:
            last_id = cur.fetchone()[0]
        except:
            last_id = 0
        sql = "REPLACE INTO %s(id, project_id, session_key, permanent_session_key, start_time, end_time, user_language, user_timezone, agent_id, os_id, device_id, referrer_site_id, referrer_keyword_id, track_count, timelength, ipaddress) SELECT id, project_id, session_key, permanent_session_key, start_time, end_time, user_language, user_timezone, agent_id, os_id, device_id, referrer_site_id, referrer_keyword_id, track_count, timelength, ipaddress FROM %s f WHERE f.id <= %d" % ('session_sessionarch', 'session_session', last_id)
        cur.execute(sql)
        sql = "DELETE FROM %s WHERE id <= %d" % ('session_session', last_id)
        cur.execute(sql)
        sql = "REPLACE INTO %s(id, session_id, valuetype_id, value) SELECT id, session_id, valuetype_id, value FROM %s f WHERE f.session_id <= %d" % (
            'session_sessionvaluearch', 'session_sessionvalue', last_id)
        cur.execute(sql)
        sql = "DELETE FROM %s WHERE session_id <= %d" % ('session_sessionvalue', last_id)
        cur.execute(sql)
        sql = "OPTIMIZE TABLE %s" % 'session_session'
        cur.execute(sql)
        sql = "OPTIMIZE TABLE %s" % 'session_sessionvalue'
        cur.execute(sql)
        sql = "flush tables"
        cur.execute(sql)
        cache.clear()
        print '====finished at %s====' % datetime.now()