def handle_label(self, label, **options): print label, '====start====', datetime.now() days_before = 0 try: days_before = int(label) except: print 'wrong label, should be int' return None """ group by time per day """ s = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) - timedelta(days=days_before) e = s + timedelta(days=1) for p in Project.objects.filter(): timeline = Timeline.objects.get_or_create(datetype='day', dateline=s) Report1.objects.generate(project=p, timeline=timeline[0], save=True) def get_goods(datarow): goods = Goods.objects.get_or_create(project=p, goods_id=datarow['value']) return goods[0] # group by track value g = Group(TrackValue, Report2) g.static_attr = {'project': p, 'timeline': timeline[0]} g.values = ['value'] g.annotate = {'viewcount': Count('value')} g.fargs = { 'track__dateline__range': [s, e], 'track__project': p, 'valuetype__name': 'goods_goods_id' } g.function_attr = {'goods': get_goods} g.easy_group() def get_goods2(datarow): goods = Goods.objects.get_or_create( project=p, goods_id=datarow['goods_id']) return goods[0] g = Group(OrderGoods, Report2) g.static_attr = {'project': p, 'timeline': timeline[0]} g.values = ['goods_id'] g.exclude_attr = ['goods_id'] g.annotate = {'sellcount': Sum('goods_number')} g.fargs = {'order__dateline__range': [s, e], 'project': p} g.function_attr = {'goods': get_goods2} g.easy_group(update=True) cache.clear() print label, '====finished====', datetime.now()
def handle_label(self, label, **options): print label, '====start====', datetime.now() days_before = 0 try: days_before = int(label) except: print 'wrong label, should be int' return None """ group by time per day """ s = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) - timedelta(days=days_before) e = s + timedelta(days=1) for p in Project.objects.filter(): timeline = Timeline.objects.get_or_create(datetype='day', dateline=s) Report1.objects.generate(project=p, timeline=timeline[0], save=True) def get_goods(datarow): goods = Goods.objects.get_or_create(project=p, goods_id=datarow['value']) return goods[0] # group by track value g = Group(TrackValue, Report2) g.static_attr = {'project': p, 'timeline': timeline[0]} g.values = ['value'] g.annotate = {'viewcount': Count('value')} g.fargs = {'track__dateline__range': [s, e], 'track__project': p, 'valuetype__name': 'goods_goods_id'} g.function_attr = {'goods': get_goods} g.easy_group() def get_goods2(datarow): goods = Goods.objects.get_or_create(project=p, goods_id=datarow['goods_id']) return goods[0] g = Group(OrderGoods, Report2) g.static_attr = {'project': p, 'timeline': timeline[0]} g.values = ['goods_id'] g.exclude_attr = ['goods_id'] g.annotate = {'sellcount': Sum('goods_number')} g.fargs = {'order__dateline__range': [s, e], 'project': p} g.function_attr = {'goods': get_goods2} g.easy_group(update=True) cache.clear() print label, '====finished====', datetime.now()
def handle_label(self, label, **options): print '====started at %s====' % datetime.now() days_before = 0 try: days_before = int(label) except: print 'wrong label, should be int' return None processing_day = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) - timedelta(days=days_before) print '====processing %s====' % processing_day.strftime("%Y-%m-%d") """ group by time per hour """ print '====processing by hour====' for i in range(24): # get time range s = datetime.now().replace(hour=i, minute=0, second=0, microsecond=0) - timedelta(days=days_before) e = s + timedelta(seconds=3600) dateline = s t = Timeline.objects.get_or_create(dateline=dateline, datetype='hour') # foreach project for p in Project.objects.filter(): # group by track action g = Group(Track, GAction) g.static_attr = {'project': p, 'timeline': t[0]} g.values = ['action', ] g.dynamic_attr = {'action_id': 'action'} g.annotate = {'count': Count('action'), 'timelength': Avg('timelength')} g.fargs = {'dateline__range': [s, e], 'project': p} g.easy_group() """ group by time per day """ # all day data s = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) - timedelta(days=days_before) e = s + timedelta(days=1) dateline = s t = Timeline.objects.get_or_create(dateline=dateline, datetype='day') # foreach projects print '====processing by day====' for p in Project.objects.filter(): # group by track action g = Group(GAction, GAction) g.static_attr = {'project': p, 'timeline': t[0]} g.values = ['action', ] g.dynamic_attr = {'action_id': 'action'} g.annotate = {'count': Sum('count'), 'timelength': Avg('timelength')} g.fargs = {'timeline__dateline__range': [s, e], 'project': p} g.easy_group() # # group by track value # g = Group(TrackValue, GValue) # g.static_attr = {'project': p, 'dateline': dateline, 'datetype': 'day'} # g.values = ['valuetype_id', 'value'] # g.annotate = {'count': Count('value'), 'timelength': Avg('track__timelength')} # g.fargs = {'track__dateline__range': [s, e], 'track__project': p} # g.easy_group() # group by track action and time and sessionreferrersite g = Group(Track, GReferrerSiteAndAction) g.static_attr = {'project': p, 'timeline': t[0]} g.values = ['action', 'session__referrer_site__id'] g.dynamic_attr = {'action_id': 'action', 'referrer_site_id': 'session__referrer_site__id'} g.annotate = {'count': Count('action'), 'timelength': Avg('timelength')} g.fargs = {'dateline__range': [s, e], 'project': p} g.easy_group() # group by track action and time and sessionreferrerkeyword g = Group(Track, GReferrerKeywordAndAction) g.static_attr = {'project': p, 'timeline': t[0]} g.values = ['action', 'session__referrer_keyword__id'] g.dynamic_attr = {'action_id': 'action', 'referrer_keyword_id': 'session__referrer_keyword__id'} g.annotate = {'count': Count('action'), 'timelength': Avg('timelength')} g.fargs = {'dateline__range': [s, e], 'project': p} g.easy_group() print '====moving data====' import MySQLdb as mdb con = mdb.connect('localhost', 'root', '', 'datapanel') cur = con.cursor() sql = "SELECT id FROM track_track WHERE dateline<='%s' ORDER BY dateline desc LIMIT 1" % processing_day cur.execute(sql) try: last_track_id = cur.fetchone()[0] except: last_track_id = 0 sql = "REPLACE INTO %s(id, project_id, session_id, action_id, url, from_track_id, referrer_site_id, referrer_keyword_id, step, timelength, dateline) SELECT id, project_id, session_id, action_id, url, from_track_id, referrer_site_id, referrer_keyword_id, step, timelength, dateline FROM %s f WHERE f.id <= %d" % ('track_trackarch', 'track_track', last_track_id) cur.execute(sql) sql = "DELETE FROM %s WHERE id <= %d" % ('track_track', last_track_id) cur.execute(sql) sql = "REPLACE INTO %s(id, track_id, valuetype_id, value) SELECT id, track_id, valuetype_id, value FROM %s f WHERE f.track_id <= %d" % ('track_trackvaluearch', 'track_trackvalue', last_track_id) cur.execute(sql) sql = "DELETE FROM %s WHERE track_id <= %d" % ('track_trackvalue', last_track_id) cur.execute(sql) sql = "OPTIMIZE TABLE %s" % 'track_track' cur.execute(sql) sql = "OPTIMIZE TABLE %s" % 'track_trackvalue' cur.execute(sql) sql = "flush tables" cur.execute(sql) cache.clear() print '====finished at %s====' % datetime.now()
def handle_label(self, label, **options): print '====started at %s====' % datetime.now() days_before = 0 try: days_before = int(label) except: print 'wrong label, should be int' return None processing_day = datetime.now().replace( hour=0, minute=0, second=0, microsecond=0) - timedelta(days=days_before) print '====processing %s====' % processing_day.strftime("%Y-%m-%d") """ group by time per hour """ print '====processing by hour====' for i in range(24): # get time range s = datetime.now().replace( hour=i, minute=0, second=0, microsecond=0) - timedelta(days=days_before) e = s + timedelta(seconds=3600) dateline = s t = Timeline.objects.get_or_create(dateline=dateline, datetype='hour') # foreach project for p in Project.objects.filter(): # group by time g = Group(Session, GTime) g.static_attr = {'project': p, 'timeline': t[0]} g.annotate = { 'count': Count('id'), 'track_count': Avg('track_count'), 'timelength': Avg('timelength') } g.fargs = { 'start_time__range': [s, e], 'project': p, 'track_count__gte': 1 } g.easy_group() """ group by time per day """ # all day data s = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) - timedelta(days=days_before) e = s + timedelta(days=1) dateline = s t = Timeline.objects.get_or_create(dateline=dateline, datetype='day') # foreach projects print '====processing by day====' for p in Project.objects.filter(): # group by time g = Group(GTime, GTime) g.static_attr = {'project': p, 'timeline': t[0]} g.annotate = { 'count': Sum('count'), 'track_count': Avg('track_count'), 'timelength': Avg('timelength') } g.fargs = {'timeline__dateline__range': [s, e], 'project': p} g.easy_group() # group by user_referrer_site and time g = Group(Session, GReferrerSite) g.static_attr = {'project': p, 'timeline': t[0]} g.fargs = { 'start_time__range': [s, e], 'project': p, 'track_count__gte': 1 } g.values = [ 'referrer_site', ] g.dynamic_attr = {'referrer_site_id': 'referrer_site'} g.annotate = { 'count': Count('referrer_site'), 'track_count': Avg('track_count'), 'timelength': Avg('timelength') } g.easy_group() # group by user_referrer_keyword and time g = Group(Session, GReferrerKeyword) g.static_attr = {'project': p, 'timeline': t[0]} g.fargs = { 'start_time__range': [s, e], 'project': p, 'track_count__gte': 1 } g.values = [ 'referrer_keyword', ] g.dynamic_attr = {'referrer_keyword_id': 'referrer_keyword'} g.annotate = { 'count': Count('referrer_keyword'), 'track_count': Avg('track_count'), 'timelength': Avg('timelength') } g.easy_group() print '====moving data====' import MySQLdb as mdb con = mdb.connect('localhost', 'root', '', 'datapanel') cur = con.cursor() sql = "SELECT id FROM session_session WHERE start_time<='%s' ORDER BY start_time desc LIMIT 1" % processing_day cur.execute(sql) try: last_id = cur.fetchone()[0] except: last_id = 0 sql = "REPLACE INTO %s(id, project_id, session_key, permanent_session_key, start_time, end_time, user_language, user_timezone, agent_id, os_id, device_id, referrer_site_id, referrer_keyword_id, track_count, timelength, ipaddress) SELECT id, project_id, session_key, permanent_session_key, start_time, end_time, user_language, user_timezone, agent_id, os_id, device_id, referrer_site_id, referrer_keyword_id, track_count, timelength, ipaddress FROM %s f WHERE f.id <= %d" % ( 'session_sessionarch', 'session_session', last_id) cur.execute(sql) sql = "DELETE FROM %s WHERE id <= %d" % ('session_session', last_id) cur.execute(sql) sql = "REPLACE INTO %s(id, session_id, valuetype_id, value) SELECT id, session_id, valuetype_id, value FROM %s f WHERE f.session_id <= %d" % ( 'session_sessionvaluearch', 'session_sessionvalue', last_id) cur.execute(sql) sql = "DELETE FROM %s WHERE session_id <= %d" % ( 'session_sessionvalue', last_id) cur.execute(sql) sql = "OPTIMIZE TABLE %s" % 'session_session' cur.execute(sql) sql = "OPTIMIZE TABLE %s" % 'session_sessionvalue' cur.execute(sql) sql = "flush tables" cur.execute(sql) cache.clear() print '====finished at %s====' % datetime.now()
def handle_label(self, label, **options): print '====started at %s====' % datetime.now() days_before = 0 try: days_before = int(label) except: print 'wrong label, should be int' return None processing_day = datetime.now().replace( hour=0, minute=0, second=0, microsecond=0) - timedelta(days=days_before) print '====processing %s====' % processing_day.strftime("%Y-%m-%d") """ group by time per hour """ print '====processing by hour====' for i in range(24): # get time range s = datetime.now().replace( hour=i, minute=0, second=0, microsecond=0) - timedelta(days=days_before) e = s + timedelta(seconds=3600) dateline = s t = Timeline.objects.get_or_create(dateline=dateline, datetype='hour') # foreach project for p in Project.objects.filter(): # group by track action g = Group(Track, GAction) g.static_attr = {'project': p, 'timeline': t[0]} g.values = [ 'action', ] g.dynamic_attr = {'action_id': 'action'} g.annotate = { 'count': Count('action'), 'timelength': Avg('timelength') } g.fargs = {'dateline__range': [s, e], 'project': p} g.easy_group() """ group by time per day """ # all day data s = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) - timedelta(days=days_before) e = s + timedelta(days=1) dateline = s t = Timeline.objects.get_or_create(dateline=dateline, datetype='day') # foreach projects print '====processing by day====' for p in Project.objects.filter(): # group by track action g = Group(GAction, GAction) g.static_attr = {'project': p, 'timeline': t[0]} g.values = [ 'action', ] g.dynamic_attr = {'action_id': 'action'} g.annotate = { 'count': Sum('count'), 'timelength': Avg('timelength') } g.fargs = {'timeline__dateline__range': [s, e], 'project': p} g.easy_group() # # group by track value # g = Group(TrackValue, GValue) # g.static_attr = {'project': p, 'dateline': dateline, 'datetype': 'day'} # g.values = ['valuetype_id', 'value'] # g.annotate = {'count': Count('value'), 'timelength': Avg('track__timelength')} # g.fargs = {'track__dateline__range': [s, e], 'track__project': p} # g.easy_group() # group by track action and time and sessionreferrersite g = Group(Track, GReferrerSiteAndAction) g.static_attr = {'project': p, 'timeline': t[0]} g.values = ['action', 'session__referrer_site__id'] g.dynamic_attr = { 'action_id': 'action', 'referrer_site_id': 'session__referrer_site__id' } g.annotate = { 'count': Count('action'), 'timelength': Avg('timelength') } g.fargs = {'dateline__range': [s, e], 'project': p} g.easy_group() # group by track action and time and sessionreferrerkeyword g = Group(Track, GReferrerKeywordAndAction) g.static_attr = {'project': p, 'timeline': t[0]} g.values = ['action', 'session__referrer_keyword__id'] g.dynamic_attr = { 'action_id': 'action', 'referrer_keyword_id': 'session__referrer_keyword__id' } g.annotate = { 'count': Count('action'), 'timelength': Avg('timelength') } g.fargs = {'dateline__range': [s, e], 'project': p} g.easy_group() print '====moving data====' import MySQLdb as mdb con = mdb.connect('localhost', 'root', '', 'datapanel') cur = con.cursor() sql = "SELECT id FROM track_track WHERE dateline<='%s' ORDER BY dateline desc LIMIT 1" % processing_day cur.execute(sql) try: last_track_id = cur.fetchone()[0] except: last_track_id = 0 sql = "REPLACE INTO %s(id, project_id, session_id, action_id, url, from_track_id, referrer_site_id, referrer_keyword_id, step, timelength, dateline) SELECT id, project_id, session_id, action_id, url, from_track_id, referrer_site_id, referrer_keyword_id, step, timelength, dateline FROM %s f WHERE f.id <= %d" % ( 'track_trackarch', 'track_track', last_track_id) cur.execute(sql) sql = "DELETE FROM %s WHERE id <= %d" % ('track_track', last_track_id) cur.execute(sql) sql = "REPLACE INTO %s(id, track_id, valuetype_id, value) SELECT id, track_id, valuetype_id, value FROM %s f WHERE f.track_id <= %d" % ( 'track_trackvaluearch', 'track_trackvalue', last_track_id) cur.execute(sql) sql = "DELETE FROM %s WHERE track_id <= %d" % ('track_trackvalue', last_track_id) cur.execute(sql) sql = "OPTIMIZE TABLE %s" % 'track_track' cur.execute(sql) sql = "OPTIMIZE TABLE %s" % 'track_trackvalue' cur.execute(sql) sql = "flush tables" cur.execute(sql) cache.clear() print '====finished at %s====' % datetime.now()
def handle_label(self, label, **options): print '====started at %s====' % datetime.now() days_before = 0 try: days_before = int(label) except: print 'wrong label, should be int' return None processing_day = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) - timedelta(days=days_before) print '====processing %s====' % processing_day.strftime("%Y-%m-%d") """ group by time per hour """ print '====processing by hour====' for i in range(24): # get time range s = datetime.now().replace(hour=i, minute=0, second=0, microsecond=0) - timedelta(days=days_before) e = s + timedelta(seconds=3600) dateline = s t = Timeline.objects.get_or_create(dateline=dateline, datetype='hour') # foreach project for p in Project.objects.filter(): # group by time g = Group(Session, GTime) g.static_attr = {'project': p, 'timeline': t[0]} g.annotate = {'count': Count('id'), 'track_count': Avg('track_count'), 'timelength': Avg('timelength')} g.fargs = {'start_time__range': [s, e], 'project': p, 'track_count__gte': 1} g.easy_group() """ group by time per day """ # all day data s = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) - timedelta(days=days_before) e = s + timedelta(days=1) dateline = s t = Timeline.objects.get_or_create(dateline=dateline, datetype='day') # foreach projects print '====processing by day====' for p in Project.objects.filter(): # group by time g = Group(GTime, GTime) g.static_attr = {'project': p, 'timeline': t[0]} g.annotate = {'count': Sum('count'), 'track_count': Avg('track_count'), 'timelength': Avg('timelength')} g.fargs = {'timeline__dateline__range': [s, e], 'project': p} g.easy_group() # group by user_referrer_site and time g = Group(Session, GReferrerSite) g.static_attr = {'project': p, 'timeline': t[0]} g.fargs = {'start_time__range': [s, e], 'project': p, 'track_count__gte': 1} g.values = ['referrer_site', ] g.dynamic_attr = {'referrer_site_id': 'referrer_site'} g.annotate = {'count': Count('referrer_site'), 'track_count': Avg('track_count'), 'timelength': Avg('timelength')} g.easy_group() # group by user_referrer_keyword and time g = Group(Session, GReferrerKeyword) g.static_attr = {'project': p, 'timeline': t[0]} g.fargs = {'start_time__range': [s, e], 'project': p, 'track_count__gte': 1} g.values = ['referrer_keyword', ] g.dynamic_attr = {'referrer_keyword_id': 'referrer_keyword'} g.annotate = {'count': Count('referrer_keyword'), 'track_count': Avg('track_count'), 'timelength': Avg('timelength')} g.easy_group() print '====moving data====' import MySQLdb as mdb con = mdb.connect('localhost', 'root', '', 'datapanel') cur = con.cursor() sql = "SELECT id FROM session_session WHERE start_time<='%s' ORDER BY start_time desc LIMIT 1" % processing_day cur.execute(sql) try: last_id = cur.fetchone()[0] except: last_id = 0 sql = "REPLACE INTO %s(id, project_id, session_key, permanent_session_key, start_time, end_time, user_language, user_timezone, agent_id, os_id, device_id, referrer_site_id, referrer_keyword_id, track_count, timelength, ipaddress) SELECT id, project_id, session_key, permanent_session_key, start_time, end_time, user_language, user_timezone, agent_id, os_id, device_id, referrer_site_id, referrer_keyword_id, track_count, timelength, ipaddress FROM %s f WHERE f.id <= %d" % ('session_sessionarch', 'session_session', last_id) cur.execute(sql) sql = "DELETE FROM %s WHERE id <= %d" % ('session_session', last_id) cur.execute(sql) sql = "REPLACE INTO %s(id, session_id, valuetype_id, value) SELECT id, session_id, valuetype_id, value FROM %s f WHERE f.session_id <= %d" % ( 'session_sessionvaluearch', 'session_sessionvalue', last_id) cur.execute(sql) sql = "DELETE FROM %s WHERE session_id <= %d" % ('session_sessionvalue', last_id) cur.execute(sql) sql = "OPTIMIZE TABLE %s" % 'session_session' cur.execute(sql) sql = "OPTIMIZE TABLE %s" % 'session_sessionvalue' cur.execute(sql) sql = "flush tables" cur.execute(sql) cache.clear() print '====finished at %s====' % datetime.now()