def bookmark_total_job(now=None): try: conn = MySQLdb.connect(**c.db_config) cursor = conn.cursor() # 今天 if not now: now = datetime.datetime.now() now_str = now.strftime('%Y-%m-%d %H:%M:%S') remove_guide = " url not regexp '^http://kan.sohu.com/help/guide-' " sum = 0 for i in range(64): sql = "select user_id, count(*) from bookmark_bookmark_%s where %s and \ (gmt_create is null or gmt_create <= '%s') group by user_id" % (i, remove_guide, now_str) cursor.execute (sql) results = cursor.fetchall() for d in results: user_id = int(d[0]) count = int(d[1]) # 去掉测试用户添加的文章 if not _is_test(user_id): sum += count now = datetime.datetime.now() data = SomeTotal(name='bookmark', time=now, count=sum) data.save() return sum except Exception, e: c.logger.error(e) return str(e)
def fiction_total_job(now=None): try: conn = MySQLdb.connect(**c.db_config) cursor = conn.cursor() # 今天 if not now: now = datetime.datetime.now() now_str = now.strftime('%Y-%m-%d %H:%M:%S') sum = 0 sql = "select user_id, count(*) from user_fiction_r where \ (gmt_create is null or gmt_create <= '%s') group by user_id" % (now_str) cursor.execute (sql) results = cursor.fetchall() for d in results: user_id = int(d[0]) count = int(d[1]) # 去掉测试用户添加的文章 if not _is_test(user_id): sum += count data = SomeTotal(name='fiction', time=now, count=sum + 4) data.save() return sum except Exception, e: c.logger.error(e) return str(e)
def share_channels(start_time): '''为[收藏渠道统计]聚合数据 ''' start_time = start_time.replace(hour=0, minute=0, second=0) step = datetime.timedelta(days=1) end_time = start_time + step try: conn = MySQLdb.connect(**c.db_self_config) cursor = conn.cursor() # 去掉测试用户的id exclude_test_user_id = ' and o.user_id !=' exclude_test_user_id += ' and o.user_id !='.join(map(lambda x:str(x), c.test_id)) sql = """select o.user_id, oo.object_key from stats_oper o left join stats_operobject oo on oo.oper_id = o.id where o.oper_type_id in (1,35) %s and oo.gmt_create >= '%s' and oo.gmt_create < '%s' and oo.object_key like '%%\"from\":%%' """ % (exclude_test_user_id, start_time, end_time) cursor.execute(sql) results = cursor.fetchall() user_id = None object_key = None m = {'time':datetime.datetime.strftime(start_time, "%Y-%m-%d"), jiathis:{'count':0, 'object_key':[]}, bshare:{'count':0, 'object_key':[]}, webapp:{'count':0, 'object_key':[]}, sohu_blog:{'count':0, 'object_key':[]}, sohu_news:{'count':0, 'object_key':[]}, baidu:{'count':0, 'object_key':[]}, other:{'count':0, 'object_key':[]}} for d in results: user_id = int(d[0]) object_key = anyjson.loads(d[1]) object_key['user_id'] = user_id # 去掉测试用户 if not _is_test(user_id): if jiathis == object_key['from']: m[jiathis]['object_key'].append(object_key) elif bshare == object_key['from']: m[bshare]['object_key'].append(object_key) elif webapp == object_key['from']: m[webapp]['object_key'].append(object_key) elif sohu_blog == object_key['from']: m[sohu_blog]['object_key'].append(object_key) elif sohu_news == object_key['from']: m[sohu_news]['object_key'].append(object_key) elif baidu == object_key['from']: m[baidu]['object_key'].append(object_key) else: m[other]['object_key'].append(object_key) for k in m: if k != 'time': m[k]['count'] = len(m[k]['object_key']) data = Aggregation(type='share_channels', time=start_time.date(), content=anyjson.dumps(m)) data.save() except Exception, e: c.logger.error(e) raise e
def bookmark_website(start_time): '''为[收藏文章的域名统计_PV]聚合数据''' start_time = start_time.replace(hour=0, minute=0, second=0) step = datetime.timedelta(days=1) end_time = start_time + step try: conn = MySQLdb.connect(**c.db_config) cursor = conn.cursor() having_fix, and_fix = _get_fix(start_time, end_time) mm = {} # mm = {'www.douban.com':100, 'www.dapenti.com':80} urls = {} # urls = {'www.douban.com':['http://www.douban.com/1','http://www.douban.com/2'], 'www.dapenti.com':[...]} ret = [] # 由于bookmark表以前没有gmt_create, 所以凡是查询bookmark表都要替换成create_time and_fix = and_fix.replace('gmt_create', 'create_time') for i in range(64): cursor.execute("select user_id, url from bookmark_bookmark_%s where 1=1 %s " % (i, and_fix)) results = cursor.fetchall() for d in results: user_id = int(d[0]) url = str(d[1]) domain = urlparse.urlparse(url)[1] if not _is_test(user_id): if domain in mm.keys(): mm[domain] += 1 else: mm[domain] = 1 urls[domain] = [] if len(urls[domain]) <= 50: urls[domain].append(url) for k in mm: # 需要去除domain='kan.sohu.com'的数据 if k == 'kan.sohu.com': continue ret.append({'domain':k, 'count':mm[k], 'urls':urls[k]}) ret.sort(key=lambda x:x['count'], reverse=True) # 只存储每日排名前100的domain #print ret[:100] data = Aggregation(type='bookmark_website', time=start_time.date(), content=anyjson.dumps(ret[:100])) data.save() # pp = [] # ssum = 0 # for r in ret: # if r['domain'].find('.sohu.com') != -1: # pp.append({'domain':r['domain'], 'count':r['count']}) # ssum += r['count'] # print pp # print ssum return ret except Exception, e: c.logger.error(e) print e return None
def add_channels(start_time): '''为[收藏渠道统计]聚合数据 ''' start_time = start_time.replace(hour=0, minute=0, second=0) step = datetime.timedelta(days=1) end_time = start_time + step try: conn = MySQLdb.connect(**c.db_self_config) cursor = conn.cursor() sql = '''select oo.user_id, oo.object_key from stats_operobject oo, stats_oper o where oo.oper_id=o.id and o.oper_type_id=1 and oo.object_key like '%% \"from\":%%' and oo.gmt_create > '%s' and oo.gmt_create < '%s' ''' % (start_time, end_time) cursor.execute(sql) share_results = cursor.fetchall() sql = '''select oo.user_id, oo.object_key from stats_operobject oo, stats_oper o where oo.oper_id=o.id and o.oper_type_id=1 and oo.object_key like '%%\"from2\": \"chrome\"%%' and oo.gmt_create > '%s' and oo.gmt_create < '%s' ''' % (start_time, end_time) cursor.execute(sql) chrome_results = cursor.fetchall() sql = '''select oo.user_id, oo.object_key from stats_operobject oo, stats_oper o where oo.oper_id=o.id and o.oper_type_id=1 and oo.object_key like '%%\"from2\": \"sogou\"%%' and oo.gmt_create > '%s' and oo.gmt_create < '%s' ''' % (start_time, end_time) cursor.execute(sql) sogou_results = cursor.fetchall() sql = '''select oo.user_id, oo.object_key from stats_operobject oo, stats_oper o where oo.oper_id=o.id and o.oper_type_id=1 and oo.object_key like '%%\"from3\": \"iPhone\"%%' and oo.gmt_create > '%s' and oo.gmt_create < '%s' ''' % (start_time, end_time) cursor.execute(sql) iPhone_results = cursor.fetchall() sql = '''select oo.user_id, oo.object_key from stats_operobject oo, stats_oper o where oo.oper_id=o.id and o.oper_type_id=1 and oo.object_key like '%%\"from3\": \"iPad\"%%' and oo.gmt_create > '%s' and oo.gmt_create < '%s' ''' % (start_time, end_time) cursor.execute(sql) iPad_results = cursor.fetchall() sql = '''select oo.user_id, oo.object_key from stats_operobject oo, stats_oper o where oo.oper_id=o.id and o.oper_type_id=1 and oo.object_key like '%%\"from3\": \"android\"%%' and oo.gmt_create > '%s' and oo.gmt_create < '%s' ''' % (start_time, end_time) cursor.execute(sql) android_results = cursor.fetchall() sql = '''select oo.user_id, oo.object_key from stats_operobject oo, stats_oper o where oo.oper_id=o.id and o.oper_type_id=1 and oo.object_key not like '%%\"from%%' and oo.gmt_create > '%s' and oo.gmt_create < '%s' ''' % (start_time, end_time) cursor.execute(sql) other_results = cursor.fetchall() other_test = 0 for d in other_results: user_id = int(d[0]) try: object_key = anyjson.loads(d[1]) except: #当title过长没有被完整写入数据库时,anyjson.loads()会报错 #by cescgao object_key = anyjson.loads(d[1][:d[1].find(', "title"')]+'}') object_key['title'] = 'None' # 去掉测试用户 if _is_test(user_id): other_test += 1 m = {'time':datetime.datetime.strftime(start_time, "%Y-%m-%d"), share:{'count': len(share_results)}, chrome:{'count': len(chrome_results)}, sogou:{'count': len(sogou_results)}, iPhone:{'count': len(iPhone_results)}, iPad:{'count': len(iPad_results)}, android:{'count': len(android_results)}, other:{'count': len(other_results) - other_test}} print m data = Aggregation(type='add_channels', time=start_time.date(), content=anyjson.dumps(m)) data.save() except Exception, e: c.logger.error(e) raise e