Esempio n. 1
0
def bookmark_total_job(now=None):
    try:
        conn = MySQLdb.connect(**c.db_config)
        cursor = conn.cursor()
        
        # 今天
        if not now:
            now = datetime.datetime.now()
        now_str = now.strftime('%Y-%m-%d %H:%M:%S')
        
        remove_guide = " url not regexp '^http://kan.sohu.com/help/guide-' "
        sum = 0
        for i in range(64):
            sql = "select user_id, count(*) from bookmark_bookmark_%s where %s and \
            (gmt_create is null or gmt_create <= '%s') group by user_id" % (i, remove_guide, now_str)
            cursor.execute (sql)
            results = cursor.fetchall()
            for d in results:
                user_id = int(d[0])
                count = int(d[1])
                # 去掉测试用户添加的文章
                if not _is_test(user_id):
                    sum += count
        now = datetime.datetime.now()
        data = SomeTotal(name='bookmark', time=now, count=sum)
        data.save()
        return sum
    except Exception, e:
        c.logger.error(e)
        return str(e)
Esempio n. 2
0
def fiction_total_job(now=None):
    try:
        conn = MySQLdb.connect(**c.db_config)
        cursor = conn.cursor()
        
        # 今天
        if not now:
            now = datetime.datetime.now()
        now_str = now.strftime('%Y-%m-%d %H:%M:%S')
        
        sum = 0
        sql = "select user_id, count(*) from user_fiction_r where \
            (gmt_create is null or gmt_create <= '%s') group by user_id" % (now_str)
        cursor.execute (sql)
        results = cursor.fetchall()
        for d in results:
            user_id = int(d[0])
            count = int(d[1])
            # 去掉测试用户添加的文章
            if not _is_test(user_id):
                sum += count
        data = SomeTotal(name='fiction', time=now, count=sum + 4)
        data.save()
        return sum
    except Exception, e:
        c.logger.error(e)
        return str(e)
Esempio n. 3
0
def share_channels(start_time):
    '''为[收藏渠道统计]聚合数据
    '''
    
    start_time = start_time.replace(hour=0, minute=0, second=0)
    step = datetime.timedelta(days=1)
    end_time = start_time + step
    
    try:
        conn = MySQLdb.connect(**c.db_self_config)
        cursor = conn.cursor()

        # 去掉测试用户的id
        exclude_test_user_id = ' and o.user_id !='
        exclude_test_user_id += ' and o.user_id !='.join(map(lambda x:str(x), c.test_id))

        sql = """select o.user_id, oo.object_key from stats_oper o  
        left join stats_operobject oo on oo.oper_id = o.id 
        where o.oper_type_id in (1,35) %s and oo.gmt_create >= '%s' and oo.gmt_create < '%s'  
        and oo.object_key like '%%\"from\":%%' """ % (exclude_test_user_id, start_time, end_time)
        cursor.execute(sql)
        results = cursor.fetchall()

        user_id = None
        object_key = None
        
        m = {'time':datetime.datetime.strftime(start_time, "%Y-%m-%d"),
             jiathis:{'count':0, 'object_key':[]},
             bshare:{'count':0, 'object_key':[]},
             webapp:{'count':0, 'object_key':[]},
             sohu_blog:{'count':0, 'object_key':[]},
             sohu_news:{'count':0, 'object_key':[]},
             baidu:{'count':0, 'object_key':[]},
             other:{'count':0, 'object_key':[]}}

        for d in results:
            user_id = int(d[0])
            object_key = anyjson.loads(d[1])
            object_key['user_id'] = user_id
            
            # 去掉测试用户
            if not _is_test(user_id):
                if jiathis == object_key['from']:
                    m[jiathis]['object_key'].append(object_key)
                elif bshare == object_key['from']:
                    m[bshare]['object_key'].append(object_key)
                elif webapp == object_key['from']:
                    m[webapp]['object_key'].append(object_key)
                elif sohu_blog == object_key['from']:
                    m[sohu_blog]['object_key'].append(object_key)
                elif sohu_news == object_key['from']:
                    m[sohu_news]['object_key'].append(object_key)
                elif baidu == object_key['from']:
                    m[baidu]['object_key'].append(object_key)
                else:
                    m[other]['object_key'].append(object_key)
        
        for k in m:
            if k != 'time':
                m[k]['count'] = len(m[k]['object_key'])
                
        data = Aggregation(type='share_channels', time=start_time.date(), content=anyjson.dumps(m))
        data.save()
    except Exception, e:
        c.logger.error(e)
        raise e
Esempio n. 4
0
def bookmark_website(start_time):
    '''为[收藏文章的域名统计_PV]聚合数据'''
    
    start_time = start_time.replace(hour=0, minute=0, second=0)
    step = datetime.timedelta(days=1)
    end_time = start_time + step
    
    try:
        conn = MySQLdb.connect(**c.db_config)
        cursor = conn.cursor()
        
        having_fix, and_fix = _get_fix(start_time, end_time)
        mm = {}   # mm = {'www.douban.com':100, 'www.dapenti.com':80}
        urls = {} #	urls = {'www.douban.com':['http://www.douban.com/1','http://www.douban.com/2'], 'www.dapenti.com':[...]}

        ret = []
        
        # 由于bookmark表以前没有gmt_create, 所以凡是查询bookmark表都要替换成create_time
        and_fix = and_fix.replace('gmt_create', 'create_time')
        
        for i in range(64):
            cursor.execute("select user_id, url from bookmark_bookmark_%s where 1=1 %s " % (i, and_fix))
            results = cursor.fetchall()
            for d in results:
                user_id = int(d[0])
                url = str(d[1])
                domain = urlparse.urlparse(url)[1]
                if not _is_test(user_id):
                    if domain in mm.keys():
                        mm[domain] += 1
                    else:
                        mm[domain] = 1
                        urls[domain] = [] 
                    if len(urls[domain]) <= 50:
                        urls[domain].append(url)
        
        for k in mm:
            # 需要去除domain='kan.sohu.com'的数据
            if k == 'kan.sohu.com':
                continue
            ret.append({'domain':k, 'count':mm[k], 'urls':urls[k]})
                
            
        ret.sort(key=lambda x:x['count'], reverse=True)
        # 只存储每日排名前100的domain
        #print ret[:100]
        data = Aggregation(type='bookmark_website', time=start_time.date(), content=anyjson.dumps(ret[:100]))
        data.save()
        
#        pp = []
#        ssum = 0
#        for r in ret:
#            if r['domain'].find('.sohu.com') != -1:
#                pp.append({'domain':r['domain'], 'count':r['count']})
#                ssum += r['count']                        
#        print pp
#        print ssum
        return ret
    except Exception, e:
        c.logger.error(e)
        print e
        return None
Esempio n. 5
0
def add_channels(start_time):
    '''为[收藏渠道统计]聚合数据
    '''
    
    start_time = start_time.replace(hour=0, minute=0, second=0)
    step = datetime.timedelta(days=1)
    end_time = start_time + step
    
    try:
        conn = MySQLdb.connect(**c.db_self_config)
        cursor = conn.cursor()

        sql = '''select oo.user_id, oo.object_key from stats_operobject oo, stats_oper o 
        where oo.oper_id=o.id and o.oper_type_id=1 and oo.object_key like '%% \"from\":%%' 
        and oo.gmt_create > '%s' and oo.gmt_create < '%s' ''' % (start_time, end_time)
        cursor.execute(sql)
        share_results = cursor.fetchall()

        sql = '''select oo.user_id, oo.object_key from stats_operobject oo, stats_oper o 
        where oo.oper_id=o.id and o.oper_type_id=1 and oo.object_key like '%%\"from2\": \"chrome\"%%' 
        and oo.gmt_create > '%s' and oo.gmt_create < '%s' ''' % (start_time, end_time)
        cursor.execute(sql)
        chrome_results = cursor.fetchall()

        sql = '''select oo.user_id, oo.object_key from stats_operobject oo, stats_oper o 
        where oo.oper_id=o.id and o.oper_type_id=1 and oo.object_key like '%%\"from2\": \"sogou\"%%' 
        and oo.gmt_create > '%s' and oo.gmt_create < '%s' ''' % (start_time, end_time)
        cursor.execute(sql)
        sogou_results = cursor.fetchall()

        sql = '''select oo.user_id, oo.object_key from stats_operobject oo, stats_oper o 
        where oo.oper_id=o.id and o.oper_type_id=1 and oo.object_key like '%%\"from3\": \"iPhone\"%%' 
        and oo.gmt_create > '%s' and oo.gmt_create < '%s' ''' % (start_time, end_time)
        cursor.execute(sql)
        iPhone_results = cursor.fetchall()

        sql = '''select oo.user_id, oo.object_key from stats_operobject oo, stats_oper o 
        where oo.oper_id=o.id and o.oper_type_id=1 and oo.object_key like '%%\"from3\": \"iPad\"%%' 
        and oo.gmt_create > '%s' and oo.gmt_create < '%s' ''' % (start_time, end_time)
        cursor.execute(sql)
        iPad_results = cursor.fetchall()

        sql = '''select oo.user_id, oo.object_key from stats_operobject oo, stats_oper o 
        where oo.oper_id=o.id and o.oper_type_id=1 and oo.object_key like '%%\"from3\": \"android\"%%' 
        and oo.gmt_create > '%s' and oo.gmt_create < '%s' ''' % (start_time, end_time)
        cursor.execute(sql)
        android_results = cursor.fetchall()

        sql = '''select oo.user_id, oo.object_key from stats_operobject oo, stats_oper o 
        where oo.oper_id=o.id and o.oper_type_id=1 and oo.object_key not like '%%\"from%%' 
        and oo.gmt_create > '%s' and oo.gmt_create < '%s' ''' % (start_time, end_time)
        cursor.execute(sql)
        other_results = cursor.fetchall()

        other_test = 0
        for d in other_results:
            user_id = int(d[0])
            try:
                object_key = anyjson.loads(d[1])
            except:
                #当title过长没有被完整写入数据库时,anyjson.loads()会报错
                #by cescgao
                object_key = anyjson.loads(d[1][:d[1].find(', "title"')]+'}')
                object_key['title'] = 'None'
            # 去掉测试用户
            if _is_test(user_id):
                other_test += 1

        
        m = {'time':datetime.datetime.strftime(start_time, "%Y-%m-%d"),
             share:{'count': len(share_results)},
             chrome:{'count': len(chrome_results)},
             sogou:{'count': len(sogou_results)},
             iPhone:{'count': len(iPhone_results)},
             iPad:{'count': len(iPad_results)},
             android:{'count': len(android_results)},
             other:{'count': len(other_results) - other_test}}
        print m
        data = Aggregation(type='add_channels', time=start_time.date(), content=anyjson.dumps(m))
        data.save()
    except Exception, e:
        c.logger.error(e)
        raise e