Esempio n. 1
0
def export_data():
    DAU_LOG = {}
    populate_DAU(datetime.datetime(2012,2,20),datetime.datetime(2012,11,1),DAU_LOG)
    
    csw = CommonCsvWriter('stat_HARDCODE-1024')
    csw.write_header([u'注册月份',u'追踪月份',u'has_read',u'has_write',u'friends count','feed'])
    for month in range(1,13):
        target_users = get_target_users_by_month(month)
        for m in range(3,11):
            start_month =  datetime.datetime(2012,m,1)
            end_month = datetime.datetime(2012,(m + 1),1)
            start_timestamp = stat_util.convert_datetime_to_timestamp(start_month)
            end_timestamp = stat_util.convert_datetime_to_timestamp(end_month)
            last_day_of_month = end_month - datetime.timedelta(days = 1,hours = 8) 
            for user in target_users:
                uid = user.id
                is_read_activtity = 'YES' if is_readactivity(uid,start_month,end_month,DAU_LOG) else 'NO'
                is_write_activity = 'YES' if is_writeactivity(uid,start_month,end_month) else 'NO'
                friends = get_friends_and_counts(uid,last_day_of_month)
                seen_feed = 0
                for f in friends:
                    c = crab.user_post[f].find(R.type.in_([1,2,3,5,7]) & ( R.created_on >= start_timestamp) & (R.created_on < end_timestamp)).count()
                    seen_feed += c
                csw.write_onerow([month,m,is_read_activtity,is_write_activity,len(friends),seen_feed])
    csw.end_write()
Esempio n. 2
0
def export_csv(aid_data):
    csw = CommonCsvWriter(filename='./output/stat_HARDCORE-1005')
    csw.write_header(['date','aid','guid','guid_name','chechin No','view','click','share'])
    csv_body = []
    keylist = aid_data.keys()
    keylist.sort()
    for key in keylist:
        aid = key
        datas = aid_data.get(key)
        for data in datas:
            d_key_list = data.keys()
            d_key_list.sort()
            date = data['date_str']
            for d_key in d_key_list:
                if d_key == 'date_str':
                    continue
                guid = d_key
                checkin_c = get_post_count_by_guid(guid)
                loc_name = get_location(guid)
                num = data.get(d_key)
                #print aid,guid,num
                csv_body.append([date,aid,guid,loc_name,checkin_c,num[0],num[1],num[2]])
           
    csw.write_body(csv_body) 
    csw.end_write()
Esempio n. 3
0
def export_csv(uids):
    csw = CommonCsvWriter(filename='./output/stat_HARDCORE-983')
    csw.write_header([u'aid','guid','guid_name','chechin No','view','click','share'])
    csv_body = []
    
    start_date = datetime.datetime(2012,9,30) - datetime.timedelta(hours = 8)
    end_date = datetime.datetime(2012,10,30) + datetime.timedelta(hours = 16)
    start = stat_util.convert_datetime_to_timestamp(start_date)
    end = stat_util.convert_datetime_to_timestamp(end_date)
    for uid in uids:
        sina = utils.get_weibo_info(uid)
        friends_count = get_f_count(uid)
        user = user_get(uid)
        user_name = user['name']
        user_point = user['points_total']
        user_posts = []
        check_in_count = 0
        for p in crab.user_post[uid].find( (R.created_on >=start) & (R.created_on < end) & (R.location_id > 0)):
            created_on = p['created_on']
            if not check_in_count:
                check_in_count = crab.user_post[uid].find( (R.created_on >=start) & (R.created_on < end) & (R.location_id > 0)).count()
            checkin_date = datetime.datetime.fromtimestamp(created_on)
            post = db_slave.post.find_one({'_id':p['post_id']})
            loc = location_get(post['l'], 'basic')
            loc_name = loc['name']
            city = city_get(p['city']) or 'N/A'
            if city:
                city= city['name'] 
            
            body = post['b']
            photo = db.photo.find_one({'p':p['post_id']})
            has_photo = 'YES' if photo else 'NO'
            photo_link = get_photo_url(photo) + '?size=500&style=1&quality=high' if photo else 'N/A'
            sina_weibo = 'YES' if sina else 'N/A'
            sina_name = sina['screen_name'] if sina else 'N/A'
            sina_url = 'http://weibo.com/u/%d' % sina['id'] if sina else 'N/A'
            csw.write_onerow([user_name,uid,user_point,checkin_date,loc_name,check_in_count,friends_count,city,body,has_photo,photo_link ,
                             sina_weibo,sina_name ,sina_url])
    
    
    keylist = aid_data.keys()
    keylist.sort()
    for key in keylist:
        aid = key
        datas = aid_data.get(key)
        for data in datas:
            d_key_list = data.keys()
            d_key_list.sort()
            for d_key in d_key_list:
                guid = d_key
                checkin_c = get_post_count_by_guid(guid)
                loc_name = get_location(guid)
                num = data.get(d_key)
                #print aid,guid,num
                csv_body.append([aid,guid,loc_name,checkin_c,num[0],num[1],num[2]])
           
    #csw.write_body(csv_body) 
    csw.end_write()
Esempio n. 4
0
def export_csv(guids):
    
    
    csw = CommonCsvWriter(filename='./output/stat_HARDCORE-1023_new')
    csw.write_header([u'guid','city','poi'])
    csv_body = []
    
    
    for guid in guids:
            info = guid.split(':')
            row = [i for i in info]
            csw.write_onerow(row)
    csw.end_write()
Esempio n. 5
0
def stat_guid_lists(target_guids):
    #start_date = datetime.datetime.utcnow() - datetime.timedelta(days = 100)
    #stat_timestamp = stat_util.convert_datetime_to_timestamp(start_date)
    #user_ids = set()
    guid_list = []
    csw = CommonCsvWriter(filename='./output/stat_nail_beauty')
    csw.write_header(['guid','checkin_user_no'])
    
    for guid in target_guids:
        csw.write_onerow(guid)
        print guid
    csw.end_write()
    return guid_list
Esempio n. 6
0
def retrive_guids(before = datetime.datetime.utcnow() - datetime.timedelta(days = 100)):
    before = datetime.datetime.utcnow() - datetime.timedelta(days = 100)
    before = datetime.datetime(*before.timetuple()[:3])
    #end = get_id('locations', 'created_on', before)
    guids = set()
    #pre_sql = "select l.guid FROM locations l limit %s,%s;"
    #param = ('',str(1000))
    #print end
    #sql = 'select guid from locations l where l.guid <  '
    offset = 0
    limit = 50000
    flag = True
    retrieve_count = 0
    csw = CommonCsvWriter(filename='./output/stat_nail_beauty')
    csw.write_header([u'found from','guid','poi_name','tip_id',u'tip_content'])
    while retrieve_count < 237442:
        
        for guid,name in session.query(Location.guid,Location.name).offset(offset).limit(limit):
            if '指甲' in name or '美甲' in name or 'nail' in name or 'nail beauty' in name:
                guids.add((guid,name))
                csw.write_onerow(('poi name',guid,name))
                print 'hited by poi name',guid,name
           
            for tip in crab.location_post[guid_to_int(guid)].find(R.type==2):
                pid = tip['post_id']
                post = db_slave.post.find_one({'_id':pid})
                if post and 'b' in post:
                    body = post['b']
                    body = body.replace('\r').replace('\n')
                    if '指甲' in body or '美甲' in body or 'nail' in body or 'nail beauty' in body:
                        guids.add((guid,name))
                        csw.write_onerow(('tip',guid,name,pid,body))
                        print 'hited by tip',guid,name
            retrieve_count += 1
        print len(guids)
        offset += limit
    csw.end_write()
    return guids
Esempio n. 7
0
def get_target_guids():
    guids = set()
    taget_city = [u'北京',u'上海',u'广州',u'深圳',u'成都']
    csw = CommonCsvWriter(filename='./output/stat_HARDCORE-1023_new')
    csw.write_header([u'guid','city','poi','checkin times'])
    for r in db_slave.locations_categories_2.find({'cat.id':{'$in':['0403','0401','0103']}}):
        guid = r['_id']
        location = location_get(guid)
        #print location
        if location and 'city' in location and  location['city'] in taget_city:
            check_in_count = crab.location_post[guid_to_int(guid)].find().count()
            if check_in_count >= 100:
                guids.add(str(guid) + ':' + location['city']+':'+location['name'])
                csw.write_onerow((guid,location['city'],location['name'],check_in_count))
    csw.end_write()
    return sorted(guids)
Esempio n. 8
0
def export_csv(guids):
    csw = CommonCsvWriter(filename='./output/stat_HM')
    csw.write_header(['date','aid','guid','guid_name','chechin No','view','click','share'])
    csv_body = []
    
    
    for guid in guids:
        posts = get_posts_by_guid(guid, detail=True)
        for post in posts:
            row = [post['u'],post['l'],post['c'],post['b'].replace('\r','').replace('\n','')]
            has_photo = 'Yes' if db_slave.photo.find_one({'p':post['_id']}) else 'No'
            row.append(has_photo)
            url = 'http://jiepang.com/user/story?pid=%s' % str(post['_id'])
            row.append(url)
            csv_body.append(row)
    csw.write_body(csv_body) 
    csw.end_write()
Esempio n. 9
0
def stat_guid_lists(target_guids):
    #start_date = datetime.datetime.utcnow() - datetime.timedelta(days = 100)
    #stat_timestamp = stat_util.convert_datetime_to_timestamp(start_date)
    #user_ids = set()
    guid_list = []
    csw = CommonCsvWriter(filename='./output/stat_checkin_lte5')
    csw.write_header(['guid','checkin_user_no'])
    
    for guid in target_guids:
        int_guid = guid_to_int(guid)
        count =  crab.location_post[int_guid].find().group(R.user_id).count()
        if count < 5:
            guid_list.append((guid,count))
            csw.write_onerow((guid,count))
        print guid
    csw.end_write()
    return guid_list
Esempio n. 10
0
def export_csv(guids):
    csw = CommonCsvWriter(filename='./output/stat_HARDCORE-989')
    csw.write_header([u'aid','guid','guid_name','chechin No','view','click','share'])
    csv_body = []
    
    for guid in guids:
      
        checkin_count = crab.location_post[guid_to_int(guid)].find(R.type.in_([1,3,7,10])).count()
        user_count = crab.location_post[guid_to_int(guid)].find(R.type.in_([1,3,7,10])).group(R.user_id).count()
        photo_count = crab.location_post[guid_to_int(guid)].find(R.type.in_([1,3,7,10]) & R.has_photo).count()
        #pid = crab.location_post[int(guid,16)].find(R.type.in_([1,3,7,10]))[0]['post_id']
        location = location_get(guid)
        if location:
            row = [location['name'],guid,checkin_count,user_count,photo_count,location['city']]
        print row
        csw.write_onerow(row)
    
    
    csw.end_write()
Esempio n. 11
0
def export_csv(guids):

    csw = CommonCsvWriter(filename="./output/stat_HARDCORE-990-1")
    csw.write_header([u"aid", "guid", "guid_name", "chechin No", "view", "click", "share"])
    csv_body = []

    start_time = datetime.datetime(2012, 6, 14) - datetime.timedelta(hours=8)
    start = stat_util.convert_datetime_to_timestamp(start_time)
    end_time = datetime.datetime(2012, 11, 14) + datetime.timedelta(hours=16)
    end = stat_util.convert_datetime_to_timestamp(end_time)
    for guid in guids:
        _checkin_count = crab.location_post[guid_to_int(guid)].find(R.type.in_([1, 3, 7, 10])).count()
        _user_count = crab.location_post[guid_to_int(guid)].find(R.type.in_([1, 3, 7, 10])).group(R.user_id).count()
        _photo_count = crab.location_post[guid_to_int(guid)].find((R.has_photo) & R.type.in_([1, 3, 7, 10])).count()

        checkin_count = (
            crab.location_post[guid_to_int(guid)]
            .find(R.type.in_([1, 3, 7, 10]) & (R.created_on >= start) & (R.created_on < end))
            .count()
        )
        user_count = (
            crab.location_post[guid_to_int(guid)]
            .find(R.type.in_([1, 3, 7, 10]) & (R.created_on >= start) & (R.created_on < end))
            .group(R.user_id)
            .count()
        )
        photo_count = (
            crab.location_post[guid_to_int(guid)]
            .find(R.type.in_([1, 3, 7, 10]) & (R.has_photo) & (R.created_on >= start) & (R.created_on < end))
            .count()
        )
        # pid = crab.location_post[int(guid,16)].find(R.type.in_([1,3,7,10]))[0]['post_id']
        location = location_get(guid)
        if location:
            # row = [guid,location['name'],checkin_count,user_count,photo_count,' ',_checkin_count,_user_count,_photo_count]
            row = [
                guid,
                location["name"],
                _checkin_count,
                _user_count,
                _photo_count,
                " ",
                checkin_count,
                user_count,
                photo_count,
            ]
        print row
        csw.write_onerow(row)
    csw.end_write()

    csw = CommonCsvWriter(filename="./output/stat_HARDCORE-990-2")
    csw.write_header([u"aid", "guid", "guid_name", "chechin No", "view", "click", "share"])
    csv_body = []

    start_date = datetime.datetime(2012, 6, 14)
    end_date = datetime.datetime(2012, 11, 14)
    for i in range(0, (end_date - start_date).days + 1):
        _day = start_date + datetime.timedelta(days=i)
        start_time = _day - datetime.timedelta(hours=8)
        end_time = _day + datetime.timedelta(hours=16)
        start = stat_util.convert_datetime_to_timestamp(start_time)
        end = stat_util.convert_datetime_to_timestamp(end_time)
        checkin_count = 0
        user_count = 0
        photo_count = 0
        for guid in guids:
            checkin_count += (
                crab.location_post[guid_to_int(guid)]
                .find(R.type.in_([1, 3, 7, 10]) & (R.created_on >= start) & (R.created_on < end))
                .count()
            )
            user_count += (
                crab.location_post[guid_to_int(guid)]
                .find(R.type.in_([1, 3, 7, 10]) & (R.created_on >= start) & (R.created_on < end))
                .group(R.user_id)
                .count()
            )
            photo_count += (
                crab.location_post[guid_to_int(guid)]
                .find(R.type.in_([1, 3, 7, 10]) & (R.has_photo) & (R.created_on >= start) & (R.created_on < end))
                .count()
            )
        row = [_day, checkin_count, user_count, photo_count]
        print row
        csw.write_onerow(row)
    csw.end_write()
Esempio n. 12
0

def get_target_guids():
    # guids = []
    return stat_util.get_vertical_list_from_csv(INPUT_FILE_PATH, 2)


def stat_user_lists(target_guids):
    start_date = datetime.datetime.utcnow() - datetime.timedelta(days=60)
    stat_timestamp = stat_util.convert_datetime_to_timestamp(start_date)
    user_ids = set()
    for guid in target_guids:
        int_guid = guid_to_int(guid)
        for r in crab.location_post[int_guid].find(R.created_on > stat_timestamp).group(R.user_id):
            user_ids.add(r["user_id"])

    return user_ids


if __name__ == "__main__":
    target_guids = get_target_guids()
    # print target_guids
    user_ids = stat_user_lists(target_guids)
    csw = CommonCsvWriter("subway_userlist")
    csw.write_header([u"user_id"])
    csv_body = []
    for uid in user_ids:
        csv_body.append([uid])
    csw.write_body(csv_body)
    csw.end_write()
Esempio n. 13
0
def export_csv(type = 'api'):
        #ProfilingLog
        #ClientLog
        test_pro = './input/looper_logs/api_server/profiling.log'
        test_client = './input/looper_logs/client/all_client.log'
        test_length = './input/looper_logs/api_length/length.log'
        test_img = './input/looper_logs/img_server/profiling.log'
        export_type = 'csv'
        
        pdatas = ProfilingLog.read_rows(test_pro)
        cdatas = ClientLog.read_rows(test_client)
        result = ProfilingLog.compare_to_client_log(pdatas,cdatas[1])
        lengthlog = LengthLog.read_rows(test_length)
        LengthLog.append_length_to_data(lengthlog,result)
        
        
        imglog = ImgLog.read_rows(test_img)
        img_result = ImgLog.compare_to_client_log(imglog, cdatas[1])
        if export_type == 'json':
            result_data = {'items':[]}
        else:
            result_data = []
        
        csw = CommonCsvWriter('compare_result_apiserver_with_client.csv')
        csw.write_header(['aa'])
        
        csw_img = CommonCsvWriter('compare_result_apiserver_with_client_img.csv')
        csw_img.write_header(['aa'])
        # apiserver loop
        for key in result:
            data = {}
            
            if type == 'api':
                data['req_id'] = key
                #row.append(key)
                log = result.get(key)
                req_id = log['client_log'][ClientLog.REQ_ID]
                req_type = log['client_log'][ClientLog.TYPE]
                is_wifi = u'yes' if log['client_log'][ClientLog.IS_WIFI] else 'no'
                client_spend_time = log['client_log'][ClientLog.RES_TIME]
                #print (len(log['profiling_log']))
                #print log['profiling_log']
                for profiling_log in log['profiling_log']:
                    row = []
                    req_api = profiling_log[ProfilingLog.REQ_API]
                    api_server_received_time = profiling_log[ProfilingLog.REQ_TIME]
                    api_server_req_ip = profiling_log[ProfilingLog.REQ_IP]
                    api_server_spd_time = profiling_log[ProfilingLog.SPD_TIME]
                    api_server_req_uid = profiling_log[ProfilingLog.RED_UID]
                    #print profiling_log 
                    api_server_version = profiling_log[ProfilingLog.REQ_VER]
                    api_server_response_length = log['length_log'][LengthLog.LENGTH]
                    row.append(req_id)
                    row.append(req_type)
                    row.append(is_wifi)
                    row.append(client_spend_time)
                    row.append(req_api)
                    row.append(api_server_received_time)
                    row.append(api_server_req_ip)
                    row.append(api_server_spd_time)
                    row.append(api_server_req_uid)
                    row.append(api_server_version)
                    row.append(api_server_response_length)
                    csw.write_onerow(row)
            """
            imglogs = img_result.get(key)
            if not imglogs:
                continue
            req_id = imglogs['client_log'][ClientLog.REQ_ID]
            req_type = imglogs['client_log'][ClientLog.TYPE]
            is_wifi = u'yes' if imglogs['client_log'][ClientLog.IS_WIFI] else 'no'
            client_spend_time = log['client_log'][ClientLog.RES_TIME]
            for img_log in imglogs['img_log']:
                img_spend_time = img_log[ImgLog.SPD_TIME]
                img_length = img_log[ImgLog.LENGTH]
                row.append(req_id)
                row.append(req_type)
                row.append(is_wifi)
                row.append(client_spend_time)
                row.append(img_spend_time)
                row.append(img_length)
                csw_img.write_onerow(row)


            
            """
            """
            if export_type == 'json':
                data['profiling_log'] = log['profiling_log']
                data['client_log'] = log['client_log']
                data['length_log'] = log['length_log']
                
                result_data['items'].append(data)
            else:
                data['length'] = log['length_log'][LengthLog.LENGTH]
                result_data.append(data)
            """
        for key in img_result:
            imglogs = img_result.get(key)
            if not imglogs:
                continue
            req_id = imglogs['client_log'][ClientLog.REQ_ID]
            if not req_id:
                continue
            req_type = imglogs['client_log'][ClientLog.TYPE]
            is_wifi = u'yes' if int(imglogs['client_log'][ClientLog.IS_WIFI]) else 'no'
            client_spend_time = imglogs['client_log'][ClientLog.RES_TIME]
            for img_log in imglogs['img_log']:
                img_type = img_log[ImgLog.TYPE]
                img_spend_time = img_log[ImgLog.SPD_TIME]
                img_length = img_log[ImgLog.LENGTH]
                
                row = []
                row.append(req_id)
                row.append(req_type)
                row.append(is_wifi)
                row.append(client_spend_time)
                row.append(img_spend_time)
                row.append(img_type)
                row.append(img_length)
                csw_img.write_onerow(row)
        csw.end_write()
        csw_img.end_write()
        return result_data