コード例 #1
0
ファイル: get_result.py プロジェクト: huxiaoqian/project
def readPropagateSpatial(keyid):

    propagateSpatials = db.session.query(PropagateSpatial).filter(PropagateSpatial.topic_id==keyid).all()
    
    if len(propagateSpatials):
        city_count = dict()
        for p in propagateSpatials:
            city_count[p.city] = p.count
        map_data = province_color_map(city_count)
        return map_data
    else:
        city_count = {}
        province_name=dict()
        html = '''<select name="province" id="province" defvalue="11"><option value="34">安徽</option><option value="11">北京</option><option value="50">重庆</option><option value="35">福建</option><option value="62">甘肃</option>
                <option value="44">广东</option><option value="45">广西</option><option value="52">贵州</option><option value="46">海南</option><option value="13">河北</option>
                <option value="23">黑龙江</option><option value="41">河南</option><option value="42">湖北</option><option value="43">湖南</option><option value="15">内蒙古</option><option value="32">江苏</option>
                <option value="36">江西</option><option value="22">吉林</option><option value="21">辽宁</option><option value="64">宁夏</option><option value="63">青海</option><option value="14">山西</option><option value="37">山东</option>
                <option value="31">上海</option><option value="51">四川</option><option value="12">天津</option><option value="54">西藏</option><option value="65">新疆</option><option value="53">云南</option><option value="33">浙江</option>
                <option value="61">陕西</option><option value="71">台湾</option><option value="81">香港</option><option value="82">澳门</option><option value="400">海外</option><option value="100">其他</option></select>'''
        province_soup = BeautifulSoup(html)
        for province in province_soup.findAll('option'):
            pp = province.string
            key = province['value']
            province_name[key] = pp
            if pp == u'海外' or pp == u'其他':
                continue
            city_count[pp] = 0
        map_data = province_color_map(city_count)
        return map_data
コード例 #2
0
ファイル: read_quota_news.py プロジェクト: huxiaoqian/case
def readNews(stylenum, topic, end_ts , during):
    """将从数据库中读取的数据转化为map_data
    """
    max_count = 0
    city_count = {}
    first_item = {}
    first_item, city_count = PcountNews(end_ts, during, stylenum, topic) # PCount从db中计算各个省市地区的总数
    if city_count.values():
        max_count = max(city_count.values())
    map_data = province_color_map(city_count)
    return max_count, map_data, first_item
コード例 #3
0
def readPropagateSpatial(stylenum, topic, end_ts, during):
    """将从数据库中读取的数据转化为map_data
    """
    max_count = 0
    city_count = {}
    first_item = {}
    first_item, city_count = Pcount(end_ts, during, stylenum,
                                    topic)  # PCount从db中计算各个省市地区的总数
    if city_count.values():
        max_count = max(city_count.values())
    map_data = province_color_map(city_count)
    return max_count, map_data, first_item
コード例 #4
0
ファイル: views.py プロジェクト: huxiaoqian/project
def profile_group_location(fieldEnName):
    domainid = DOMAIN_LIST.index(fieldEnName)
    datestr = '20130901'

    verified_count, unverified_count, province_dict = 0, 0, {}
    _verified_count, _unverified_count, _province_dict = getDomainBasic(domainid, datestr)
    verified_count = int(_verified_count)
    unverified_count = int(_unverified_count)
    province_dict = _province_dict

    city_count = province_dict
    results = province_color_map(city_count)
    return json.dumps(results)
コード例 #5
0
def readAcum(stylenum, topic, start_ts, end_ts, during):
    pointnum = (end_ts - start_ts) / during  # 时间点数
    spatial_dict = {}
    spatial_info_list = []
    global_max_count = 0
    global_first_timestamp = end_ts
    global_first_city = ""

    for i in range(pointnum + 1):
        end_ts = start_ts + during * i
        max_count = 0
        first_item = {}
        city_count = {}
        first_item, city_count = Pcount(end_ts, during, stylenum, topic)

        for city in city_count:
            j = i
            while j > 0:
                previous_data = spatial_info_list[j - 1]
                if city in previous_data:
                    city_count[city] += previous_data[city]
                    break
                else:
                    j -= 1
        if i > 0:
            previous_data = spatial_info_list[i - 1]
            for city in previous_data:
                try:
                    city_count[city]
                except KeyError:
                    city_count[city] = previous_data[city]
                    continue

        if city_count.values():
            max_count = max(city_count.values())

        if global_max_count < max_count:
            global_max_count = max_count
        spatial_info_list.append(city_count)
        topic_spatial_info = province_color_map(city_count)
        spatial_dict[str(
            end_ts)] = topic_spatial_info  # spatial_dict = {end_ts:map_data}
        try:
            if first_item['timestamp'] <= global_first_timestamp:
                global_first_timestamp = first_item['timestamp']
                global_first_city = geo2city(first_item['geo'])
        except KeyError:
            pass
    return global_max_count, spatial_dict, global_first_city
コード例 #6
0
ファイル: read_quota_news.py プロジェクト: huxiaoqian/case
def readAcumNews(stylenum, topic, start_ts, end_ts, during):
    pointnum = (end_ts - start_ts) / during # 时间点数
    spatial_dict = {}
    spatial_info_list = []
    global_max_count = 0
    global_first_timestamp = end_ts
    global_first_city = ""

    for i in range(pointnum + 1):
        end_ts = start_ts +  during * i
        max_count = 0
        first_item = {}
        city_count = {}
        first_item, city_count = PcountNews(end_ts, during, stylenum, topic)


        for city in city_count:
            j = i
            while j > 0:
                previous_data = spatial_info_list[j-1]
                if city in previous_data:
                    city_count[city] += previous_data[city]
                    break
                else:
                    j -= 1
        if i > 0:
            previous_data = spatial_info_list[i-1]
            for city in previous_data:
                try:
                    city_count[city]
                except KeyError:
                    city_count[city] = previous_data[city]
                    continue

        if city_count.values():
            max_count = max(city_count.values())

        if global_max_count < max_count:
            global_max_count = max_count
        spatial_info_list.append(city_count)
        topic_spatial_info = province_color_map(city_count)
        spatial_dict[str(end_ts)] = topic_spatial_info # spatial_dict = {end_ts:map_data}
        try:
            if first_item['timestamp'] <= global_first_timestamp:
                global_first_timestamp = first_item['timestamp']
                global_first_city = ''
        except KeyError:
            pass
    return global_max_count, spatial_dict, global_first_city
コード例 #7
0
ファイル: calculatetopic.py プロジェクト: huxiaoqian/project
def calculate_topic(kw):
    #初始化
    topic_info = {}

    topic_index = {}
    date_list = []
    perday_count_list = []
    topic_rel_blog = []
    topic_url = []
    topic_participents = []
    topic_leader = []
    topic_date = []
    blogs_sum = 0
    comments_sum = 0
    topic_ori_blog = []
    
    city_count={}
    html = '''<select name="province" id="province" defvalue="11"><option value="34">安徽</option><option value="11">北京</option><option value="50">重庆</option><option value="35">福建</option><option value="62">甘肃</option>
                <option value="44">广东</option><option value="45">广西</option><option value="52">贵州</option><option value="46">海南</option><option value="13">河北</option>
                <option value="23">黑龙江</option><option value="41">河南</option><option value="42">湖北</option><option value="43">湖南</option><option value="15">内蒙古</option><option value="32">江苏</option>
                <option value="36">江西</option><option value="22">吉林</option><option value="21">辽宁</option><option value="64">宁夏</option><option value="63">青海</option><option value="14">山西</option><option value="37">山东</option>
                <option value="31">上海</option><option value="51">四川</option><option value="12">天津</option><option value="54">西藏</option><option value="65">新疆</option><option value="53">云南</option><option value="33">浙江</option>
                <option value="61">陕西</option><option value="71">台湾</option><option value="81">香港</option><option value="82">澳门</option><option value="400">海外</option><option value="100">其他</option></select>'''
    province_soup = BeautifulSoup(html)
    for province in province_soup.findAll('option'):
        pp = province.string
        if pp == u'海外' or pp == u'其他':
            continue
        city_count[pp] = 0
    
    gt = calendar.timegm(datetime(2012, 1, 1).timetuple())
    lt = calendar.timegm(datetime(2012, 1, 10).timetuple())

    s = XapianSearch(path='/opt/xapian_weibo/data/', name='master_timeline_weibo', schema_version=2)
    count, get_results = s.search(query={'text': [u'%s'%kw], 'timestamp': {'$gt': gt, '$lt': lt}}, sort_by=['timestamp'], fields=['text', 'timestamp','reposts_count','comments_count','user', 'terms', '_id','retweeted_status','bmiddle_pic','geo','source','attitudes_count'])
    
    for r in get_results():
        # 获取时间与每天微博数量
	temp_date = date.fromtimestamp(r['timestamp'])
        if len(date_list) == 0:
            date_list.append(temp_date)
            perday_count_list.append(1)
        else:
            if temp_date < date_list[-1]:
                if temp_date in date_list:
                    temp_index = date_list.index(temp_date)
                    perday_count_list[temp_index] += 1
                else:
                    i = 0
                    while i < len(date_list):
                        if temp_date < date_list[0]:
                            date_list.insert(0,temp_date)
                            perday_count_list.insert(0,1)
                            break
                        else:
                            if temp_date > date_list[i] and temp_date < date_list[i+1]:
                                date_list.insert(i+1,temp_date)
                                perday_count_list.insert(i+1,1)
                                break
                            else:
                                i += 1
            if temp_date == date_list[-1]:
                perday_count_list[-1] += 1
            if temp_date > date_list[-1]:
                timedelta = date(2000,1,2)-date(2000,1,1)
                while date_list[-1] != temp_date:
                    temp_date1 = date_list[-1] + timedelta
                    date_list.append(temp_date1)
                    perday_count_list.append(0)
                perday_count_list[-1] = 1
                        

	if r['user']:
            uid = int(r['user'])
            user = get_user(uid)
            if user != None:
                if user not in topic_participents:
                    topic_participents.append(user)
                if r['retweeted_status'] == None:
                    temp_ori = {}
                    temp_ori['status'] = r
                    temp_ori['user'] = user
                    topic_ori_blog.append(temp_ori)
                if r['reposts_count'] != None and r['comments_count'] != None:
                    rc = r['reposts_count'] + r['comments_count']
                    if rc > 1500:
                        topic_leader.append(user)
                if r['reposts_count'] > 1000:
                    temp = {}
                    temp['status'] = r
                    temp['status']['created_at'] = datetime.fromtimestamp(r['timestamp'])
                    temp['status']['text'] = r['text'].decode("utf-8")
                    temp['status']['source'] = re.match('<.*?>(.*)<.*?>', r['source']).group(1).decode("utf-8")
                    temp['user'] = user
                    topic_rel_blog.append(temp)
                if r['bmiddle_pic']:
                    topic_url.append(r['bmiddle_pic'])
                if r['geo'] != None and r['geo'].has_key('province_name'):
                    p = r['geo']['province_name'].split('省')[0]
                    if p == u'海外' or p == u'其他':
                        pass
                    else:
                        city_count[p] += 1
                elif user['location']:
                    p = user['location'].split(' ')[0]
                    if p == u'海外' or p == u'其他':
                        pass
                    else:
                        city_count[p] += 1
                else:
                    pass
        else:
            pass
            
        comments_sum = comments_sum + r['comments_count']
        blogs_sum += 1

    timedelta = len(date_list)
    avg = blogs_sum/float(timedelta)
    i = 0
    persistent_index = 0
    temp_sudden = 0
    while i < int(timedelta):
	if perday_count_list[i] > avg:
	    persistent_index += 1
	    temp_sudden = perday_count_list[i]-avg+temp_sudden
	    i += 1
	else:
	    i += 1
    sudden_index = '%10.2f'%(temp_sudden/float(blogs_sum))
    coverage_index = '%10.2f'%((blogs_sum + comments_sum)/(24*float(timedelta)))
    
    media_index = 0
    top_medias = []
    medias = db.session.query(Media)
    for media in medias:
	media_name = media.mediaName
	top_medias.append(media_name)
	
    media_list = []
    for r in topic_ori_blog:
	tmedia = []
        tmedia.append(r['user']['name'])
	x = r['status']['comments_count']+r['status']['reposts_count']
	tmedia.append(x)
	media_list.append(tmedia)
	sorted(media_list, key=lambda tmedia: tmedia[1],reverse = True)
	if len(media_list) >= 20:
	    m = 0
	    while m < 20:
		if media_list[m][0] in top_medias:
		    media_index += 1
		    m += 1
		else:
		    m += 1
	else:
	    m = 0
	    while m < len(media_list):
		if media_list[m][0] in top_medias:
		    media_index += 1
		    m += 1
		else:
		    m += 1

    leader_index = len(topic_leader)

    work_list = []
    work_count = []
    fields = db.session.query(Field)
    for field in fields:
	field_name = field.fieldName
	work_list.append(field_name)
	work_count.append(0)
    for r in topic_participents:
	k = 0
	while k < len(work_list):
	    if r['userField'] == work_list[k]:
		work_count[k] += 1
		break
	    else:
		k += 1
    
    topic_index['persistent_index'] = persistent_index
    topic_index['sudden_index'] = sudden_index
    topic_index['coverage_index'] = coverage_index
    topic_index['media_index'] = media_index
    topic_index['leader_index'] = leader_index

    map_data = province_color_map(city_count)

    topic_info['topic_poster'] = topic_participents[0]['name']
    topic_info['topic_post_date'] = date_list[0]
    topic_info['topic_leader_count'] = len(topic_leader)
    topic_info['topic_participents'] = len(topic_participents)
    topic_info['blogs_sum'] = blogs_sum
    topic_info['topic_ori_blog_count'] = len(topic_ori_blog)
    topic_info['topic_url'] = topic_url
    topic_info['perday_count_list'] = perday_count_list
    topic_info['date_list'] = date_list
    topic_info['topic_rel_blog'] = topic_rel_blog
    topic_info['geo'] = map_data
    topic_info['topic_leader'] = topic_leader
    topic_info['topic_working_list'] = work_list
    topic_info['topic_working_count'] = work_count
    topic_info['topic_index'] = topic_index
    topic_info['gt'] = gt
    topic_info['lt'] = lt            
    return topic_info
コード例 #8
0
def calculate_single(_id, test):

    #初始化
    blog_info = {}
    city_count = {}
    html = '''<select name="province" id="province" defvalue="11"><option value="34">安徽</option><option value="11">北京</option><option value="50">重庆</option><option value="35">福建</option><option value="62">甘肃</option>
                <option value="44">广东</option><option value="45">广西</option><option value="52">贵州</option><option value="46">海南</option><option value="13">河北</option>
                <option value="23">黑龙江</option><option value="41">河南</option><option value="42">湖北</option><option value="43">湖南</option><option value="15">内蒙古</option><option value="32">江苏</option>
                <option value="36">江西</option><option value="22">吉林</option><option value="21">辽宁</option><option value="64">宁夏</option><option value="63">青海</option><option value="14">山西</option><option value="37">山东</option>
                <option value="31">上海</option><option value="51">四川</option><option value="12">天津</option><option value="54">西藏</option><option value="65">新疆</option><option value="53">云南</option><option value="33">浙江</option>
                <option value="61">陕西</option><option value="71">台湾</option><option value="81">香港</option><option value="82">澳门</option><option value="400">海外</option><option value="100">其他</option></select>'''
    province_soup = BeautifulSoup(html)
    for province in province_soup.findAll('option'):
        pp = province.string
        if pp == u'海外' or pp == u'其他':
            continue
        city_count[pp] = 0
        
    begin_ts1 = time.mktime(datetime(2011, 1, 1).timetuple())
    now = date.today()
    now_year = int(now.year)
    now_month = int(now.month)
    now_day = int(now.day)
    end_ts1 = time.mktime(datetime(now_year, now_month, now_day).timetuple())
    
    #获取原微博信息
    status_ori = get_ori_status(_id, test)

    #获取相关微博
    if test == 'None':
        count, get_results = xapian_search_weibo.search(query={'retweeted_status': _id, 'timestamp': {'$gt': begin_ts1, '$lt': end_ts1} }, sort_by=['timestamp'])
    else:
        count, get_results = xapian_search_weibo_test.search(query={'retweeted_status': _id, 'timestamp': {'$gt': begin_ts1, '$lt': end_ts1} }, sort_by=['timestamp'])
    print count
    reposter = []
    
    date_list = []
    date_list.append(date.fromtimestamp(status_ori['timestamp']))
    perday_repost_count = []
    perday_repost_count.append(1)
    
    per = date(2000,01,02)-date(2000,01,01)
    reposts_sum = 0
    comments_sum = 0
    key_reposter = []

    for r in get_results():
      
        if r['user']:
            user = get_user(r['user'], test)
            if user['location'] != None:
                p = user['location']
                tp = p.split(' ')
                ppp = tp[0]
                if ppp == u'海外' or ppp == u'其他':
                    pass
                else:
                    city_count[ppp] += 1
            if user not in reposter:
                reposter.append(user)
            if r['reposts_count'] > 1000:
                key_reposter.append(user)
        else:
             pass

        tempdate = date.fromtimestamp(r['timestamp'])
        if tempdate < date_list[-1]:
            if tempdate in date_list:
                temp_index = date_list.index(tempdate)
                perday_repost_count[temp_index] += 1
            else:
                i = 0
                while i < len(date_list):
                    if tempdate > date_list[i] and tempdate < date_list[i+1]:
                        date_list.insert(i+1,tempdate)
                        perday_repost_count.insert(i+1,1)
                        break
                    else:
                        i += 1
        if tempdate == date_list[-1]:
            perday_repost_count[-1] += 1
        if tempdate > date_list[-1]:
            timedelta = date(2000,1,2)-date(2000,1,1)
            while date_list[-1] != tempdate:
                tempdate1 = date_list[-1] + timedelta
                date_list.append(tempdate1)
                perday_repost_count.append(0)
            perday_repost_count[-1] = 1
        reposts_sum += r['reposts_count']
        try:
            comments_sum += r['comments_count']
        except:
            pass

    totalRepost = reposts_sum + 1
    avg = (float(totalRepost))/len(date_list)
    persistent_index = 0
    sudden_count = 0
    j = 0
    while j < len(date_list):
       if perday_repost_count[j] > avg:
          persistent_index += 1
          sudden_count = perday_repost_count[j]-avg+sudden_count
          j += 1
       else:
          j += 1
    sudden_index = '%10.2f'%((float(sudden_count))/totalRepost)
    coverage_index = '%10.2f'%((totalRepost+comments_sum)/float(24*len(date_list)))
   
    media_index = 0
    medias = db.session.query(Media)
    for key in reposter:
       if key in medias:
          media_index += 1
       else:
          pass

    print city_count
    map_data = province_color_map(city_count)
    print map_data
    leader_index = len(key_reposter)
    
    blog_info['status'] = status_ori
    blog_info['user'] = status_ori['user']
    blog_info['repost_users'] = reposter
    blog_info['datelist'] = date_list
    blog_info['perday_count'] = perday_repost_count
    blog_info['persistent_index'] = persistent_index
    blog_info['sudden_index'] = sudden_index
    blog_info['coverage_index'] = coverage_index
    blog_info['media_index'] = media_index
    blog_info['leader_index'] = leader_index
    blog_info['geo'] = map_data
    blog_info['key_reposter'] = key_reposter
    
    return blog_info