Ejemplo n.º 1
0
def updateProfiles(weiboids):
    myconnect = GetConnect()
    for w in weiboids:
        sql = "update profile set is_education = 1 where weiboid = '%s'" % str(
            w)
        print sql
        myconnect.executeDB(sql)
Ejemplo n.º 2
0
def updateSchoolers(weiboids, schoolname=None):
    myconnect = GetConnect()
    for w in weiboids:
        sql = "update %s set is_wb_ori_no_pic = 1 where weiboid = '%s'" % (
            schoolname, str(w))
        print sql
        myconnect.executeDB(sql)
def getWeiboIds(schooltable):
	weiboids = []
	myconnect = GetConnect()
	sql = 'select weiboid from %s where is_profile = -1' % schooltable
	results = myconnect.getData(sql)
	if results:
		for r in results:
			weiboids.append(r[0])
	return weiboids
Ejemplo n.º 4
0
def getWeiboIds():
    weiboids = []
    myconnect = GetConnect()
    sql = 'select weiboid from profile where is_education = -1'
    results = myconnect.getData(sql)
    if results:
        for r in results:
            weiboids.append(r[0])
    return weiboids
def getWeiboIds():
	weiboids = []
	myconnect = GetConnect()
	sql = 'select weiboid from profile where is_education = -1'
	results = myconnect.getData(sql)
	if results:
		for r in results:
			weiboids.append(r[0])
	return weiboids
def get_school_weibo(schoolname):
	myconnect = GetConnect()
	school_weibo_table = schoolname + '_wordsegment'
	school_weibo = 'select segments, is_meaningful from %s' % school_weibo_table
	weibo_content_results = myconnect.getData(school_weibo)
	print len(weibo_content_results)
	for i in xrange(100):
		print weibo_content_results[i][0],weibo_content_results[i][1]

	pickle.dump(weibo_content_results, open('dict weibo data\\%s_seg_weibo.pkl' % (schoolname), 'w'))
def getSchooWeiboMeaning(schoolname):
	schoolname = schoolname + '_wordsegment'
	countMeaning = []
	myconnect = GetConnect()
	get_school_weibo_meaning_num = 'select count(*) as meaningcount,is_meaningful from %s group by is_meaningful;' % schoolname
	meaning_num_results = myconnect.getData(get_school_weibo_meaning_num)
	if meaning_num_results:
		countMeaning.append(int(meaning_num_results[1][0]))
		countMeaning.append(int(meaning_num_results[0][0]))
	return countMeaning
Ejemplo n.º 8
0
def getWeiboIds(schoolname=None):
    '获取schoolname表中没有下载原创无图微博的用户'
    weiboids = []
    sql = "select weiboid from %s order by rand() limit 20" % schoolname
    myconnect = GetConnect()
    results = myconnect.getData(sql)
    if results:
        for r in results:
            weiboids.append(r[0])
            #print r[0]
    return weiboids
def getWeiboIds(schoolname=None):
	'获取schoolname表中没有下载原创无图微博的用户'
	weiboids = []
	sql = "select weiboid from %s order by rand() limit 20" % schoolname
	myconnect = GetConnect() 
	results = myconnect.getData(sql)
	if results:
		for r in results:
			weiboids.append(r[0])
			#print r[0]
	return weiboids
def getMarkedWeibo():
	marked_weibo_data = []
	pos_weibo = [] # 表达积极情绪的微博
	neg_weibo = [] # 表达消极情绪的微博
	act_weibo = [] # 代表参与一个活动随手转发的微博,一般都是广告,含有链接
	obj_weibo = [] # 客观的微博,即不包含任何感情的微博
	# Read txt file contain sentiment stopwords
	sentiment_stopwords = get_txt_data('sentiment_stopword.txt', 'lines')

	# 获取已经标注过的微博数据,-1代表未知,1代表积极,2代表消极,
	# 3代表活动(有链接,一般是广告),4是客观(没有表达任何情绪)
	get_mark_weibo_sql = "select content, mark from markedweibo;"
	myconnect = GetConnect()
	results = myconnect.getData(get_mark_weibo_sql)
	for weibo in results:
		if weibo[1] == 1:
			pos_weibo.append(weibo[0])
		elif weibo[1] == 2:
			neg_weibo.append(weibo[0])
		elif weibo[1] == 3:
			act_weibo.append(weibo[0])
		elif weibo[1] == 4:
			obj_weibo.append(weibo[0])
	# Filter stopwords from reviews
	seg_pos_result = []
	seg_neg_result = []
	seg_act_result = []
	seg_obj_result = []
	for weibo in pos_weibo:
		fil = [word for word in weibo if word not in sentiment_stopwords and word != ' ']
		seg_pos_result.append(fil)
		fil = []
	for weibo in neg_weibo:
		fil = [word for word in weibo if word not in sentiment_stopwords and word != ' ']
		seg_neg_result.append(fil)
		fil = []
	for weibo in act_weibo:
		fil = [word for word in weibo if word not in sentiment_stopwords and word != ' ']
		seg_act_result.append(fil)
		fil = []
	for weibo in obj_weibo:
		fil = [word for word in weibo if word not in sentiment_stopwords and word != ' ']
		seg_obj_result.append(fil)
		fil = []
	# 将这些数据存储成pickle
	pickle.dump(pos_weibo, open("maked weibo\\seg_pos_result.pkl", 'w'))
	pickle.dump(neg_weibo, open("maked weibo\\seg_neg_result.pkl", 'w'))
	pickle.dump(act_weibo, open("maked weibo\\seg_act_result.pkl", 'w'))
	pickle.dump(obj_weibo, open("maked weibo\\seg_obj_result.pkl", 'w'))
Ejemplo n.º 11
0
def conveyToSchoolTable(schoolname, schooltable):
    '将education表中所有大学的微博用户添加到表中'
    global GetConnect
    sql = "select * from education where school = '%s'" % schoolname
    myconnect = GetConnect()
    results = myconnect.getData(sql)
    school_d = School_Db(schooltable)
    if results:
        for r in results:
            schoolers = School_Info(r[1])
            school_d.insertIntoDB(schoolers)
            #print r[1]
        countsql = "select * from %s" % schooltable
        count = myconnect.getCount(countsql)
        return count
Ejemplo n.º 12
0
def conveyToSchoolTable(schoolname):
	'将education表中所有大连理工大学的微博用户添加到dlut表中'
	global GetConnect
	sql = "select * from education where school = '%s'" % schoolname
	myconnect = GetConnect()
	results = myconnect.getData(sql)
	dlut_d = Dlut_Db()
	if results:
		for r in results:
			dluters = Dlut(r[1])
			dlut_d.insertIntoDB(dluters)
			#print r[1]
		countsql = "select * from dlut"
		count = myconnect.getCount(countsql)
		return count
def main(sourcetable, destable):
	myconnect = GetConnect()
	sql = "select * from %s" % sourcetable
	numrows = myconnect.getCount(sql)
	myconnect2 = GetConnect()
	for r in xrange(numrows):
		row = myconnect.cursor.fetchone()
		wmd5, weiboid, wcontent = row[0], row[1], row[3]
		segments = str(getSegments(wcontent))
		keywords, is_meaningful = getKeywordsAndIs_Meaningful(wcontent)
		insert_sql = "replace into %s(wmd5, weiboid, segments, keywords, is_meaningful) values('%s', '%s', '%s', '%s', %s)" % (destable, wmd5, weiboid, segments, keywords, is_meaningful)
		try:
			myconnect2.executeDB(insert_sql)
		except Exception, e:
			print "Error %d: %s" % (e.args[0],e.args[1])
def conveyToSchoolTable(schoolname, schooltable):
	'将education表中所有大学的微博用户添加到表中'
	global GetConnect
	sql = "select * from education where school = '%s'" % schoolname
	myconnect = GetConnect()
	results = myconnect.getData(sql)
	school_d = School_Db(schooltable)
	if results:
		for r in results:
			schoolers = School_Info(r[1])
			school_d.insertIntoDB(schoolers)
			#print r[1]
		countsql = "select * from %s" % schooltable
		count = myconnect.getCount(countsql)
		return count
def get_school_weibo_and_save(schoolname):
	if schoolname == 'dlut':
		schooltable = 'wb_ori_no_pic'
	else:
		schooltable = schoolname + '_wb_ori_no_pic'
	get_weibo_sql = "select content from %s;" % schooltable
	myconnect = GetConnect()
	results = myconnect.getData(get_weibo_sql)
	school_weibo = []
	index = 0
	for i in results:
		school_weibo.append(i[0])
		if index < 100:
			print i[0].encode('utf-8')
			index += 1
	pickle.dump(school_weibo, open("machine learning data\\%s_weibo.pkl" % schoolname,'w'))
def getWordFrequency(schoolname):
	get_keyword_sql = 'select keywords from %s where is_meaningful = 1' % (schoolname+'_wordsegment')
	myconnect = GetConnect()
	results = myconnect.getData(get_keyword_sql)
	worddict = {} # 单词字典,记录所有出现的单词以及出现的次数
	print len(results)
	index = 0
	for r in results:
		for w in r[0].split():
			index += 1
			if worddict.has_key(w) == False:
				worddict[w] = 1
			else:
				worddict[w] += 1
	print index, 'index' # 这是单词出现的总数 dlut有27万词
	print len(worddict) # 这是存储到字典中的单词总数,dlut有6.5万词
	pickle.dump(worddict, open('weibo word contrast\\%s_worddict.pkl' % schoolname, 'w'))
def getWordFrequency(schoolname):
    get_keyword_sql = 'select keywords from %s where is_meaningful = 1' % (
        schoolname + '_wordsegment')
    myconnect = GetConnect()
    results = myconnect.getData(get_keyword_sql)
    worddict = {}  # 单词字典,记录所有出现的单词以及出现的次数
    print len(results)
    index = 0
    for r in results:
        for w in r[0].split():
            index += 1
            if worddict.has_key(w) == False:
                worddict[w] = 1
            else:
                worddict[w] += 1
    print index, 'index'  # 这是单词出现的总数 dlut有27万词
    print len(worddict)  # 这是存储到字典中的单词总数,dlut有6.5万词
    pickle.dump(worddict,
                open('weibo word contrast\\%s_worddict.pkl' % schoolname, 'w'))
def get_one_weibo_data(schoolname, weiboid=None):
	myconnect = GetConnect()
	if weiboid is None:
		get_weibo_id_sql = "select weiboid from %s where is_wb_ori_no_pic = 1 order by rand() limit 1;" % schoolname
		results = myconnect.getData(get_weibo_id_sql)
		if results:
			weiboid = results[0][0]
		else:
			print "get weiboid wrong"
			weiboid = '2591961830'

	if schoolname == 'dlut':
		school_weibo_table = 'wb_ori_no_pic'
	else:
		school_weibo_table = schoolname + '_wb_ori_no_pic'

	get_weibo_content_sql = "select content, upvotes, forwards, reviews, weiboid from %s where weiboid = %s" % (school_weibo_table, weiboid)
	weibo_content_results = myconnect.getData(get_weibo_content_sql)
	if weibo_content_results:
		return weibo_content_results
	else:
		return None
def main(sourcetable, destable):
    myconnect = GetConnect()
    sql = "select * from %s" % sourcetable
    numrows = myconnect.getCount(sql)
    myconnect2 = GetConnect()
    for r in xrange(numrows):
        row = myconnect.cursor.fetchone()
        wmd5, weiboid, wcontent = row[0], row[1], row[3]
        segments = str(getSegments(wcontent))
        keywords, is_meaningful = getKeywordsAndIs_Meaningful(wcontent)
        insert_sql = "replace into %s(wmd5, weiboid, segments, keywords, is_meaningful) values('%s', '%s', '%s', '%s', %s)" % (
            destable, wmd5, weiboid, segments, keywords, is_meaningful)
        try:
            myconnect2.executeDB(insert_sql)
        except Exception, e:
            print "Error %d: %s" % (e.args[0], e.args[1])
def updateSchoolers(weiboids, schoolname=None):
	myconnect = GetConnect()
	for w in weiboids:
		sql = "update %s set is_wb_ori_no_pic = 1 where weiboid = '%s'" % (schoolname, str(w))
		print sql
		myconnect.executeDB(sql)
def updateProfiles(weiboids):
	myconnect = GetConnect()
	for w in weiboids:
		sql = "update profile set is_education = 1 where weiboid = '%s'" % str(w)
		print sql
		myconnect.executeDB(sql)
def updateSchools(weiboids, schooltable):
	myconnect = GetConnect()
	for w in weiboids:
		sql = "update %s set is_profile = 1 where weiboid = '%s'" % (schooltable, str(w))
		#print sql
		myconnect.executeDB(sql)
# -*- coding:utf-8 -*-
import sys
import pygame
from pygame.locals import *
from pgu import gui
from getconnect import GetConnect
import my_text_processing as tp
reload(sys)
from intermediate import Intermediate
sys.setdefaultencoding('utf-8')
##########################################################
# 初始化中间层对象,中间层是用来连接该图形化界面与实现该图形化
weibo_interm = Intermediate()
# 初始化数据库访问层
myconnect = GetConnect()


##########################################################
def get_random_weiboid(schoolname):
    '随机返回一个学校的微博用户id'
    get_weibo_id_sql = "select weiboid from %s where is_wb_ori_no_pic = 1 order by rand() limit 1;" % schoolname
    results = myconnect.getData(get_weibo_id_sql)
    if results:
        weiboid = results[0][0]
    else:
        print "get weiboid wrong"
        weiboid = '2591961830'
    return weiboid


##########################################################