def updateProfiles(weiboids): myconnect = GetConnect() for w in weiboids: sql = "update profile set is_education = 1 where weiboid = '%s'" % str( w) print sql myconnect.executeDB(sql)
def updateSchoolers(weiboids, schoolname=None): myconnect = GetConnect() for w in weiboids: sql = "update %s set is_wb_ori_no_pic = 1 where weiboid = '%s'" % ( schoolname, str(w)) print sql myconnect.executeDB(sql)
def getWeiboIds(schooltable): weiboids = [] myconnect = GetConnect() sql = 'select weiboid from %s where is_profile = -1' % schooltable results = myconnect.getData(sql) if results: for r in results: weiboids.append(r[0]) return weiboids
def getWeiboIds(): weiboids = [] myconnect = GetConnect() sql = 'select weiboid from profile where is_education = -1' results = myconnect.getData(sql) if results: for r in results: weiboids.append(r[0]) return weiboids
def get_school_weibo(schoolname): myconnect = GetConnect() school_weibo_table = schoolname + '_wordsegment' school_weibo = 'select segments, is_meaningful from %s' % school_weibo_table weibo_content_results = myconnect.getData(school_weibo) print len(weibo_content_results) for i in xrange(100): print weibo_content_results[i][0],weibo_content_results[i][1] pickle.dump(weibo_content_results, open('dict weibo data\\%s_seg_weibo.pkl' % (schoolname), 'w'))
def getSchooWeiboMeaning(schoolname): schoolname = schoolname + '_wordsegment' countMeaning = [] myconnect = GetConnect() get_school_weibo_meaning_num = 'select count(*) as meaningcount,is_meaningful from %s group by is_meaningful;' % schoolname meaning_num_results = myconnect.getData(get_school_weibo_meaning_num) if meaning_num_results: countMeaning.append(int(meaning_num_results[1][0])) countMeaning.append(int(meaning_num_results[0][0])) return countMeaning
def getWeiboIds(schoolname=None): '获取schoolname表中没有下载原创无图微博的用户' weiboids = [] sql = "select weiboid from %s order by rand() limit 20" % schoolname myconnect = GetConnect() results = myconnect.getData(sql) if results: for r in results: weiboids.append(r[0]) #print r[0] return weiboids
def getMarkedWeibo(): marked_weibo_data = [] pos_weibo = [] # 表达积极情绪的微博 neg_weibo = [] # 表达消极情绪的微博 act_weibo = [] # 代表参与一个活动随手转发的微博,一般都是广告,含有链接 obj_weibo = [] # 客观的微博,即不包含任何感情的微博 # Read txt file contain sentiment stopwords sentiment_stopwords = get_txt_data('sentiment_stopword.txt', 'lines') # 获取已经标注过的微博数据,-1代表未知,1代表积极,2代表消极, # 3代表活动(有链接,一般是广告),4是客观(没有表达任何情绪) get_mark_weibo_sql = "select content, mark from markedweibo;" myconnect = GetConnect() results = myconnect.getData(get_mark_weibo_sql) for weibo in results: if weibo[1] == 1: pos_weibo.append(weibo[0]) elif weibo[1] == 2: neg_weibo.append(weibo[0]) elif weibo[1] == 3: act_weibo.append(weibo[0]) elif weibo[1] == 4: obj_weibo.append(weibo[0]) # Filter stopwords from reviews seg_pos_result = [] seg_neg_result = [] seg_act_result = [] seg_obj_result = [] for weibo in pos_weibo: fil = [word for word in weibo if word not in sentiment_stopwords and word != ' '] seg_pos_result.append(fil) fil = [] for weibo in neg_weibo: fil = [word for word in weibo if word not in sentiment_stopwords and word != ' '] seg_neg_result.append(fil) fil = [] for weibo in act_weibo: fil = [word for word in weibo if word not in sentiment_stopwords and word != ' '] seg_act_result.append(fil) fil = [] for weibo in obj_weibo: fil = [word for word in weibo if word not in sentiment_stopwords and word != ' '] seg_obj_result.append(fil) fil = [] # 将这些数据存储成pickle pickle.dump(pos_weibo, open("maked weibo\\seg_pos_result.pkl", 'w')) pickle.dump(neg_weibo, open("maked weibo\\seg_neg_result.pkl", 'w')) pickle.dump(act_weibo, open("maked weibo\\seg_act_result.pkl", 'w')) pickle.dump(obj_weibo, open("maked weibo\\seg_obj_result.pkl", 'w'))
def conveyToSchoolTable(schoolname, schooltable): '将education表中所有大学的微博用户添加到表中' global GetConnect sql = "select * from education where school = '%s'" % schoolname myconnect = GetConnect() results = myconnect.getData(sql) school_d = School_Db(schooltable) if results: for r in results: schoolers = School_Info(r[1]) school_d.insertIntoDB(schoolers) #print r[1] countsql = "select * from %s" % schooltable count = myconnect.getCount(countsql) return count
def conveyToSchoolTable(schoolname): '将education表中所有大连理工大学的微博用户添加到dlut表中' global GetConnect sql = "select * from education where school = '%s'" % schoolname myconnect = GetConnect() results = myconnect.getData(sql) dlut_d = Dlut_Db() if results: for r in results: dluters = Dlut(r[1]) dlut_d.insertIntoDB(dluters) #print r[1] countsql = "select * from dlut" count = myconnect.getCount(countsql) return count
def main(sourcetable, destable): myconnect = GetConnect() sql = "select * from %s" % sourcetable numrows = myconnect.getCount(sql) myconnect2 = GetConnect() for r in xrange(numrows): row = myconnect.cursor.fetchone() wmd5, weiboid, wcontent = row[0], row[1], row[3] segments = str(getSegments(wcontent)) keywords, is_meaningful = getKeywordsAndIs_Meaningful(wcontent) insert_sql = "replace into %s(wmd5, weiboid, segments, keywords, is_meaningful) values('%s', '%s', '%s', '%s', %s)" % (destable, wmd5, weiboid, segments, keywords, is_meaningful) try: myconnect2.executeDB(insert_sql) except Exception, e: print "Error %d: %s" % (e.args[0],e.args[1])
def get_school_weibo_and_save(schoolname): if schoolname == 'dlut': schooltable = 'wb_ori_no_pic' else: schooltable = schoolname + '_wb_ori_no_pic' get_weibo_sql = "select content from %s;" % schooltable myconnect = GetConnect() results = myconnect.getData(get_weibo_sql) school_weibo = [] index = 0 for i in results: school_weibo.append(i[0]) if index < 100: print i[0].encode('utf-8') index += 1 pickle.dump(school_weibo, open("machine learning data\\%s_weibo.pkl" % schoolname,'w'))
def getWordFrequency(schoolname): get_keyword_sql = 'select keywords from %s where is_meaningful = 1' % (schoolname+'_wordsegment') myconnect = GetConnect() results = myconnect.getData(get_keyword_sql) worddict = {} # 单词字典,记录所有出现的单词以及出现的次数 print len(results) index = 0 for r in results: for w in r[0].split(): index += 1 if worddict.has_key(w) == False: worddict[w] = 1 else: worddict[w] += 1 print index, 'index' # 这是单词出现的总数 dlut有27万词 print len(worddict) # 这是存储到字典中的单词总数,dlut有6.5万词 pickle.dump(worddict, open('weibo word contrast\\%s_worddict.pkl' % schoolname, 'w'))
def getWordFrequency(schoolname): get_keyword_sql = 'select keywords from %s where is_meaningful = 1' % ( schoolname + '_wordsegment') myconnect = GetConnect() results = myconnect.getData(get_keyword_sql) worddict = {} # 单词字典,记录所有出现的单词以及出现的次数 print len(results) index = 0 for r in results: for w in r[0].split(): index += 1 if worddict.has_key(w) == False: worddict[w] = 1 else: worddict[w] += 1 print index, 'index' # 这是单词出现的总数 dlut有27万词 print len(worddict) # 这是存储到字典中的单词总数,dlut有6.5万词 pickle.dump(worddict, open('weibo word contrast\\%s_worddict.pkl' % schoolname, 'w'))
def get_one_weibo_data(schoolname, weiboid=None): myconnect = GetConnect() if weiboid is None: get_weibo_id_sql = "select weiboid from %s where is_wb_ori_no_pic = 1 order by rand() limit 1;" % schoolname results = myconnect.getData(get_weibo_id_sql) if results: weiboid = results[0][0] else: print "get weiboid wrong" weiboid = '2591961830' if schoolname == 'dlut': school_weibo_table = 'wb_ori_no_pic' else: school_weibo_table = schoolname + '_wb_ori_no_pic' get_weibo_content_sql = "select content, upvotes, forwards, reviews, weiboid from %s where weiboid = %s" % (school_weibo_table, weiboid) weibo_content_results = myconnect.getData(get_weibo_content_sql) if weibo_content_results: return weibo_content_results else: return None
def main(sourcetable, destable): myconnect = GetConnect() sql = "select * from %s" % sourcetable numrows = myconnect.getCount(sql) myconnect2 = GetConnect() for r in xrange(numrows): row = myconnect.cursor.fetchone() wmd5, weiboid, wcontent = row[0], row[1], row[3] segments = str(getSegments(wcontent)) keywords, is_meaningful = getKeywordsAndIs_Meaningful(wcontent) insert_sql = "replace into %s(wmd5, weiboid, segments, keywords, is_meaningful) values('%s', '%s', '%s', '%s', %s)" % ( destable, wmd5, weiboid, segments, keywords, is_meaningful) try: myconnect2.executeDB(insert_sql) except Exception, e: print "Error %d: %s" % (e.args[0], e.args[1])
def updateSchoolers(weiboids, schoolname=None): myconnect = GetConnect() for w in weiboids: sql = "update %s set is_wb_ori_no_pic = 1 where weiboid = '%s'" % (schoolname, str(w)) print sql myconnect.executeDB(sql)
def updateProfiles(weiboids): myconnect = GetConnect() for w in weiboids: sql = "update profile set is_education = 1 where weiboid = '%s'" % str(w) print sql myconnect.executeDB(sql)
def updateSchools(weiboids, schooltable): myconnect = GetConnect() for w in weiboids: sql = "update %s set is_profile = 1 where weiboid = '%s'" % (schooltable, str(w)) #print sql myconnect.executeDB(sql)
# -*- coding:utf-8 -*- import sys import pygame from pygame.locals import * from pgu import gui from getconnect import GetConnect import my_text_processing as tp reload(sys) from intermediate import Intermediate sys.setdefaultencoding('utf-8') ########################################################## # 初始化中间层对象,中间层是用来连接该图形化界面与实现该图形化 weibo_interm = Intermediate() # 初始化数据库访问层 myconnect = GetConnect() ########################################################## def get_random_weiboid(schoolname): '随机返回一个学校的微博用户id' get_weibo_id_sql = "select weiboid from %s where is_wb_ori_no_pic = 1 order by rand() limit 1;" % schoolname results = myconnect.getData(get_weibo_id_sql) if results: weiboid = results[0][0] else: print "get weiboid wrong" weiboid = '2591961830' return weiboid ##########################################################