def test_print_frame_rate_DoesNotRaiseErrorWithFloatValues(self): # mock 'print' function capturedOutput = StringIO.StringIO() sys.stdout = capturedOutput mp.print_frame_rate(MockCapture()) result = capturedOutput.getvalue().strip() self.assertTrue( "frame rate per second = 5.00\nnumber of frames = 7.00" in result)
def main(): ## Assign source with reviews of a hotel received from user input choice = 1 if choice == 0: result = start.find(source, 0) else: result = start.find(source, 1) print(result)
def corpus_to_list(): # stopwords dicFile = open('stopwords.txt', 'r') stopwords = dicFile.readlines() stopwordList = [] stopwordList.append(' ') for stopword in stopwords: temp = stopword.strip().replace('\r\n', '').decode('utf8') stopwordList.append(temp) dicFile.close() valid = ['ns', 'vn', 'n', 'nz', 'eng', 'nr', 'nrt'] # channel = ["CCTV5"] # channel = ["CCTV","CCTV-阿语","CCTV1","CCTV10","CCTV11","CCTV12","CCTV13","CCTV14","CCTV15","CCTV2","CCTV3",\ # "CCTV4","CCTV5","CCTV6","CCTV7","CCTV9","QINGXUANZE","WEIBO","WEIXIN","zgdsb","ZHONGGUODIANSHIBAO",\ # "中国电视报","央视","央视专区","央视科技","测试"] # channel = ["CCTV2","WEIBO", "WEIXIN"] channel = ["WEIBO"] sqlConn = MySQLdb.connect(host='192.168.168.105', user='******', passwd='', db='cctv', charset='utf8') sqlcursor = sqlConn.cursor() tdm = textmining.TermDocumentMatrix() delset = string.punctuation a = list() for one_type in channel: one_type_text = list() print one_type word_box = list() word_nominal = dict() # 1.数据量适中,一次跑完 # 含网址的内容删去 # sqlcursor.execute('''SELECT ti,content from q_test where channel = "%s" and length(content) >0 and length(ti) > 0 and content NOT REGEXP 'http://|测试|test' and ti NOT REGEXP '测试|test';''' %(one_type)) # # 2.数据量太大,3000条一跑 base_number_id_index = 3000 sqlcursor.execute( '''SELECT id from q_test where channel = "%s" and length(content) >0 and length(ti) > 0 and content NOT REGEXP 'http://|测试|test' and ti NOT REGEXP '测试|test';''' % (one_type)) id = str(list(sqlcursor.fetchall())).replace("(", "") id = id.replace("[", "") id = id.replace("L", "") id = id.replace("]", "") id = sorted(map(eval, id.replace(",)", "").split(","))) base_number_id_end = id[base_number_id_index] base_number_id_end_2 = id[base_number_id_index * 2] base_number_id_end_3 = id[base_number_id_index * 3] # 第一次跑 # sqlcursor.execute( # '''SELECT ti,content from q_test where channel = "%s" and id < "%s" and length(content) >0 and length(ti) > 0 and content NOT REGEXP 'http://|测试|test' and ti NOT REGEXP '测试|test';''' % ( # one_type, base_number_id_end)) # 第二次跑 # sqlcursor.execute( # '''SELECT ti,content from q_test where channel = "%s" and id > "%s" and id < "%s" and length(content) >0 and length(ti) > 0 and content NOT REGEXP 'http://|测试|test' and ti NOT REGEXP '测试|test';''' % ( # one_type, base_number_id_end,base_number_id_end_2)) # 第三次跑 sqlcursor.execute( '''SELECT ti,content from q_test where channel = "%s" and id > "%s" and id < "%s" and length(content) >0 and length(ti) > 0 and content NOT REGEXP 'http://|测试|test' and ti NOT REGEXP '测试|test';''' % (one_type, base_number_id_end_2, base_number_id_end_3)) # ***********end******************************* traindata = list(sqlcursor.fetchall()) ind = 0 print "doc number:", len(traindata) all_docs_to_lists = list() allDoc_coma_join_lists = list() title_box = list() tdms = list() for PubTitle, Storyline in traindata: word_box_single = list() if PubTitle not in title_box: ind += 1 title_box.append(PubTitle) full_text = str(Storyline) + str(PubTitle) one_type_text.append(full_text) # full_text = full_text.replace("\n", "") # full_text = full_text.translate(None, delset) # full_text = jieba.cut(full_text, cut_all=False) full_text = pseg.cut(full_text) for i in full_text: if i.word not in stopwordList: # if len(i) == 0: # print 'kkk',i if len(i.word) > 1: # if (i != "test") and (i != "title") and (i != "content") and (i != "description") and (i != "time") and (len(i) != 8): # if chara in valid: if i.flag in valid: word_box.append(i.word) word_nominal[i.word] = i.flag word_box_single.append(i.word) word_box_str = ','.join(word_box_single) all_docs_to_lists.append(word_box_single) allDoc_coma_join_lists.append(word_box_str) print '总共多少篇', ind, len(title_box) # 以下准备reuters.titles title_file = open(root_directory_lda + '/reuters.titles', 'w+') mark3 = 0 # docs_total = ind if len(title_box) == 0: # print("{} (top topic: {})".format(doc_topic[i].argmax(), titles[i])) print("{} (category has NO news.)".format(one_type)) continue for one_title in title_box: mark3 += 1 if mark3 != len(title_box): title_file.write("%s\n" % str(one_title).encode('utf-8')) else: title_file.write("%s" % str(one_title).encode('utf-8')) title_file.close() print 'reuters.titles is ready-----------------------------------------------------------' # 以下准备tokens # 只留更有意义的词,像动词形容词啥的都删去了 ffile = open(root_directory_lda + '/reuters.tokens', 'w+') docs_key_words = MainProgram.key_word_priority(one_type_text) count = 0 sum = 0 count_invalid = 0 for i in range(0, len(docs_key_words)): sum += len(docs_key_words[i]) word_already = list() for i in range(0, len(docs_key_words)): for word in docs_key_words[i]: count += 1 nominal = word_nominal.get(word) if nominal not in valid: # count += 1 # print word, nominal continue # list最后一个元素才不换行,定位list最后元素就行 if count == sum: ffile.write("%s" % word.encode('utf-8')) else: if word not in word_already: ffile.write("%s\n" % word.encode('utf-8')) word_already.append(word) ffile.close() print 'reuters.tokens is ready-----------------------------------------------------------' list_of_lists_file = open(root_directory + '/list_of_lists.txt', 'w+') mark = 0 print 'len(allDoc_coma_join_lists)', len(allDoc_coma_join_lists) for one in allDoc_coma_join_lists: mark += 1 if mark != ind: list_of_lists_file.write("%s\n" % one.encode('utf-8')) else: list_of_lists_file.write("%s" % one.encode('utf-8')) list_of_lists_file.close() print "mark", mark print "list_of_lists.txt is ready" execfile('docToMatrix.py') execfile('formal_matrix_title.py') excuteldamodel( mysqlhostIP='192.168.168.105', how_many_topics=30, how_many_iteration=100, how_many_topic_words=30, catcat=one_type, dbname='cctv', )
def test_one(self): player = MainProgram.Player('Jose', 100) name = player.name bank = player.balance self.assertEqual(name, 'Jose') self.assertEqual(bank, 100)
import MainProgram mp = MainProgram.MainProgram('Distance') mp.MainWindow()
def test_get_name_from_path_returnsPath(self): path = "boo/foo" expected = "foo" self.assertEqual(mp.get_name_from_path(path), expected)
def test_calculate_video_duration_setsStopFrameNo(self): mp.calculate_video_duration(MockCapture()) self.assertEqual(mp.stop_frame_no, 7)
def test_calculate_frames_ReturnsRightValue(self): result = mp.calculate_frames(MockCapture(), 5) self.assertEqual(result, 25)