예제 #1
0
 def test_print_frame_rate_DoesNotRaiseErrorWithFloatValues(self):
     # mock 'print' function
     capturedOutput = StringIO.StringIO()
     sys.stdout = capturedOutput
     mp.print_frame_rate(MockCapture())
     result = capturedOutput.getvalue().strip()
     self.assertTrue(
         "frame rate per second = 5.00\nnumber of frames = 7.00" in result)
def main():
    ## Assign source with reviews of a hotel received from user input
    choice = 1
    if choice == 0:
        result = start.find(source, 0)
    else:
        result = start.find(source, 1)

    print(result)
예제 #3
0
def corpus_to_list():
    # stopwords
    dicFile = open('stopwords.txt', 'r')
    stopwords = dicFile.readlines()
    stopwordList = []
    stopwordList.append(' ')
    for stopword in stopwords:
        temp = stopword.strip().replace('\r\n', '').decode('utf8')
        stopwordList.append(temp)
    dicFile.close()
    valid = ['ns', 'vn', 'n', 'nz', 'eng', 'nr', 'nrt']
    # channel = ["CCTV5"]
    # channel = ["CCTV","CCTV-阿语","CCTV1","CCTV10","CCTV11","CCTV12","CCTV13","CCTV14","CCTV15","CCTV2","CCTV3",\
    #            "CCTV4","CCTV5","CCTV6","CCTV7","CCTV9","QINGXUANZE","WEIBO","WEIXIN","zgdsb","ZHONGGUODIANSHIBAO",\
    #            "中国电视报","央视","央视专区","央视科技","测试"]
    # channel = ["CCTV2","WEIBO", "WEIXIN"]
    channel = ["WEIBO"]
    sqlConn = MySQLdb.connect(host='192.168.168.105',
                              user='******',
                              passwd='',
                              db='cctv',
                              charset='utf8')
    sqlcursor = sqlConn.cursor()
    tdm = textmining.TermDocumentMatrix()
    delset = string.punctuation
    a = list()
    for one_type in channel:
        one_type_text = list()
        print one_type
        word_box = list()
        word_nominal = dict()
        # 1.数据量适中,一次跑完
        # 含网址的内容删去
        # sqlcursor.execute('''SELECT ti,content from q_test where channel =  "%s" and length(content) >0 and length(ti) > 0 and content NOT REGEXP 'http://|测试|test' and ti NOT REGEXP '测试|test';''' %(one_type))
        # # 2.数据量太大,3000条一跑
        base_number_id_index = 3000
        sqlcursor.execute(
            '''SELECT id from q_test where channel =  "%s" and length(content) >0 and length(ti) > 0 and content NOT REGEXP 'http://|测试|test' and ti NOT REGEXP '测试|test';'''
            % (one_type))
        id = str(list(sqlcursor.fetchall())).replace("(", "")
        id = id.replace("[", "")
        id = id.replace("L", "")
        id = id.replace("]", "")
        id = sorted(map(eval, id.replace(",)", "").split(",")))
        base_number_id_end = id[base_number_id_index]
        base_number_id_end_2 = id[base_number_id_index * 2]
        base_number_id_end_3 = id[base_number_id_index * 3]
        # 第一次跑
        # sqlcursor.execute(
        #     '''SELECT ti,content from q_test where channel =  "%s" and id < "%s" and length(content) >0 and length(ti) > 0 and content NOT REGEXP 'http://|测试|test' and ti NOT REGEXP '测试|test';''' % (
        #     one_type, base_number_id_end))
        # 第二次跑
        # sqlcursor.execute(
        #     '''SELECT ti,content from q_test where channel =  "%s" and id > "%s" and id < "%s" and length(content) >0 and length(ti) > 0 and content NOT REGEXP 'http://|测试|test' and ti NOT REGEXP '测试|test';''' % (
        #         one_type, base_number_id_end,base_number_id_end_2))
        # 第三次跑
        sqlcursor.execute(
            '''SELECT ti,content from q_test where channel =  "%s" and id > "%s" and id < "%s" and length(content) >0 and length(ti) > 0 and content NOT REGEXP 'http://|测试|test' and ti NOT REGEXP '测试|test';'''
            % (one_type, base_number_id_end_2, base_number_id_end_3))
        # ***********end*******************************
        traindata = list(sqlcursor.fetchall())
        ind = 0
        print "doc number:", len(traindata)
        all_docs_to_lists = list()
        allDoc_coma_join_lists = list()
        title_box = list()
        tdms = list()
        for PubTitle, Storyline in traindata:
            word_box_single = list()
            if PubTitle not in title_box:
                ind += 1
                title_box.append(PubTitle)
                full_text = str(Storyline) + str(PubTitle)
                one_type_text.append(full_text)
                # full_text = full_text.replace("\n", "")
                # full_text = full_text.translate(None, delset)
                # full_text = jieba.cut(full_text, cut_all=False)
                full_text = pseg.cut(full_text)
                for i in full_text:
                    if i.word not in stopwordList:
                        # if len(i) == 0:
                        #     print 'kkk',i
                        if len(i.word) > 1:
                            # if (i != "test") and (i != "title") and (i != "content") and (i != "description") and (i != "time") and (len(i) != 8):
                            # if chara in valid:
                            if i.flag in valid:
                                word_box.append(i.word)
                                word_nominal[i.word] = i.flag
                                word_box_single.append(i.word)
                word_box_str = ','.join(word_box_single)
                all_docs_to_lists.append(word_box_single)
                allDoc_coma_join_lists.append(word_box_str)

        print '总共多少篇', ind, len(title_box)
        # 以下准备reuters.titles
        title_file = open(root_directory_lda + '/reuters.titles', 'w+')
        mark3 = 0
        # docs_total = ind

        if len(title_box) == 0:
            # print("{} (top topic: {})".format(doc_topic[i].argmax(), titles[i]))
            print("{} (category has NO news.)".format(one_type))
            continue
        for one_title in title_box:
            mark3 += 1
            if mark3 != len(title_box):
                title_file.write("%s\n" % str(one_title).encode('utf-8'))
            else:
                title_file.write("%s" % str(one_title).encode('utf-8'))
        title_file.close()
        print 'reuters.titles is ready-----------------------------------------------------------'

        # 以下准备tokens
        # 只留更有意义的词,像动词形容词啥的都删去了
        ffile = open(root_directory_lda + '/reuters.tokens', 'w+')
        docs_key_words = MainProgram.key_word_priority(one_type_text)
        count = 0
        sum = 0
        count_invalid = 0
        for i in range(0, len(docs_key_words)):
            sum += len(docs_key_words[i])
        word_already = list()
        for i in range(0, len(docs_key_words)):
            for word in docs_key_words[i]:
                count += 1
                nominal = word_nominal.get(word)
                if nominal not in valid:
                    # count += 1
                    # print word, nominal
                    continue
                # list最后一个元素才不换行,定位list最后元素就行
                if count == sum:
                    ffile.write("%s" % word.encode('utf-8'))
                else:
                    if word not in word_already:
                        ffile.write("%s\n" % word.encode('utf-8'))
                        word_already.append(word)
        ffile.close()
        print 'reuters.tokens is ready-----------------------------------------------------------'

        list_of_lists_file = open(root_directory + '/list_of_lists.txt', 'w+')
        mark = 0
        print 'len(allDoc_coma_join_lists)', len(allDoc_coma_join_lists)
        for one in allDoc_coma_join_lists:
            mark += 1
            if mark != ind:
                list_of_lists_file.write("%s\n" % one.encode('utf-8'))
            else:
                list_of_lists_file.write("%s" % one.encode('utf-8'))
        list_of_lists_file.close()
        print "mark", mark
        print "list_of_lists.txt is ready"

        execfile('docToMatrix.py')
        execfile('formal_matrix_title.py')
        excuteldamodel(
            mysqlhostIP='192.168.168.105',
            how_many_topics=30,
            how_many_iteration=100,
            how_many_topic_words=30,
            catcat=one_type,
            dbname='cctv',
        )
예제 #4
0
 def test_one(self):
     player = MainProgram.Player('Jose', 100)
     name = player.name
     bank = player.balance
     self.assertEqual(name, 'Jose')
     self.assertEqual(bank, 100)
예제 #5
0
import MainProgram

mp = MainProgram.MainProgram('Distance')
mp.MainWindow()
예제 #6
0
 def test_get_name_from_path_returnsPath(self):
     path = "boo/foo"
     expected = "foo"
     self.assertEqual(mp.get_name_from_path(path), expected)
예제 #7
0
 def test_calculate_video_duration_setsStopFrameNo(self):
     mp.calculate_video_duration(MockCapture())
     self.assertEqual(mp.stop_frame_no, 7)
예제 #8
0
 def test_calculate_frames_ReturnsRightValue(self):
     result = mp.calculate_frames(MockCapture(), 5)
     self.assertEqual(result, 25)