Beispiel #1
0
def sevenGram(list):
    list_7 = []
    for i in xrange(len(list) - 6):
        if list[i][2] == list[i+6][2]:
            item0 = list[i][0] + ' ' + list[i+1][0]+ ' ' + list[i+2][0]+ ' ' + list[i+3][0]+ ' ' + list[i+4][0]+ ' ' + list[i+5][0]+ ' ' + list[i+6][0]
            item1 = 1
            list_7.append([item0, item1, list[i][2]])
    dic = CountFreq.count(list_7)
    f = open('7gramindexed.txt', 'w')
    CountFreq.write(dic,f) #将统计好的词频写到f文件中
Beispiel #2
0
def threeGram(list):
    list_3 = []
    for i in xrange(len(list) - 2):
        if list[i][2] == list[i+2][2]:
            item0 = list[i][0] + ' ' + list[i+1][0]+ ' ' + list[i+2][0]
            item1 = 1
            list_3.append([item0, item1, list[i][2]])
    dic = CountFreq.count(list_3)
    f = open('3gramindexed.txt', 'w')
    CountFreq.write(dic,f) #将统计好的词频写到f文件中
Beispiel #3
0
def fiveGram(list):
    list_5 = []
    for i in xrange(len(list) - 4):
        if list[i][2] == list[i+4][2]:
            item0 = list[i][0] + ' ' + list[i+1][0]+ ' ' + list[i+2][0]+ ' ' + list[i+3][0]+ ' ' + list[i+4][0]
            item1 = 1
            list_5.append([item0, item1, list[i][2]])
    dic = CountFreq.count(list_5)
    f = open('5gramindexed.txt', 'w')
    CountFreq.write(dic,f) #将统计好的词频写到f文件中
Beispiel #4
0
def threeGram(list):
    list_3 = []
    for i in xrange(len(list) - 2):
        if list[i][2] == list[i + 2][2]:
            item0 = list[i][0] + ' ' + list[i + 1][0] + ' ' + list[i + 2][0]
            item1 = 1
            list_3.append([item0, item1, list[i][2]])
    dic = CountFreq.count(list_3)
    f = open('3gramindexed.txt', 'w')
    CountFreq.write(dic, f)  #将统计好的词频写到f文件中
Beispiel #5
0
def fiveGram(list):
    list_5 = []
    for i in xrange(len(list) - 4):
        if list[i][2] == list[i + 4][2]:
            item0 = list[i][0] + ' ' + list[i + 1][0] + ' ' + list[
                i + 2][0] + ' ' + list[i + 3][0] + ' ' + list[i + 4][0]
            item1 = 1
            list_5.append([item0, item1, list[i][2]])
    dic = CountFreq.count(list_5)
    f = open('5gramindexed.txt', 'w')
    CountFreq.write(dic, f)  #将统计好的词频写到f文件中
Beispiel #6
0
def eightGram(list):
    list_8 = []
    for i in xrange(len(list) - 7):
        if list[i][2] == list[i + 7][2]:
            item0 = list[i][0] + ' ' + list[i + 1][0] + ' ' + list[i + 2][
                0] + ' ' + list[i + 3][0] + ' ' + list[i + 4][0] + ' ' + list[
                    i + 5][0] + ' ' + list[i + 6][0] + ' ' + list[i + 7][0]
            item1 = 1
            list_8.append([item0, item1, list[i][2]])
    dic = CountFreq.count(list_8)
    f = open('8gramindexed.txt', 'w')
    CountFreq.write(dic, f)  #将统计好的词频写到f文件中
Beispiel #7
0
def twoGram(list):
    list_2 = []
    for i in xrange(len(list) - 1):
        if list[i][2] == list[i+1][2]:
            item0 = list[i][0] + ' ' + list[i+1][0]
            item1 = 1
            list_2.append([item0, item1, list[i][2]])
    dic = CountFreq.count(list_2) #回词的dic
    ff = open('2gram.txt','w')
    for i in xrange(len(list_2)):
        ff.write(list_2[i][0] + '\t' + '1' + '\t' + list_2[i][2])
        ff.write('\n')
    ff.close()
    f = open('2gramindexed.txt', 'w')
    CountFreq.write(dic,f) #将统计好的词频写到f文件中
Beispiel #8
0
def twoGram(list):
    list_2 = []
    for i in xrange(len(list) - 1):
        if list[i][2] == list[i + 1][2]:
            item0 = list[i][0] + ' ' + list[i + 1][0]
            item1 = 1
            list_2.append([item0, item1, list[i][2]])
    dic = CountFreq.count(list_2)  #回词的dic
    ff = open('2gram.txt', 'w')
    for i in xrange(len(list_2)):
        ff.write(list_2[i][0] + '\t' + '1' + '\t' + list_2[i][2])
        ff.write('\n')
    ff.close()
    f = open('2gramindexed.txt', 'w')
    CountFreq.write(dic, f)  #将统计好的词频写到f文件中
Beispiel #9
0
def oneGram(list):
    dic = CountFreq.count(list) #返回词的dic
    f = open('1gramindexed.txt', 'w')
    CountFreq.write(dic,f) #将统计好的词频写到f文件中
Beispiel #10
0
def oneGram(list):
    dic = CountFreq.count(list)  #返回词的dic
    f = open('1gramindexed.txt', 'w')
    CountFreq.write(dic, f)  #将统计好的词频写到f文件中