예제 #1
0
파일: cyttron.py 프로젝트: RedSunCMX/thesis
def descMatchAll(lijst):
    import keywords
    keywords.extractKeywords(lijst,20)
    freqlist = []
    nounlist = []
    bilist = []
    trilist = []
    comboList = []
    
    f = csv.reader(open('db\cyttron-keywords.csv', 'rb'), delimiter=';')
    for line in f:
        freqlist.append(line[0])
        nounlist.append(line[1])
        bilist.append(line[2])
        trilist.append(line[3])
        comboList.append('. '.join(line))
    
    listDescMatch(lijst)
    os.rename('log\descMatch.csv','log\descMatch-literal.csv')
    print "1/24"
    listDescMatch(freqlist)
    os.rename('log\descMatch.csv','log\descMatch-freqWords.csv')
    print "2/24"    
    listDescMatch(nounlist)
    os.rename('log\descMatch.csv','log\descMatch-nounWords.csv')
    print "3/24"    
    listDescMatch(bilist)
    os.rename('log\descMatch.csv','log\descMatch-bigrams.csv')
    print "4/24"    
    listDescMatch(trilist)
    os.rename('log\descMatch.csv','log\descMatch-trigrams.csv')
    print "5/24"    
    listDescMatch(comboList)
    os.rename('log\descMatch.csv','log\descMatch-combo.csv')
    print "6/24"
    
    listDescWordNetMatch(lijst)
    os.rename('log\descMatch.csv','log\descMatch-wordNet-literal.csv')
    print "7/24"
    listDescWordNetMatch(freqlist)
    os.rename('log\descMatch.csv','log\descMatch-wordNet-freqWords.csv')
    print "8/24"
    listDescWordNetMatch(nounlist)
    os.rename('log\descMatch.csv','log\descMatch-wordNet-nounWords.csv')
    print "9/24"
    listDescWordNetMatch(bilist)
    os.rename('log\descMatch.csv','log\descMatch-wordNet-bigrams.csv')
    print "10/24"
    listDescWordNetMatch(trilist)
    os.rename('log\descMatch.csv','log\descMatch-wordNet-trigrams.csv')
    print "11/24"
    listDescWordNetMatch(comboList)
    os.rename('log\descMatch.csv','log\descMatch-wordNet-combo.csv')
    print "12/24"

    dictionary = corpora.Dictionary.load('vsm\\stem\\stem-dictionary.dict')
    print dictionary
    corpus = corpora.MmCorpus('vsm\\stem\\stem-corpus.mm')
    print corpus
    tfidf = models.TfidfModel.load('vsm\\stem\\model.tfidf')
    print tfidf

    index = similarities.Similarity.load('vsm\\stem\\stem.index')

    tfidfFile = open('vsm\\stem\\tfidfDesc.list','r')
    tfidfDesc = pickle.load(tfidfFile)
    tfidfFile.close()
    print "TF-IDF Descriptions:",len(tfidfDesc),"\n"    

    stemList(lijst)
    stemList(freqlist)
    stemList(nounlist)
    stemList(bilist)
    stemList(trilist)
    stemList(comboList)    

    listDescMatch(lijst)
    os.rename('log\descMatch.csv','log\descMatch-stem-literal.csv')
    print "13/24"
    listDescMatch(freqlist)
    os.rename('log\descMatch.csv','log\descMatch-stem-freqWords.csv')
    print "14/24"    
    listDescMatch(nounlist)
    os.rename('log\descMatch.csv','log\descMatch-stem-nounWords.csv')
    print "15/24"    
    listDescMatch(bilist)
    os.rename('log\descMatch.csv','log\descMatch-stem-bigrams.csv')
    print "16/24"    
    listDescMatch(trilist)
    os.rename('log\descMatch.csv','log\descMatch-stem-trigrams.csv')
    print "17/24"    
    listDescMatch(comboList)
    os.rename('log\descMatch.csv','log\descMatch-stem-combo.csv')
    print "18/24"
    
    listDescWordNetMatch(lijst)
    os.rename('log\descMatch.csv','log\descMatch-stem-wordNet-literal.csv')
    print "19/24"
    listDescWordNetMatch(freqlist)
    os.rename('log\descMatch.csv','log\descMatch-stem-wordNet-freqWords.csv')
    print "20/24"
    listDescWordNetMatch(nounlist)
    os.rename('log\descMatch.csv','log\descMatch-stem-wordNet-nounWords.csv')
    print "21/24"
    listDescWordNetMatch(bilist)
    os.rename('log\descMatch.csv','log\descMatch-stem-wordNet-bigrams.csv')
    print "22/24"
    listDescWordNetMatch(trilist)
    os.rename('log\descMatch.csv','log\descMatch-stem-wordNet-trigrams.csv')
    print "23/24"
    listDescWordNetMatch(comboList)
    os.rename('log\descMatch.csv','log\descMatch-stem-wordNet-combo.csv')
    print "24/24"
예제 #2
0
파일: cyttron.py 프로젝트: RedSunCMX/thesis
def descMatchAll(lijst):
    import keywords
    keywords.extractKeywords(lijst, 20)
    freqlist = []
    nounlist = []
    bilist = []
    trilist = []
    comboList = []

    f = csv.reader(open('db\cyttron-keywords.csv', 'rb'), delimiter=';')
    for line in f:
        freqlist.append(line[0])
        nounlist.append(line[1])
        bilist.append(line[2])
        trilist.append(line[3])
        comboList.append('. '.join(line))

    listDescMatch(lijst)
    os.rename('log\descMatch.csv', 'log\descMatch-literal.csv')
    print "1/24"
    listDescMatch(freqlist)
    os.rename('log\descMatch.csv', 'log\descMatch-freqWords.csv')
    print "2/24"
    listDescMatch(nounlist)
    os.rename('log\descMatch.csv', 'log\descMatch-nounWords.csv')
    print "3/24"
    listDescMatch(bilist)
    os.rename('log\descMatch.csv', 'log\descMatch-bigrams.csv')
    print "4/24"
    listDescMatch(trilist)
    os.rename('log\descMatch.csv', 'log\descMatch-trigrams.csv')
    print "5/24"
    listDescMatch(comboList)
    os.rename('log\descMatch.csv', 'log\descMatch-combo.csv')
    print "6/24"

    listDescWordNetMatch(lijst)
    os.rename('log\descMatch.csv', 'log\descMatch-wordNet-literal.csv')
    print "7/24"
    listDescWordNetMatch(freqlist)
    os.rename('log\descMatch.csv', 'log\descMatch-wordNet-freqWords.csv')
    print "8/24"
    listDescWordNetMatch(nounlist)
    os.rename('log\descMatch.csv', 'log\descMatch-wordNet-nounWords.csv')
    print "9/24"
    listDescWordNetMatch(bilist)
    os.rename('log\descMatch.csv', 'log\descMatch-wordNet-bigrams.csv')
    print "10/24"
    listDescWordNetMatch(trilist)
    os.rename('log\descMatch.csv', 'log\descMatch-wordNet-trigrams.csv')
    print "11/24"
    listDescWordNetMatch(comboList)
    os.rename('log\descMatch.csv', 'log\descMatch-wordNet-combo.csv')
    print "12/24"

    dictionary = corpora.Dictionary.load('vsm\\stem\\stem-dictionary.dict')
    print dictionary
    corpus = corpora.MmCorpus('vsm\\stem\\stem-corpus.mm')
    print corpus
    tfidf = models.TfidfModel.load('vsm\\stem\\model.tfidf')
    print tfidf

    index = similarities.Similarity.load('vsm\\stem\\stem.index')

    tfidfFile = open('vsm\\stem\\tfidfDesc.list', 'r')
    tfidfDesc = pickle.load(tfidfFile)
    tfidfFile.close()
    print "TF-IDF Descriptions:", len(tfidfDesc), "\n"

    stemList(lijst)
    stemList(freqlist)
    stemList(nounlist)
    stemList(bilist)
    stemList(trilist)
    stemList(comboList)

    listDescMatch(lijst)
    os.rename('log\descMatch.csv', 'log\descMatch-stem-literal.csv')
    print "13/24"
    listDescMatch(freqlist)
    os.rename('log\descMatch.csv', 'log\descMatch-stem-freqWords.csv')
    print "14/24"
    listDescMatch(nounlist)
    os.rename('log\descMatch.csv', 'log\descMatch-stem-nounWords.csv')
    print "15/24"
    listDescMatch(bilist)
    os.rename('log\descMatch.csv', 'log\descMatch-stem-bigrams.csv')
    print "16/24"
    listDescMatch(trilist)
    os.rename('log\descMatch.csv', 'log\descMatch-stem-trigrams.csv')
    print "17/24"
    listDescMatch(comboList)
    os.rename('log\descMatch.csv', 'log\descMatch-stem-combo.csv')
    print "18/24"

    listDescWordNetMatch(lijst)
    os.rename('log\descMatch.csv', 'log\descMatch-stem-wordNet-literal.csv')
    print "19/24"
    listDescWordNetMatch(freqlist)
    os.rename('log\descMatch.csv', 'log\descMatch-stem-wordNet-freqWords.csv')
    print "20/24"
    listDescWordNetMatch(nounlist)
    os.rename('log\descMatch.csv', 'log\descMatch-stem-wordNet-nounWords.csv')
    print "21/24"
    listDescWordNetMatch(bilist)
    os.rename('log\descMatch.csv', 'log\descMatch-stem-wordNet-bigrams.csv')
    print "22/24"
    listDescWordNetMatch(trilist)
    os.rename('log\descMatch.csv', 'log\descMatch-stem-wordNet-trigrams.csv')
    print "23/24"
    listDescWordNetMatch(comboList)
    os.rename('log\descMatch.csv', 'log\descMatch-stem-wordNet-combo.csv')
    print "24/24"
예제 #3
0
파일: cyttron.py 프로젝트: RedSunCMX/thesis
def wordMatchAll(lijst):
    import keywords
    global cyttronAll
    keywords.extractKeywords(lijst,cyttronAll,20)
    freqlist = []
    nounlist = []
    bilist = []
    trilist = []
    comboList = []
    
    f = csv.reader(open('db\cyttron-keywords.csv', 'rb'), delimiter=';')
    for line in f:
        freqlist.append(line[0])
        nounlist.append(line[1])
        bilist.append(line[2])
        trilist.append(line[3])
        comboList.append('. '.join(line))
    print comboList[1]
    
    listWordMatch(lijst)
    os.rename('log\wordMatch.csv','log\wordMatch-literal.csv')
    print "1/24"
    listWordMatch(freqlist)
    os.rename('log\wordMatch.csv','log\wordMatch-freqWords.csv')
    print "2/24"    
    listWordMatch(nounlist)
    os.rename('log\wordMatch.csv','log\wordMatch-nounWords.csv')
    print "3/24"    
    listWordMatch(bilist)
    os.rename('log\wordMatch.csv','log\wordMatch-bigrams.csv')
    print "4/24"    
    listWordMatch(trilist)
    os.rename('log\wordMatch.csv','log\wordMatch-trigrams.csv')
    print "5/24"    
    listWordMatch(comboList)
    os.rename('log\wordMatch.csv','log\wordMatch-combo.csv')
    print "6/24"
    
    listWordNetMatch(lijst)
    os.rename('log\wordMatch.csv','log\wordMatch-wordNet-literal.csv')
    print "7/24"
    listWordNetMatch(freqlist)
    os.rename('log\wordMatch.csv','log\wordMatch-wordNet-freqWords.csv')
    print "8/24"
    listWordNetMatch(nounlist)
    os.rename('log\wordMatch.csv','log\wordMatch-wordNet-nounWords.csv')
    print "9/24"
    listWordNetMatch(bilist)
    os.rename('log\wordMatch.csv','log\wordMatch-wordNet-bigrams.csv')
    print "10/24"
    listWordNetMatch(trilist)
    os.rename('log\wordMatch.csv','log\wordMatch-wordNet-trigrams.csv')
    print "11/24"
    listWordNetMatch(comboList)
    os.rename('log\wordMatch.csv','log\wordMatch-wordNet-combo.csv')
    print "12/24"    
    
    # Stem
    stemOnto(label)
    stemList(lijst)
    stemList(freqlist)
    stemList(nounlist)
    stemList(bilist)
    stemList(trilist)
    stemList(comboList)

    listWordMatch(lijst)
    os.rename('log\wordMatch.csv','log\wordMatch-stem-literal.csv')
    print "13/24"    
    listWordMatch(freqlist)
    os.rename('log\wordMatch.csv','log\wordMatch-stem-freqWords.csv')
    print "14/24"    
    listWordMatch(nounlist)
    os.rename('log\wordMatch.csv','log\wordMatch-stem-nounWords.csv')
    print "15/24"    
    listWordMatch(bilist)
    os.rename('log\wordMatch.csv','log\wordMatch-stem-bigrams.csv')
    print "16/24"
    
    listWordMatch(trilist)
    os.rename('log\wordMatch.csv','log\wordMatch-stem-trigrams.csv')
    print "17/24"    
    listWordMatch(comboList)
    os.rename('log\wordMatch.csv','log\wordMatch-stem-combo.csv')
    print "18/24"    

    listStemWordNetMatch(lijst)
    os.rename('log\wordMatch.csv','log\wordMatch-stem-wordNet-literal.csv')
    print "19/24" 
    listStemWordNetMatch(freqlist)
    os.rename('log\wordMatch.csv','log\wordMatch-stem-wordNet-freqWords.csv')
    print "20/24"

    listStemWordNetMatch(nounlist)
    os.rename('log\wordMatch.csv','log\wordMatch-stem-wordNet-nounWords.csv')
    print "21/24" 
    listStemWordNetMatch(bilist)
    os.rename('log\wordMatch.csv','log\wordMatch-stem-wordNet-bigrams.csv')
    print "22/24" 
    listStemWordNetMatch(trilist)
    os.rename('log\wordMatch.csv','log\wordMatch-stem-wordNet-trigrams.csv')
    print "23/24" 
    listStemWordNetMatch(comboList)
    os.rename('log\wordMatch.csv','log\wordMatch-stem-wordNet-combo.csv')
    print "24/24"
예제 #4
0
파일: cyttron.py 프로젝트: RedSunCMX/thesis
def wordMatchAll(lijst):
    import keywords
    global cyttronAll
    keywords.extractKeywords(lijst, cyttronAll, 20)
    freqlist = []
    nounlist = []
    bilist = []
    trilist = []
    comboList = []

    f = csv.reader(open('db\cyttron-keywords.csv', 'rb'), delimiter=';')
    for line in f:
        freqlist.append(line[0])
        nounlist.append(line[1])
        bilist.append(line[2])
        trilist.append(line[3])
        comboList.append('. '.join(line))
    print comboList[1]

    listWordMatch(lijst)
    os.rename('log\wordMatch.csv', 'log\wordMatch-literal.csv')
    print "1/24"
    listWordMatch(freqlist)
    os.rename('log\wordMatch.csv', 'log\wordMatch-freqWords.csv')
    print "2/24"
    listWordMatch(nounlist)
    os.rename('log\wordMatch.csv', 'log\wordMatch-nounWords.csv')
    print "3/24"
    listWordMatch(bilist)
    os.rename('log\wordMatch.csv', 'log\wordMatch-bigrams.csv')
    print "4/24"
    listWordMatch(trilist)
    os.rename('log\wordMatch.csv', 'log\wordMatch-trigrams.csv')
    print "5/24"
    listWordMatch(comboList)
    os.rename('log\wordMatch.csv', 'log\wordMatch-combo.csv')
    print "6/24"

    listWordNetMatch(lijst)
    os.rename('log\wordMatch.csv', 'log\wordMatch-wordNet-literal.csv')
    print "7/24"
    listWordNetMatch(freqlist)
    os.rename('log\wordMatch.csv', 'log\wordMatch-wordNet-freqWords.csv')
    print "8/24"
    listWordNetMatch(nounlist)
    os.rename('log\wordMatch.csv', 'log\wordMatch-wordNet-nounWords.csv')
    print "9/24"
    listWordNetMatch(bilist)
    os.rename('log\wordMatch.csv', 'log\wordMatch-wordNet-bigrams.csv')
    print "10/24"
    listWordNetMatch(trilist)
    os.rename('log\wordMatch.csv', 'log\wordMatch-wordNet-trigrams.csv')
    print "11/24"
    listWordNetMatch(comboList)
    os.rename('log\wordMatch.csv', 'log\wordMatch-wordNet-combo.csv')
    print "12/24"

    # Stem
    stemOnto(label)
    stemList(lijst)
    stemList(freqlist)
    stemList(nounlist)
    stemList(bilist)
    stemList(trilist)
    stemList(comboList)

    listWordMatch(lijst)
    os.rename('log\wordMatch.csv', 'log\wordMatch-stem-literal.csv')
    print "13/24"
    listWordMatch(freqlist)
    os.rename('log\wordMatch.csv', 'log\wordMatch-stem-freqWords.csv')
    print "14/24"
    listWordMatch(nounlist)
    os.rename('log\wordMatch.csv', 'log\wordMatch-stem-nounWords.csv')
    print "15/24"
    listWordMatch(bilist)
    os.rename('log\wordMatch.csv', 'log\wordMatch-stem-bigrams.csv')
    print "16/24"

    listWordMatch(trilist)
    os.rename('log\wordMatch.csv', 'log\wordMatch-stem-trigrams.csv')
    print "17/24"
    listWordMatch(comboList)
    os.rename('log\wordMatch.csv', 'log\wordMatch-stem-combo.csv')
    print "18/24"

    listStemWordNetMatch(lijst)
    os.rename('log\wordMatch.csv', 'log\wordMatch-stem-wordNet-literal.csv')
    print "19/24"
    listStemWordNetMatch(freqlist)
    os.rename('log\wordMatch.csv', 'log\wordMatch-stem-wordNet-freqWords.csv')
    print "20/24"

    listStemWordNetMatch(nounlist)
    os.rename('log\wordMatch.csv', 'log\wordMatch-stem-wordNet-nounWords.csv')
    print "21/24"
    listStemWordNetMatch(bilist)
    os.rename('log\wordMatch.csv', 'log\wordMatch-stem-wordNet-bigrams.csv')
    print "22/24"
    listStemWordNetMatch(trilist)
    os.rename('log\wordMatch.csv', 'log\wordMatch-stem-wordNet-trigrams.csv')
    print "23/24"
    listStemWordNetMatch(comboList)
    os.rename('log\wordMatch.csv', 'log\wordMatch-stem-wordNet-combo.csv')
    print "24/24"