def descMatchAll(lijst): import keywords keywords.extractKeywords(lijst,20) freqlist = [] nounlist = [] bilist = [] trilist = [] comboList = [] f = csv.reader(open('db\cyttron-keywords.csv', 'rb'), delimiter=';') for line in f: freqlist.append(line[0]) nounlist.append(line[1]) bilist.append(line[2]) trilist.append(line[3]) comboList.append('. '.join(line)) listDescMatch(lijst) os.rename('log\descMatch.csv','log\descMatch-literal.csv') print "1/24" listDescMatch(freqlist) os.rename('log\descMatch.csv','log\descMatch-freqWords.csv') print "2/24" listDescMatch(nounlist) os.rename('log\descMatch.csv','log\descMatch-nounWords.csv') print "3/24" listDescMatch(bilist) os.rename('log\descMatch.csv','log\descMatch-bigrams.csv') print "4/24" listDescMatch(trilist) os.rename('log\descMatch.csv','log\descMatch-trigrams.csv') print "5/24" listDescMatch(comboList) os.rename('log\descMatch.csv','log\descMatch-combo.csv') print "6/24" listDescWordNetMatch(lijst) os.rename('log\descMatch.csv','log\descMatch-wordNet-literal.csv') print "7/24" listDescWordNetMatch(freqlist) os.rename('log\descMatch.csv','log\descMatch-wordNet-freqWords.csv') print "8/24" listDescWordNetMatch(nounlist) os.rename('log\descMatch.csv','log\descMatch-wordNet-nounWords.csv') print "9/24" listDescWordNetMatch(bilist) os.rename('log\descMatch.csv','log\descMatch-wordNet-bigrams.csv') print "10/24" listDescWordNetMatch(trilist) os.rename('log\descMatch.csv','log\descMatch-wordNet-trigrams.csv') print "11/24" listDescWordNetMatch(comboList) os.rename('log\descMatch.csv','log\descMatch-wordNet-combo.csv') print "12/24" dictionary = corpora.Dictionary.load('vsm\\stem\\stem-dictionary.dict') print dictionary corpus = corpora.MmCorpus('vsm\\stem\\stem-corpus.mm') print corpus tfidf = models.TfidfModel.load('vsm\\stem\\model.tfidf') print tfidf index = similarities.Similarity.load('vsm\\stem\\stem.index') tfidfFile = open('vsm\\stem\\tfidfDesc.list','r') tfidfDesc = pickle.load(tfidfFile) tfidfFile.close() print "TF-IDF Descriptions:",len(tfidfDesc),"\n" stemList(lijst) stemList(freqlist) stemList(nounlist) stemList(bilist) stemList(trilist) stemList(comboList) listDescMatch(lijst) os.rename('log\descMatch.csv','log\descMatch-stem-literal.csv') print "13/24" listDescMatch(freqlist) os.rename('log\descMatch.csv','log\descMatch-stem-freqWords.csv') print "14/24" listDescMatch(nounlist) os.rename('log\descMatch.csv','log\descMatch-stem-nounWords.csv') print "15/24" listDescMatch(bilist) os.rename('log\descMatch.csv','log\descMatch-stem-bigrams.csv') print "16/24" listDescMatch(trilist) os.rename('log\descMatch.csv','log\descMatch-stem-trigrams.csv') print "17/24" listDescMatch(comboList) os.rename('log\descMatch.csv','log\descMatch-stem-combo.csv') print "18/24" listDescWordNetMatch(lijst) os.rename('log\descMatch.csv','log\descMatch-stem-wordNet-literal.csv') print "19/24" listDescWordNetMatch(freqlist) os.rename('log\descMatch.csv','log\descMatch-stem-wordNet-freqWords.csv') print "20/24" listDescWordNetMatch(nounlist) os.rename('log\descMatch.csv','log\descMatch-stem-wordNet-nounWords.csv') print "21/24" listDescWordNetMatch(bilist) os.rename('log\descMatch.csv','log\descMatch-stem-wordNet-bigrams.csv') print "22/24" listDescWordNetMatch(trilist) os.rename('log\descMatch.csv','log\descMatch-stem-wordNet-trigrams.csv') print "23/24" listDescWordNetMatch(comboList) os.rename('log\descMatch.csv','log\descMatch-stem-wordNet-combo.csv') print "24/24"
def descMatchAll(lijst): import keywords keywords.extractKeywords(lijst, 20) freqlist = [] nounlist = [] bilist = [] trilist = [] comboList = [] f = csv.reader(open('db\cyttron-keywords.csv', 'rb'), delimiter=';') for line in f: freqlist.append(line[0]) nounlist.append(line[1]) bilist.append(line[2]) trilist.append(line[3]) comboList.append('. '.join(line)) listDescMatch(lijst) os.rename('log\descMatch.csv', 'log\descMatch-literal.csv') print "1/24" listDescMatch(freqlist) os.rename('log\descMatch.csv', 'log\descMatch-freqWords.csv') print "2/24" listDescMatch(nounlist) os.rename('log\descMatch.csv', 'log\descMatch-nounWords.csv') print "3/24" listDescMatch(bilist) os.rename('log\descMatch.csv', 'log\descMatch-bigrams.csv') print "4/24" listDescMatch(trilist) os.rename('log\descMatch.csv', 'log\descMatch-trigrams.csv') print "5/24" listDescMatch(comboList) os.rename('log\descMatch.csv', 'log\descMatch-combo.csv') print "6/24" listDescWordNetMatch(lijst) os.rename('log\descMatch.csv', 'log\descMatch-wordNet-literal.csv') print "7/24" listDescWordNetMatch(freqlist) os.rename('log\descMatch.csv', 'log\descMatch-wordNet-freqWords.csv') print "8/24" listDescWordNetMatch(nounlist) os.rename('log\descMatch.csv', 'log\descMatch-wordNet-nounWords.csv') print "9/24" listDescWordNetMatch(bilist) os.rename('log\descMatch.csv', 'log\descMatch-wordNet-bigrams.csv') print "10/24" listDescWordNetMatch(trilist) os.rename('log\descMatch.csv', 'log\descMatch-wordNet-trigrams.csv') print "11/24" listDescWordNetMatch(comboList) os.rename('log\descMatch.csv', 'log\descMatch-wordNet-combo.csv') print "12/24" dictionary = corpora.Dictionary.load('vsm\\stem\\stem-dictionary.dict') print dictionary corpus = corpora.MmCorpus('vsm\\stem\\stem-corpus.mm') print corpus tfidf = models.TfidfModel.load('vsm\\stem\\model.tfidf') print tfidf index = similarities.Similarity.load('vsm\\stem\\stem.index') tfidfFile = open('vsm\\stem\\tfidfDesc.list', 'r') tfidfDesc = pickle.load(tfidfFile) tfidfFile.close() print "TF-IDF Descriptions:", len(tfidfDesc), "\n" stemList(lijst) stemList(freqlist) stemList(nounlist) stemList(bilist) stemList(trilist) stemList(comboList) listDescMatch(lijst) os.rename('log\descMatch.csv', 'log\descMatch-stem-literal.csv') print "13/24" listDescMatch(freqlist) os.rename('log\descMatch.csv', 'log\descMatch-stem-freqWords.csv') print "14/24" listDescMatch(nounlist) os.rename('log\descMatch.csv', 'log\descMatch-stem-nounWords.csv') print "15/24" listDescMatch(bilist) os.rename('log\descMatch.csv', 'log\descMatch-stem-bigrams.csv') print "16/24" listDescMatch(trilist) os.rename('log\descMatch.csv', 'log\descMatch-stem-trigrams.csv') print "17/24" listDescMatch(comboList) os.rename('log\descMatch.csv', 'log\descMatch-stem-combo.csv') print "18/24" listDescWordNetMatch(lijst) os.rename('log\descMatch.csv', 'log\descMatch-stem-wordNet-literal.csv') print "19/24" listDescWordNetMatch(freqlist) os.rename('log\descMatch.csv', 'log\descMatch-stem-wordNet-freqWords.csv') print "20/24" listDescWordNetMatch(nounlist) os.rename('log\descMatch.csv', 'log\descMatch-stem-wordNet-nounWords.csv') print "21/24" listDescWordNetMatch(bilist) os.rename('log\descMatch.csv', 'log\descMatch-stem-wordNet-bigrams.csv') print "22/24" listDescWordNetMatch(trilist) os.rename('log\descMatch.csv', 'log\descMatch-stem-wordNet-trigrams.csv') print "23/24" listDescWordNetMatch(comboList) os.rename('log\descMatch.csv', 'log\descMatch-stem-wordNet-combo.csv') print "24/24"
def wordMatchAll(lijst): import keywords global cyttronAll keywords.extractKeywords(lijst,cyttronAll,20) freqlist = [] nounlist = [] bilist = [] trilist = [] comboList = [] f = csv.reader(open('db\cyttron-keywords.csv', 'rb'), delimiter=';') for line in f: freqlist.append(line[0]) nounlist.append(line[1]) bilist.append(line[2]) trilist.append(line[3]) comboList.append('. '.join(line)) print comboList[1] listWordMatch(lijst) os.rename('log\wordMatch.csv','log\wordMatch-literal.csv') print "1/24" listWordMatch(freqlist) os.rename('log\wordMatch.csv','log\wordMatch-freqWords.csv') print "2/24" listWordMatch(nounlist) os.rename('log\wordMatch.csv','log\wordMatch-nounWords.csv') print "3/24" listWordMatch(bilist) os.rename('log\wordMatch.csv','log\wordMatch-bigrams.csv') print "4/24" listWordMatch(trilist) os.rename('log\wordMatch.csv','log\wordMatch-trigrams.csv') print "5/24" listWordMatch(comboList) os.rename('log\wordMatch.csv','log\wordMatch-combo.csv') print "6/24" listWordNetMatch(lijst) os.rename('log\wordMatch.csv','log\wordMatch-wordNet-literal.csv') print "7/24" listWordNetMatch(freqlist) os.rename('log\wordMatch.csv','log\wordMatch-wordNet-freqWords.csv') print "8/24" listWordNetMatch(nounlist) os.rename('log\wordMatch.csv','log\wordMatch-wordNet-nounWords.csv') print "9/24" listWordNetMatch(bilist) os.rename('log\wordMatch.csv','log\wordMatch-wordNet-bigrams.csv') print "10/24" listWordNetMatch(trilist) os.rename('log\wordMatch.csv','log\wordMatch-wordNet-trigrams.csv') print "11/24" listWordNetMatch(comboList) os.rename('log\wordMatch.csv','log\wordMatch-wordNet-combo.csv') print "12/24" # Stem stemOnto(label) stemList(lijst) stemList(freqlist) stemList(nounlist) stemList(bilist) stemList(trilist) stemList(comboList) listWordMatch(lijst) os.rename('log\wordMatch.csv','log\wordMatch-stem-literal.csv') print "13/24" listWordMatch(freqlist) os.rename('log\wordMatch.csv','log\wordMatch-stem-freqWords.csv') print "14/24" listWordMatch(nounlist) os.rename('log\wordMatch.csv','log\wordMatch-stem-nounWords.csv') print "15/24" listWordMatch(bilist) os.rename('log\wordMatch.csv','log\wordMatch-stem-bigrams.csv') print "16/24" listWordMatch(trilist) os.rename('log\wordMatch.csv','log\wordMatch-stem-trigrams.csv') print "17/24" listWordMatch(comboList) os.rename('log\wordMatch.csv','log\wordMatch-stem-combo.csv') print "18/24" listStemWordNetMatch(lijst) os.rename('log\wordMatch.csv','log\wordMatch-stem-wordNet-literal.csv') print "19/24" listStemWordNetMatch(freqlist) os.rename('log\wordMatch.csv','log\wordMatch-stem-wordNet-freqWords.csv') print "20/24" listStemWordNetMatch(nounlist) os.rename('log\wordMatch.csv','log\wordMatch-stem-wordNet-nounWords.csv') print "21/24" listStemWordNetMatch(bilist) os.rename('log\wordMatch.csv','log\wordMatch-stem-wordNet-bigrams.csv') print "22/24" listStemWordNetMatch(trilist) os.rename('log\wordMatch.csv','log\wordMatch-stem-wordNet-trigrams.csv') print "23/24" listStemWordNetMatch(comboList) os.rename('log\wordMatch.csv','log\wordMatch-stem-wordNet-combo.csv') print "24/24"
def wordMatchAll(lijst): import keywords global cyttronAll keywords.extractKeywords(lijst, cyttronAll, 20) freqlist = [] nounlist = [] bilist = [] trilist = [] comboList = [] f = csv.reader(open('db\cyttron-keywords.csv', 'rb'), delimiter=';') for line in f: freqlist.append(line[0]) nounlist.append(line[1]) bilist.append(line[2]) trilist.append(line[3]) comboList.append('. '.join(line)) print comboList[1] listWordMatch(lijst) os.rename('log\wordMatch.csv', 'log\wordMatch-literal.csv') print "1/24" listWordMatch(freqlist) os.rename('log\wordMatch.csv', 'log\wordMatch-freqWords.csv') print "2/24" listWordMatch(nounlist) os.rename('log\wordMatch.csv', 'log\wordMatch-nounWords.csv') print "3/24" listWordMatch(bilist) os.rename('log\wordMatch.csv', 'log\wordMatch-bigrams.csv') print "4/24" listWordMatch(trilist) os.rename('log\wordMatch.csv', 'log\wordMatch-trigrams.csv') print "5/24" listWordMatch(comboList) os.rename('log\wordMatch.csv', 'log\wordMatch-combo.csv') print "6/24" listWordNetMatch(lijst) os.rename('log\wordMatch.csv', 'log\wordMatch-wordNet-literal.csv') print "7/24" listWordNetMatch(freqlist) os.rename('log\wordMatch.csv', 'log\wordMatch-wordNet-freqWords.csv') print "8/24" listWordNetMatch(nounlist) os.rename('log\wordMatch.csv', 'log\wordMatch-wordNet-nounWords.csv') print "9/24" listWordNetMatch(bilist) os.rename('log\wordMatch.csv', 'log\wordMatch-wordNet-bigrams.csv') print "10/24" listWordNetMatch(trilist) os.rename('log\wordMatch.csv', 'log\wordMatch-wordNet-trigrams.csv') print "11/24" listWordNetMatch(comboList) os.rename('log\wordMatch.csv', 'log\wordMatch-wordNet-combo.csv') print "12/24" # Stem stemOnto(label) stemList(lijst) stemList(freqlist) stemList(nounlist) stemList(bilist) stemList(trilist) stemList(comboList) listWordMatch(lijst) os.rename('log\wordMatch.csv', 'log\wordMatch-stem-literal.csv') print "13/24" listWordMatch(freqlist) os.rename('log\wordMatch.csv', 'log\wordMatch-stem-freqWords.csv') print "14/24" listWordMatch(nounlist) os.rename('log\wordMatch.csv', 'log\wordMatch-stem-nounWords.csv') print "15/24" listWordMatch(bilist) os.rename('log\wordMatch.csv', 'log\wordMatch-stem-bigrams.csv') print "16/24" listWordMatch(trilist) os.rename('log\wordMatch.csv', 'log\wordMatch-stem-trigrams.csv') print "17/24" listWordMatch(comboList) os.rename('log\wordMatch.csv', 'log\wordMatch-stem-combo.csv') print "18/24" listStemWordNetMatch(lijst) os.rename('log\wordMatch.csv', 'log\wordMatch-stem-wordNet-literal.csv') print "19/24" listStemWordNetMatch(freqlist) os.rename('log\wordMatch.csv', 'log\wordMatch-stem-wordNet-freqWords.csv') print "20/24" listStemWordNetMatch(nounlist) os.rename('log\wordMatch.csv', 'log\wordMatch-stem-wordNet-nounWords.csv') print "21/24" listStemWordNetMatch(bilist) os.rename('log\wordMatch.csv', 'log\wordMatch-stem-wordNet-bigrams.csv') print "22/24" listStemWordNetMatch(trilist) os.rename('log\wordMatch.csv', 'log\wordMatch-stem-wordNet-trigrams.csv') print "23/24" listStemWordNetMatch(comboList) os.rename('log\wordMatch.csv', 'log\wordMatch-stem-wordNet-combo.csv') print "24/24"