def hebrewParshiot(): parshiot = Parshiot.createSplitParshiot() print("Calculating regular hebrew TD-IDF for ", PARSHA_NAME) results = TFIDF.parshaIDF(PARSHA_NAME, parshiot) print(results) print("20 most common:") print(results.most_common(20))
def hebrewParshiotWithFreq(): parshiot = Parshiot.createSplitParshiot() parshiotFreq = Parshiot.processParshiotByFrequency() print("Calculating hebrew TD-IDF for ", PARSHA_NAME, " based on minimum letter frequency") results = TFIDF.parshaFreqIDF(PARSHA_NAME, parshiot, parshiotFreq) print(results) print("20 most common:") print(results.most_common(20))
def hebrewParshiotTop(): parshiot = Parshiot.createSplitParshiot() print("Calculating regular hebrew TD-IDF for ", PARSHA_NAME) results = TFIDF.parshaIDF(PARSHA_NAME, parshiot) print(results) print(len(results)) percent = int(len(results)/2) print("top 50% of results: ", percent) print(results.most_common(percent)) r = results.most_common(percent) l = [i[0] for i in r] print(l)
def hebrewFreqParshiot(): parshiot = Parshiot.processParshiotByFrequency() parshaResults = {} with open(subDir + 'strippedHebTFIDFparshaResults.csv', mode='w', encoding='utf-8') as csv_file: fieldnames = ['parsha', 'most relevant words'] writer = csv.DictWriter(csv_file, fieldnames=fieldnames) writer.writeheader() for parsha in parshiot.keys(): parshaResults[parsha] = TFIDF.parshaIDF(parsha, parshiot).most_common(20) writer.writerow({ 'parsha': parsha, 'most relevant words': [a[0] for a in parshaResults[parsha]] })
def TFIDFOutput(lang='heb'): parshiot = Parshiot.createSplitParshiot(lang) parshaResults = {} with open(subDir + lang + 'TFIDFParshaResults' + '.csv', mode='w', encoding='utf-8') as csv_file: fieldnames = ['parsha', 'most relevant words'] writer = csv.DictWriter(csv_file, fieldnames=fieldnames) writer.writeheader() for parsha in parshiot.keys(): parshaResults[parsha] = TFIDF.parshaIDF(parsha, parshiot).most_common(20) writer.writerow({ 'parsha': parsha, 'most relevant words': [a[0] for a in parshaResults[parsha]] })
def englishChapter(): TEXT_NAME = 'JPS-Devarim.txt' chapterNum = 1 print("Calculating chapter TF-IDF for ", TEXT_NAME, "in chapter ", chapterNum) print(TFIDF.chapterIDF(chapterNum, TEXT_NAME))