def update_figure(selectedFile): fileContent = utils.readAllText("analysis/" + selectedFile) jsonObject = json.loads(fileContent) try: bigramsJson = eval(str(jsonObject["bigrams"])) except TypeError: bigramsJson = eval(str(jsonObject["digrams"])) except KeyError: bigramsJson = eval(str(jsonObject["digrams"])) sort = OrderedDict( sorted(bigramsJson.items(), key=lambda item: item[1], reverse=True)) literals = list(sort.keys()) freq = list(sort.values()) figure = [] figure.append( go.Bar(x=literals, y=freq, name="Bigrams frequency", text="The exact value of occurrences of selected bigram")) return { "data": figure, 'layout': go.Layout( xaxis={'title': 'Bigram'}, yaxis={'title': 'Proportions of occurrences'}, # 'range': [0, 0.2] ) }
def update_figure(selectedFile): fileContent = utils.readAllText("analysis/" + selectedFile) jsonObject = json.loads(fileContent) trigramsJson = eval(str(jsonObject["trigrams"])) alphabetical = OrderedDict( sorted(trigramsJson.items(), key=lambda item: item[0])) literals = list(alphabetical.keys()) freq = list(alphabetical.values()) figure = [] figure.append( go.Bar( x=literals, y=freq, name="Trigram frequency", text="The exact value of occurrences of selected trigram", )) return { "data": figure, 'layout': go.Layout( xaxis={'title': 'Trigram'}, yaxis={'title': 'Proportions of occurrences'}, # 'range': [0, 0.2] ) }
def detectLang(filenameToAnalyse): filesList = os.listdir("analysis") # print("ANALIZA: " + filenameToAnalyse) result = "No match found." score = 10 for fileName in filesList: # print("fileName:" + fileName) # print("filenameToAnalyse:" + filenameToAnalyse) if (fileName != filenameToAnalyse): # print("PLIK:" + fileName) # similarity -> smaller=better similarity = lettersFactor( utils.readAllText("analysis/" + fileName), utils.readAllText("analysis/" + filenameToAnalyse)) if (similarity < score): score = similarity result = "Best match: " + utils.mapFileToLanguage(fileName) # print("Aktualny rezultat: " + result) # print(result) return result
import time import datetime import os import json # custom modules import utils import analyzer try: # parsing files filesList = os.listdir("text samples") for fileName in filesList: # reading files and clearing data fileContent = utils.readAllText("text samples/" + fileName) clearedData = analyzer.clearData(fileContent) if not os.path.isdir("cleared data"): os.mkdir("cleared data") utils.writeAllText("cleared data/" + fileName, clearedData) # counting frequency lettersDict = utils.listToDict(analyzer.countLettersFreq(clearedData)) if not os.path.isdir("letter frequency"): os.mkdir("letter frequency") utils.writeAllText("letter frequency/" + fileName, json.dumps(lettersDict)) bigramsDict = utils.listToDict(analyzer.countBigramsFreq(clearedData)) if not os.path.isdir("bigrams"): os.mkdir("bigrams")