def moushakeel_V1(self,text,smooth_const): t1 = time.time() tool = MyToolKit() hdb = DBHandler('data/model.db') result = {} hdb.connect() result_teshkeel = [] result['token'] = 0 result['type'] = 0 for sent in tool.sents(text.strip(),["\n","\r",".",":",",",';'],subsent=['"',"'",'-']): text = "# "+tool.normalizeArabicAlif(tool.DeleteDiacritic(sent))+" $" list_words = tool.words(text) #Récuperer les possibilités pour chaque mot dict = {} for word in list_words: res = hdb.SelectOne('dictionary','type="'+word+'"',attribute='vocabularies') if res == None: dict[word] = word else : dict[word] = res[0] dict['#'] = '#' dict['$'] = '$' possibilities = self.getPossibilities(list_words,dict) max_p = 0 best_sent = '' for possib in possibilities: p = self.sentProbaility(possib,smooth_const) if p > max_p : p = max_p best_sent = possib result_teshkeel.append(' '.join(tool.words(best_sent)[1:len(tool.words(best_sent))-1])) result['token'] = len(list_words)-2 result['type'] = len(dict)-2 result['result'] = result_teshkeel result['time'] = round(time.time() - t1,2) return result
def moushakeel_V2(self, text, smooth_const): t1 = time.time() tool = MyToolKit() hdb = DBHandler('data/model.db') result = {} hdb.connect() result_teshkeel = [] result['token'] = 0 result['type'] = 0 ###################### Vocalisation by word ######################### for sent in tool.sents(text.strip(), ["\n", "\r", ".", ":", ",", ';'], subsent=['"', "'", '-']): #le replace a revoir, car il change un peut les mots (tatwil) sent = "# " + tool.normalizeArabicAlif( tool.DeleteDiacritic(sent)).replace('ـ', '') + " $" list_words = tool.words(sent) #Récuperer les possibilités pour chaque mot dict = {} for word in list_words: res = hdb.SelectOne('dictionary', 'type="' + word + '"', attribute='vocabularies') if res == None: dict[word] = word else: dict[word] = res[0] dict['#'] = '#' dict['$'] = '$' #HMM matrice = [] for word in list_words: list_dict = [] for possib in tool.words(dict[word]): if possib == "#": list_dict.append([-1, possib, 1]) else: list_dict.append([-1, possib, 0]) matrice.append(list_dict) sent = self.Viterbi(matrice, smooth_const) result_teshkeel.append(' '.join( tool.words(sent)[1:len(tool.words(sent)) - 1])) result['token'] += len(list_words) - 2 result['type'] += len(dict) - 2 ####################################################################### ###################### Vocalisation by letter ######################### result_teshkeel = self.LettersVocaliser(result_teshkeel, smooth_const) ####################################################################### result['result'] = result_teshkeel result['time'] = round(time.time() - t1, 2) return result
def DeletDiac(request): t1 = time.time() tool = MyToolKit() result = {} res = [] for sent in tool.sents(request.POST['text'].strip(),["\n","\r",".",":",",",';'],subsent=['"',"'",'-'])[0]: res.append(tool.DeleteDiacritic(sent)) result['results'] = '<br>'.join(res) result['time'] = round(time.time() - t1,2) return HttpResponse(json.dumps(result),content_type="application/json")
def moushakeel_V2(self,text,smooth_const): t1 = time.time() tool = MyToolKit() hdb = DBHandler('data/model.db') result = {} hdb.connect() result_teshkeel = [] result['token'] = 0 result['type'] = 0 ###################### Vocalisation by word ######################### for sent in tool.sents(text.strip(),["\n","\r",".",":",",",';'],subsent=['"',"'",'-']): #le replace a revoir, car il change un peut les mots (tatwil) sent = "# "+tool.normalizeArabicAlif(tool.DeleteDiacritic(sent)).replace('ـ','')+" $" list_words = tool.words(sent) #Récuperer les possibilités pour chaque mot dict = {} for word in list_words: res = hdb.SelectOne('dictionary','type="'+word+'"',attribute='vocabularies') if res == None: dict[word] = word else : dict[word] = res[0] dict['#'] = '#' dict['$'] = '$' #HMM matrice = [] for word in list_words: list_dict = [] for possib in tool.words(dict[word]): if possib == "#":list_dict.append([-1,possib,1]) else : list_dict.append([-1,possib,0]) matrice.append(list_dict) sent = self.Viterbi(matrice,smooth_const) result_teshkeel.append(' '.join(tool.words(sent)[1:len(tool.words(sent))-1])) result['token'] += len(list_words)-2 result['type'] += len(dict)-2 ####################################################################### ###################### Vocalisation by letter ######################### result_teshkeel = self.LettersVocaliser(result_teshkeel,smooth_const) ####################################################################### result['result'] = result_teshkeel result['time'] = round(time.time() - t1,2) return result
def moushakeel_V1(self, text, smooth_const): t1 = time.time() tool = MyToolKit() hdb = DBHandler('data/model.db') result = {} hdb.connect() result_teshkeel = [] result['token'] = 0 result['type'] = 0 for sent in tool.sents(text.strip(), ["\n", "\r", ".", ":", ",", ';'], subsent=['"', "'", '-']): text = "# " + tool.normalizeArabicAlif( tool.DeleteDiacritic(sent)) + " $" list_words = tool.words(text) #Récuperer les possibilités pour chaque mot dict = {} for word in list_words: res = hdb.SelectOne('dictionary', 'type="' + word + '"', attribute='vocabularies') if res == None: dict[word] = word else: dict[word] = res[0] dict['#'] = '#' dict['$'] = '$' possibilities = self.getPossibilities(list_words, dict) max_p = 0 best_sent = '' for possib in possibilities: p = self.sentProbaility(possib, smooth_const) if p > max_p: p = max_p best_sent = possib result_teshkeel.append(' '.join( tool.words(best_sent)[1:len(tool.words(best_sent)) - 1])) result['token'] = len(list_words) - 2 result['type'] = len(dict) - 2 result['result'] = result_teshkeel result['time'] = round(time.time() - t1, 2) return result