def alif_count_verse(quran_dict, sura, verse, assumptions): mod = "strong" try: trans = transString(quran_dict[sura][verse]["arabic"]) # print(sura, verse, trans) total = sum([1 for letter in trans if letter in assumptions]) letters = "".join( [letter for letter in trans if letter in assumptions]) return { "verse": Markup(quran_dict[sura][verse]["arabic"].replace( "ا", "<" + mod + ">ا</" + mod + ">")), "count": total, "tgv": calc_val(letters, "tgv") } except: { "verse": Markup( "<b>Error</b> in function 'alif_count_sura': Invalid sura and verse number " ), "count": "Error in alif_count_verse function" }
def index(): global verse_obj, alif_count, search_term alif_count = "" # test=['a', 'b'] if request.method == "GET": # search_term = '_undefined_' if len(search_term) < 1 else search_term return render_template("main_page.html", comments=Word.query.all(), verses=verse_obj, alif_count=alif_count, search_term=search_term) else: execute_this = request.form["submit"] logging.error(request.form) if execute_this == "Clear": n = Word.query.delete() db.session.commit() print(n) elif execute_this == "Calculate TGV": word = request.form["contents"] addition = calc_val(transString(word), "tgv") string = "TGV of " + word + " is: " + str(addition) comment = Word(content=string) db.session.add(comment) db.session.commit() elif execute_this == "Count alif": req = request.form["contents"].strip() req = list(map(int, re.split("\D+", req))) if len(req) == 1: sura = req[0] alif_count = alif_count_sura(quran_dict, sura, ["A"]) else: try: sura, verse = req[0], req[1] alif_count = alif_count_verse(quran_dict, sura, verse, ["A"]) except: alif_count = "Invalid sura-verse input" elif execute_this == "Get verse": req = request.form["contents"].strip() if re.match("\d+\D+\d+\D*", req): verse_obj_pre = query_verses_number(req, db) search_term = 'N/A' else: search_term = remove_diacritics(req) if detect_arabic( req) else req verse_obj_pre = query_verses_text(req, db) verse_obj = [] for verse_dict in verse_obj_pre: temp = verse_dict.copy() temp['ar'] = remove_diacritics(verse_dict['ar']) verse_obj.append(temp) return redirect(url_for('index'))
def tgv_matching(): global verse_obj, alif_count alif_count = "" # test=['a', 'b'] if request.method == "GET": return render_template("tgv_matching.html", comments=Word.query.all(), verses=verse_obj, alif_count=alif_count, search_package={}) else: execute_this = request.form["submit"] logging.error(request.form) if execute_this == "Clear": n = Word.query.delete() db.session.commit() print(n) elif execute_this == "Calculate TGV": word = request.form["contents"] addition = calc_val(transString(word), "tgv") string = "TGV of " + word + " is: " + str(addition) comment = Word(content=string) db.session.add(comment) db.session.commit() elif execute_this == "Find TGV pairs": req = request.form["contents"].strip() if re.fullmatch('\d*', req): tgv = int(req) pairs = [ngram for ngram in all_ngrams if ngram['tgv'] == tgv] pairs_dict = build_tgv_match_dict(pairs) search_package = {'term': tgv, 'type': 'number'} else: word = req tgv = calc_val(transString(word), "tgv") pairs = [ngram for ngram in all_ngrams if ngram['tgv'] == tgv] pairs_dict = build_tgv_match_dict(pairs) search_package = {'term': word, 'type': 'word'} return render_template("tgv_matching.html", comments=Word.query.all(), pairs=pairs_dict, search_package=search_package, tgv=tgv) return redirect(url_for('tgv_matching'))
def get_ngrams_verse(sura_nbr, verse_nbr, verse): words = nltk.word_tokenize(verse) my_bigrams = [' '.join(x) for x in nltk.bigrams(words)] my_trigrams = [' '.join(x) for x in nltk.trigrams(words)] all_grams = words + my_bigrams + my_trigrams return [{ "gram": w, "tgv": calc_val(transString(w), type_v="tgv"), "sura_nbr": sura_nbr, "verse_nbr": verse_nbr } for w in all_grams]
def verse2dict(verse): '''function used to read in Arabic Quran text. Use regular expression pattern matching to extract sura and verse number, and Arabic and English text from Quran text file''' rd = {} rd["nSura"] = re.search("\d{1,3}\|", verse).group(0)[0:-1] rd["nVerse"] = re.search("\|\d{1,3}\|", verse).group(0)[1:-1] rd["ar"] = verse.split("|")[2] rd["eng"] = verse.split("|")[3] rd["translit"] = transString(rd["ar"]) return rd
def query_verses_text(req, db): if req=="": return [{}] req=req.strip() if detect_arabic(req): search = '%' + transString(req) + '%' textual = text("select nSura, nVerse, ar, eng, translit from q_tbl " + "where translit like :x1") else: search = '%' + req + '%' textual = text("select nSura, nVerse, ar, eng, translit from q_tbl " + "where eng like :x1") print("req is: ", req) print("search is: ", search) rv = db.session.execute(textual, {"x1": search}).fetchall() return load_query_into_dict(rv)
db.session.commit() qdf = solver.quran_as_df(quran_file, sura_order_table) if quran_test: # if testing mode, only work with first few rows of data n = 111 else: n = 50100 i = 0 for verse in qdf[0:n].itertuples(): temp_verse = Verse(nSura=verse.sura, nVerse=verse.verse, ar=verse.arabic, eng=verse.english, translit=transString(verse.arabic), seq_order=verse.seq_index, chron_order=verse.chron_index) db.session.add(temp_verse) i += 1 db.session.commit() # Build Quran dictionary and TGV dictionary quran_dict = scrape_quran_into_dict(quran_file) print("assembled quran_dict") all_ngrams = get_ngrams_quran(quran_dict) print("assembled ngrams phase 1...") ngrams_sub = all_ngrams[0:6] #tgv_dict = build_tgv_dict(ngrams_sub)
from pandasql import sqldf from funcs import calc_val, transString # Read in datafiles ldf = pd.read_csv('letter_df.csv') # Create summary by letter q = '''select sura, letters as letter, count(*) as letter_count from ldf group by sura, letters order by sura, letters; ''' pysqldf = lambda q: sqldf(q, globals()) ldfsum = pysqldf(q) ldfsum['tgv'] = ldfsum['letter'].apply( lambda x: calc_val(transString(x), 'tgv')) ldfsum = ldfsum.query('tgv > 0') # Create TGV plotting dataset q = '''select min(letter) as letter, tgv, sum(letter_count) as count from ldfsum group by tgv order by tgv; ''' tgv_plotting_df = pysqldf(q) letters = [transString(l) for l in tgv_plotting_df['letter'].tolist()] # Create plot sns.lmplot(x="tgv", y="count", data=tgv_plotting_df,