def from_index(index_props): id = index_props[0] # if the note was stored in the index, its title, text, and source fields are collapsed into a single field, separated by \u001f text = index_props[4] title = text.split("\u001f")[0] body = text.split("\u001f")[1] src = text.split("\u001f")[2] return SiacNote((id, title, body, src, index_props[2], -1, "", "", "", "", -1))
def _most_common_words(self, text): """ Returns the html that is displayed in the right sidebar containing the clickable keywords. """ if text is None or len(text) == 0: return "No keywords for empty result." text = utility.text.clean(text, self.stopwords) counts = {} for token in text.split(): if token == "" or len(token) == 1 or self.EXCLUDE_KEYWORDS.match( token): continue if token.lower() in counts: counts[token.lower()][1] += 1 else: counts[token.lower()] = [token, 1] sortedCounts = sorted(counts.items(), key=lambda kv: kv[1][1], reverse=True) html = "" for entry in sortedCounts[:15]: k = utility.text.trim_if_longer_than(entry[1][0], 25) kd = entry[1][0].replace("'", "") html = f"{html}<a class='keyword' href='#' data-keyword='{kd}' onclick='event.preventDefault(); searchFor($(this).data(\"keyword\"));'>{k}</a>, " if len(html) == 0: return "No keywords for empty result." return html[:-2]
def _mostCommonWords(self, text): """ Returns the html that is displayed in the right sidebar containing the clickable keywords. """ if text is None or len(text) == 0: return "No keywords for empty result." text = utility.text.clean(text, self.stopwords) counts = {} for token in text.split(): if token == "" or len(token) == 1 or self.SOUND_TAG.match(token): continue if token.lower() in counts: counts[token.lower()][1] += 1 else: counts[token.lower()] = [token, 1] sortedCounts = sorted(counts.items(), key=lambda kv: kv[1][1], reverse=True) html = "" for entry in sortedCounts[:15]: html = "%s<a class='keyword' href='#' onclick='event.preventDefault(); searchFor($(this).text())'>%s</a>, " % ( html, entry[1][0]) if len(html) == 0: return "No keywords for empty result." return html[:-2]
def removeStopwords(self, text): cleaned = "" for token in text.split(" "): if token.lower() not in self.stopWords: cleaned += token + " " if len(cleaned) > 0: return cleaned[:-1] return ""
def get_result_html_simple(self, db_list, tag_hover = True, search_on_selection = True): html = "" epochTime = int(time.time() * 1000) timeDiffString = "" newNote = "" ret = 0 nids = [r.id for r in db_list] if self.showRetentionScores: retsByNid = getRetentions(nids) for counter, res in enumerate(db_list): try: timeDiffString = self._get_time_diff_lbl(res[3], epochTime) except: timeDiffString = "Could not determine creation date" ret = retsByNid[int(res.id)] if self.showRetentionScores and int(res.id) in retsByNid else None if ret is not None: retMark = "border-color: %s;" % (utility.misc._retToColor(ret)) retInfo = """<div class='retMark' style='%s'>PR: %s</div> """ % (retMark, int(ret)) else: retInfo = "" text = res.get_content() # hide fields that should not be shown if str(res.mid) in self.fields_to_hide_in_results: text = "\u001f".join([spl for i, spl in enumerate(text.split("\u001f")) if i not in self.fields_to_hide_in_results[str(res.mid)]]) # hide cloze brackets if set in config if not self.show_clozes: text = utility.text.hide_cloze_brackets(text) #remove <div> tags if set in config if self.remove_divs and res.note_type != "user": text = utility.text.remove_divs(text) text = utility.text.clean_field_separators(text).replace("\\", "\\\\").replace("`", "\\`").replace("$", "$") text = utility.text.try_hide_image_occlusion(text) #try to put fields that consist of a single image in their own line text = utility.text.newline_before_images(text) template = NOTE_TMPL_SIMPLE if res.note_type == "index" else NOTE_TMPL_SIAC_SIMPLE newNote = template.format( counter=counter+1, nid=res.id, edited="" if str(res.id) not in self.edited else "<i class='fa fa-pencil ml-10 mr-5'></i> " + self._build_edited_info(self.edited[str(res.id)]), mouseup="getSelectionText()" if search_on_selection else "", text=text, ret=retInfo, tags=utility.tags.build_tag_string(res.tags, tag_hover, maxLength = 25, maxCount = 2), creation=" 🕐 " + timeDiffString) html += newNote return html
def _build_non_anki_note_html(self, text): """ User's notes should be displayed in a way to visually distinguish between title, text and source. Also, text might need to be cut if is too long to reduce time needed for highlighting, extracting keywords, and rendering. """ #trim very long texts: if len(text) > 5000: src_begin_index = text.rfind("\u001f") src = text[src_begin_index + 1:] title = text[:text.find("\u001f")] body = text[text.find("\u001f") + 1:src_begin_index][:5000] #there might be unclosed tags now, but parsing would be too much overhead, so simply remove div, a and span tags #there might be still problems with <p style='...'> body = utility.text.remove_tags(body, ["div", "span", "a"]) last_open_bracket = body.rfind("<") if last_open_bracket >= len(body) - 500 or body.rfind( " ") < len(body) - 500: last_close_bracket = body.rfind(">") if last_close_bracket < last_open_bracket: body = body[:last_open_bracket] body += "<br></ul></b></i></em></span></p></p><p style='text-align: center; user-select: none;'><b>(Text was cut - too long to display)</b></p>" else: title = text.split("\u001f")[0] body = text.split("\u001f")[1] src = text.split("\u001f")[2] is_pdf = src is not None and src.lower().strip().endswith(".pdf") title = "%s<b>%s</b>%s" % ( "<span class='siac-pdf-icon'></span>" if is_pdf else "", title if len(title) > 0 else "Unnamed Note", "<hr style='margin-bottom: 5px; border-top: dotted 2px;'>" if len(body.strip()) > 0 else "") if src is not None and len(src) > 0: src = "<br/><hr style='border-top: dotted 2px;'><i>Source: %s</i>" % ( src) else: src = "" return title + body + src
def updateSingle(self, note): """ Used after note has been edited. The edited note should be rerendered. To keep things simple, only note text and tags are replaced. """ if self._editor is None or self._editor.web is None: return tags = note[2] tagStr = utility.tags.build_tag_string(tags, self.gridView) nid = note[0] text = note[1] # hide fields that should not be shown if len(note) > 4 and str(note[4]) in self.fields_to_hide_in_results: text = "\u001f".join([ spl for i, spl in enumerate(text.split("\u001f")) if i not in self.fields_to_hide_in_results[str(note[4])] ]) text = utility.text.cleanFieldSeparators(text).replace( "\\", "\\\\").replace("`", "\\`").replace("$", "$") text = utility.text.try_hide_image_occlusion(text) text = utility.text.newline_before_images(text) #find rendered note and replace text and tags self._editor.web.eval(""" document.getElementById('%s').innerHTML = `%s`; document.getElementById('tags-%s').innerHTML = `%s`; """ % (nid, text, nid, tagStr)) self._editor.web.eval( "$('#cW-%s').find('.rankingLblAddInfo').hide();" % nid) self._editor.web.eval( "fixRetMarkWidth(document.getElementById('cW-%s'));" % nid) self._editor.web.eval( f"""$('#cW-{nid} .editedStamp').html(` 🕐 Edited just now`).show(); if ($('#siac-susp-lbl-{nid}').length) {{ $('#siac-susp-lbl-{nid}').css('left', '140px').show(); }} """)
def updateSingle(self, note): """ Used after note has been edited. The edited note should be rerendered. To keep things simple, only note text and tags are replaced. """ if self.editor is None or self.editor.web is None: return tags = note[2] tagStr = self.buildTagString(tags) nid = note[0] text = note[1] # hide fields that should not be shown if len(note) > 4 and str(note[4]) in self.fields_to_hide_in_results: text = "\u001f".join([ spl for i, spl in enumerate(text.split("\u001f")) if i not in self.fields_to_hide_in_results[str(note[4])] ]) text = self._cleanFieldSeparators(text).replace("\\", "\\\\").replace( "`", "\\`").replace("$", "$") text = self.tryHideImageOcclusion(text) text = self.IMG_FLD.sub("|</span><br/>\\1<br/>\\2", text) #find rendered note and replace text and tags self.editor.web.eval(""" document.getElementById('%s').innerHTML = `%s`; document.getElementById('tags-%s').innerHTML = `%s`; """ % (nid, text, nid, tagStr)) self.editor.web.eval("$('#cW-%s').find('.rankingLblAddInfo').hide();" % nid) self.editor.web.eval( "fixRetMarkWidth(document.getElementById('cW-%s'));" % nid) self.editor.web.eval( "$('#cW-%s .editedStamp').html(` 🕐 Edited just now`).show();" % nid)
def update_single(self, note): """ Used after note has been edited. The edited note should be rerendered. To keep things simple, only note text and tags are replaced. """ if self._editor is None or self._editor.web is None: return tags = note[2] tagStr = utility.tags.build_tag_string(tags, self.gridView) nid = note[0] text = note[1] # hide fields that should not be shown if len(note) > 4 and str(note[4]) in self.fields_to_hide_in_results: text = "\u001f".join([spl for i, spl in enumerate(text.split("\u001f")) if i not in self.fields_to_hide_in_results[str(note[4])]]) text = utility.text.clean_field_separators(text).replace("\\", "\\\\").replace("`", "\\`").replace("$", "$") text = utility.text.try_hide_image_occlusion(text) # hide clozes if set in config if not self.show_clozes: text = utility.text.hide_cloze_brackets(text) text = utility.text.newline_before_images(text) if self.remove_divs: text = utility.text.remove_divs(text, " ") #find rendered note and replace text and tags self._editor.web.eval(""" document.getElementById('siac-inner-card-%s').innerHTML = `%s`; document.getElementById('tags-%s').innerHTML = `%s`; """ % (nid, text, nid, tagStr)) self._editor.web.eval(f"""$('#siac-edited-dsp-{nid}').html(`<i class='fa fa-pencil mr-5 ml-10'></i> Edited just now`); """)
def print_search_results(self, notes, stamp, editor=None, logging=False, printTimingInfo=False, page=1, query_set=None, is_queue=False, is_cached=False): """ This is the html that gets rendered in the search results div. This will always print the first page. """ if logging: log("Entering print_search_results") log("Length (searchResults): " + str(len(notes))) if stamp is not None: if stamp != self.latest: return if not is_cached and len(notes) > 0: self.previous_calls.append([ notes, None, editor, logging, printTimingInfo, page, query_set, is_queue ]) if len(self.previous_calls) > 11: self.previous_calls.pop(0) html = "" allText = "" tags = [] epochTime = int(time.time() * 1000) timeDiffString = "" newNote = "" lastNote = "" ret = 0 self.last_had_timing_info = printTimingInfo if notes is not None and len(notes) > 0: self.lastResults = notes self.last_query_set = query_set searchResults = notes[(page - 1) * 50:page * 50] nids = [r.id for r in searchResults] if self.showRetentionScores: retsByNid = getRetentions(nids) # various time stamps to collect information about rendering performance start = time.time() highlight_start = None build_user_note_start = None highlight_total = 0.0 build_user_note_total = 0.0 remaining_to_highlight = {} highlight_boundary = 15 if self.gridView else 10 # for better performance, collect all notes that are .pdfs, and # query their reading progress after they have been rendered pdfs = [] check_for_suspended = [] for counter, res in enumerate(searchResults): nid = res.id counter += (page - 1) * 50 try: timeDiffString = self._getTimeDifferenceString(nid, epochTime) except: if logging: log("Failed to determine creation date: " + str(nid)) timeDiffString = "Could not determine creation date" ret = retsByNid[int(nid)] if self.showRetentionScores and int( nid) in retsByNid else None if ret is not None: retMark = "background: %s; color: black;" % ( utility.misc._retToColor(ret)) if str(nid) in self.edited: retMark = ''.join((retMark, "max-width: 20px;")) retInfo = """<div class='retMark' style='%s'>%s</div>""" % ( retMark, int(ret)) else: retInfo = "" lastNote = newNote #non-anki notes should be displayed differently, we distinguish between title, text and source here #confusing: 'source' on notes from the index means the original note content (without stopwords removed etc.), #on SiacNotes, it means the source field. build_user_note_start = time.time() text = res.get_content() progress = "" pdf_class = "" if res.note_type == "user" and res.is_pdf(): pdfs.append(nid) p_html = "<div class='siac-prog-sq'></div>" * 10 progress = f"<div id='ptmp-{nid}' class='siac-prog-tmp'>{p_html} <span> 0 / ?</span></div>" pdf_class = "pdf" elif res.note_type == "index" and res.did > 0: check_for_suspended.append(res.id) build_user_note_total += time.time() - build_user_note_start # hide fields that should not be shown if str(res.mid) in self.fields_to_hide_in_results: text = "\u001f".join([ spl for i, spl in enumerate(text.split("\u001f")) if i not in self.fields_to_hide_in_results[str(res.mid)] ]) #remove double fields separators text = utility.text.cleanFieldSeparators(text).replace( "\\", "\\\\") #try to remove image occlusion fields text = utility.text.try_hide_image_occlusion(text) #try to put fields that consist of a single image in their own line text = utility.text.newline_before_images(text) #remove <div> tags if set in config if self.remove_divs and res.note_type != "user": text = utility.text.remove_divs(text, " ") #highlight highlight_start = time.time() if query_set is not None: if counter - (page - 1) * 50 < highlight_boundary: text = utility.text.mark_highlights(text, query_set) else: remaining_to_highlight[nid] = "" highlight_total += time.time() - highlight_start if query_set is not None and counter - ( page - 1) * 50 >= highlight_boundary: remaining_to_highlight[nid] = text gridclass = "grid" if self.gridView else "" if self.gridView and len(text) < 200: if self.scale < 0.8: gridclass = ' '.join((gridclass, "grid-smaller")) else: gridclass = ' '.join((gridclass, "grid-small")) elif self.gridView and self.scale < 0.8: gridclass = ' '.join((gridclass, "grid-small")) elif self.gridView and len(text) > 700 and self.scale > 0.8: gridclass = ' '.join((gridclass, "grid-large")) if self.scale != 1.0: gridclass = ' '.join([ gridclass, "siac-sc-%s" % str(self.scale).replace(".", "-") ]) # use either the template for addon's notes or the normal if res.note_type == "user": newNote = noteTemplateUserNote.format( grid_class=gridclass, counter=counter + 1, nid=nid, creation=" 🕐 " + timeDiffString, edited="" if str(nid) not in self.edited else " 🕐 " + self._buildEditedInfo(self.edited[str(nid)]), mouseup="getSelectionText()" if not is_queue else "", text=text, tags=utility.tags.build_tag_string(res.tags, self.gridView), queue=": Q-%s " % (res.position + 1) if res.is_in_queue() else "", progress=progress, pdf_class=pdf_class, ret=retInfo) else: newNote = noteTemplate.format( grid_class=gridclass, counter=counter + 1, nid=nid, creation=" 🕐 " + timeDiffString, edited="" if str(nid) not in self.edited else " 🕐 " + self._buildEditedInfo(self.edited[str(nid)]), mouseup="getSelectionText()" if not is_queue else "", text=text, tags=utility.tags.build_tag_string(res.tags, self.gridView), ret=retInfo) html = f"{html}{newNote}" tags = self._addToTags(tags, res.tags) if counter - (page - 1) * 50 < 20: # todo: title for user notes allText = f"{allText} {res.text[:5000]}" tags.sort() html = html.replace("`", "`").replace("$", "$") pageMax = math.ceil(len(notes) / 50.0) if get_index() is not None and get_index().lastResDict is not None: get_index().lastResDict["time-html"] = int( (time.time() - start) * 1000) get_index().lastResDict["time-html-highlighting"] = int( highlight_total * 1000) get_index().lastResDict["time-html-build-user-note"] = int( build_user_note_total * 1000) if stamp is None and self.last_took is not None: took = self.last_took stamp = -1 elif stamp is not None: took = utility.misc.get_milisec_stamp() - stamp self.last_took = took else: took = "?" timing = "true" if printTimingInfo else "false" if not self.hideSidebar: infoMap = { "Took": "<b>%s</b> ms %s" % (took, " <b style='cursor: pointer' onclick='pycmd(`siac-last-timing`)'>ⓘ</b>" if printTimingInfo else ""), "Found": "<b>%s</b> notes" % (len(notes) if len(notes) > 0 else "<span style='color: red;'>0</span>") } info = self.build_info_table(infoMap, tags, allText) cmd = "setSearchResults(`%s`, `%s`, %s, page=%s, pageMax=%s, total=%s, cacheSize=%s, stamp=%s, printTiming=%s);" % ( html, info[0].replace("`", "`"), json.dumps(info[1]), page, pageMax, len(notes), len(self.previous_calls), stamp, timing) else: cmd = "setSearchResults(`%s`, ``, null, page=%s , pageMax=%s, total=%s, cacheSize=%s, stamp=%s, printTiming=%s);" % ( html, page, pageMax, len(notes), len( self.previous_calls), stamp, timing) cmd = f"{cmd}updateSwitchBtn({len(notes)});" self._js(cmd, editor) if len(remaining_to_highlight) > 0: cmd = "" for nid, text in remaining_to_highlight.items(): cmd = ''.join( (cmd, "document.getElementById('%s').innerHTML = `%s`;" % (nid, utility.text.mark_highlights(text, query_set)))) self._js(cmd, editor) if len(check_for_suspended) > 0: susp = get_suspended(check_for_suspended) if len(susp) > 0: cmd = "" for nid in susp: cmd = f"{cmd}$('#cW-{nid}').after(`<span id='siac-susp-lbl-{nid}' onclick='pycmd(\"siac-unsuspend-modal {nid}\")' class='siac-susp-lbl'>SUSPENDED</span>`);" if str(nid) in self.edited: cmd = f"{cmd} $('#siac-susp-lbl-{nid}').css('left', '140px');" self._js(cmd, editor) if len(pdfs) > 0: pdf_info_list = get_pdf_info(pdfs) if pdf_info_list is not None and len(pdf_info_list) > 0: cmd = "" for i in pdf_info_list: perc = int(i[1] * 10.0 / i[2]) prog_bar = "" for x in range(0, 10): if x < perc: prog_bar = ''.join( (prog_bar, "<div class='siac-prog-sq-filled'></div>")) else: prog_bar = ''.join( (prog_bar, "<div class='siac-prog-sq'></div>")) cmd = ''.join(( cmd, "document.querySelector('#ptmp-%s').innerHTML = `%s <span>%s / %s</span>`;" % (i[0], prog_bar, i[1], i[2]))) self._js(cmd, editor) return (highlight_total * 1000, build_user_note_total)
def searchProc(self, text, decks, only_user_notes, print_mode): resDict = {} start = time.time() orig = text text = self.clean(text) resDict["time-stopwords"] = int((time.time() - start) * 1000) self.lastSearch = (text, decks, "default", orig) if self.logging: log("\nFTS index - Received query: " + text) log("Decks (arg): " + str(decks)) log("Self.pinned: " + str(self.pinned)) log("Self.limit: " + str(self.limit)) if len(text) == 0: if print_mode == "default": UI.empty_result("Query was empty after cleaning.<br/><br/><b>Query:</b> <i>%s</i>" % utility.text.trim_if_longer_than(orig, 100).replace("\u001f", "").replace("`", "`")) if mw.addonManager.getConfig(__name__)["hideSidebar"]: return "Found 0 notes. Query was empty after cleaning." return None elif print_mode == "pdf": return None start = time.time() text = utility.text.expand_by_synonyms(text, self.synonyms) resDict["time-synonyms"] = int((time.time() - start) * 1000) resDict["query"] = text if utility.text.text_too_small(text): if self.logging: log("Returning - Text was < 2 chars: " + text) return { "results" : [] } tokens = text.split(" ") if len(tokens) > 10: tokens = set(tokens) if self.type == "SQLite FTS5": query = u" OR ".join(["tags:" + s.strip().replace("OR", "or") for s in tokens if not utility.text.text_too_small(s) ]) query += " OR " + " OR ".join(["text:" + s.strip().replace("OR", "or") for s in tokens if not utility.text.text_too_small(s) ]) else: query = " OR ".join([s.strip().replace("OR", "or") for s in tokens if not utility.text.text_too_small(s) ]) if len(query) == 0 or query == " OR ": if self.logging: log("Returning. Query was: " + query) return { "results" : [] } c = 0 resDict["decks"] = decks allDecks = "-1" in decks decks.append("-1") rList = list() user_note_filter = "AND mid='-1'" if only_user_notes else "" conn = sqlite3.connect(self.dir + "search-data.db") if self.type == "SQLite FTS5": dbStr = "select nid, text, tags, did, source, bm25(notes) as score, mid, refs from notes where notes match '%s' %s order by score" %(query, user_note_filter) else: conn.create_function("simple_rank", 1, simple_rank) dbStr = "select nid, text, tags, did, source, simple_rank(matchinfo(notes)) as score, mid, refs from notes where text match '%s' %s order by score desc" %(query, user_note_filter) try: start = time.time() res = conn.execute(dbStr).fetchall() resDict["time-query"] = int((time.time() - start) * 1000) except Exception as e: print("Executing match query threw exception: " + str(e)) res = [] finally: conn.close() if self.logging: log("dbStr was: " + dbStr) log("Result length of db query: " + str(len(res))) resDict["highlighting"] = self.highlighting # if self.type == "SQLite FTS5": for r in res: if not str(r[0]) in self.pinned and (allDecks or str(r[3]) in decks): if str(r[6]) == "-1": rList.append(SiacNote.from_index(r)) else: rList.append(IndexNote(r)) c += 1 if c >= self.limit: break if self.logging: log("Query was: " + query) log("Result length (after removing pinned and unselected decks): " + str(len(rList))) resDict["results"] = rList[:min(self.limit, len(rList))] self.lastResDict = resDict return resDict
def get_result_html_simple(self, db_list, tag_hover=True, search_on_selection=True): html = "" epochTime = int(time.time() * 1000) timeDiffString = "" newNote = "" lastNote = "" nids = [r[3] for r in db_list] if self.showRetentionScores: retsByNid = getRetentions(nids) ret = 0 for counter, res in enumerate(db_list): try: timeDiffString = self._getTimeDifferenceString( res[3], epochTime) except: timeDiffString = "Could not determine creation date" ret = retsByNid[int(res[3])] if self.showRetentionScores and int( res[3]) in retsByNid else None if ret is not None: retMark = "background: %s; color: black;" % ( self._retToColor(ret)) if str(res[3]) in self.edited: retMark += "max-width: 20px;" retInfo = """<div class='retMark' style='%s'>%s</div> """ % (retMark, int(ret)) else: retInfo = "" lastNote = newNote text = res[0] #non-anki notes should be displayed differently, we distinguish between title, text and source here if str(res[2]) == "-1": text = self._build_non_anki_note_html(text) # hide fields that should not be shown if len(res) > 5 and str(res[5]) in self.fields_to_hide_in_results: text = "\u001f".join([ spl for i, spl in enumerate(text.split("\u001f")) if i not in self.fields_to_hide_in_results[str(res[5])] ]) #remove <div> tags if set in config if self.remove_divs: text = utility.text.remove_divs(text) text = self._cleanFieldSeparators(text).replace( "\\", "\\\\").replace("`", "\\`").replace("$", "$") text = self.tryHideImageOcclusion(text) #try to put fields that consist of a single image in their own line text = self.IMG_FLD.sub("|</span><br/>\\1<br/>\\2", text) template = self.noteTemplateSimple if str( res[2]) != "-1" else self.noteTemplateUserNoteSimple newNote = template % ( counter + 1, " 🕐 " + timeDiffString, "" if str(res[3]) not in self.edited else " 🕐 " + self._buildEditedInfo(self.edited[str(res[3])]), retInfo, res[3], res[3], "getSelectionText()" if search_on_selection else "", res[3], res[3], text, self.buildTagString( res[1], tag_hover, maxLength=25, maxCount=2), res[3]) html += newNote return html
def printSearchResults(self, db_list, stamp, editor=None, logging=False, printTimingInfo=False, page=1, query_set=None, is_queue=False): """ This is the html that gets rendered in the search results div. This will always print the first page. Args: searchResults - a list of tuples, see SearchIndex.search() searchResults.0: highlighted note text searchResults.1: tags searchResults.2: did searchResults.3: nid searchResults.4: score (not used currently) searchResults.5: mid searchResults.6: refs (not used currently) searchResults.7: position in queue (only present if in queue) """ if stamp is not None: if stamp != self.latest: if logging: log("PrintSearchResults: Aborting because stamp != latest") return if logging: log("Entering printSearchResults") log("Length (searchResults): " + str(len(db_list))) html = "" allText = "" tags = [] epochTime = int(time.time() * 1000) timeDiffString = "" newNote = "" lastNote = "" self.last_had_timing_info = printTimingInfo if db_list is not None and len(db_list) > 0: self.lastResults = db_list self.last_query_set = query_set searchResults = db_list[(page - 1) * 50:page * 50] nids = [r[3] for r in searchResults] if self.showRetentionScores: retsByNid = getRetentions(nids) ret = 0 # various time stamps to collect information about rendering performance start = time.time() highlight_start = None build_user_note_start = None highlight_total = 0.0 build_user_note_total = 0.0 remaining_to_highlight = {} highlight_boundary = 15 if self.gridView else 10 # for better performance, collect all notes that are .pdfs, and # query their reading progress after they have been rendered pdfs = [] for counter, res in enumerate(searchResults): counter += (page - 1) * 50 try: timeDiffString = self._getTimeDifferenceString( res[3], epochTime) except: if logging: log("Failed to determine creation date: " + str(res[3])) timeDiffString = "Could not determine creation date" ret = retsByNid[int(res[3])] if self.showRetentionScores and int( res[3]) in retsByNid else None if ret is not None: retMark = "background: %s; color: black;" % ( self._retToColor(ret)) if str(res[3]) in self.edited: retMark += "max-width: 20px;" retInfo = """<div class='retMark' style='%s'>%s</div> """ % (retMark, int(ret)) else: retInfo = "" lastNote = newNote text = res[0] #non-anki notes should be displayed differently, we distinguish between title, text and source here build_user_note_start = time.time() if str(res[2]) == "-1": src = text.split("\u001f")[2] text = self._build_non_anki_note_html(text) if src.endswith(".pdf"): pdfs.append(res[3]) build_user_note_total += time.time() - build_user_note_start # hide fields that should not be shown if len(res) > 5 and str(res[5]) in self.fields_to_hide_in_results: text = "\u001f".join([ spl for i, spl in enumerate(text.split("\u001f")) if i not in self.fields_to_hide_in_results[str(res[5])] ]) #remove double fields separators text = self._cleanFieldSeparators(text).replace("\\", "\\\\") #try to remove image occlusion fields text = self.tryHideImageOcclusion(text) #try to put fields that consist of a single image in their own line text = self.IMG_FLD.sub("|</span><br/>\\1<br/>\\2", text) #remove <div> tags if set in config if self.remove_divs: text = utility.text.remove_divs(text, " ") #highlight highlight_start = time.time() if query_set is not None: if counter - (page - 1) * 50 < highlight_boundary: text = self._markHighlights(text, query_set) else: remaining_to_highlight[res[3]] = "" highlight_total += time.time() - highlight_start if query_set is not None and counter - ( page - 1) * 50 >= highlight_boundary: remaining_to_highlight[res[3]] = text gridclass = "grid" if self.gridView else "" if self.gridView and len(text) < 200: if self.scale < 0.8: gridclass = ' '.join((gridclass, "grid-smaller")) else: gridclass = ' '.join((gridclass, "grid-small")) elif self.gridView and self.scale < 0.8: gridclass = ' '.join((gridclass, "grid-small")) elif self.gridView and len(text) > 700 and self.scale > 0.8: gridclass = ' '.join((gridclass, "grid-large")) if self.scale != 1.0: gridclass = ' '.join([ gridclass, "siac-sc-%s" % str(self.scale).replace(".", "-") ]) # use either the template for addon's notes or the normal if str(res[2]) == "-1": newNote = self.noteTemplateUserNote % ( gridclass, counter + 1, res[3], counter + 1, " 🕐 " + timeDiffString, "" if str(res[3]) not in self.edited else " 🕐 " + self._buildEditedInfo(self.edited[str(res[3])]), retInfo, res[3], res[3], res[3], res[3], res[3], res[3], res[3], res[3], "getSelectionText()" if not is_queue else "", res[3], res[3], res[3], res[3], text, res[3], self.buildTagString(res[1]), res[3], ": Q-%s " % (res[7] + 1) if len(res) >= 8 and res[7] is not None else "") else: newNote = self.noteTemplate % ( gridclass, counter + 1, res[3], counter + 1, " 🕐 " + timeDiffString, "" if str(res[3]) not in self.edited else " 🕐 " + self._buildEditedInfo(self.edited[str(res[3])]), retInfo, res[3], res[3], res[3], res[3], res[3], res[3], res[3], res[3], res[3], res[3], res[3], text, res[3], self.buildTagString(res[1]), res[3]) # if self.gridView: # if counter % 2 == 1: # html += "<div class='gridRow'>%s</div>" % (lastNote + newNote) # elif counter == len(searchResults) - 1: # html += "<div class='gridRow'>%s</div>" % (newNote) # else: html += newNote tags = self._addToTags(tags, res[1]) if counter - (page - 1) * 50 < 20: allText += " " + res[0][:5000] tags.sort() html = html.replace("`", "`").replace("$", "$") pageMax = math.ceil(len(db_list) / 50.0) if get_index().lastResDict is not None: get_index().lastResDict["time-html"] = int( (time.time() - start) * 1000) get_index().lastResDict["time-html-highlighting"] = int( highlight_total * 1000) get_index().lastResDict["time-html-build-user-note"] = int( build_user_note_total * 1000) if stamp is None and self.last_took is not None: took = self.last_took elif stamp is not None: took = utility.misc.get_milisec_stamp() - stamp self.last_took = took else: took = "?" if not self.hideSidebar: infoMap = { "Took": "<b>%s</b> ms %s" % (took, " <b style='cursor: pointer' onclick='pycmd(`lastTiming`)'>ⓘ</b>" if printTimingInfo else ""), "Found": "<b>%s</b> notes" % (len(db_list) if len(db_list) > 0 else "<span style='color: red;'>0</span>") } info = self.buildInfoTable(infoMap, tags, allText) cmd = "setSearchResults(`%s`, `%s`, %s, page=%s, pageMax=%s, total=%s);" % ( html, info[0].replace("`", "`"), json.dumps( info[1]), page, pageMax, len(db_list)) else: cmd = "setSearchResults(`%s`, ``, null, page=%s , pageMax=%s, total=%s);" % ( html, page, pageMax, len(db_list)) cmd += "updateSwitchBtn(%s)" % len(searchResults) if editor is None or editor.web is None: if self.editor is not None and self.editor.web is not None: if logging: log("printing the result html...") self.editor.web.eval(cmd) else: if logging: log("printing the result html...") editor.web.eval(cmd) if len(remaining_to_highlight) > 0: cmd = "" for nid, text in remaining_to_highlight.items(): cmd = ''.join( (cmd, "document.getElementById('%s').innerHTML = `%s`;" % (nid, self._markHighlights(text, query_set)))) if editor is None or editor.web is None: if self.editor is not None and self.editor.web is not None: self.editor.web.eval(cmd) else: editor.web.eval(cmd) if len(pdfs) > 0: pdf_info_list = get_pdf_info(pdfs) if pdf_info_list is not None and len(pdf_info_list) > 0: cmd = "" for i in pdf_info_list: perc = int(i[1] * 10.0 / i[2]) prog_bar = "" for x in range(0, 10): if x < perc: prog_bar += "<div class='siac-prog-sq-filled'></div>" else: prog_bar += "<div class='siac-prog-sq'></div>" cmd = ''.join(( cmd, "document.getElementById('%s').innerHTML += `<br><div style='margin-top: 5px;'>%s %s / %s</div>`;" % (i[0], prog_bar, i[1], i[2]))) if editor is None or editor.web is None: if self.editor is not None and self.editor.web is not None: self.editor.web.eval(cmd) else: editor.web.eval(cmd)
def print_search_results(self, notes, stamp, editor=None, timing_info=False, page=1, query_set=None, is_cached=False): """ This is the html that gets rendered in the search results div. This will always print the first page. """ if stamp is not None: if stamp != self.latest: return # if we were on e.g. on page 2 which contains exactly one note (nr. 51 of 51 search results), and deleted that note, the # refresh call would still be to rerender page 2 with the updated search results, # but page 2 would not exist anymore, so we have to check for that: if (page - 1) * 50 > len(notes): page = page - 1 # if this is true, avoid scrolling to the top of the search results again is_rerender = False if not is_cached and len(notes) > 0: # roughly check if current call equals the last one, to set is_rerender to True if len(self.previous_calls) > 0: nids = [n.id for n in self.previous_calls[-1][0][:30]] if query_set == self.previous_calls[-1][5] and page == self.previous_calls[-1][4] and nids == [n.id for n in notes[:30]]: is_rerender = True # cache all calls to be able to repeat them self.previous_calls.append([notes, None, editor, timing_info, page, query_set]) if len(self.previous_calls) > 11: self.previous_calls.pop(0) html = "" allText = "" tags = [] epochTime = int(time.time() * 1000) timeDiffString = "" newNote = "" ret = 0 self.last_had_timing_info = timing_info if notes is not None and len(notes) > 0: self.lastResults = notes self.last_query_set = query_set meta_notes_cnt = 0 while meta_notes_cnt < len(notes) and notes[meta_notes_cnt].note_type == "user" and notes[meta_notes_cnt].is_meta_note(): meta_notes_cnt += 1 searchResults = notes[(page- 1) * 50 + min(page - 1, 1) * meta_notes_cnt: page * 50 + meta_notes_cnt] nids = [r.id for r in searchResults] if self.showRetentionScores: retsByNid = getRetentions(nids) # various time stamps to collect information about rendering performance start = time.time() highlight_start = None build_user_note_start = None highlight_total = 0.0 build_user_note_total = 0.0 remaining_to_highlight = {} highlight_boundary = 15 if self.gridView else 10 # for better performance, collect all notes that are .pdfs, and # query their reading progress after they have been rendered pdfs = [] check_for_suspended = [] meta_card_counter = 0 for counter, res in enumerate(searchResults): nid = res.id counter += (page - 1)* 50 try: timeDiffString = self._get_time_diff_lbl(nid, epochTime) except: timeDiffString = "Could not determine creation date" ret = retsByNid[int(nid)] if self.showRetentionScores and int(nid) in retsByNid else None if ret is not None: retMark = "border-color: %s;" % (utility.misc._retToColor(ret)) retInfo = """<div class='retMark' style='%s'>Pass Rate: %s</div>""" % (retMark, int(ret)) else: retInfo = "" # non-anki notes should be displayed differently, we distinguish between title, text and source here # confusing: 'source' on notes from the index means the original note content (without stopwords removed etc.), # on SiacNotes, it means the source field. build_user_note_start = time.time() text = res.get_content() progress = "" pdf_class = "" if res.note_type == "user": icon = "book" if res.is_pdf(): pdfs.append(nid) p_html = "<div class='siac-prog-sq'></div>" * 10 progress = f"<div id='ptmp-{nid}' class='siac-prog-tmp'>{p_html} <span> 0 / ?</span></div><div style='display: inline-block;' id='siac-ex-tmp-{nid}'></div>" pdf_class = "pdf" elif int(res.id) < 0: # meta card pdf_class = "meta" elif res.is_yt(): icon = "film" elif res.is_file(): icon = "external-link" elif res.note_type == "index" and res.did and res.did > 0: check_for_suspended.append(res.id) build_user_note_total += time.time() - build_user_note_start # hide fields that should not be shown if str(res.mid) in self.fields_to_hide_in_results: text = "\u001f".join([spl for i, spl in enumerate(text.split("\u001f")) if i not in self.fields_to_hide_in_results[str(res.mid)]]) # remove double fields separators text = utility.text.clean_field_separators(text).replace("\\", "\\\\") # try to remove image occlusion fields text = utility.text.try_hide_image_occlusion(text) # if set in config, try to remove cloze brackets if not self.show_clozes: text = utility.text.hide_cloze_brackets(text) # try to put fields that consist of a single image in their own line text = utility.text.newline_before_images(text) #remove <div> tags if set in config if self.remove_divs and res.note_type != "user": text = utility.text.remove_divs(text, " ") #highlight highlight_start = time.time() if query_set is not None: if counter - (page -1) * 50 < highlight_boundary: text = utility.text.mark_highlights(text, query_set) else: remaining_to_highlight[nid] = "" highlight_total += time.time() - highlight_start if query_set is not None and counter - (page -1) * 50 >= highlight_boundary: remaining_to_highlight[nid] = text gridclass = "grid" if self.gridView else "" if self.scale != 1.0: gridclass = ' '.join([gridclass, "siac-sc-%s" % str(self.scale).replace(".", "-")]) # use either the template for addon's notes or the normal if res.note_type == "user": template = NOTE_TMPL_SIAC if res.is_meta_note(): template = NOTE_TMPL_META meta_card_counter += 1 newNote = template.format( grid_class = gridclass, counter = counter + 1 - meta_card_counter, nid = nid, creation = " 🕐 " + timeDiffString, edited = "" if str(nid) not in self.edited else "<i class='fa fa-pencil ml-10 mr-5'></i> " + self._build_edited_info(self.edited[str(nid)]), mouseup = "getSelectionText()", text = text, tags = utility.tags.build_tag_string(res.tags, self.gridView), queue = ": Q-%s " % (res.position + 1) if res.is_in_queue() else "", progress = progress, icon = icon, pdf_class = pdf_class, ret = retInfo) else: newNote = NOTE_TMPL.format( grid_class = gridclass, counter = counter + 1 - meta_card_counter, nid = nid, creation = " 🕐 " + timeDiffString, edited = "" if str(nid) not in self.edited else "<i class='fa fa-pencil ml-10 mr-5'></i> " + self._build_edited_info(self.edited[str(nid)]), mouseup = "getSelectionText()", text = text, tags = utility.tags.build_tag_string(res.tags, self.gridView), ret = retInfo) html = f"{html}{newNote}" tags = self._addToTags(tags, res.tags) if counter - (page - 1) * 50 < 20: # todo: title for user notes allText = f"{allText} {res.text[:5000]}" if res.note_type == "user": allText = f"{allText} {res.title}" tags.sort() html = html.replace("`", "`").replace("$", "$") pageMax = math.ceil(len(notes) / 50.0) if get_index() is not None and get_index().lastResDict is not None: get_index().lastResDict["time-html"] = int((time.time() - start) * 1000) get_index().lastResDict["time-html-highlighting"] = int(highlight_total * 1000) get_index().lastResDict["time-html-build-user-note"] = int(build_user_note_total * 1000) if stamp is None and self.last_took is not None: took = self.last_took stamp = -1 elif stamp is not None: took = utility.misc.get_milisec_stamp() - stamp self.last_took = took else: took = "?" timing = "true" if timing_info else "false" rerender = "true" if is_rerender else "false" if not self.hideSidebar: infoMap = { "Took" : "<b>%s</b> ms %s" % (took, " <b style='cursor: pointer' onclick='pycmd(`siac-last-timing`)'><i class='fa fa-info-circle'></i></b>" if timing_info else ""), "Found" : "<b>%s</b> notes" % (len(notes) if len(notes) > 0 else "<span style='color: red;'>0</span>") } info = self.build_info_table(infoMap, tags, allText) cmd = "setSearchResults(`%s`, `%s`, %s, page=%s, pageMax=%s, total=%s, cacheSize=%s, stamp=%s, printTiming=%s, isRerender=%s);" % (html, info[0].replace("`", "`"), json.dumps(info[1]), page, pageMax, len(notes), len(self.previous_calls), stamp, timing, rerender) else: cmd = "setSearchResults(`%s`, ``, null, page=%s , pageMax=%s, total=%s, cacheSize=%s, stamp=%s, printTiming=%s, isRerender=%s);" % (html, page, pageMax, len(notes), len(self.previous_calls), stamp, timing, rerender) self._js(cmd, editor) if len(remaining_to_highlight) > 0: cmd = "" for nid,text in remaining_to_highlight.items(): cmd = ''.join((cmd, "document.getElementById('siac-inner-card-%s').innerHTML = `%s`;" % (nid, utility.text.mark_highlights(text, query_set)))) self._js(cmd, editor) if len(check_for_suspended) > 0: susp = get_suspended(check_for_suspended) if len(susp) > 0: cmd = "" for nid in susp: cmd = f"{cmd}$('#siac-susp-dsp-{nid}').html(`<span id='siac-susp-lbl-{nid}' onclick='pycmd(\"siac-unsuspend-modal {nid}\")' class='siac-susp-lbl'> SUSPENDED </span>`);" self._js(cmd, editor) if len(pdfs) > 0: pdf_info_list = get_pdf_info(pdfs) if pdf_info_list is not None and len(pdf_info_list) > 0: cmd = "" for i in pdf_info_list: perc = int(i[1] * 10.0 / i[2]) prog_bar = "" for x in range(0, 10): if x < perc: prog_bar = ''.join((prog_bar, "<div class='siac-prog-sq-filled'></div>")) else: prog_bar = ''.join((prog_bar, "<div class='siac-prog-sq'></div>")) cmd = ''.join((cmd, "document.querySelector('#ptmp-%s').innerHTML = `%s <span>%s / %s</span>`;" % (i[0], prog_bar, i[1], i[2]))) extract = "" ext_start = i[3] ext_end = i[4] if ext_end and ext_start == ext_end: extract = f"<span class='siac-extract-mark'> [{ext_start}]</span>" elif ext_start: extract = f"<span class='siac-extract-mark'> [{ext_start} - {ext_end}]</span>" if extract != "": cmd = ''.join((cmd, "document.querySelector('#siac-ex-tmp-%s').innerHTML = `%s`;" % (i[0], extract))) self._js(cmd, editor) return (highlight_total * 1000, build_user_note_total)
def search_results(db_list: List[IndexNote], query_set: List[str]) -> HTML: """ Prints a list of index notes. Used e.g. in the pdf viewer. """ html = "" newNote = "" nids = [r.id for r in db_list] show_ret = conf_or_def("showRetentionScores", True) fields_to_hide_in_results = conf_or_def("fieldsToHideInResults", {}) hide_clozes = conf_or_def("results.hide_cloze_brackets", False) remove_divs = conf_or_def("removeDivsFromOutput", False) if show_ret: retsByNid = getRetentions(nids) ret = 0 highlighting = conf_or_def("highlighting", True) for counter, res in enumerate(db_list): ret = retsByNid[int( res.id)] if show_ret and int(res.id) in retsByNid else None if ret is not None: retMark = "border-color: %s;" % (utility.misc._retToColor(ret)) retInfo = """<div class='retMark' style='%s'>PR: %s</div> """ % ( retMark, int(ret)) else: retInfo = "" text = res.get_content() # hide fields that should not be shown if str(res.mid) in fields_to_hide_in_results: text = "\u001f".join([ spl for i, spl in enumerate(text.split("\u001f")) if i not in fields_to_hide_in_results[str(res.mid)] ]) #remove <div> tags if set in config if remove_divs and res.note_type != "user": text = utility.text.remove_divs(text) # remove cloze brackets if set in config if hide_clozes and res.note_type != "user": text = utility.text.hide_cloze_brackets(text) if highlighting and query_set is not None: text = utility.text.mark_highlights(text, query_set) text = utility.text.clean_field_separators(text).replace( "\\", "\\\\").replace("`", "\\`").replace("$", "$") text = utility.text.try_hide_image_occlusion(text) #try to put fields that consist of a single image in their own line text = utility.text.newline_before_images(text) template = NOTE_TMPL_SIMPLE if res.note_type == "index" else NOTE_TMPL_SIAC_SIMPLE newNote = template.format(counter=counter + 1, nid=res.id, edited="", mouseup="", text=text, ret=retInfo, tags=utility.tags.build_tag_string( res.tags, False, False, maxLength=15, maxCount=2), creation="") html += newNote return html
def searchProc(self, text, decks, only_user_notes, print_mode): resDict = {} start = time.time() orig = text text = self.clean(text) resDict["time-stopwords"] = int((time.time() - start) * 1000) if self.logging: log("\nFTS index - Received query: " + text) log("Decks (arg): " + str(decks)) log("Self.pinned: " + str(self.pinned)) log("Self.limit: " + str(self.limit)) self.lastSearch = (text, decks, "default") if len(text) == 0: if print_mode == "default": self.output.editor.web.eval( "setSearchResults(``, 'Query was empty after cleaning.<br/><br/><b>Query:</b> <i>%s</i>')" % utility.text.trim_if_longer_than(orig, 100).replace( "\u001f", "")) if mw.addonManager.getConfig(__name__)["hideSidebar"]: return "Found 0 notes. Query was empty after cleaning." return None elif print_mode == "pdf": return None start = time.time() text = utility.text.expand_by_synonyms(text, self.synonyms) resDict["time-synonyms"] = int((time.time() - start) * 1000) resDict["query"] = text if utility.text.text_too_small(text): if self.logging: log("Returning - Text was < 2 chars: " + text) return {"results": []} tokens = text.split(" ") if len(tokens) > 10: tokens = set(tokens) if self.type == "SQLite FTS5": query = u" OR ".join([ "tags:" + s.strip().replace("OR", "or") for s in tokens if not utility.text.text_too_small(s) ]) query += " OR " + " OR ".join([ "text:" + s.strip().replace("OR", "or") for s in tokens if not utility.text.text_too_small(s) ]) else: query = " OR ".join([ s.strip().replace("OR", "or") for s in tokens if not utility.text.text_too_small(s) ]) if len(query) == 0 or query == " OR ": if self.logging: log("Returning. Query was: " + query) return {"results": []} c = 0 resDict["decks"] = decks allDecks = "-1" in decks decks.append("-1") rList = list() user_note_filter = "AND mid='-1'" if only_user_notes else "" conn = sqlite3.connect(self.dir + "search-data.db") if self.type == "SQLite FTS5": dbStr = "select nid, text, tags, did, source, bm25(notes) as score, mid, refs from notes where notes match '%s' %s order by score" % ( query, user_note_filter) #bm25 results in really slow queries for some reason, so we use the simpler ranking for fts4 # elif self.type == "SQLite FTS4": # conn.create_function("bm25", 1, bm25) # dbStr = "select nid, text, tags, did, source, bm25(matchinfo(notes, 'pcnalx')) as score, mid, refs from notes where text match '%s' %s order by score desc" %(query, user_note_filter) else: conn.create_function("simple_rank", 1, simple_rank) dbStr = "select nid, text, tags, did, source, simple_rank(matchinfo(notes)) as score, mid, refs from notes where text match '%s' %s order by score desc" % ( query, user_note_filter) try: start = time.time() res = conn.execute(dbStr).fetchall() resDict["time-query"] = int((time.time() - start) * 1000) except Exception as e: if self.logging: log("Executing db query threw exception: " + str(e)) res = [] if self.logging: log("dbStr was: " + dbStr) log("Result length of db query: " + str(len(res))) resDict["highlighting"] = self.highlighting # if self.type == "SQLite FTS5": for r in res: if not str(r[0]) in self.pinned and (allDecks or str(r[3]) in decks): rList.append((r[4], r[2], r[3], r[0], r[5], r[6], r[7])) c += 1 if c >= self.limit: break # else: # start = time.time() # for r in res: # if not str(r[0]) in self.pinned and (allDecks or str(r[3]) in decks): # rList.append((r[4], r[2], r[3], r[0], r[5], r[6], r[7])) # resDict["time-ranking"] = int((time.time() - start) * 1000) # else: # start = time.time() # for r in res: # if not str(r[0]) in self.pinned and (allDecks or str(r[3]) in decks): # rList.append((r[4], r[2], r[3], r[0], r[5], r[6], r[7])) # resDict["time-ranking"] = int((time.time() - start) * 1000) conn.close() #if fts5 is not used, results are not sorted by score # if not self.type == "SQLite FTS5": # listSorted = sorted(rList, key=lambda x: x[4]) # rList = listSorted if self.logging: log("Query was: " + query) log("Result length (after removing pinned and unselected decks): " + str(len(rList))) resDict["results"] = rList[:min(self.limit, len(rList))] self.lastResDict = resDict return resDict