def generate_text_object(request, config, note=False): """Return text object given an philo_id""" # verify this isn't a page ID or if this is a note if len(request.philo_id.split()) == 9 and note is not True: width = 9 else: width = 7 db = DB(config.db_path + '/data/', width=width) if note: target = request.target.replace('#', '') doc_id = request.philo_id.split()[0] + ' %' c = db.dbh.cursor() c.execute( 'select philo_id from toms where id=? and philo_id like ? limit 1', (target, doc_id)) philo_id = c.fetchall()[0]['philo_id'].split()[:7] obj = db[philo_id] else: try: obj = db[request.philo_id] except ValueError: obj = db[' '.join(request.path_components)] philo_id = obj.philo_id if width != 9: while obj['philo_name'] == '__philo_virtual' and obj[ "philo_type"] != "div1": philo_id.pop() obj = db[philo_id] philo_id = list(obj.philo_id) while int(philo_id[-1]) == 0: philo_id.pop() text_object = { "query": dict([i for i in request]), "philo_id": ' '.join([str(i) for i in philo_id]) } text_object['prev'] = neighboring_object_id(db, obj.prev, width) text_object['next'] = neighboring_object_id(db, obj.__next__, width) metadata_fields = {} for metadata in db.locals['metadata_fields']: metadata_fields[metadata] = obj[metadata] text_object['metadata_fields'] = metadata_fields if width != 9: citation_hrefs = citation_links(db, config, obj) else: doc_obj = db[obj.philo_id[0]] citation_hrefs = citation_links(db, config, doc_obj) citation = citations(obj, citation_hrefs, config, report="navigation") text_object['citation'] = citation text, imgs = get_text_obj(obj, config, request, db.locals["token_regex"], note=note) if config.navigation_formatting_regex: for pattern, replacement in config.navigation_formatting_regex: text = re.sub(r'%s' % pattern, '%s' % replacement, text) text_object['text'] = text text_object['imgs'] = imgs return text_object
def generate_text_object(request, config, note=False): """Return text object given an philo_id""" # verify this isn't a page ID or if this is a note if len(request.philo_id.split()) == 9 and note is not True: width = 9 else: width = 7 db = DB(config.db_path + '/data/', width=width) if note: target = request.target.replace('#', '') doc_id = request.philo_id.split()[0] + ' %' cursor = db.dbh.cursor() cursor.execute('select philo_id from toms where id=? and philo_id like ? limit 1', (target, doc_id)) philo_id = cursor.fetchall()[0]['philo_id'].split()[:7] obj = db[philo_id] else: try: obj = db[request.philo_id] except ValueError: obj = db[' '.join(request.path_components)] philo_id = obj.philo_id if width != 9: while obj['philo_name'] == '__philo_virtual' and obj["philo_type"] != "div1": philo_id.pop() obj = db[philo_id] philo_id = list(obj.philo_id) while int(philo_id[-1]) == 0: philo_id.pop() text_object = {"query": dict([i for i in request]), "philo_id": ' '.join([str(i) for i in philo_id])} text_object['prev'] = neighboring_object_id(db, obj.prev, width) text_object['next'] = neighboring_object_id(db, obj.__next__, width) metadata_fields = {} for metadata in db.locals['metadata_fields']: metadata_fields[metadata] = obj[metadata] text_object['metadata_fields'] = metadata_fields if width != 9: citation_hrefs = citation_links(db, config, obj) citation = citations(obj, citation_hrefs, config, report="navigation") else: db = DB(config.db_path + '/data/', width=7) doc_obj = db[obj.philo_id[0]] citation_hrefs = citation_links(db, config, doc_obj) citation = citations(doc_obj, citation_hrefs, config, report="navigation") text_object['citation'] = citation text, imgs = get_text_obj(obj, config, request, db.locals["token_regex"], note=note) if config.navigation_formatting_regex: for pattern, replacement in config.navigation_formatting_regex: text = re.sub(r'%s' % pattern, '%s' % replacement, text) text_object['text'] = text text_object['imgs'] = imgs return text_object
def group_by_metadata(request, config): citation_types = json.loads(request.citation) db = DB(config.db_path + "/data/") cursor = db.dbh.cursor() query = """select * from toms where philo_type="doc" and %s=?""" % request.group_by_field cursor.execute(query, (request.query, )) result_group = [] for doc in cursor: obj = db[doc["philo_id"]] links = citation_links(db, config, obj) citation = citations(obj, links, config, report="landing_page", citation_type=citation_types) result_group.append({ "metadata": get_all_metadata(db, doc), "citation": citation }) return json.dumps({ "display_count": request.display_count, "content_type": request.group_by_field, "content": [{ "prefix": request.query, "results": result_group }], })
def bibliography_results(request, config): """Fetch bibliography results""" db = DB(config.db_path + "/data/") if request.no_metadata: hits = db.get_all(db.locals["default_object_level"], request["sort_order"]) else: hits = db.query(sort_order=request["sort_order"], **request.metadata) if ( request.simple_bibliography == "all" ): # request from simple landing page report which gets all biblio in load order hits.finish() start = 1 end = len(hits) page_num = end else: start, end, page_num = page_interval(request.results_per_page, hits, request.start, request.end) bibliography_object = { "description": {"start": start, "end": end, "n": page_num, "results_per_page": request.results_per_page}, "query": dict([i for i in request]), "default_object": db.locals["default_object_level"], } results = [] result_type = "doc" for hit in hits[start - 1 : end]: citation_hrefs = citation_links(db, config, hit) metadata_fields = {} for metadata in db.locals["metadata_fields"]: metadata_fields[metadata] = hit[metadata] result_type = hit.object_type if request.simple_bibliography == "all": citation = citations(hit, citation_hrefs, config, report="simple_landing") else: citation = citations(hit, citation_hrefs, config, report="bibliography", result_type=result_type) if config.dictionary_bibliography is False or result_type == "doc": results.append( { "citation": citation, "citation_links": citation_hrefs, "philo_id": hit.philo_id, "metadata_fields": metadata_fields, "object_type": result_type, } ) else: context = get_text_obj(hit, config, request, db.locals["token_regex"], images=False) results.append( { "citation": citation, "citation_links": citation_hrefs, "philo_id": hit.philo_id, "metadata_fields": metadata_fields, "context": context, "object_type": result_type, } ) bibliography_object["results"] = results bibliography_object["results_length"] = len(hits) bibliography_object["query_done"] = hits.done bibliography_object["result_type"] = result_type return bibliography_object, hits
def kwic_hit_object(hit, config, db): """Build an individual kwic concordance""" # Get all metadata metadata_fields = {} for metadata in db.locals["metadata_fields"]: metadata_fields[metadata] = hit[metadata].strip() # Get all links and citations citation_hrefs = citation_links(db, config, hit) citation = citations(hit, citation_hrefs, config) # Determine length of text needed byte_distance = hit.bytes[-1] - hit.bytes[0] length = config.concordance_length + byte_distance + config.concordance_length # Get concordance and align it byte_offsets, start_byte = adjust_bytes(hit.bytes, config.concordance_length) conc_text = get_text(hit, start_byte, length, config.db_path) conc_text = format_strip(conc_text, db.locals["token_regex"], byte_offsets) conc_text = conc_text.replace("\n", " ") conc_text = conc_text.replace("\r", "") conc_text = conc_text.replace("\t", " ") try: start_hit = conc_text.index('<span class="highlight">') start_output = ( '<span class="kwic-before"><span class="inner-before">' + conc_text[:start_hit] + "</span></span>" ) end_hit = conc_text.rindex("</span>") + 7 highlighted_text = conc_text[start_hit + 23 : end_hit - 7].lower() # for use in KWIC sorting end_output = '<span class="kwic-after">' + conc_text[end_hit:] + "</span>" conc_text = ( '<span class="kwic-text">' + start_output + ' <span class="kwic-highlight">' + conc_text[start_hit:end_hit] + "</span> " + end_output + "</span>" ) except ValueError as v: import sys print("KWIC ERROR:", v, file=sys.stderr) if config.kwic_formatting_regex: for pattern, replacement in config.kwic_formatting_regex: conc_text = re.sub(r"%s" % pattern, "%s" % replacement, conc_text) kwic_result = { "philo_id": hit.philo_id, "context": conc_text, "highlighted_text": highlighted_text, "metadata_fields": metadata_fields, "citation_links": citation_hrefs, "citation": citation, "bytes": hit.bytes, } return kwic_result
def concordance_results(request, config): """Fetch concordances results.""" db = DB(config.db_path + '/data/') if request.collocation_type: first_hits = db.query(request["q"], request["method"], request["arg"], **request.metadata) second_hits = db.query(request["left"], request["method"], request["arg"], **request.metadata) hits = CombinedHitlist(first_hits, second_hits) else: hits = db.query(request["q"], request["method"], request["arg"], sort_order=request["sort_order"], **request.metadata) start, end, page_num = page_interval(request['results_per_page'], hits, request.start, request.end) concordance_object = { "description": { "start": start, "end": end, "results_per_page": request.results_per_page }, "query": dict([i for i in request]), "default_object": db.locals['default_object_level'] } formatting_regexes = [] if config.concordance_formatting_regex: for pattern, replacement in config.concordance_formatting_regex: compiled_regex = re.compile(r'%s' % pattern) formatting_regexes.append((compiled_regex, replacement)) results = [] for hit in hits[start - 1:end]: citation_hrefs = citation_links(db, config, hit) metadata_fields = {} for metadata in db.locals['metadata_fields']: metadata_fields[metadata] = hit[metadata] citation = citations(hit, citation_hrefs, config, report="concordance") context = get_concordance_text(db, hit, config.db_path, config.concordance_length) if formatting_regexes: for formatting_regex, replacement in formatting_regexes: context = formatting_regex.sub(r'%s' % replacement, context) result_obj = { "philo_id": hit.philo_id, "citation": citation, "citation_links": citation_hrefs, "context": context, "metadata_fields": metadata_fields, "bytes": hit.bytes } results.append(result_obj) concordance_object["results"] = results concordance_object['results_length'] = len(hits) concordance_object["query_done"] = hits.done return concordance_object
def concordance_results(request, config): """Fetch concordances results.""" db = DB(config.db_path + '/data/') if request.collocation_type: first_hits = db.query(request["q"], request["method"], request["arg"], **request.metadata) second_hits = db.query(request["left"], request["method"], request["arg"], **request.metadata) hits = CombinedHitlist(first_hits, second_hits) else: hits = db.query(request["q"], request["method"], request["arg"], sort_order=request["sort_order"], **request.metadata) start, end, page_num = page_interval(request['results_per_page'], hits, request.start, request.end) concordance_object = { "description": {"start": start, "end": end, "results_per_page": request.results_per_page}, "query": dict([i for i in request]), "default_object": db.locals['default_object_level'] } formatting_regexes = [] if config.concordance_formatting_regex: for pattern, replacement in config.concordance_formatting_regex: compiled_regex = re.compile(r'%s' % pattern) formatting_regexes.append((compiled_regex, replacement)) results = [] for hit in hits[start - 1:end]: citation_hrefs = citation_links(db, config, hit) metadata_fields = {} for metadata in db.locals['metadata_fields']: metadata_fields[metadata] = hit[metadata] citation = citations(hit, citation_hrefs, config, report="concordance") context = get_concordance_text(db, hit, config.db_path, config.concordance_length) if formatting_regexes: for formatting_regex, replacement in formatting_regexes: context = formatting_regex.sub(r'%s' % replacement, context) result_obj = { "philo_id": hit.philo_id, "citation": citation, "citation_links": citation_hrefs, "context": context, "metadata_fields": metadata_fields, "bytes": hit.bytes } results.append(result_obj) concordance_object["results"] = results concordance_object['results_length'] = len(hits) concordance_object["query_done"] = hits.done return concordance_object
def group_by_metadata(request, config): citation_types = json.loads(request.citation) db = DB(config.db_path + "/data/") cursor = db.dbh.cursor() query = """select * from toms where philo_type="doc" and %s=?""" % request.group_by_field cursor.execute(query, (request.query,)) result_group = [] for doc in cursor: obj = db[doc["philo_id"]] links = citation_links(db, config, obj) citation = citations(obj, links, config, report="landing_page", citation_type=citation_types) result_group.append({"metadata": get_all_metadata(db, doc), "citation": citation}) return json.dumps( { "display_count": request.display_count, "content_type": request.group_by_field, "content": [{"prefix": request.query, "results": result_group}], } )
def group_by_range(request_range, request, config): db = DB(config.db_path + "/data/") metadata_queried = request.group_by_field citation_types = json.loads(request.citation) is_date = False try: int(request_range[0]) int(request_range[1]) is_date = True except ValueError: pass cursor = db.dbh.cursor() if is_date: content_type = "date" query_range = set(range(int(request_range[0]), int(request_range[1]))) cursor.execute('select * from toms where philo_type="doc"') else: content_type = metadata_queried query_range = set( range(ord(request_range[0]), ord(request_range[1]) + 1)) # Ordinal avoids unicode issues... cursor.execute( 'select *, count(*) as count from toms where philo_type="doc" group by %s' % metadata_queried) try: cursor.execute( 'select *, count(*) as count from toms where philo_type="doc" group by %s' % metadata_queried) except sqlite3.OperationalError: return json.dumps({ "display_count": request.display_count, "content_type": content_type, "content": [] }) content = {} date_count = defaultdict(int) for doc in cursor: normalized_test_value = "" if doc[metadata_queried] is None: continue if is_date: try: initial = int(doc[metadata_queried]) test_value = initial date_count[initial] += 1 except: continue else: try: initial_letter = doc[metadata_queried][0].lower() except IndexError: # we have an empty string continue try: test_value = ord(initial_letter) normalized_test_value = ord("".join([ i for i in unicodedata.normalize("NFKD", initial_letter) if not unicodedata.combining(i) ])) except TypeError: continue initial = initial_letter.upper() # Are we within the range? if test_value in query_range or normalized_test_value in query_range: if normalized_test_value in query_range: initial = "".join([ i for i in unicodedata.normalize("NFKD", initial_letter) if not unicodedata.combining(i) ]).upper() obj = db[doc["philo_id"]] links = citation_links(db, config, obj) citation = citations(obj, links, config, report="landing_page", citation_type=citation_types) if initial not in content: content[initial] = [] if is_date: try: normalized_field = unaccent.smash_accents( doc["title"]).lower() except: normalized_field = None content[initial].append({ "metadata": get_all_metadata(db, doc), "citation": citation, "count": date_count[initial], "normalized": normalized_field, }) else: content[initial].append({ "metadata": get_all_metadata(db, doc), "citation": citation, "count": doc["count"], "normalized": unaccent.smash_accents(doc[metadata_queried]).lower(), }) results = [] for prefix, result_set in sorted(content.items(), key=itemgetter(0)): results.append({ "prefix": prefix, "results": sorted(result_set, key=lambda x: x["normalized"]) }) return json.dumps({ "display_count": request.display_count, "content_type": content_type, "content": results })
def filter_words_by_property(request, config): """Filter words by property""" db = DB(config.db_path + '/data/') hits = db.query(request["q"], request["method"], request["arg"], **request.metadata) concordance_object = {"query": dict([i for i in request])} # Do these need to be captured in wsgi_handler? word_property = request["word_property"] word_property_value = request["word_property_value"] word_property_total = request["word_property_total"] new_hitlist = [] results = [] position = 0 more_pages = False if request.start == 0: start = 1 else: start = request.start for hit in hits: # get my chunk of text hit_val = get_word_attrib(hit, word_property, db) if hit_val == word_property_value: position += 1 if position < start: continue new_hitlist.append(hit) citation_hrefs = citation_links(db, config, hit) metadata_fields = {} for metadata in db.locals['metadata_fields']: metadata_fields[metadata] = hit[metadata] citation = citations(hit, citation_hrefs, config) context = get_concordance_text(db, hit, config.db_path, config.concordance_length) result_obj = { "philo_id": hit.philo_id, "citation": citation, "citation_links": citation_hrefs, "context": context, "metadata_fields": metadata_fields, "bytes": hit.bytes, "collocate_count": 1 } results.append(result_obj) if len(new_hitlist) == (request.results_per_page): more_pages = True break end = start + len(results) - 1 if len(results) < request.results_per_page: word_property_total = end else: word_property_total = end + 1 concordance_object['results'] = results concordance_object["query_done"] = hits.done concordance_object['results_length'] = word_property_total concordance_object["description"] = { "start": start, "end": end, "results_per_page": request.results_per_page, "more_pages": more_pages } return concordance_object
def group_by_range(request_range, request, config): db = DB(config.db_path + '/data/') metadata_queried = request.group_by_field citation_types = json.loads(request.citation) is_date = False try: int(request_range[0]) int(request_range[1]) is_date = True except ValueError: pass if is_date: content_type = "date" query_range = set(range(int(request_range[0]), int(request_range[1]))) else: content_type = metadata_queried query_range = set(range( ord(request_range[0]), ord(request_range[1]) + 1)) # Ordinal avoids unicode issues... c = db.dbh.cursor() try: c.execute('select *, count(*) as count from toms where philo_type="doc" group by %s' % metadata_queried) except sqlite3.OperationalError: return json.dumps({ "display_count": request.display_count, "content_type": content_type, "content": [] }) content = {} for doc in c.fetchall(): normalized_test_value = '' if doc[metadata_queried] is None: continue if is_date: try: initial = int(doc[metadata_queried]) test_value = initial except: continue else: try: initial_letter = doc[metadata_queried][0].lower() except IndexError: # we have an empty string continue test_value = ord(initial_letter) normalized_test_value = ord(''.join( [i for i in unicodedata.normalize("NFKD", initial_letter) if not unicodedata.combining(i)])) initial = initial_letter.upper() # Are we within the range? if test_value in query_range or normalized_test_value in query_range: if normalized_test_value in query_range: initial = ''.join( [i for i in unicodedata.normalize("NFKD", initial_letter) if not unicodedata.combining(i)]).upper() obj = db[doc["philo_id"]] links = citation_links(db, config, obj) citation = citations(obj, links, config, report="landing_page", citation_type=citation_types) if initial not in content: content[initial] = [] content[initial].append({ "metadata": get_all_metadata(db, doc), "citation": citation, "count": doc['count'] }) results = [] for result_set in sorted(content.items(), key=itemgetter(0)): results.append({"prefix": result_set[0], "results": result_set[1]}) return json.dumps({"display_count": request.display_count, "content_type": content_type, "content": results})
def group_by_range(request_range, request, config): db = DB(config.db_path + '/data/') metadata_queried = request.group_by_field citation_types = json.loads(request.citation) is_date = False try: int(request_range[0]) int(request_range[1]) is_date = True except ValueError: pass if is_date: content_type = "date" query_range = set(range(int(request_range[0]), int(request_range[1]))) else: content_type = metadata_queried query_range = set(range( ord(request_range[0]), ord(request_range[1]) + 1)) # Ordinal avoids unicode issues... cursor = db.dbh.cursor() try: cursor.execute('select *, count(*) as count from toms where philo_type="doc" group by %s' % metadata_queried) except sqlite3.OperationalError: return json.dumps({ "display_count": request.display_count, "content_type": content_type, "content": [] }) content = {} for doc in cursor: normalized_test_value = '' if doc[metadata_queried] is None: continue if is_date: try: initial = int(doc[metadata_queried]) test_value = initial except: continue else: try: initial_letter = doc[metadata_queried][0].lower() except IndexError: # we have an empty string continue test_value = ord(initial_letter) normalized_test_value = ord(''.join( [i for i in unicodedata.normalize("NFKD", initial_letter) if not unicodedata.combining(i)])) initial = initial_letter.upper() # Are we within the range? if test_value in query_range or normalized_test_value in query_range: if normalized_test_value in query_range: initial = ''.join( [i for i in unicodedata.normalize("NFKD", initial_letter) if not unicodedata.combining(i)]).upper() obj = db[doc["philo_id"]] links = citation_links(db, config, obj) citation = citations(obj, links, config, report="landing_page", citation_type=citation_types) if initial not in content: content[initial] = [] content[initial].append({ "metadata": get_all_metadata(db, doc), "citation": citation, "count": doc['count'] }) results = [] for result_set in sorted(content.items(), key=itemgetter(0)): results.append({"prefix": result_set[0], "results": result_set[1]}) return json.dumps({"display_count": request.display_count, "content_type": content_type, "content": results})
def generate_toc_object(request, config): """This function fetches all philo_ids for div elements within a doc""" db = DB(config.db_path + '/data/') conn = db.dbh cursor = conn.cursor() try: obj = db[request.philo_id] except ValueError: philo_id = ' '.join(request.path_components[:-1]) obj = db[philo_id] doc_id = int(obj.philo_id[0]) next_doc_id = doc_id + 1 # find the starting rowid for this doc cursor.execute('select rowid from toms where philo_id="%d 0 0 0 0 0 0"' % doc_id) start_rowid = cursor.fetchone()[0] # find the starting rowid for the next doc cursor.execute('select rowid from toms where philo_id="%d 0 0 0 0 0 0"' % next_doc_id) try: end_rowid = cursor.fetchone()[0] except TypeError: # if this is the last doc, just get the last rowid in the table. cursor.execute('select max(rowid) from toms;') end_rowid = cursor.fetchone()[0] # use start_rowid and end_rowid to fetch every div in the document. philo_slices = {"doc": 1, "div1": 2, "div2": 3, "div3": 4, "para": 5} text_hierarchy = [] cursor.execute("select * from toms where rowid >= ? and rowid <=? and philo_type>='div' and philo_type<='div3'", (start_rowid, end_rowid)) for row in cursor: philo_id = [int(n) for n in row["philo_id"].split(" ")] text = HitWrapper.ObjectWrapper(philo_id, db, row=row) if text['philo_name'] == '__philo_virtual' and text["philo_type"] != "div1": continue elif text['word_count'] == 0: continue else: philo_id = text['philo_id'] philo_type = text['philo_type'] display_name = "" if text['philo_name'] == "front": display_name = "Front Matter" elif text['philo_name'] == "note": continue else: display_name = text['head'] if display_name: display_name = display_name.strip() if not display_name: if text["type"] and text["n"]: display_name = text['type'] + " " + text["n"] else: display_name = text["head"] or text['type'] or text['philo_name'] or text['philo_type'] if display_name == "__philo_virtual": display_name = text['philo_type'] display_name = display_name[0].upper() + display_name[1:] link = make_absolute_object_link(config, philo_id.split()[:philo_slices[philo_type]]) philo_id = ' '.join(philo_id.split()[:philo_slices[philo_type]]) toc_element = {"philo_id": philo_id, "philo_type": philo_type, "label": display_name, "href": link} text_hierarchy.append(toc_element) metadata_fields = {} for metadata in db.locals['metadata_fields']: if db.locals['metadata_types'][metadata] == "doc": metadata_fields[metadata] = obj[metadata] citation_hrefs = citation_links(db, config, obj) citation = citations(obj, citation_hrefs, config, report="navigation") toc_object = {"query": dict([i for i in request]), "philo_id": obj.philo_id, "toc": text_hierarchy, "metadata_fields": metadata_fields, "citation": citation} return toc_object
def filter_words_by_property(request, config): """Filter words by property""" db = DB(config.db_path + "/data/") hits = db.query(request["q"], request["method"], request["arg"], **request.metadata) concordance_object = {"query": dict([i for i in request])} # Do these need to be captured in wsgi_handler? word_property = request["word_property"] word_property_value = request["word_property_value"] word_property_total = request["word_property_total"] new_hitlist = [] results = [] position = 0 more_pages = False if request.start == 0: start = 1 else: start = request.start for hit in hits: # get my chunk of text hit_val = get_word_attrib(hit, word_property, db) if hit_val == word_property_value: position += 1 if position < start: continue new_hitlist.append(hit) citation_hrefs = citation_links(db, config, hit) metadata_fields = {} for metadata in db.locals["metadata_fields"]: metadata_fields[metadata] = hit[metadata] citation = citations(hit, citation_hrefs, config) context = get_concordance_text(db, hit, config.db_path, config.concordance_length) result_obj = { "philo_id": hit.philo_id, "citation": citation, "citation_links": citation_hrefs, "context": context, "metadata_fields": metadata_fields, "bytes": hit.bytes, "collocate_count": 1, } results.append(result_obj) if len(new_hitlist) == (request.results_per_page): more_pages = True break end = start + len(results) - 1 if len(results) < request.results_per_page: word_property_total = end else: word_property_total = end + 1 concordance_object["results"] = results concordance_object["query_done"] = hits.done concordance_object["results_length"] = word_property_total concordance_object["description"] = { "start": start, "end": end, "results_per_page": request.results_per_page, "more_pages": more_pages, } return concordance_object
def generate_toc_object(request, config): """This function fetches all philo_ids for div elements within a doc""" db = DB(config.db_path + "/data/") conn = db.dbh cursor = conn.cursor() try: obj = db[request.philo_id] except ValueError: philo_id = " ".join(request.path_components[:-1]) obj = db[philo_id] doc_id = int(obj.philo_id[0]) next_doc_id = doc_id + 1 # find the starting rowid for this doc cursor.execute('select rowid from toms where philo_id="%d 0 0 0 0 0 0"' % doc_id) start_rowid = cursor.fetchone()[0] # find the starting rowid for the next doc cursor.execute('select rowid from toms where philo_id="%d 0 0 0 0 0 0"' % next_doc_id) try: end_rowid = cursor.fetchone()[0] except TypeError: # if this is the last doc, just get the last rowid in the table. cursor.execute("select max(rowid) from toms;") end_rowid = cursor.fetchone()[0] # use start_rowid and end_rowid to fetch every div in the document. philo_slices = {"doc": 1, "div1": 2, "div2": 3, "div3": 4, "para": 5} text_hierarchy = [] cursor.execute( "select * from toms where rowid >= ? and rowid <=? and philo_type>='div' and philo_type<='div3'", (start_rowid, end_rowid), ) for row in cursor: philo_id = [int(n) for n in row["philo_id"].split(" ")] text = HitWrapper.ObjectWrapper(philo_id, db, row=row) if text["philo_name"] == "__philo_virtual" and text[ "philo_type"] != "div1": continue elif text["word_count"] == 0: continue else: philo_id = text["philo_id"] philo_type = text["philo_type"] display_name = "" if text["philo_name"] == "front": display_name = "Front Matter" elif text["philo_name"] == "note": continue else: display_name = text["head"] if display_name: display_name = display_name.strip() if not display_name: if text["type"] and text["n"]: display_name = text["type"] + " " + text["n"] else: display_name = text["head"] or text["type"] or text[ "philo_name"] or text["philo_type"] if display_name == "__philo_virtual": display_name = text["philo_type"] display_name = display_name[0].upper() + display_name[1:] link = make_absolute_object_link( config, philo_id.split()[:philo_slices[philo_type]]) philo_id = " ".join(philo_id.split()[:philo_slices[philo_type]]) toc_element = { "philo_id": philo_id, "philo_type": philo_type, "label": display_name, "href": link } text_hierarchy.append(toc_element) metadata_fields = {} for metadata in db.locals["metadata_fields"]: if db.locals["metadata_types"][metadata] == "doc": metadata_fields[metadata] = obj[metadata] citation_hrefs = citation_links(db, config, obj) citation = citations(obj, citation_hrefs, config, report="table_of_contents") toc_object = { "query": dict([i for i in request]), "philo_id": obj.philo_id, "toc": text_hierarchy, "metadata_fields": metadata_fields, "citation": citation, } return toc_object