def landing_page_bibliography(request, config): db = DB(config.db_path + "/data/") object_level = request.object_level if object_level and object_level in ["doc", "div1", "div2", "div3"]: hits = db.get_all(object_level) else: hits = db.get_all(db.locals["default_object_level"]) results = [] c = db.dbh.cursor() for hit in hits: hit_object = {} for field in db.locals["metadata_fields"]: hit_object[field] = hit[field] or "" if object_level == "doc": hit_object["philo_id"] = hit.philo_id[0] else: hit_object["philo_id"] = "/".join([str(i) for i in hit.philo_id]) doc_id = str(hit.philo_id[0]) + " 0 0 0 0 0 0" next_doc_id = str(hit.philo_id[0] + 1) + " 0 0 0 0 0 0" c.execute('select rowid from toms where philo_id="%s"' % doc_id) doc_row = c.fetchone()["rowid"] c.execute('select rowid from toms where philo_id="%s"' % next_doc_id) try: next_doc_row = c.fetchone()["rowid"] except TypeError: # if this is the last doc, just get the last rowid in the table. c.execute("select max(rowid) from toms;") next_doc_row = c.fetchone()[0] try: c.execute( 'select * from toms where rowid between %d and %d and head is not null and head !="" limit 1' % (doc_row, next_doc_row)) except sqlite3.OperationalError: # no type field in DB c.execute( 'select * from toms where rowid between ? and ? and head is not null and head !="" limit 1', (doc_row, next_doc_row), ) try: start_head = c.fetchone()["head"].decode("utf-8") start_head = start_head.lower().title().encode("utf-8") except Exception as e: print(repr(e), file=sys.stderr) start_head = "" try: c.execute( 'select head from toms where rowid between %d and %d and head is not null and head !="" order by rowid desc limit 1' % (doc_row, next_doc_row)) except sqlite3.OperationalError: # no type field in DB c.execute( 'select head from toms where rowid between %d and %d and head is not null and head !="" order by rowid desc limit 1' % (doc_row, next_doc_row)) try: end_head = c.fetchone()["head"] end_head = end_head.decode("utf-8").lower().title().encode("utf-8") except: end_head = "" hit_object["start_head"] = start_head hit_object["end_head"] = end_head results.append(hit_object) return results
def bibliography_results(request, config): """Fetch bibliography results""" db = DB(config.db_path + "/data/") if request.no_metadata: hits = db.get_all(db.locals["default_object_level"], request["sort_order"]) else: hits = db.query(sort_order=request["sort_order"], **request.metadata) if ( request.simple_bibliography == "all" ): # request from simple landing page report which gets all biblio in load order hits.finish() start = 1 end = len(hits) page_num = end else: start, end, page_num = page_interval(request.results_per_page, hits, request.start, request.end) bibliography_object = { "description": {"start": start, "end": end, "n": page_num, "results_per_page": request.results_per_page}, "query": dict([i for i in request]), "default_object": db.locals["default_object_level"], } results = [] result_type = "doc" for hit in hits[start - 1 : end]: citation_hrefs = citation_links(db, config, hit) metadata_fields = {} for metadata in db.locals["metadata_fields"]: metadata_fields[metadata] = hit[metadata] result_type = hit.object_type if request.simple_bibliography == "all": citation = citations(hit, citation_hrefs, config, report="simple_landing") else: citation = citations(hit, citation_hrefs, config, report="bibliography", result_type=result_type) if config.dictionary_bibliography is False or result_type == "doc": results.append( { "citation": citation, "citation_links": citation_hrefs, "philo_id": hit.philo_id, "metadata_fields": metadata_fields, "object_type": result_type, } ) else: context = get_text_obj(hit, config, request, db.locals["token_regex"], images=False) results.append( { "citation": citation, "citation_links": citation_hrefs, "philo_id": hit.philo_id, "metadata_fields": metadata_fields, "context": context, "object_type": result_type, } ) bibliography_object["results"] = results bibliography_object["results_length"] = len(hits) bibliography_object["query_done"] = hits.done bibliography_object["result_type"] = result_type return bibliography_object, hits
def get_total_results(environ, start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', '')) db = DB(config.db_path + '/data/') request = WSGIHandler(environ, config) if request.no_q: if request.no_metadata: hits = db.get_all(db.locals['default_object_level'], request["sort_order"]) else: hits = db.query(sort_order=request["sort_order"], **request.metadata) else: hits = db.query(request["q"], request["method"], request["arg"], **request.metadata) total_results = 0 hits.finish() total_results = len(hits) yield json.dumps(total_results).encode('utf8')
def get_total_results(environ, start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) config = WebConfig( os.path.abspath(os.path.dirname(__file__)).replace('scripts', '')) db = DB(config.db_path + '/data/') request = WSGIHandler(environ, config) if request.no_q: if request.no_metadata: hits = db.get_all(db.locals['default_object_level'], request["sort_order"]) else: hits = db.query(sort_order=request["sort_order"], **request.metadata) else: hits = db.query(request["q"], request["method"], request["arg"], **request.metadata) total_results = 0 hits.finish() total_results = len(hits) yield json.dumps(total_results).encode('utf8')
def frequency_results(request, config, sorted_results=False): """reads through a hitlist. looks up request.frequency_field in each hit, and builds up a list of unique values and their frequencies.""" db = DB(config.db_path + "/data/") biblio_search = False if request.q == "" and request.no_q: biblio_search = True if request.no_metadata: hits = db.get_all(db.locals["default_object_level"], sort_order=["rowid"], raw_results=True) else: hits = db.query(sort_order=["rowid"], raw_results=True, **request.metadata) else: hits = db.query(request["q"], request["method"], request["arg"], raw_results=True, **request.metadata) if sorted_results is True: hits.finish() cursor = db.dbh.cursor() cursor.execute("select philo_id, %s from toms where %s is not null" % (request.frequency_field, request.frequency_field)) metadata_dict = {} for i in cursor: philo_id, field = i philo_id = tuple(int(s) for s in philo_id.split() if int(s)) metadata_dict[philo_id] = field counts = {} frequency_object = {} start_time = timeit.default_timer() last_hit_done = request.start obj_dict = { "doc": 1, "div1": 2, "div2": 3, "div3": 4, "para": 5, "sent": 6, "word": 7 } metadata_type = db.locals["metadata_types"][request.frequency_field] try: object_level = obj_dict[metadata_type] except KeyError: # metadata_type == "div" pass try: for philo_id in hits[request.start:]: if not biblio_search: philo_id = tuple(list(philo_id[:6]) + [philo_id[7]]) if metadata_type == "div": key = "" for div in ["div1", "div2", "div3"]: if philo_id[:obj_dict[div]] in metadata_dict: key = metadata_dict[philo_id[:obj_dict[div]]] while not key: if philo_id[:4] in metadata_dict: key = metadata_dict[philo_id[:4]] break if philo_id[:5] in metadata_dict: key = metadata_dict[philo_id[:5]] break break if not key: last_hit_done += 1 continue else: try: key = metadata_dict[philo_id[:object_level]] except: last_hit_done += 1 continue if key not in counts: counts[key] = { "count": 0, "metadata": { request.frequency_field: key } } counts[key]["url"] = make_absolute_query_link( config, request, frequency_field="", start="0", end="0", report=request.report, script="", **{request.frequency_field: '"%s"' % key}) if not biblio_search: query_metadata = dict([ (k, v) for k, v in request.metadata.items() if v ]) query_metadata[request.frequency_field] = '"%s"' % key local_hits = db.query(**query_metadata) counts[key][ "total_word_count"] = local_hits.get_total_word_count( ) counts[key]["count"] += 1 # avoid timeouts by splitting the query if more than # request.max_time (in seconds) has been spent in the loop elapsed = timeit.default_timer() - start_time last_hit_done += 1 if elapsed > 5 and sorted_results is False: break frequency_object["results"] = counts frequency_object["hits_done"] = last_hit_done if last_hit_done == len(hits): new_metadata = dict([(k, v) for k, v in request.metadata.items() if v]) new_metadata[request.frequency_field] = '"NULL"' if request.q == "" and request.no_q: new_hits = db.query(sort_order=["rowid"], raw_results=True, **new_metadata) else: new_hits = db.query(request["q"], request["method"], request["arg"], raw_results=True, **new_metadata) new_hits.finish() if len(new_hits): null_url = make_absolute_query_link( config, request, frequency_field="", start="0", end="0", report=request.report, script="", **{request.frequency_field: '"NULL"'}) local_hits = db.query(**new_metadata) if not biblio_search: frequency_object["results"]["NULL"] = { "count": len(new_hits), "url": null_url, "metadata": { request.frequency_field: '"NULL"' }, "total_word_count": local_hits.get_total_word_count(), } else: frequency_object["results"]["NULL"] = { "count": len(new_hits), "url": null_url, "metadata": { request.frequency_field: '"NULL"' }, } frequency_object["more_results"] = False else: frequency_object["more_results"] = True except IndexError: frequency_object["results"] = {} frequency_object["more_results"] = False frequency_object["results_length"] = len(hits) frequency_object["query"] = dict([i for i in request]) if sorted_results is True: frequency_object["results"] = sorted( frequency_object["results"].items(), key=lambda x: x[1]["count"], reverse=True) return frequency_object
def landing_page_bibliography(request, config): db = DB(config.db_path + "/data/") object_level = request.object_level if object_level and object_level in ["doc", "div1", "div2", "div3"]: hits = db.get_all(object_level) else: hits = db.get_all(db.locals["default_object_level"]) results = [] c = db.dbh.cursor() for hit in hits: hit_object = {} for field in db.locals["metadata_fields"]: hit_object[field] = hit[field] or "" if object_level == "doc": hit_object["philo_id"] = hit.philo_id[0] else: hit_object["philo_id"] = "/".join([str(i) for i in hit.philo_id]) doc_id = str(hit.philo_id[0]) + " 0 0 0 0 0 0" next_doc_id = str(hit.philo_id[0] + 1) + " 0 0 0 0 0 0" c.execute('select rowid from toms where philo_id="%s"' % doc_id) doc_row = c.fetchone()["rowid"] c.execute('select rowid from toms where philo_id="%s"' % next_doc_id) try: next_doc_row = c.fetchone()["rowid"] except TypeError: # if this is the last doc, just get the last rowid in the table. c.execute("select max(rowid) from toms;") next_doc_row = c.fetchone()[0] try: c.execute( 'select * from toms where rowid between %d and %d and head is not null and head !="" limit 1' % (doc_row, next_doc_row) ) except sqlite3.OperationalError: # no type field in DB c.execute( 'select * from toms where rowid between ? and ? and head is not null and head !="" limit 1', (doc_row, next_doc_row), ) try: start_head = c.fetchone()["head"].decode("utf-8") start_head = start_head.lower().title().encode("utf-8") except Exception as e: print(repr(e), file=sys.stderr) start_head = "" try: c.execute( 'select head from toms where rowid between %d and %d and head is not null and head !="" order by rowid desc limit 1' % (doc_row, next_doc_row) ) except sqlite3.OperationalError: # no type field in DB c.execute( 'select head from toms where rowid between %d and %d and head is not null and head !="" order by rowid desc limit 1' % (doc_row, next_doc_row) ) try: end_head = c.fetchone()["head"] end_head = end_head.decode("utf-8").lower().title().encode("utf-8") except: end_head = "" hit_object["start_head"] = start_head hit_object["end_head"] = end_head results.append(hit_object) return results