def landing_page_bibliography(request, config): db = DB(config.db_path + '/data/') object_level = request.object_level if object_level and object_level in ["doc", "div1", "div2", "div3"]: hits = db.get_all(object_level) else: hits = db.get_all(db.locals['default_object_level']) results = [] c = db.dbh.cursor() for hit in hits: hit_object = {} for field in db.locals['metadata_fields']: hit_object[field] = hit[field] or '' if object_level == "doc": hit_object['philo_id'] = hit.philo_id[0] else: hit_object['philo_id'] = '/'.join([str(i) for i in hit.philo_id]) doc_id = str(hit.philo_id[0]) + ' 0 0 0 0 0 0' next_doc_id = str(hit.philo_id[0] + 1) + ' 0 0 0 0 0 0' c.execute('select rowid from toms where philo_id="%s"' % doc_id) doc_row = c.fetchone()['rowid'] c.execute('select rowid from toms where philo_id="%s"' % next_doc_id) try: next_doc_row = c.fetchone()['rowid'] except TypeError: # if this is the last doc, just get the last rowid in the table. c.execute('select max(rowid) from toms;') next_doc_row = c.fetchone()[0] try: c.execute( 'select * from toms where rowid between %d and %d and head is not null and head !="" limit 1' % (doc_row, next_doc_row)) except sqlite3.OperationalError: # no type field in DB c.execute( 'select * from toms where rowid between ? and ? and head is not null and head !="" limit 1', (doc_row, next_doc_row)) try: start_head = c.fetchone()['head'].decode('utf-8') start_head = start_head.lower().title().encode('utf-8') except Exception as e: print(repr(e), file=sys.stderr) start_head = '' try: c.execute( 'select head from toms where rowid between %d and %d and head is not null and head !="" order by rowid desc limit 1' % (doc_row, next_doc_row)) except sqlite3.OperationalError: # no type field in DB c.execute( 'select head from toms where rowid between %d and %d and head is not null and head !="" order by rowid desc limit 1' % (doc_row, next_doc_row)) try: end_head = c.fetchone()['head'] end_head = end_head.decode('utf-8').lower().title().encode('utf-8') except: end_head = '' hit_object['start_head'] = start_head hit_object['end_head'] = end_head results.append(hit_object) return results
def get_bibliography(environ,start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'),("Access-Control-Allow-Origin","*")] start_response(status,headers) config = f.WebConfig() db = DB(config.db_path + '/data/') request = WSGIHandler(db, environ) object_level = request.object_level if object_level and object_level in object_levels: hits = db.get_all(object_level) else: hits = db.get_all(db.locals['default_object_level']) results = [] c = db.dbh.cursor() for hit in hits: hit_object = {} for field in db.locals['metadata_fields']: hit_object[field] = hit[field] or '' if object_level == "doc": hit_object['philo_id'] = hit.philo_id[0] else: hit_object['philo_id'] = '/'.join([str(i) for i in hit.philo_id]) doc_id = str(hit.philo_id[0]) + ' 0 0 0 0 0 0' next_doc_id = str(hit.philo_id[0] + 1) + ' 0 0 0 0 0 0' c.execute('select rowid from toms where philo_id="%s"' % doc_id) doc_row = c.fetchone()['rowid'] c.execute('select rowid from toms where philo_id="%s"' % next_doc_id) try: next_doc_row = c.fetchone()['rowid'] except TypeError: # if this is the last doc, just get the last rowid in the table. c.execute('select max(rowid) from toms;') next_doc_row = c.fetchone()[0] c.execute('select head from toms where rowid between %d and %d and head is not null limit 1' % (doc_row, next_doc_row)) try: start_head = c.fetchone()['head'] start_head = start_head.decode('utf-8').lower().title().encode('utf-8') except: start_head = '' c.execute('select head from toms where rowid between %d and %d and head is not null order by rowid desc limit 1' % (doc_row, next_doc_row)) try: end_head = c.fetchone()['head'] end_head = end_head.decode('utf-8').lower().title().encode('utf-8') except: end_head = '' hit_object['start_head'] = start_head hit_object['end_head'] = end_head results.append(hit_object) yield json.dumps(results)
def bibliography_results(request, config): """Fetch bibliography results""" db = DB(config.db_path + '/data/') if request.no_metadata: hits = db.get_all(db.locals['default_object_level'], request["sort_order"]) else: hits = db.query(sort_order=request["sort_order"], **request.metadata) if request.simple_bibliography == "all": # request from simple landing page report which gets all biblio in load order hits.finish() start = 1 end = len(hits) page_num = end else: start, end, page_num = page_interval(request.results_per_page, hits, request.start, request.end) bibliography_object = { "description": { "start": start, "end": end, "n": page_num, "results_per_page": request.results_per_page }, "query": dict([i for i in request]), "default_object": db.locals['default_object_level'] } results = [] result_type = "doc" for hit in hits[start - 1:end]: citation_hrefs = citation_links(db, config, hit) metadata_fields = {} for metadata in db.locals['metadata_fields']: metadata_fields[metadata] = hit[metadata] result_type = hit.object_type if request.simple_bibliography == "all": citation = citations(hit, citation_hrefs, config, report="simple_landing") else: citation = citations(hit, citation_hrefs, config, report="bibliography", result_type=result_type) if config.dictionary_bibliography is False or result_type == "doc": results.append({ 'citation': citation, 'citation_links': citation_hrefs, 'philo_id': hit.philo_id, "metadata_fields": metadata_fields, "object_type": result_type }) else: context = get_text_obj(hit, config, request, db.locals["token_regex"], images=False) results.append({ 'citation': citation, 'citation_links': citation_hrefs, 'philo_id': hit.philo_id, "metadata_fields": metadata_fields, "context": context, "object_type": result_type }) bibliography_object["results"] = results bibliography_object['results_length'] = len(hits) bibliography_object['query_done'] = hits.done bibliography_object['result_type'] = result_type return bibliography_object, hits
def get_frequency(environ,start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'),("Access-Control-Allow-Origin","*")] start_response(status,headers) config = f.WebConfig() db = DB(config.db_path + '/data/') request = WSGIHandler(db, environ) if request.q == '' and request.no_q: if request.no_metadata: hits = db.get_all(db.locals['default_object_level']) else: hits = db.query(**request.metadata) else: hits = db.query(request["q"],request["method"],request["arg"],**request.metadata) results = r.generate_frequency(hits, request, db, config) yield json.dumps(results)
def get_total_results(environ, start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', '')) db = DB(config.db_path + '/data/') request = WSGIHandler(environ, config) if request.no_q: if request.no_metadata: hits = db.get_all(db.locals['default_object_level']) else: hits = db.query(**request.metadata) else: hits = db.query(request["q"], request["method"], request["arg"], **request.metadata) total_results = 0 hits.finish() total_results = len(hits) yield simplejson.dumps(total_results)
def get_total_results(environ, start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', '')) db = DB(config.db_path + '/data/') request = WSGIHandler(environ, config) if request.no_q: if request.no_metadata: hits = db.get_all(db.locals['default_object_level'], request["sort_order"]) else: hits = db.query(sort_order=request["sort_order"], **request.metadata) else: hits = db.query(request["q"], request["method"], request["arg"], **request.metadata) total_results = 0 hits.finish() total_results = len(hits) yield simplejson.dumps(total_results)
def bibliography_results(request, config): db = DB(config.db_path + '/data/') if request.no_metadata: hits = db.get_all(db.locals['default_object_level'], request["sort_order"],) else: hits = db.query(**request.metadata) start, end, n = page_interval(request.results_per_page, hits, request.start, request.end) bibliography_object = { "description": { "start": start, "end": end, "n": n, "results_per_page": request.results_per_page }, "query": dict([i for i in request]), "default_object": db.locals['default_object_level'] } results = [] result_type = 'doc' for hit in hits[start - 1:end]: citation_hrefs = citation_links(db, config, hit) metadata_fields = {} for metadata in db.locals['metadata_fields']: metadata_fields[metadata] = hit[metadata] result_type = hit.type if hit.type == "doc": citation = citations(hit, citation_hrefs, config, report="bibliography") else: citation = citations(hit, citation_hrefs, config, report="concordance") results.append({ 'citation': citation, 'citation_links': citation_hrefs, 'philo_id': hit.philo_id, "metadata_fields": metadata_fields }) bibliography_object["results"] = results bibliography_object['results_length'] = len(hits) bibliography_object['query_done'] = hits.done bibliography_object['result_type'] = result_type return bibliography_object, hits
def get_frequency(environ, start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) config = f.WebConfig() db = DB(config.db_path + '/data/') request = WSGIHandler(db, environ) setattr(request, 'frequency_field', json.dumps( eval('"%s"' % request.frequency_field))) if request.q == '' and request.no_q: if request.no_metadata: hits = db.get_all(db.locals['default_object_level']) else: hits = db.query(**request.metadata) else: hits = db.query(request["q"], request["method"], request["arg"], **request.metadata) hits.finish() results = r.generate_frequency(hits, request, db, config) results['results'] = sorted(results['results'].iteritems(), key=lambda x: x[1]['count'], reverse=True) yield json.dumps(results)
def frequency_results(request, config, sorted_results=False): """reads through a hitlist. looks up request.frequency_field in each hit, and builds up a list of unique values and their frequencies.""" db = DB(config.db_path + "/data/") biblio_search = False if request.q == "" and request.no_q: biblio_search = True if request.no_metadata: hits = db.get_all(db.locals["default_object_level"], sort_order=["rowid"], raw_results=True) else: hits = db.query(sort_order=["rowid"], raw_results=True, **request.metadata) else: hits = db.query(request["q"], request["method"], request["arg"], raw_results=True, **request.metadata) if sorted_results is True: hits.finish() cursor = db.dbh.cursor() cursor.execute("select philo_id, %s from toms where %s is not null" % (request.frequency_field, request.frequency_field)) metadata_dict = {} for i in cursor: philo_id, field = i philo_id = tuple(int(s) for s in philo_id.split() if int(s)) metadata_dict[philo_id] = field counts = {} frequency_object = {} start_time = timeit.default_timer() last_hit_done = request.start obj_dict = { "doc": 1, "div1": 2, "div2": 3, "div3": 4, "para": 5, "sent": 6, "word": 7 } metadata_type = db.locals["metadata_types"][request.frequency_field] try: object_level = obj_dict[metadata_type] except KeyError: # metadata_type == "div" pass try: for philo_id in hits[request.start:]: if not biblio_search: philo_id = tuple(list(philo_id[:6]) + [philo_id[7]]) if metadata_type == "div": key = "" for div in ["div1", "div2", "div3"]: if philo_id[:obj_dict[div]] in metadata_dict: key = metadata_dict[philo_id[:obj_dict[div]]] while not key: if philo_id[:4] in metadata_dict: key = metadata_dict[philo_id[:4]] break if philo_id[:5] in metadata_dict: key = metadata_dict[philo_id[:5]] break break if not key: last_hit_done += 1 continue else: try: key = metadata_dict[philo_id[:object_level]] except: last_hit_done += 1 continue if key not in counts: counts[key] = { "count": 0, "metadata": { request.frequency_field: key } } counts[key]["url"] = make_absolute_query_link( config, request, frequency_field="", start="0", end="0", report=request.report, script="", **{request.frequency_field: '"%s"' % key}) if not biblio_search: query_metadata = dict([ (k, v) for k, v in request.metadata.items() if v ]) query_metadata[request.frequency_field] = '"%s"' % key local_hits = db.query(**query_metadata) counts[key][ "total_word_count"] = local_hits.get_total_word_count( ) counts[key]["count"] += 1 # avoid timeouts by splitting the query if more than # request.max_time (in seconds) has been spent in the loop elapsed = timeit.default_timer() - start_time last_hit_done += 1 if elapsed > 5 and sorted_results is False: break frequency_object["results"] = counts frequency_object["hits_done"] = last_hit_done if last_hit_done == len(hits): new_metadata = dict([(k, v) for k, v in request.metadata.items() if v]) new_metadata[request.frequency_field] = '"NULL"' if request.q == "" and request.no_q: new_hits = db.query(sort_order=["rowid"], raw_results=True, **new_metadata) else: new_hits = db.query(request["q"], request["method"], request["arg"], raw_results=True, **new_metadata) new_hits.finish() if len(new_hits): null_url = make_absolute_query_link( config, request, frequency_field="", start="0", end="0", report=request.report, script="", **{request.frequency_field: '"NULL"'}) local_hits = db.query(**new_metadata) if not biblio_search: frequency_object["results"]["NULL"] = { "count": len(new_hits), "url": null_url, "metadata": { request.frequency_field: '"NULL"' }, "total_word_count": local_hits.get_total_word_count(), } else: frequency_object["results"]["NULL"] = { "count": len(new_hits), "url": null_url, "metadata": { request.frequency_field: '"NULL"' }, } frequency_object["more_results"] = False else: frequency_object["more_results"] = True except IndexError: frequency_object["results"] = {} frequency_object["more_results"] = False frequency_object["results_length"] = len(hits) frequency_object["query"] = dict([i for i in request]) if sorted_results is True: frequency_object["results"] = sorted( frequency_object["results"].items(), key=lambda x: x[1]["count"], reverse=True) return frequency_object
def frequency_results(request, config, sorted=False): """reads through a hitlist. looks up request.frequency_field in each hit, and builds up a list of unique values and their frequencies.""" db = DB(config.db_path + '/data/') biblio_search = False if request.q == '' and request.no_q: biblio_search = True if request.no_metadata: hits = db.get_all(db.locals['default_object_level'], sort_order=["rowid"], raw_results=True) else: hits = db.query(sort_order=["rowid"], raw_results=True, **request.metadata) else: hits = db.query(request["q"], request["method"], request["arg"], raw_results=True, **request.metadata) if sorted: hits.finish() c = db.dbh.cursor() c.execute('select philo_id, %s from toms where %s is not null' % (request.frequency_field, request.frequency_field)) metadata_dict = {} for i in c.fetchall(): philo_id, field = i philo_id = tuple(int(s) for s in philo_id.split() if int(s)) metadata_dict[philo_id] = field counts = {} frequency_object = {} start_time = timeit.default_timer() last_hit_done = request.start obj_dict = {'doc': 1, 'div1': 2, 'div2': 3, 'div3': 4, 'para': 5, 'sent': 6, 'word': 7} metadata_type = db.locals["metadata_types"][request.frequency_field] try: object_level = obj_dict[metadata_type] except KeyError: # metadata_type == "div" pass try: for philo_id in hits[request.start:]: if not biblio_search: philo_id = tuple(list(philo_id[:6]) + [philo_id[7]]) if metadata_type == "div": key = "" for div in ["div1", "div2", "div3"]: if philo_id[:obj_dict[div]] in metadata_dict: key = metadata_dict[philo_id[:obj_dict[div]]] while not key: if philo_id[:4] in metadata_dict: key = metadata_dict[philo_id[:4]] break if philo_id[:5] in metadata_dict: key = metadata_dict[philo_id[:5]] break break if not key: last_hit_done += 1 continue else: try: key = metadata_dict[philo_id[:object_level]] except: last_hit_done += 1 continue if key not in counts: counts[key] = {"count": 0, 'metadata': {request.frequency_field: key}} counts[key]["url"] = make_absolute_query_link(config, request, frequency_field="", start="0", end="0", report=request.report, script='', **{request.frequency_field: '"%s"' % key}) if not biblio_search: query_metadata = dict([(k, v) for k, v in request.metadata.iteritems() if v]) query_metadata[request.frequency_field] = '"%s"' % key local_hits = db.query(**query_metadata) counts[key]["total_word_count"] = local_hits.get_total_word_count() counts[key]["count"] += 1 # avoid timeouts by splitting the query if more than # request.max_time (in seconds) has been spent in the loop elapsed = timeit.default_timer() - start_time last_hit_done += 1 if elapsed > 5: break frequency_object['results'] = counts frequency_object["hits_done"] = last_hit_done if last_hit_done == len(hits): new_metadata = dict([(k, v) for k, v in request.metadata.iteritems() if v]) new_metadata[request.frequency_field] = '"NULL"' if request.q == '' and request.no_q: new_hits = db.query(sort_order=["rowid"], raw_results=True, **new_metadata) else: new_hits = db.query(request["q"], request["method"], request["arg"], raw_results=True, **new_metadata) new_hits.finish() if len(new_hits): null_url = make_absolute_query_link(config, request, frequency_field="", start="0", end="0", report=request.report, script='', **{request.frequency_field: '"NULL"'}) local_hits = db.query(**new_metadata) if not biblio_search: frequency_object["results"]["NULL"] = {"count": len(new_hits), "url": null_url, "metadata": {request.frequency_field: '"NULL"'}, "total_word_count": local_hits.get_total_word_count()} else: frequency_object["results"]["NULL"] = {"count": len(new_hits), "url": null_url, "metadata": {request.frequency_field: '"NULL"'}} frequency_object['more_results'] = False else: frequency_object['more_results'] = True except IndexError: frequency_object['results'] = {} frequency_object['more_results'] = False frequency_object['results_length'] = len(hits) frequency_object['query'] = dict([i for i in request]) if sorted: frequency_object["results"] = sorted(frequency_object['results'].iteritems(), key=lambda x: x[1]['count'], reverse=True) return frequency_object
def frequency_results(request, config, sorted=False): """reads through a hitlist. looks up request.frequency_field in each hit, and builds up a list of unique values and their frequencies.""" db = DB(config.db_path + '/data/') if request.q == '' and request.no_q: if request.no_metadata: hits = db.get_all(db.locals['default_object_level']) else: hits = db.query(**request.metadata) else: hits = db.query(request["q"], request["method"], request["arg"], **request.metadata) if sorted: hits.finish() field_list = eval(simplejson.loads(request.frequency_field)) counts = defaultdict(int) frequency_object = {} start_time = timeit.default_timer() last_hit_done = request.start try: for hit in hits[request.start:]: key = tuple((field, hit[field]) for field in field_list) counts[key] += 1 # avoid timeouts by splitting the query if more than request.max_time (in seconds) has been spent in the loop elapsed = timeit.default_timer() - start_time last_hit_done += 1 if elapsed > 5: break table = {} for key, count in counts.iteritems(): # for each item in the table, we modify the query params to # generate a link url. # Make a distinct copy for each key in case we modify it below metadata = dict(request.metadata) # Build a label starting with the first value as the main value first_metatada_key, first_metadata_value = key[0] label = first_metadata_value or "NULL" metadata[first_metatada_key] = first_metadata_value.encode('utf-8', 'ignore') or "NULL" append_to_label = [] for metadata_key, metadata_value in key[1:]: metadata_value = metadata_value.strip() if not metadata_value: # replace NULL with '[None]', 'N.A.', 'Untitled', etc. metadata[metadata_key] = "NULL" else: # we want to run exact queries on defined values. metadata[metadata_key] = metadata_value.encode('utf-8', 'ignore') append_to_label.append(metadata_value) # Add parentheses to other value, as they are secondary if append_to_label: label = label + ' (' + ', '.join(append_to_label) + ')' # Quote metadata to force exact matches on metadata for m in metadata: if m not in request.metadata: # skip metadata already in original query: this could be a glob search if metadata[m] and m != "date" and metadata[m] != "NULL": if not metadata[m].startswith('"'): metadata[m] = '"%s"' % metadata[m] # Now build the url from request. url = make_absolute_query_link(config, request, frequency_field="", start="0", end="0", report=request.report, script='', **metadata) table[label] = {'count': count, 'url': url, 'metadata': metadata} frequency_object['results'] = table frequency_object["hits_done"] = last_hit_done if last_hit_done == len(hits): frequency_object['more_results'] = False else: frequency_object['more_results'] = True except IndexError: frequency_object['results'] = {} frequency_object['more_results'] = False frequency_object['results_length'] = len(hits) frequency_object['query'] = dict([i for i in request]) if sorted: frequency_object["results"] = sorted(frequency_object['results'].iteritems(), key=lambda x: x[1]['count'], reverse=True) return frequency_object