def get_web_config(environ, start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) config = WebConfig( os.path.abspath(os.path.dirname(__file__)).replace('scripts', '')) config.time_series_status = time_series_tester(config) yield config.to_json().encode('utf8')
def get_web_config(_, start_response): status = "200 OK" headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) db_path = os.path.abspath(os.path.dirname(__file__)).replace("scripts", "") config = WebConfig(db_path) config.time_series_status = time_series_tester(config) db_locals = MakeDBConfig(os.path.join(db_path, "data/db.locals.py")) config.data["available_metadata"] = db_locals.metadata_fields yield config.to_json().encode("utf8")
def time_series(environ, start_response): config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('reports', '')) request = WSGIHandler(environ, config) time_series_object = generate_time_series(request, config) headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response('200 OK', headers) yield json.dumps(time_series_object).encode('utf8')
def term_group(environ, start_response): status = "200 OK" headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("scripts", "")) db = DB(config.db_path + "/data/") request = WSGIHandler(environ, config) if not request["q"]: dump = json.dumps({"original_query": "", "term_groups": []}) else: hits = db.query( request["q"], request["method"], request["arg"], sort_order=request["sort_order"], **request.metadata ) parsed = parse_query(request.q) group = group_terms(parsed) all_groups = split_terms(group) term_groups = [] for g in all_groups: term_group = "" not_started = False for kind, term in g: if kind == "NOT": if not_started is False: not_started = True term_group += " NOT " elif kind == "OR": term_group += "|" elif kind == "TERM": term_group += " %s " % term elif kind == "QUOTE": term_group += " %s " % term term_group = term_group.strip() term_groups.append(term_group) dump = json.dumps({"term_groups": term_groups, "original_query": request.original_q}) yield dump.encode("utf8")
def lookup_word_service(environ, start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', '')) db = DB(config.db_path + '/data/') request = WSGIHandler(environ, config) cursor = db.dbh.cursor() if request.report == "concordance": hits = db.query(request["q"], request["method"], request["arg"], **request.metadata) context_size = config['concordance_length'] * 3 hit = hits[int(request.position)] bytes = hit.bytes hit_span = hit.bytes[-1] - hit.bytes[0] length = context_size + hit_span + context_size bytes, start_byte = adjust_bytes(bytes, length) end_byte = start_byte + length filename = hit.filename token = request.selected elif request.report == "navigation": token = request.selected philo_id = request.philo_id.split(" ") text_obj = db[philo_id] start_byte, end_byte = int(text_obj.start_byte), int(text_obj.end_byte) filename = text_obj.filename # print >> sys.stderr, "WORD LOOKUP FROM NAVIGATION", request.philo_id,request.selected, start_byte, end_byte, filename else: pass # print >> sys.stderr, "TOKEN", token, "BYTES: ", start_byte, end_byte, "FILENAME: ", filename, "POSITION", request.position token_n = 0 yield lookup_word(db, cursor, token, token_n, start_byte, end_byte, filename).encode('utf8')
def get_start_end_date(environ, start_response): status = "200 OK" headers = [("Content-type", "text/html; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) config = WebConfig( os.path.abspath(os.path.dirname(__file__)).replace("scripts", "")) db = DB(config.db_path + "/data/") request = WSGIHandler(environ, config) start_date, end_date = start_end_date(db, config, start_date=request.start_date, end_date=request.end_date) request.metadata["year"] = "{}-{}".format(start_date, end_date) request["start_date"] = "" request["end_date"] = "" hits = db.query(request["q"], request["method"], request["arg"], **request.metadata) total_results = 0 hits.finish() total_results = len(hits) yield json.dumps({ "start_date": start_date, "end_date": end_date, "total_results": total_results }).encode("utf8")
def navigation(environ, start_response): config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('reports', '')) request = WSGIHandler(environ, config) headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response('200 OK', headers) text_object = generate_text_object(request, config) yield simplejson.dumps(text_object)
def bibliography(environ, start_response): config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('reports', '')) request = WSGIHandler(environ, config) headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response('200 OK', headers) bibliography_object, hits = bibliography_results(request, config) yield simplejson.dumps(bibliography_object)
def get_bibliography(environ, start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', '')) request = WSGIHandler(environ, config) results = landing_page_bibliography(request, config) yield json.dumps(results).encode('utf8')
def alignment_to_text(environ, start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', '')) db = DB(config.db_path + '/data/') request = WSGIHandler(environ, config) link = byte_range_to_link(db, config, request) yield simplejson.dumps({"link": link})
def get_header(environ, start_response): status = '200 OK' headers = [('Content-type', 'text/html; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) config = WebConfig( os.path.abspath(os.path.dirname(__file__)).replace('scripts', '')) request = WSGIHandler(environ, config) header = get_tei_header(request, config) yield header.encode('utf8')
def get_frequency(environ, start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) config = WebConfig( os.path.abspath(os.path.dirname(__file__)).replace('scripts', '')) request = WSGIHandler(environ, config) word_frequency_object = generate_word_frequency(request, config) yield json.dumps(word_frequency_object).encode('utf8')
def alignment_to_text(environ, start_response): status = "200 OK" headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) config = WebConfig( os.path.abspath(os.path.dirname(__file__)).replace("scripts", "")) db = DB(config.db_path + "/data/") request = WSGIHandler(environ, config) link = byte_range_to_link(db, config, request) yield dumps({"link": link}).encode("utf8")
def get_notes(environ, start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) config = WebConfig( os.path.abspath(os.path.dirname(__file__)).replace('scripts', '')) db = DB(config.db_path + '/data/') request = WSGIHandler(environ, config) text_object = generate_text_object(request, config, note=True) yield json.dumps(text_object).encode('utf8')
def get_frequency(environ, start_response): """reads through a hitlist. looks up q.frequency_field in each hit, and builds up a list of unique values and their frequencies.""" status = "200 OK" headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("scripts", "")) request = WSGIHandler(environ, config) results = frequency_results(request, config) yield json.dumps(results).encode("utf8")
def metadata_list(environ, start_response): status = "200 OK" headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) config = WebConfig( os.path.abspath(os.path.dirname(__file__)).replace("scripts", "")) db = DB(config.db_path + "/data/") request = WSGIHandler(environ, config) metadata = request.term field = request.field yield autocomplete_metadata(metadata, field, db).encode("utf8")
def term_list(environ, start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', '')) db = DB(config.db_path + '/data/') request = WSGIHandler(environ, config) term = request.term if isinstance(term, list): term = term[-1] all_words = format_query(term, db, config)[:100] yield json.dumps(all_words).encode('utf8')
def term_list(environ, start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', '')) db = DB(config.db_path + '/data/') request = WSGIHandler(environ, config) hits = db.query(request["q"], request["method"], request["arg"], **request.metadata) hits.finish() expanded_terms = get_expanded_query(hits) yield json.dumps(expanded_terms[0]).encode('utf8')
def get_text_object(environ, start_response): status = "200 OK" headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("scripts", "")) db = DB(config.db_path + "/data/") request = WSGIHandler(environ, config) path = config.db_path zeros = 7 - len(request.philo_id) if zeros: request.philo_id += zeros * " 0" obj = ObjectWrapper(request["philo_id"].split(), db) text_object = generate_text_object(request, config) yield json.dumps(text_object).encode("utf8")
def get_more_context(environ, start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', '')) db = DB(config.db_path + '/data/') request = WSGIHandler(environ, config) hit_num = int(request.hit_num) hits = db.query(request["q"], request["method"], request["arg"], **request.metadata) context_size = config['concordance_length'] * 3 hit_context = get_concordance_text(db, hits[hit_num], config.db_path, context_size) yield json.dumps(hit_context).encode('utf8')
def get_sorted_kwic(environ, start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', '')) db = DB(config.db_path + '/data/') input_object = json.loads(environ['wsgi.input'].read().decode('utf8', 'ignore')) all_results = input_object['results'] query_string = input_object['query_string'] sort_keys = [i for i in input_object["sort_keys"] if i] environ['QUERY_STRING'] = query_string request = WSGIHandler(environ, config) sorted_hits = get_sorted_hits(all_results, sort_keys, request, config, db, input_object['start'], input_object['end']) yield json.dumps(sorted_hits).encode('utf8')
def get_table_of_contents(environ, start_response): status = "200 OK" headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("scripts", "")) request = WSGIHandler(environ, config) philo_id = request["philo_id"].split() toc_object = generate_toc_object(request, config) current_obj_position = 0 philo_id = " ".join(philo_id) for pos, toc_element in enumerate(toc_object["toc"]): if toc_element["philo_id"] == philo_id: current_obj_position = pos break toc_object["current_obj_position"] = current_obj_position yield json.dumps(toc_object).encode("utf8")
def landing_page_content(environ, start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) config = WebConfig( os.path.abspath(os.path.dirname(__file__)).replace('scripts', '')) request = WSGIHandler(environ, config) if request.is_range == 'true': if isinstance(request.query, bytes): request_range = request.query.decode("utf8") request_range = request.query.lower().split('-') results = group_by_range(request_range, request, config) else: results = group_by_metadata(request, config) yield results.encode('utf8')
def get_start_end_date(environ, start_response): status = '200 OK' headers = [('Content-type', 'text/html; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) config = WebConfig( os.path.abspath(os.path.dirname(__file__)).replace('scripts', '')) db = DB(config.db_path + '/data/') request = WSGIHandler(environ, config) start_date, end_date = start_end_date(db, config, start_date=request.start_date, end_date=request.end_date) yield json.dumps({ "start_date": start_date, "end_date": end_date }).encode('utf8')
def get_table_of_contents(environ, start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) config = WebConfig( os.path.abspath(os.path.dirname(__file__)).replace('scripts', '')) request = WSGIHandler(environ, config) philo_id = request['philo_id'].split() toc_object = generate_toc_object(request, config) current_obj_position = 0 philo_id = ' '.join(philo_id) for pos, toc_element in enumerate(toc_object['toc']): if toc_element['philo_id'] == philo_id: current_obj_position = pos break toc_object['current_obj_position'] = current_obj_position yield json.dumps(toc_object).encode('utf8')
def landing_page_content(environ, start_response): status = "200 OK" headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) config = WebConfig( os.path.abspath(os.path.dirname(__file__)).replace("scripts", "")) request = WSGIHandler(environ, config) if request.is_range == "true": if isinstance(request.query, bytes): request_range = request.query.decode("utf8") request_range = request.query.lower().split("-") if len(request_range) == 1: request_range.append(request_range[0]) results = group_by_range(request_range, request, config) else: results = group_by_metadata(request, config) yield results.encode("utf8")
def access_request(environ, start_response): status = "200 OK" headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")] config = WebConfig( os.path.abspath(os.path.dirname(__file__)).replace("scripts", "")) request = WSGIHandler(environ, config) access, headers = login_access(environ, request, config, headers) start_response(status, headers) if access: yield json.dumps({"access": True}).encode("utf8") else: incoming_address, domain_name = access_control.get_client_info(environ) yield json.dumps({ "access": False, "incoming_address": incoming_address, "domain_name": domain_name }).encode("utf8")
def access_request(environ, start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] config = WebConfig( os.path.abspath(os.path.dirname(__file__)).replace('scripts', '')) db = DB(config.db_path + '/data/') request = WSGIHandler(environ, config) access, headers = login_access(environ, request, config, headers) start_response(status, headers) if access: yield json.dumps({'access': True}) else: incoming_address, domain_name = access_control.get_client_info(environ) yield json.dumps({ 'access': False, "incoming_address": incoming_address, "domain_name": domain_name }).encode('utf8')
def get_total_results(environ, start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) config = WebConfig( os.path.abspath(os.path.dirname(__file__)).replace('scripts', '')) db = DB(config.db_path + '/data/') request = WSGIHandler(environ, config) if request.no_q: if request.no_metadata: hits = db.get_all(db.locals['default_object_level'], request["sort_order"]) else: hits = db.query(sort_order=request["sort_order"], **request.metadata) else: hits = db.query(request["q"], request["method"], request["arg"], **request.metadata) total_results = 0 hits.finish() total_results = len(hits) yield json.dumps(total_results).encode('utf8')
def resolve_cite_service(environ, start_response): config = WebConfig( os.path.abspath(os.path.dirname(__file__)).replace("scripts", "")) db = DB(config.db_path + "/data/") request = WSGIHandler(environ, config) c = db.dbh.cursor() q = request.q best_url = config["db_url"] if " - " in q: milestone = q.split(" - ")[0] else: milestone = q milestone_segments = [] last_segment = 0 milestone_prefixes = [] for separator in re.finditer(r" (?!\.)|\.(?! )", milestone): milestone_prefixes += [milestone[:separator.start()]] milestone_segments += [milestone[last_segment:separator.start()]] last_segment = separator.end() milestone_segments += [milestone[last_segment:]] milestone_prefixes += [milestone] print("SEGMENTS", repr(milestone_segments), file=sys.stderr) print("PREFIXES", repr(milestone_prefixes), file=sys.stderr) abbrev_match = None for pos, v in enumerate(milestone_prefixes): print("QUERYING for abbrev = ", v, file=sys.stderr) abbrev_q = c.execute("SELECT * FROM toms WHERE abbrev = ?;", (v, )).fetchone() if abbrev_q: abbrev_match = abbrev_q print("ABBREV", abbrev_match["abbrev"], abbrev_match["philo_id"], file=sys.stderr) doc_obj = ObjectWrapper(abbrev_match["philo_id"].split(), db) nav = nav_query(doc_obj, db) best_match = None for n in nav: if n["head"] == request.q: print("MATCH", n["philo_id"], n["n"], n["head"], file=sys.stderr) best_match = n break if best_match: type_offsets = {"doc": 1, "div1": 2, "div2": 3, "div3": 4, "para": 5} t = best_match["philo_type"] short_id = best_match["philo_id"].split()[:type_offsets[t]] best_url = f.make_absolute_object_link(config, short_id) print("BEST_URL", best_url, file=sys.stderr) status = "302 Found" redirect = config["db_url"] headers = [("Location", best_url)] start_response(status, headers) return ""