def get_start_end_date(environ, start_response):
    status = "200 OK"
    headers = [("Content-type", "text/html; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("scripts", ""))
    db = DB(config.db_path + "/data/")
    request = WSGIHandler(environ, config)
    start_date, end_date = start_end_date(db, config, start_date=request.start_date, end_date=request.end_date)
    request.metadata["year"] = "{}-{}".format(start_date, end_date)
    request["start_date"] = ""
    request["end_date"] = ""
    hits = db.query(request["q"], request["method"], request["arg"], **request.metadata)
    total_results = 0
    hits.finish()
    total_results = len(hits)
    yield json.dumps({"start_date": start_date, "end_date": end_date, "total_results": total_results}).encode("utf8")
def bibliography(environ, start_response):
    config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('reports', ''))
    request = WSGIHandler(environ, config)
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response('200 OK', headers)
    bibliography_object, hits = bibliography_results(request, config)
    yield simplejson.dumps(bibliography_object)
Esempio n. 3
0
def get_bibliography(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    request = WSGIHandler(environ, config)
    results = landing_page_bibliography(request, config)
    yield json.dumps(results).encode('utf8')
Esempio n. 4
0
def alignment_to_text(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(environ, config)
    link = byte_range_to_link(db, config, request)
    yield simplejson.dumps({"link": link})
Esempio n. 5
0
def metadata_list(environ, start_response):
    status = "200 OK"
    headers = [("Content-type", "application/json; charset=UTF-8"),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(
        os.path.abspath(os.path.dirname(__file__)).replace("scripts", ""))
    db = DB(config.db_path + "/data/")
    request = WSGIHandler(environ, config)
    metadata = request.term
    field = request.field
    yield autocomplete_metadata(metadata, field, db).encode("utf8")
Esempio n. 6
0
def get_start_end_date(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'text/html; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(
        os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(environ, config)
    start_date, end_date = start_end_date(db,
                                          config,
                                          start_date=request.start_date,
                                          end_date=request.end_date)
    yield json.dumps({
        "start_date": start_date,
        "end_date": end_date
    }).encode('utf8')
def landing_page_content(environ, start_response):
    status = "200 OK"
    headers = [("Content-type", "application/json; charset=UTF-8"),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(
        os.path.abspath(os.path.dirname(__file__)).replace("scripts", ""))
    request = WSGIHandler(environ, config)
    if request.is_range == "true":
        if isinstance(request.query, bytes):
            request_range = request.query.decode("utf8")
        request_range = request.query.lower().split("-")
        if len(request_range) == 1:
            request_range.append(request_range[0])
        results = group_by_range(request_range, request, config)
    else:
        results = group_by_metadata(request, config)
    yield results.encode("utf8")
Esempio n. 8
0
def access_request(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    config = WebConfig(
        os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(environ, config)
    access, headers = login_access(environ, request, config, headers)
    start_response(status, headers)
    if access:
        yield json.dumps({'access': True})
    else:
        incoming_address, domain_name = access_control.get_client_info(environ)
        yield json.dumps({
            'access': False,
            "incoming_address": incoming_address,
            "domain_name": domain_name
        }).encode('utf8')
Esempio n. 9
0
def resolve_cite_service(environ, start_response):
    config = WebConfig(
        os.path.abspath(os.path.dirname(__file__)).replace("scripts", ""))
    db = DB(config.db_path + "/data/")
    request = WSGIHandler(environ, config)
    c = db.dbh.cursor()
    q = request.q

    best_url = config["db_url"]

    if " - " in q:
        milestone = q.split(" - ")[0]
    else:
        milestone = q

    milestone_segments = []
    last_segment = 0
    milestone_prefixes = []
    for separator in re.finditer(r" (?!\.)|\.(?! )", milestone):
        milestone_prefixes += [milestone[:separator.start()]]
        milestone_segments += [milestone[last_segment:separator.start()]]
        last_segment = separator.end()
    milestone_segments += [milestone[last_segment:]]
    milestone_prefixes += [milestone]

    print("SEGMENTS", repr(milestone_segments), file=sys.stderr)
    print("PREFIXES", repr(milestone_prefixes), file=sys.stderr)

    abbrev_match = None
    for pos, v in enumerate(milestone_prefixes):
        print("QUERYING for abbrev = ", v, file=sys.stderr)
        abbrev_q = c.execute("SELECT * FROM toms WHERE abbrev = ?;",
                             (v, )).fetchone()
        if abbrev_q:
            abbrev_match = abbrev_q

    print("ABBREV",
          abbrev_match["abbrev"],
          abbrev_match["philo_id"],
          file=sys.stderr)
    doc_obj = ObjectWrapper(abbrev_match["philo_id"].split(), db)

    nav = nav_query(doc_obj, db)

    best_match = None
    for n in nav:
        if n["head"] == request.q:
            print("MATCH", n["philo_id"], n["n"], n["head"], file=sys.stderr)
            best_match = n
            break

    if best_match:
        type_offsets = {"doc": 1, "div1": 2, "div2": 3, "div3": 4, "para": 5}
        t = best_match["philo_type"]
        short_id = best_match["philo_id"].split()[:type_offsets[t]]
        best_url = f.make_absolute_object_link(config, short_id)
        print("BEST_URL", best_url, file=sys.stderr)

    status = "302 Found"
    redirect = config["db_url"]
    headers = [("Location", best_url)]
    start_response(status, headers)

    return ""
Esempio n. 10
0
def get_neighboring_words(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)

    config = WebConfig(
        os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(environ, config)

    try:
        index = int(request.hits_done)
    except:
        index = 0

    max_time = int(request.max_time)

    kwic_words = []
    start_time = timeit.default_timer()
    hits = db.query(request["q"], request["method"], request["arg"],
                    **request.metadata)
    cursor = db.dbh.cursor()

    for hit in hits[index:]:
        word_id = ' '.join([str(i) for i in hit.philo_id])
        query = 'select rowid, philo_name, parent from words where philo_id="%s" limit 1' % word_id
        cursor.execute(query)
        results = cursor.fetchone()

        highlighted_text = kwic_hit_object(hit, config, db)["highlighted_text"]
        highlighted_text = highlighted_text.translate(remove_punctuation_map)
        highlighted_text = highlighted_text.strip()

        result_obj = {
            "left": "",
            "right": "",
            "index": index,
            "q": highlighted_text
        }

        left_rowid = results["rowid"] - 10
        right_rowid = results["rowid"] + 10

        cursor.execute(
            'select philo_name, philo_id from words where rowid between ? and ?',
            (left_rowid, results['rowid'] - 1))
        result_obj["left"] = []
        for i in cursor:
            result_obj["left"].append(i['philo_name'])
        result_obj["left"].reverse()
        result_obj["left"] = ' '.join(result_obj["left"])

        cursor.execute(
            'select philo_name, philo_id from words where rowid between ? and ?',
            (results['rowid'] + 1, right_rowid))
        result_obj["right"] = []
        for i in cursor:
            result_obj["right"].append(i['philo_name'])
        result_obj["right"] = ' '.join(result_obj["right"])

        for metadata in config.kwic_metadata_sorting_fields:
            result_obj[metadata] = hit[metadata].lower()

        kwic_words.append(result_obj)

        index += 1

        elapsed = timeit.default_timer() - start_time
        if elapsed > max_time:  # avoid timeouts by splitting the query if more than 10 seconds has been spent in the loop
            break

    yield json.dumps({
        "results": kwic_words,
        "hits_done": index
    }).encode('utf8')