Esempio n. 1
0
def philo_dispatcher(environ, start_response):
    """Dispatcher function."""
    loop = get_event_loop()
    clean_task = loop.create_task(clean_up())
    config = WebConfig(path)
    request = WSGIHandler(environ, config)
    response = ""
    if request.content_type == "application/json" or request.format == "json":
        try:
            path_components = [c for c in environ["PATH_INFO"].split("/") if c]
        except:
            path_components = []
        if path_components:
            if path_components[-1] == "table-of-contents":
                response = "".join([
                    i
                    for i in reports.table_of_contents(environ, start_response)
                ])
            else:
                response = "".join(
                    [i for i in reports.navigation(environ, start_response)])
        else:
            report = getattr(reports, FieldStorage().getvalue("report"))
            response = "".join([i for i in report(environ, start_response)])
    else:
        response = angular(environ, start_response)
    yield response.encode("utf8")
    loop.run_until_complete(clean_task)
Esempio n. 2
0
def lookup_word_service(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(environ, config)
    cursor = db.dbh.cursor()

    if request.report == "concordance":
        hits = db.query(request["q"], request["method"], request["arg"], **request.metadata)
        context_size = config['concordance_length'] * 3
        hit = hits[int(request.position)]
        bytes = hit.bytes
        hit_span = hit.bytes[-1] - hit.bytes[0]
        length = context_size + hit_span + context_size
        bytes, start_byte = adjust_bytes(bytes, length)
        end_byte = start_byte + length
        filename = hit.filename
        token = request.selected
    elif request.report == "navigation":

        token = request.selected
        philo_id = request.philo_id.split(" ")
        text_obj = db[philo_id]
        start_byte, end_byte = int(text_obj.start_byte), int(text_obj.end_byte)
        filename = text_obj.filename
#        print >> sys.stderr, "WORD LOOKUP FROM NAVIGATION", request.philo_id,request.selected, start_byte, end_byte, filename
    else:
        pass
#    print >> sys.stderr, "TOKEN", token, "BYTES: ", start_byte, end_byte, "FILENAME: ", filename, "POSITION", request.position
    token_n = 0
    yield lookup_word(db, cursor, token, token_n, start_byte, end_byte, filename)
def resolve_cite_service(environ, start_response):
    config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(environ, config)
    c = db.dbh.cursor()
    q = request.q

    best_url = config['db_url']

    if " - " in q:
        milestone = q.split(" - ")[0]
    else:
        milestone = q

    milestone_segments = []
    last_segment = 0
    milestone_prefixes = []
    for separator in re.finditer(r' (?!\.)|\.(?! )', milestone):
        milestone_prefixes += [milestone[:separator.start()]]
        milestone_segments += [milestone[last_segment:separator.start()]]
        last_segment = separator.end()
    milestone_segments += [milestone[last_segment:]]
    milestone_prefixes += [milestone]

    print >> sys.stderr, "SEGMENTS", repr(milestone_segments)
    print >> sys.stderr, "PREFIXES", repr(milestone_prefixes)

    abbrev_match = None
    for pos, v in enumerate(milestone_prefixes):
        print >> sys.stderr, "QUERYING for abbrev = ", v
        abbrev_q = c.execute("SELECT * FROM toms WHERE abbrev = ?;", (v, )).fetchone()
        if abbrev_q:
            abbrev_match = abbrev_q

    print >> sys.stderr, "ABBREV", abbrev_match["abbrev"], abbrev_match["philo_id"]
    doc_obj = ObjectWrapper(abbrev_match['philo_id'].split(), db)

    nav = nav_query(doc_obj, db)

    best_match = None
    for n in nav:
        if n["head"] == request.q:
            print >> sys.stderr, "MATCH", n["philo_id"], n["n"], n["head"]
            best_match = n
            break

    if best_match:
        type_offsets = {"doc": 1, "div1": 2, "div2": 3, "div3": 4, "para": 5}
        t = best_match['philo_type']
        short_id = best_match["philo_id"].split()[:type_offsets[t]]
        best_url = f.make_absolute_object_link(config, short_id)
        print >> sys.stderr, "BEST_URL", best_url

    status = '302 Found'
    redirect = config['db_url']
    headers = [('Location', best_url)]
    start_response(status, headers)

    return ""
Esempio n. 4
0
def get_filter_list(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    request = WSGIHandler(environ, config)
    filter_list = build_filter_list(request, config)
    yield json.dumps(filter_list).encode('utf8')
Esempio n. 5
0
def get_frequency(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    request = WSGIHandler(environ, config)
    word_frequency_object = generate_word_frequency(request, config)
    yield simplesimplejson.dumps(word_frequency_object)
def get_header(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'text/html; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(
        os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    request = WSGIHandler(environ, config)
    header = get_tei_header(request, config)
    yield header
def get_bibliography(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(
        os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    request = WSGIHandler(environ, config)
    results = landing_page_bibliography(request, config)
    yield simplejson.dumps(results)
def alignment_to_text(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(
        os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(environ, config)
    link = byte_range_to_link(db, config, request)
    yield simplejson.dumps({"link": link})
Esempio n. 9
0
def get_notes(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(
        os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(environ, config)
    text_object = generate_text_object(request, config, note=True)
    yield json.dumps(text_object).encode('utf8')
Esempio n. 10
0
def metadata_list(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(
        os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(environ, config)
    metadata = request.term
    field = request.field
    yield autocomplete_metadata(metadata, field, db).encode('utf8')
def get_frequency(environ, start_response):
    """reads through a hitlist. looks up q.frequency_field in each hit, and builds up a list of
       unique values and their frequencies."""
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(
        os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    request = WSGIHandler(environ, config)
    results = frequency_results(request, config, sorted_results=True)
    yield json.dumps(results).encode('utf8')
def term_list(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(environ, config)
    hits = db.query(request["q"], request["method"], request["arg"],
                    **request.metadata)
    hits.finish()
    expanded_terms = get_expanded_query(hits)
    yield simplejson.dumps(expanded_terms[0])
def term_list(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(
        os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(environ, config)
    term = request.term
    if isinstance(term, list):
        term = term[-1]
    all_words = format_query(term, db, config)[:100]
    yield simplejson.dumps(all_words)
Esempio n. 14
0
def get_start_end_date(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'text/html; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(
        os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(environ, config)
    start_date, end_date = start_end_date(db,
                                          config,
                                          start_date=request.start_date,
                                          end_date=request.end_date)
    yield simplejson.dumps({"start_date": start_date, "end_date": end_date})
def get_sorted_kwic(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    input_object = simplejson.loads(environ['wsgi.input'].read())
    all_results = input_object['results']
    query_string = input_object['query_string']
    sort_keys = [i for i in input_object["sort_keys"] if i]
    environ['QUERY_STRING'] = query_string.encode('utf8')
    request = WSGIHandler(environ, config)
    sorted_hits = get_sorted_hits(all_results, sort_keys, request, config, db, input_object['start'],
                                  input_object['end'])
    yield simplejson.dumps(sorted_hits)
Esempio n. 16
0
def get_text_object(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(environ, config)
    path = config.db_path
    zeros = 7 - len(request.philo_id)
    if zeros:
        request.philo_id += zeros * " 0"
    obj = ObjectWrapper(request['philo_id'].split(), db)
    text_object = generate_text_object(request, config)
    yield simplejson.dumps(text_object)
Esempio n. 17
0
def get_more_context(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(
        os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(environ, config)
    hit_num = int(request.hit_num)
    hits = db.query(request["q"], request["method"], request["arg"],
                    **request.metadata)
    context_size = config['concordance_length'] * 3
    hit_context = get_concordance_text(db, hits[hit_num], config.db_path,
                                       context_size)
    yield json.dumps(hit_context).encode('utf8')
def landing_page_content(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(
        os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    request = WSGIHandler(environ, config)
    if request.is_range == 'true':
        if type(request.query) == str:
            request_range = request.query.decode("utf8")
        request_range = request_range.lower().split('-')
        results = group_by_range(request_range, request, config)
    else:
        results = group_by_metadata(request, config)
    yield results
Esempio n. 19
0
def get_table_of_contents(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(
        os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    request = WSGIHandler(environ, config)
    philo_id = request['philo_id'].split()
    toc_object = generate_toc_object(request, config)
    current_obj_position = 0
    philo_id = ' '.join(philo_id)
    for pos, toc_element in enumerate(toc_object['toc']):
        if toc_element['philo_id'] == philo_id:
            current_obj_position = pos
            break
    toc_object['current_obj_position'] = current_obj_position
    yield json.dumps(toc_object).encode('utf8')
Esempio n. 20
0
def angular(environ, start_response):
    headers = [('Content-type', 'text/html; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    if not config.valid_config:  # This means we have an error in the webconfig file
        html = build_misconfig_page(config.traceback, 'webconfig.cfg')
    # TODO handle errors in db.locals.py
    else:
        request = WSGIHandler(environ, config)
        if config.access_control:
            if not request.authenticated:
                token = access_control.check_access(environ, config)
                if token:
                    h, ts = token
                    headers.append(("Set-Cookie", "hash=%s" % h))
                    headers.append(("Set-Cookie", "timestamp=%s" % ts))
        html = build_html_page(config)
    start_response('200 OK', headers)
    return html
def access_request(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    config = WebConfig(
        os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(environ, config)
    access, headers = login_access(environ, request, config, headers)
    start_response(status, headers)
    if access:
        yield simplejson.dumps({'access': True})
    else:
        incoming_address, domain_name = access_control.get_client_info(environ)
        yield simplejson.dumps({
            'access': False,
            "incoming_address": incoming_address,
            "domain_name": domain_name
        })
Esempio n. 22
0
def term_group(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(
        os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(environ, config)
    if not request["q"]:
        dump = json.dumps({"original_query": "", "term_groups": []})
    else:
        hits = db.query(request["q"],
                        request["method"],
                        request["arg"],
                        sort_order=request["sort_order"],
                        **request.metadata)
        parsed = parse_query(request.q)
        group = group_terms(parsed)
        all_groups = split_terms(group)
        term_groups = []
        for g in all_groups:
            term_group = ''
            not_started = False
            for kind, term in g:
                if kind == 'NOT':
                    if not_started is False:
                        not_started = True
                        term_group += ' NOT '
                elif kind == 'OR':
                    term_group += '|'
                elif kind == "TERM":
                    term_group += ' %s ' % term
                elif kind == "QUOTE":
                    term_group += ' %s ' % term
            term_group = term_group.strip()
            term_groups.append(term_group)
        dump = json.dumps({
            "term_groups": term_groups,
            "original_query": request.original_q
        })
    yield dump.encode('utf8')
Esempio n. 23
0
def get_total_results(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(environ, config)
    if request.no_q:
        if request.no_metadata:
            hits = db.get_all(db.locals['default_object_level'], request["sort_order"])
        else:
            hits = db.query(sort_order=request["sort_order"], **request.metadata)
    else:
        hits = db.query(request["q"], request["method"], request["arg"],
                        **request.metadata)
    total_results = 0
    hits.finish()
    total_results = len(hits)

    yield simplejson.dumps(total_results)
def philo_dispatcher(environ, start_response):
    """Dispatcher function."""
    clean_up()
    config = WebConfig(path)
    request = WSGIHandler(environ, config)
    if request.content_type == "application/json" or request.format == "json":
        try:
            path_components = [c for c in environ["PATH_INFO"].split("/") if c]
        except:
            path_components = []
        if path_components:
            if path_components[-1] == "table-of-contents":
                yield ''.join([
                    i
                    for i in reports.table_of_contents(environ, start_response)
                ])
            else:
                yield ''.join(
                    [i for i in reports.navigation(environ, start_response)])
        else:
            report = getattr(reports, FieldStorage().getvalue('report'))
            yield ''.join([i for i in report(environ, start_response)])
    else:
        yield angular(environ, start_response)
def get_neighboring_words(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)

    config = WebConfig(
        os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(environ, config)

    try:
        index = int(request.hits_done)
    except:
        index = 0

    max_time = int(request.max_time)

    kwic_words = []
    start_time = timeit.default_timer()
    hits = db.query(request["q"], request["method"], request["arg"],
                    **request.metadata)
    c = db.dbh.cursor()

    for hit in hits[index:]:
        word_id = ' '.join([str(i) for i in hit.philo_id])
        query = 'select rowid, philo_name, parent from words where philo_id="%s" limit 1' % word_id
        c.execute(query)
        results = c.fetchone()

        parent_sentence = results['parent']

        highlighted_text = kwic_hit_object(hit, config, db)["highlighted_text"]
        highlighted_text = highlighted_text.translate(remove_punctuation_map)
        highlighted_text = highlighted_text.strip()

        result_obj = {
            "left": "",
            "right": "",
            "index": index,
            "q": highlighted_text
        }

        left_rowid = results["rowid"] - 10
        right_rowid = results["rowid"] + 10

        c.execute(
            'select philo_name, philo_id from words where rowid between ? and ?',
            (left_rowid, results['rowid'] - 1))
        result_obj["left"] = []
        for i in c.fetchall():
            result_obj["left"].append(i['philo_name'])
        result_obj["left"].reverse()
        result_obj["left"] = ' '.join(result_obj["left"])

        c.execute(
            'select philo_name, philo_id from words where rowid between ? and ?',
            (results['rowid'] + 1, right_rowid))
        result_obj["right"] = []
        for i in c.fetchall():
            result_obj["right"].append(i['philo_name'])
        result_obj["right"] = ' '.join(result_obj["right"])

        metadata_fields = {}
        for metadata in config.kwic_metadata_sorting_fields:
            result_obj[metadata] = hit[metadata].lower()

        kwic_words.append(result_obj)

        index += 1

        elapsed = timeit.default_timer() - start_time
        if elapsed > max_time:  # avoid timeouts by splitting the query if more than 10 seconds has been spent in the loop
            break

    yield json.dumps({
        "results": kwic_words,
        "hits_done": index
    }).encode('utf8')