Beispiel #1
0
def get_more_context(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    environ["SCRIPT_FILENAME"] = environ["SCRIPT_FILENAME"].replace(
        'scripts/get_more_context.py', '')
    cgi = urlparse.parse_qs(environ["QUERY_STRING"], keep_blank_values=True)
    hit_num = int(cgi.get('hit_num', [0])[0])
    db, path_components, q = parse_cgi(environ)
    config = f.WebConfig()
    if q['start'] == 0:
        start = 0
    else:
        start = q['start'] - 1
    end = (q['end'] or q['results_per_page']) + 1
    hit_range = range(start, end)
    hits = db.query(q["q"], q["method"], q["arg"], **q["metadata"])
    context_size = config['concordance_length'] * 3
    html_list = []
    for i in hit_range:
        try:
            html_list.append(
                r.fetch_concordance(hits[i], environ["SCRIPT_FILENAME"],
                                    context_size))
        except IndexError:
            break
    yield json.dumps(html_list)
Beispiel #2
0
def concordance(environ, start_response):
    db, dbname, path_components, q = wsgi_response(environ, start_response)
    path = os.getcwd().replace('functions/', '')
    config = f.WebConfig()
    if q['format'] == "json":
        hits = db.query(q["q"], q["method"], q["arg"], **q["metadata"])
        start, end, n = f.link.page_interval(q['results_per_page'], hits,
                                             q["start"], q["end"])
        formatted_results = []
        for i in hits[start - 1:end]:
            text = fetch_concordance(i, path, config.concordance_length)
            full_metadata = {}
            for metadata in config.metadata:
                full_metadata[metadata] = i[metadata]
            result = {
                "citation": f.cite.make_abs_doc_cite_mobile(db, i),
                "shrtcit": f.cite.make_abs_doc_shrtcit_mobile(db, i),
                "text": text,
                "hit_count": len(hits),
                "philo_id": i.philo_id,
                "start": start,
                "offsets": i.bytes
            }
            formatted_results.append(result)
        return json.dumps(formatted_results)
    if q['q'] == '':
        return bibliography(f, path, db, dbname, q, environ)
    else:
        hits = db.query(q["q"], q["method"], q["arg"], **q["metadata"])
        return render_concordance(hits, db, dbname, q, path, config)
def export_results(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'text/plain; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    environ["SCRIPT_FILENAME"] = environ["SCRIPT_FILENAME"].replace(
        'scripts/export_results.py', '')
    cgi = urlparse.parse_qs(environ["QUERY_STRING"], keep_blank_values=True)
    output_format = cgi.get('output_format', [''])[0]
    db, path_components, q = parse_cgi(environ)
    config = f.WebConfig()
    path = os.path.abspath(os.path.dirname(__file__)).replace('scripts', "")

    if q['report'] == "concordance" or q['report'] == None:
        results_string, flat_list = export_concordance(db, config, q, path)

    unique_filename = str(uuid.uuid4())
    if output_format == "json":
        write_json(path, unique_filename, results_string)
        link = config.db_url + "/data/exports/" + unique_filename + ".json"
    elif output_format == "csv":
        write_csv(path, unique_filename, flat_list)
        link = config.db_url + "/data/exports/" + unique_filename + '.csv'
    elif output_format == "tab":
        write_tab(path, unique_filename, flat_list)
        link = config.db_url + "/data/exports/" + unique_filename + '.tab'
    yield link
Beispiel #4
0
def get_results_bibliography(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    environ["SCRIPT_FILENAME"] = environ["SCRIPT_FILENAME"].replace(
        'scripts/get_results_bibliography.py', '')
    cgi = urlparse.parse_qs(environ["QUERY_STRING"], keep_blank_values=True)
    philo_ids = cgi.get('id', [])
    db, path_components, q = parse_cgi(environ)
    obj_level = db.locals["default_object_level"]
    path = db.locals['db_path']
    path = path[:path.rfind("/data")]
    config = f.WebConfig()
    c = db.dbh.cursor()
    citations = []
    citation_counter = defaultdict(int)
    count = 0
    for philo_id in philo_ids:
        obj = ObjectWrapper(philo_id.split(',')[:7], db)
        obj.bytes = []
        citation = f.cite.biblio_citation(db, config, obj)
        if citation not in citation_counter:
            citations.append(citation)
        citation_counter[citation] += 1
        count += 1
    citations_with_count = []
    for cite in citations:
        count = citation_counter[cite]
        citations_with_count.append([cite, count])
    yield json.dumps(citations_with_count)
Beispiel #5
0
def collocation(environ, start_response):
    db, dbname, path_components, q = wsgi_response(environ, start_response)
    path = os.getcwd().replace('functions/', '')
    config = f.WebConfig()
    if q['q'] == '':
        return bibliography(f, path, db, dbname, q,
                            environ)  ## the default should be an error message
    hits = db.query(q["q"], q["method"], q["arg"], **q["metadata"])
    return render_collocation(hits, db, dbname, q, path, config)
Beispiel #6
0
def time_series(environ,start_response):
    db, dbname, path_components, q = wsgi_response(environ,start_response)
    path = os.getcwd().replace('functions/', '')
    config = f.WebConfig()
    if q['q'] == '':
        return bibliography(f,path, db, dbname,q,environ)
    else:
        q = handle_dates(q, db)
        hits = db.query(q["q"],q["method"],q["arg"],**q["metadata"])
        return render_time_series(hits, db, dbname, q, path, config)
Beispiel #7
0
def navigation(environ, start_response):
    db, dbname, path_components, q = wsgi_response(environ, start_response)
    path = os.getcwd().replace('functions/', '')
    obj = db[path_components]
    config = f.WebConfig()
    prev = ' '.join(obj.prev.split()[:7])
    next = ' '.join(obj.next.split()[:7])
    current = obj.philo_id[:7]
    if q['format'] == "json":
        if check_philo_virtual(db, path_components):
            obj = db[path_components[:-1]]
        obj_text = f.get_text_obj(obj, path, query_args=q['byte'])
        return json.dumps({
            'current':
            current,
            'text':
            obj_text,
            'prev':
            prev,
            'next':
            next,
            'shrtcit':
            f.cite.make_abs_doc_shrtcit_mobile(db, obj),
            'citation':
            f.cite.make_abs_doc_cite_mobile(db, obj)
        })
    if obj.philo_type == 'doc':
        concatenate_files(path, "t_o_c", debug=db.locals["debug"])
        return render_template(obj=obj,
                               philo_id=obj.philo_id[0],
                               dbname=dbname,
                               f=f,
                               navigate_doc=navigate_doc,
                               db=db,
                               q=q,
                               config=config,
                               template_name='t_o_c.mako',
                               report="t_o_c",
                               ressources=f.concatenate.report_files)
    obj_text = f.get_text_obj(obj, path, query_args=q['byte'])
    concatenate_files(path, "navigation", debug=db.locals["debug"])
    return render_template(obj=obj,
                           philo_id=obj.philo_id[0],
                           dbname=dbname,
                           f=f,
                           navigate_doc=navigate_doc,
                           db=db,
                           q=q,
                           obj_text=obj_text,
                           prev=prev,
                           next=next,
                           config=config,
                           template_name='object.mako',
                           report="navigation",
                           ressources=f.concatenate.report_files)
Beispiel #8
0
def get_table_of_contents(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'text/html; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    environ["SCRIPT_FILENAME"] = environ["SCRIPT_FILENAME"].replace(
        'scripts/get_table_of_contents.py', '')
    db, path_components, q = parse_cgi(environ)
    config = f.WebConfig()
    path = db.locals['db_path']
    path = path[:path.rfind("/data")]
    obj = ObjectWrapper(q['philo_id'].split(), db)
    results = r.navigate_doc(obj, db)
    if q['format'] == "json":
        html = ''
        for philo_id, philo_type, head in results:
            link_id = philo_id.replace(' ', '_')
            href = f.link.make_absolute_object_link(config,
                                                    philo_id.split()[:7])
            if philo_type == "div1":
                html += '<div class="toc-div1">'
            if philo_type == "div2":
                html += '<div class="toc-div2">'
            elif philo_type == "div3":
                html += '<div class="toc-div3">'
            html += '<a href="%s" id="%s" style="text-decoration: none;">%s</a></div>' % (
                href, link_id, head or philo_type.upper())
        wrapper = json.dumps({
            'toc':
            html,
            'citation':
            f.cite.make_abs_doc_cite_biblio_mobile(db, obj)
        })
        yield wrapper
    else:
        div1_markup = '<div class="toc-div1"><span class="bullet-point-div1"></span>'
        div2_markup = '<div class="toc-div2"><span class="bullet-point-div2"></span>'
        div3_markup = '<div class="toc-div3"><span class="bullet-point-div3"></span>'
        html = ['']
        for philo_id, philo_type, head in results:
            link_id = philo_id.replace(' ', '_')
            href = f.link.make_absolute_object_link(config,
                                                    philo_id.split()[:7])
            if philo_type == "div2":
                space = div2_markup
            elif philo_type == "div3":
                space = div3_markup
            else:
                space = div1_markup
            html.append(space + '<a href="%s" id="%s">%s</a></div>' %
                        (href, link_id, head or philo_type.upper()))
        html = ''.join(html)
        yield html.encode('utf-8', 'ignore')
Beispiel #9
0
def landing_page(environ, start_response):
    db, dbname, path_components, q = wsgi_response(environ, start_response)
    config = f.WebConfig()
    path = os.getcwd()
    concatenate_files(path, "landing_page", debug=db.locals["debug"])
    return render_template(db=db,
                           dbname=dbname,
                           form=True,
                           q=q,
                           template_name='landing_page.mako',
                           config=config,
                           report="landing_page",
                           ressources=f.concatenate.report_files)
Beispiel #10
0
def access(environ, start_response):
    db, dbname, path_components, q = wsgi_response(environ, start_response)
    config = f.WebConfig()
    incoming_address = environ['REMOTE_ADDR']
    hostname = socket.gethostname()
    return render_template(db=db,
                           dbname=dbname,
                           config=config,
                           form=True,
                           client_address=incoming_address,
                           q=q,
                           hostname=environ['HTTP_HOST'],
                           report='access',
                           template_name='access_denied.mako')
Beispiel #11
0
def kwic(environ,start_response):
    db, dbname, path_components, q = wsgi_response(environ,start_response)
    path = os.getcwd().replace('functions/', '')
    config = f.WebConfig()
    if q['format'] == "json":
        hits = db.query(q["q"],q["method"],q["arg"],**q["metadata"])
        start, end, n = f.link.page_interval(q['results_per_page'], hits, q["start"], q["end"])
        kwic_results = fetch_kwic(hits, path, q, f.link.byte_query, db, start-1, end, length=250)
        formatted_results = [{"citation": i[0],
                              "text": i[1], "philo_id": i[2], "start":start, "hit_count": len(hits)} for i in kwic_results]
        return json.dumps(formatted_results)
    if q['q'] == '':
        return bibliography(f,path, db, dbname,q,environ)
    else:
        hits = db.query(q["q"],q["method"],q["arg"],**q["metadata"])
        return render_kwic(hits, db, dbname, q, path, config)
Beispiel #12
0
def error_handling(db, dbname, q):
    hits = NoHits()
    path = os.getcwd().replace('functions/', '')
    config = f.WebConfig()
    if q["error_report"]:
        report = q["error_report"]
    else:
        report = q['report']
    hits = NoHits()
    if report == "concordance":
        return r.render_concordance(hits, db, dbname, q, path, config)
    elif report == "kwic":
        return r.render_kwic(hits, db, dbname, q, path, config)
    elif report == "collocation":
        return r.render_collocation(hits, db, dbname, q, path, config)
    elif report == "time_series":
        q = r.handle_dates(q, db)
        return r.render_time_series(hits, db, dbname, q, path, config)
def landing_page_content(environ,start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),("Access-Control-Allow-Origin","*")]
    start_response(status,headers)
    environ["SCRIPT_FILENAME"] = environ["SCRIPT_FILENAME"].replace('scripts/landing_page_content.py', '')
    cgi = urlparse.parse_qs(environ["QUERY_STRING"],keep_blank_values=True)
    content_type = cgi.get('landing_page_content_type',[])[0]
    q_range = cgi.get('range',[])[0].lower().split('-')
    if content_type != "year":
        letter_range = set([chr(i) for i in range(ord(q_range[0]),ord(q_range[1])+1)])
    db, path_components, q = parse_cgi(environ)
    config = f.WebConfig()
    c = db.dbh.cursor()
    content = ''
    if content_type == "author":
        content = generate_author_list(c, letter_range, db, config)
    elif content_type == "title":
        content = generate_title_list(c, letter_range, db, config)
    elif content_type == "year":
        content = generate_year_list(c, q_range, db, config)
    yield json.dumps(content)
Beispiel #14
0
def fetch_bibliography(f, path, db, dbname, q, environ):
    if q["no_q"]:
        hits = db.get_all(db.locals['default_object_level'])
    else:
        hits = db.query(**q["metadata"])
    if q['format'] == "json":
        return hits
    else:
        concatenate_files(path, "bibliography", debug=db.locals["debug"])
        config = f.WebConfig()
        biblio_criteria = f.biblio_criteria(q, config)
        return render_template(results=hits,
                               db=db,
                               dbname=dbname,
                               q=q,
                               template_name='bibliography.mako',
                               results_per_page=q['results_per_page'],
                               f=f,
                               biblio_criteria=biblio_criteria,
                               config=config,
                               report="bibliography",
                               ressources=f.concatenate.report_files)
def concordance_from_collocation(environ, start_response):
    db, dbname, path_components, q = wsgi_response(environ, start_response)
    path = os.getcwd().replace('functions/', '')
    config = f.WebConfig()
    if q['q'] == '':
        return bibliography(f, path, db, dbname, q, environ)
    else:
        hits = db.query(q["q"], q["method"], q["arg"], **q["metadata"])
        colloc_results = fetch_colloc_concordance(hits, path, q, db, config)
        biblio_criteria = []
        for k, v in q["metadata"].iteritems():
            if v:
                if k in config.metadata_aliases:
                    k = config.metadata_aliases[k]
                biblio_criteria.append(
                    '<span class="biblio_criteria">%s: <b>%s</b></span>' % (
                        k.title(),
                        v.decode('utf-8', 'ignore'),
                    ))
        biblio_criteria = ' '.join(biblio_criteria)
        concatenate_files(path,
                          "concordance_from_collocation",
                          debug=db.locals["debug"])
        return render_template(
            results=colloc_results,
            db=db,
            dbname=dbname,
            q=q,
            colloc_concordance=colloc_concordance,
            f=f,
            path=path,
            results_per_page=q['results_per_page'],
            config=config,
            report="concordance_from_collocation",
            biblio_criteria=biblio_criteria,
            template_name="concordance_from_collocation.mako",
            ressources=f.concatenate.report_files)
import cgi
from functions.wsgi_handler import parse_cgi
from bibliography import bibliography
from render_template import render_template
from concordance import fetch_concordance
from kwic import fetch_kwic
from mako.template import Template

if __name__ == "__main__":
    environ = os.environ
    environ["SCRIPT_FILENAME"] = environ["SCRIPT_FILENAME"].replace(
        'reports/concordance_switcher.py', '')
    form = cgi.FieldStorage()
    dbname = os.path.basename(environ["SCRIPT_FILENAME"])
    path = os.getcwd().replace('reports', '')
    config = f.WebConfig()
    db, path_components, q = parse_cgi(environ)
    try:
        hits = db.query(q["q"], q["method"], q["arg"], **q["metadata"])
    except:
        hits = noHits()
    print "Content-Type: text/html\n"
    if q['report'] == 'concordance':
        mytemplate = Template(filename=path +
                              "templates/concordance_short.mako")
        print mytemplate.render(results=hits,
                                db=db,
                                dbname=dbname,
                                q=q,
                                fetch_concordance=fetch_concordance,
                                f=f,
Beispiel #17
0
def fetch_collocation(results,
                      path,
                      q,
                      db,
                      word_filter=True,
                      filter_num=100,
                      full_report=True,
                      stopwords=True):
    config = f.WebConfig()
    length = config['concordance_length']
    within_x_words = q['word_num']

    ## set up filtering with stopwords or 100 most frequent terms ##
    filter_list = set([q['q']])
    if word_filter:
        if stopwords:
            filter_list_path = path + '/data/stopwords.txt'
            if os.path.isfile(filter_list_path):
                filter_words_file = open(filter_list_path)
                filter_num = float("inf")
            else:
                filter_list_path = path + '/data/frequencies/word_frequencies'
                filter_words_file = open(filter_list_path)
        else:
            filter_list_path = path + '/data/frequencies/word_frequencies'
            filter_words_file = open(filter_list_path)
        line_count = 0
        for line in filter_words_file:
            line_count += 1
            try:
                word = line.split()[0]
            except IndexError:
                continue
            filter_list.add(word.decode('utf-8', 'ignore'))
            if line_count > filter_num:
                break

    ## start going though hits ##
    left_collocates = defaultdict(int)
    right_collocates = defaultdict(int)
    all_collocates = defaultdict(int)

    count = 0
    for hit in results[q['interval_start']:q['interval_end']]:
        bytes, byte_start = adjust_bytes(hit.bytes, length)
        conc_text = f.get_text(hit, byte_start, length, path)

        ## Isolate left and right concordances
        conc_left = convert_entities(conc_text[:bytes[0]].decode(
            'utf-8', 'ignore'))
        conc_left = begin_match.sub('', conc_left)
        conc_left = start_cutoff_match.sub('', conc_left)
        conc_right = convert_entities(conc_text[bytes[-1]:].decode(
            'utf-8', 'ignore'))
        conc_right = end_match.sub('', conc_right)
        conc_right = left_truncate.sub('', conc_right)
        conc_left = strip_tags(conc_left)
        conc_right = strip_tags(conc_right)

        left_words = tokenize(conc_left, filter_list, within_x_words, 'left',
                              db)
        right_words = tokenize(conc_right, filter_list, within_x_words,
                               'right', db)

        for l_word in left_words:
            left_collocates[l_word] += 1
            all_collocates[l_word] += 1

        for r_word in right_words:
            right_collocates[r_word] += 1
            all_collocates[r_word] += 1

    if full_report:
        return all_collocates, left_collocates, right_collocates
    else:
        return all_collocates