def get_more_context(environ, start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) environ["SCRIPT_FILENAME"] = environ["SCRIPT_FILENAME"].replace( 'scripts/get_more_context.py', '') cgi = urlparse.parse_qs(environ["QUERY_STRING"], keep_blank_values=True) hit_num = int(cgi.get('hit_num', [0])[0]) db, path_components, q = parse_cgi(environ) config = f.WebConfig() if q['start'] == 0: start = 0 else: start = q['start'] - 1 end = (q['end'] or q['results_per_page']) + 1 hit_range = range(start, end) hits = db.query(q["q"], q["method"], q["arg"], **q["metadata"]) context_size = config['concordance_length'] * 3 html_list = [] for i in hit_range: try: html_list.append( r.fetch_concordance(hits[i], environ["SCRIPT_FILENAME"], context_size)) except IndexError: break yield json.dumps(html_list)
def concordance(environ, start_response): db, dbname, path_components, q = wsgi_response(environ, start_response) path = os.getcwd().replace('functions/', '') config = f.WebConfig() if q['format'] == "json": hits = db.query(q["q"], q["method"], q["arg"], **q["metadata"]) start, end, n = f.link.page_interval(q['results_per_page'], hits, q["start"], q["end"]) formatted_results = [] for i in hits[start - 1:end]: text = fetch_concordance(i, path, config.concordance_length) full_metadata = {} for metadata in config.metadata: full_metadata[metadata] = i[metadata] result = { "citation": f.cite.make_abs_doc_cite_mobile(db, i), "shrtcit": f.cite.make_abs_doc_shrtcit_mobile(db, i), "text": text, "hit_count": len(hits), "philo_id": i.philo_id, "start": start, "offsets": i.bytes } formatted_results.append(result) return json.dumps(formatted_results) if q['q'] == '': return bibliography(f, path, db, dbname, q, environ) else: hits = db.query(q["q"], q["method"], q["arg"], **q["metadata"]) return render_concordance(hits, db, dbname, q, path, config)
def export_results(environ, start_response): status = '200 OK' headers = [('Content-type', 'text/plain; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) environ["SCRIPT_FILENAME"] = environ["SCRIPT_FILENAME"].replace( 'scripts/export_results.py', '') cgi = urlparse.parse_qs(environ["QUERY_STRING"], keep_blank_values=True) output_format = cgi.get('output_format', [''])[0] db, path_components, q = parse_cgi(environ) config = f.WebConfig() path = os.path.abspath(os.path.dirname(__file__)).replace('scripts', "") if q['report'] == "concordance" or q['report'] == None: results_string, flat_list = export_concordance(db, config, q, path) unique_filename = str(uuid.uuid4()) if output_format == "json": write_json(path, unique_filename, results_string) link = config.db_url + "/data/exports/" + unique_filename + ".json" elif output_format == "csv": write_csv(path, unique_filename, flat_list) link = config.db_url + "/data/exports/" + unique_filename + '.csv' elif output_format == "tab": write_tab(path, unique_filename, flat_list) link = config.db_url + "/data/exports/" + unique_filename + '.tab' yield link
def get_results_bibliography(environ, start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) environ["SCRIPT_FILENAME"] = environ["SCRIPT_FILENAME"].replace( 'scripts/get_results_bibliography.py', '') cgi = urlparse.parse_qs(environ["QUERY_STRING"], keep_blank_values=True) philo_ids = cgi.get('id', []) db, path_components, q = parse_cgi(environ) obj_level = db.locals["default_object_level"] path = db.locals['db_path'] path = path[:path.rfind("/data")] config = f.WebConfig() c = db.dbh.cursor() citations = [] citation_counter = defaultdict(int) count = 0 for philo_id in philo_ids: obj = ObjectWrapper(philo_id.split(',')[:7], db) obj.bytes = [] citation = f.cite.biblio_citation(db, config, obj) if citation not in citation_counter: citations.append(citation) citation_counter[citation] += 1 count += 1 citations_with_count = [] for cite in citations: count = citation_counter[cite] citations_with_count.append([cite, count]) yield json.dumps(citations_with_count)
def collocation(environ, start_response): db, dbname, path_components, q = wsgi_response(environ, start_response) path = os.getcwd().replace('functions/', '') config = f.WebConfig() if q['q'] == '': return bibliography(f, path, db, dbname, q, environ) ## the default should be an error message hits = db.query(q["q"], q["method"], q["arg"], **q["metadata"]) return render_collocation(hits, db, dbname, q, path, config)
def time_series(environ,start_response): db, dbname, path_components, q = wsgi_response(environ,start_response) path = os.getcwd().replace('functions/', '') config = f.WebConfig() if q['q'] == '': return bibliography(f,path, db, dbname,q,environ) else: q = handle_dates(q, db) hits = db.query(q["q"],q["method"],q["arg"],**q["metadata"]) return render_time_series(hits, db, dbname, q, path, config)
def navigation(environ, start_response): db, dbname, path_components, q = wsgi_response(environ, start_response) path = os.getcwd().replace('functions/', '') obj = db[path_components] config = f.WebConfig() prev = ' '.join(obj.prev.split()[:7]) next = ' '.join(obj.next.split()[:7]) current = obj.philo_id[:7] if q['format'] == "json": if check_philo_virtual(db, path_components): obj = db[path_components[:-1]] obj_text = f.get_text_obj(obj, path, query_args=q['byte']) return json.dumps({ 'current': current, 'text': obj_text, 'prev': prev, 'next': next, 'shrtcit': f.cite.make_abs_doc_shrtcit_mobile(db, obj), 'citation': f.cite.make_abs_doc_cite_mobile(db, obj) }) if obj.philo_type == 'doc': concatenate_files(path, "t_o_c", debug=db.locals["debug"]) return render_template(obj=obj, philo_id=obj.philo_id[0], dbname=dbname, f=f, navigate_doc=navigate_doc, db=db, q=q, config=config, template_name='t_o_c.mako', report="t_o_c", ressources=f.concatenate.report_files) obj_text = f.get_text_obj(obj, path, query_args=q['byte']) concatenate_files(path, "navigation", debug=db.locals["debug"]) return render_template(obj=obj, philo_id=obj.philo_id[0], dbname=dbname, f=f, navigate_doc=navigate_doc, db=db, q=q, obj_text=obj_text, prev=prev, next=next, config=config, template_name='object.mako', report="navigation", ressources=f.concatenate.report_files)
def get_table_of_contents(environ, start_response): status = '200 OK' headers = [('Content-type', 'text/html; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) environ["SCRIPT_FILENAME"] = environ["SCRIPT_FILENAME"].replace( 'scripts/get_table_of_contents.py', '') db, path_components, q = parse_cgi(environ) config = f.WebConfig() path = db.locals['db_path'] path = path[:path.rfind("/data")] obj = ObjectWrapper(q['philo_id'].split(), db) results = r.navigate_doc(obj, db) if q['format'] == "json": html = '' for philo_id, philo_type, head in results: link_id = philo_id.replace(' ', '_') href = f.link.make_absolute_object_link(config, philo_id.split()[:7]) if philo_type == "div1": html += '<div class="toc-div1">' if philo_type == "div2": html += '<div class="toc-div2">' elif philo_type == "div3": html += '<div class="toc-div3">' html += '<a href="%s" id="%s" style="text-decoration: none;">%s</a></div>' % ( href, link_id, head or philo_type.upper()) wrapper = json.dumps({ 'toc': html, 'citation': f.cite.make_abs_doc_cite_biblio_mobile(db, obj) }) yield wrapper else: div1_markup = '<div class="toc-div1"><span class="bullet-point-div1"></span>' div2_markup = '<div class="toc-div2"><span class="bullet-point-div2"></span>' div3_markup = '<div class="toc-div3"><span class="bullet-point-div3"></span>' html = [''] for philo_id, philo_type, head in results: link_id = philo_id.replace(' ', '_') href = f.link.make_absolute_object_link(config, philo_id.split()[:7]) if philo_type == "div2": space = div2_markup elif philo_type == "div3": space = div3_markup else: space = div1_markup html.append(space + '<a href="%s" id="%s">%s</a></div>' % (href, link_id, head or philo_type.upper())) html = ''.join(html) yield html.encode('utf-8', 'ignore')
def landing_page(environ, start_response): db, dbname, path_components, q = wsgi_response(environ, start_response) config = f.WebConfig() path = os.getcwd() concatenate_files(path, "landing_page", debug=db.locals["debug"]) return render_template(db=db, dbname=dbname, form=True, q=q, template_name='landing_page.mako', config=config, report="landing_page", ressources=f.concatenate.report_files)
def access(environ, start_response): db, dbname, path_components, q = wsgi_response(environ, start_response) config = f.WebConfig() incoming_address = environ['REMOTE_ADDR'] hostname = socket.gethostname() return render_template(db=db, dbname=dbname, config=config, form=True, client_address=incoming_address, q=q, hostname=environ['HTTP_HOST'], report='access', template_name='access_denied.mako')
def kwic(environ,start_response): db, dbname, path_components, q = wsgi_response(environ,start_response) path = os.getcwd().replace('functions/', '') config = f.WebConfig() if q['format'] == "json": hits = db.query(q["q"],q["method"],q["arg"],**q["metadata"]) start, end, n = f.link.page_interval(q['results_per_page'], hits, q["start"], q["end"]) kwic_results = fetch_kwic(hits, path, q, f.link.byte_query, db, start-1, end, length=250) formatted_results = [{"citation": i[0], "text": i[1], "philo_id": i[2], "start":start, "hit_count": len(hits)} for i in kwic_results] return json.dumps(formatted_results) if q['q'] == '': return bibliography(f,path, db, dbname,q,environ) else: hits = db.query(q["q"],q["method"],q["arg"],**q["metadata"]) return render_kwic(hits, db, dbname, q, path, config)
def error_handling(db, dbname, q): hits = NoHits() path = os.getcwd().replace('functions/', '') config = f.WebConfig() if q["error_report"]: report = q["error_report"] else: report = q['report'] hits = NoHits() if report == "concordance": return r.render_concordance(hits, db, dbname, q, path, config) elif report == "kwic": return r.render_kwic(hits, db, dbname, q, path, config) elif report == "collocation": return r.render_collocation(hits, db, dbname, q, path, config) elif report == "time_series": q = r.handle_dates(q, db) return r.render_time_series(hits, db, dbname, q, path, config)
def landing_page_content(environ,start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'),("Access-Control-Allow-Origin","*")] start_response(status,headers) environ["SCRIPT_FILENAME"] = environ["SCRIPT_FILENAME"].replace('scripts/landing_page_content.py', '') cgi = urlparse.parse_qs(environ["QUERY_STRING"],keep_blank_values=True) content_type = cgi.get('landing_page_content_type',[])[0] q_range = cgi.get('range',[])[0].lower().split('-') if content_type != "year": letter_range = set([chr(i) for i in range(ord(q_range[0]),ord(q_range[1])+1)]) db, path_components, q = parse_cgi(environ) config = f.WebConfig() c = db.dbh.cursor() content = '' if content_type == "author": content = generate_author_list(c, letter_range, db, config) elif content_type == "title": content = generate_title_list(c, letter_range, db, config) elif content_type == "year": content = generate_year_list(c, q_range, db, config) yield json.dumps(content)
def fetch_bibliography(f, path, db, dbname, q, environ): if q["no_q"]: hits = db.get_all(db.locals['default_object_level']) else: hits = db.query(**q["metadata"]) if q['format'] == "json": return hits else: concatenate_files(path, "bibliography", debug=db.locals["debug"]) config = f.WebConfig() biblio_criteria = f.biblio_criteria(q, config) return render_template(results=hits, db=db, dbname=dbname, q=q, template_name='bibliography.mako', results_per_page=q['results_per_page'], f=f, biblio_criteria=biblio_criteria, config=config, report="bibliography", ressources=f.concatenate.report_files)
def concordance_from_collocation(environ, start_response): db, dbname, path_components, q = wsgi_response(environ, start_response) path = os.getcwd().replace('functions/', '') config = f.WebConfig() if q['q'] == '': return bibliography(f, path, db, dbname, q, environ) else: hits = db.query(q["q"], q["method"], q["arg"], **q["metadata"]) colloc_results = fetch_colloc_concordance(hits, path, q, db, config) biblio_criteria = [] for k, v in q["metadata"].iteritems(): if v: if k in config.metadata_aliases: k = config.metadata_aliases[k] biblio_criteria.append( '<span class="biblio_criteria">%s: <b>%s</b></span>' % ( k.title(), v.decode('utf-8', 'ignore'), )) biblio_criteria = ' '.join(biblio_criteria) concatenate_files(path, "concordance_from_collocation", debug=db.locals["debug"]) return render_template( results=colloc_results, db=db, dbname=dbname, q=q, colloc_concordance=colloc_concordance, f=f, path=path, results_per_page=q['results_per_page'], config=config, report="concordance_from_collocation", biblio_criteria=biblio_criteria, template_name="concordance_from_collocation.mako", ressources=f.concatenate.report_files)
import cgi from functions.wsgi_handler import parse_cgi from bibliography import bibliography from render_template import render_template from concordance import fetch_concordance from kwic import fetch_kwic from mako.template import Template if __name__ == "__main__": environ = os.environ environ["SCRIPT_FILENAME"] = environ["SCRIPT_FILENAME"].replace( 'reports/concordance_switcher.py', '') form = cgi.FieldStorage() dbname = os.path.basename(environ["SCRIPT_FILENAME"]) path = os.getcwd().replace('reports', '') config = f.WebConfig() db, path_components, q = parse_cgi(environ) try: hits = db.query(q["q"], q["method"], q["arg"], **q["metadata"]) except: hits = noHits() print "Content-Type: text/html\n" if q['report'] == 'concordance': mytemplate = Template(filename=path + "templates/concordance_short.mako") print mytemplate.render(results=hits, db=db, dbname=dbname, q=q, fetch_concordance=fetch_concordance, f=f,
def fetch_collocation(results, path, q, db, word_filter=True, filter_num=100, full_report=True, stopwords=True): config = f.WebConfig() length = config['concordance_length'] within_x_words = q['word_num'] ## set up filtering with stopwords or 100 most frequent terms ## filter_list = set([q['q']]) if word_filter: if stopwords: filter_list_path = path + '/data/stopwords.txt' if os.path.isfile(filter_list_path): filter_words_file = open(filter_list_path) filter_num = float("inf") else: filter_list_path = path + '/data/frequencies/word_frequencies' filter_words_file = open(filter_list_path) else: filter_list_path = path + '/data/frequencies/word_frequencies' filter_words_file = open(filter_list_path) line_count = 0 for line in filter_words_file: line_count += 1 try: word = line.split()[0] except IndexError: continue filter_list.add(word.decode('utf-8', 'ignore')) if line_count > filter_num: break ## start going though hits ## left_collocates = defaultdict(int) right_collocates = defaultdict(int) all_collocates = defaultdict(int) count = 0 for hit in results[q['interval_start']:q['interval_end']]: bytes, byte_start = adjust_bytes(hit.bytes, length) conc_text = f.get_text(hit, byte_start, length, path) ## Isolate left and right concordances conc_left = convert_entities(conc_text[:bytes[0]].decode( 'utf-8', 'ignore')) conc_left = begin_match.sub('', conc_left) conc_left = start_cutoff_match.sub('', conc_left) conc_right = convert_entities(conc_text[bytes[-1]:].decode( 'utf-8', 'ignore')) conc_right = end_match.sub('', conc_right) conc_right = left_truncate.sub('', conc_right) conc_left = strip_tags(conc_left) conc_right = strip_tags(conc_right) left_words = tokenize(conc_left, filter_list, within_x_words, 'left', db) right_words = tokenize(conc_right, filter_list, within_x_words, 'right', db) for l_word in left_words: left_collocates[l_word] += 1 all_collocates[l_word] += 1 for r_word in right_words: right_collocates[r_word] += 1 all_collocates[r_word] += 1 if full_report: return all_collocates, left_collocates, right_collocates else: return all_collocates