def get_results_bibliography(environ, start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) environ["SCRIPT_FILENAME"] = environ["SCRIPT_FILENAME"].replace( 'scripts/get_results_bibliography.py', '') cgi = urlparse.parse_qs(environ["QUERY_STRING"], keep_blank_values=True) philo_ids = cgi.get('id', []) db, path_components, q = parse_cgi(environ) obj_level = db.locals["default_object_level"] path = db.locals['db_path'] path = path[:path.rfind("/data")] config = f.WebConfig() c = db.dbh.cursor() citations = [] citation_counter = defaultdict(int) count = 0 for philo_id in philo_ids: obj = ObjectWrapper(philo_id.split(',')[:7], db) obj.bytes = [] citation = f.cite.biblio_citation(db, config, obj) if citation not in citation_counter: citations.append(citation) citation_counter[citation] += 1 count += 1 citations_with_count = [] for cite in citations: count = citation_counter[cite] citations_with_count.append([cite, count]) yield json.dumps(citations_with_count)
def get_more_context(environ, start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) environ["SCRIPT_FILENAME"] = environ["SCRIPT_FILENAME"].replace( 'scripts/get_more_context.py', '') cgi = urlparse.parse_qs(environ["QUERY_STRING"], keep_blank_values=True) hit_num = int(cgi.get('hit_num', [0])[0]) db, path_components, q = parse_cgi(environ) config = f.WebConfig() if q['start'] == 0: start = 0 else: start = q['start'] - 1 end = (q['end'] or q['results_per_page']) + 1 hit_range = range(start, end) hits = db.query(q["q"], q["method"], q["arg"], **q["metadata"]) context_size = config['concordance_length'] * 3 html_list = [] for i in hit_range: try: html_list.append( r.fetch_concordance(hits[i], environ["SCRIPT_FILENAME"], context_size)) except IndexError: break yield json.dumps(html_list)
def get_header(environ, start_response): status = '200 OK' headers = [('Content-type', 'text/html; charset=UTF-8'),("Access-Control-Allow-Origin","*")] start_response(status,headers) environ["SCRIPT_FILENAME"] = environ["SCRIPT_FILENAME"].replace('scripts/get_header.py', '') db, path_components, q = parse_cgi(environ) path = db.locals['db_path'] path = path[:path.rfind("/data")] obj = ObjectWrapper(q['philo_id'].split(), db) filename = path + '/data/TEXT/' + obj.filename print >> sys.stderr, "FILENAME", filename parser = etree.XMLParser(remove_blank_text=True, recover=True) xml_tree = etree.parse(filename, parser) header = xml_tree.find(header_name) try: header_text = etree.tostring(header, pretty_print=True) except TypeError: ## workaround for when lxml doesn't find the header for whatever reason header_text = '' start = 0 for line in open(filename): if re.search('<%s' % header_name, line): start = 1 if start: header_text += line if re.search('</%s' % header_name, line): break yield header_text.replace('<', '<').replace('>', '>')
def export_results(environ, start_response): status = '200 OK' headers = [('Content-type', 'text/plain; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) environ["SCRIPT_FILENAME"] = environ["SCRIPT_FILENAME"].replace( 'scripts/export_results.py', '') cgi = urlparse.parse_qs(environ["QUERY_STRING"], keep_blank_values=True) output_format = cgi.get('output_format', [''])[0] db, path_components, q = parse_cgi(environ) config = f.WebConfig() path = os.path.abspath(os.path.dirname(__file__)).replace('scripts', "") if q['report'] == "concordance" or q['report'] == None: results_string, flat_list = export_concordance(db, config, q, path) unique_filename = str(uuid.uuid4()) if output_format == "json": write_json(path, unique_filename, results_string) link = config.db_url + "/data/exports/" + unique_filename + ".json" elif output_format == "csv": write_csv(path, unique_filename, flat_list) link = config.db_url + "/data/exports/" + unique_filename + '.csv' elif output_format == "tab": write_tab(path, unique_filename, flat_list) link = config.db_url + "/data/exports/" + unique_filename + '.tab' yield link
def get_frequency(environ,start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'),("Access-Control-Allow-Origin","*")] start_response(status,headers) environ["SCRIPT_FILENAME"] = environ["SCRIPT_FILENAME"].replace('scripts/get_frequency.py', '') cgi = urlparse.parse_qs(environ["QUERY_STRING"],keep_blank_values=True) frequency_field = cgi.get('frequency_field',[''])[0] db, path_components, q = parse_cgi(environ) q['field'] = frequency_field if q['q'] == '' and q["no_q"]: hits = db.get_all(db.locals['default_object_level']) else: hits = db.query(q["q"],q["method"],q["arg"],**q["metadata"]) if q["format"] == "json": while not len(hits): time.sleep(0.5) ## this should be enough time to write all results to disk in most instances.... better fix later q["interval_start"] = 0 q["interval_end"] = len(hits) bib_values = dict([(i, j) for i, j in q['metadata'].iteritems() if j]) field, results = r.generate_frequency(hits, q, db) new_results = [] for label, result in sorted(results.iteritems(), key=lambda (x, y): y["count"], reverse=True): if frequency_field == "title": author = get_author(label, db) if author: label = label + " (%s)" % author.decode('utf-8', 'ignore') formatted_result = {"search_term": q['q'], "frequency_field": frequency_field, "results": label, "count": result["count"], "url": "dispatcher.py/" + result["url"].replace('./', ''), "bib_values": bib_values} new_results.append(formatted_result) yield json.dumps(new_results) else: field, results = r.generate_frequency(hits, q, db) yield json.dumps(results,indent=2)
def export_results(environ,start_response): status = '200 OK' headers = [('Content-type', 'text/plain; charset=UTF-8'),("Access-Control-Allow-Origin","*")] start_response(status,headers) environ["SCRIPT_FILENAME"] = environ["SCRIPT_FILENAME"].replace('scripts/export_results.py', '') cgi = urlparse.parse_qs(environ["QUERY_STRING"],keep_blank_values=True) output_format = cgi.get('output_format',[''])[0] db, path_components, q = parse_cgi(environ) config = f.WebConfig() path = os.path.abspath(os.path.dirname(__file__)).replace('scripts', "") if q['report'] == "concordance" or q['report'] == None: results_string, flat_list = export_concordance(db, config, q, path) unique_filename = str(uuid.uuid4()) if output_format == "json": write_json(path, unique_filename, results_string) link = config.db_url + "/data/exports/" + unique_filename + ".json" elif output_format == "csv": write_csv(path, unique_filename, flat_list) link = config.db_url + "/data/exports/" + unique_filename + '.csv' elif output_format == "tab": write_tab(path, unique_filename, flat_list) link = config.db_url + "/data/exports/" + unique_filename + '.tab' yield link
def error(environ,start_response): try: db, dbname, path_components, q = wsgi_response(environ,start_response) except AssertionError: myname = environ["SCRIPT_FILENAME"] dbname = os.path.basename(myname.replace("/dispatcher.py","")) db, path_components, q = parse_cgi(environ) return error_handling(db, dbname, q)
def error(environ, start_response): try: db, dbname, path_components, q = wsgi_response(environ, start_response) except AssertionError: myname = environ["SCRIPT_FILENAME"] dbname = os.path.basename(myname.replace("/dispatcher.py", "")) db, path_components, q = parse_cgi(environ) return error_handling(db, dbname, q)
def term_list(environ,start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'),("Access-Control-Allow-Origin","*")] start_response(status,headers) environ["SCRIPT_FILENAME"] = environ["SCRIPT_FILENAME"].replace('scripts/term_list.py', '') db, path_components, q = parse_cgi(environ) cgi = urlparse.parse_qs(environ["QUERY_STRING"],keep_blank_values=True) term = cgi.get('term',[''])[0] yield autocomplete_term(term,db)
def term_list(environ, start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) environ["SCRIPT_FILENAME"] = environ["SCRIPT_FILENAME"].replace( 'scripts/term_list.py', '') db, path_components, q = parse_cgi(environ) cgi = urlparse.parse_qs(environ["QUERY_STRING"], keep_blank_values=True) term = cgi.get('term', [''])[0] yield autocomplete_term(term, db)
def get_table_of_contents(environ, start_response): status = '200 OK' headers = [('Content-type', 'text/html; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) environ["SCRIPT_FILENAME"] = environ["SCRIPT_FILENAME"].replace( 'scripts/get_table_of_contents.py', '') db, path_components, q = parse_cgi(environ) config = f.WebConfig() path = db.locals['db_path'] path = path[:path.rfind("/data")] obj = ObjectWrapper(q['philo_id'].split(), db) results = r.navigate_doc(obj, db) if q['format'] == "json": html = '' for philo_id, philo_type, head in results: link_id = philo_id.replace(' ', '_') href = f.link.make_absolute_object_link(config, philo_id.split()[:7]) if philo_type == "div1": html += '<div class="toc-div1">' if philo_type == "div2": html += '<div class="toc-div2">' elif philo_type == "div3": html += '<div class="toc-div3">' html += '<a href="%s" id="%s" style="text-decoration: none;">%s</a></div>' % ( href, link_id, head or philo_type.upper()) wrapper = json.dumps({ 'toc': html, 'citation': f.cite.make_abs_doc_cite_biblio_mobile(db, obj) }) yield wrapper else: div1_markup = '<div class="toc-div1"><span class="bullet-point-div1"></span>' div2_markup = '<div class="toc-div2"><span class="bullet-point-div2"></span>' div3_markup = '<div class="toc-div3"><span class="bullet-point-div3"></span>' html = [''] for philo_id, philo_type, head in results: link_id = philo_id.replace(' ', '_') href = f.link.make_absolute_object_link(config, philo_id.split()[:7]) if philo_type == "div2": space = div2_markup elif philo_type == "div3": space = div3_markup else: space = div1_markup html.append(space + '<a href="%s" id="%s">%s</a></div>' % (href, link_id, head or philo_type.upper())) html = ''.join(html) yield html.encode('utf-8', 'ignore')
def time_series_fetcher(environ, start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) environ["SCRIPT_FILENAME"] = environ["SCRIPT_FILENAME"].replace( 'scripts/time_series_fetcher.py', '') cgi = urlparse.parse_qs(environ["QUERY_STRING"], keep_blank_values=True) db, path_components, q = parse_cgi(environ) q = r.handle_dates(q, db) results = db.query(q["q"], q["method"], q["arg"], **q["metadata"]) absolute_frequency, date_counts = r.generate_time_series(q, db, results) yield json.dumps([json.loads(absolute_frequency), json.loads(date_counts)])
def go_to_obj(environ,start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'),("Access-Control-Allow-Origin","*")] start_response(status,headers) environ["SCRIPT_FILENAME"] = environ["SCRIPT_FILENAME"].replace('scripts/go_to_obj.py', '') db, path_components, q = parse_cgi(environ) path = db.locals['db_path'] path = path[:path.rfind("/data")] philo_obj = ObjectWrapper(q['philo_id'].split(), db) prev_obj = ' '.join(philo_obj.prev.split()[:7]) next_obj = ' '.join(philo_obj.next.split()[:7]) text = f.get_text_obj(philo_obj, path) yield json.dumps({'text': text, "prev": prev_obj, "next": next_obj})
def get_collocate(environ, start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) environ["SCRIPT_FILENAME"] = environ["SCRIPT_FILENAME"].replace( 'scripts/get_collocate.py', '') db, path_components, q = parse_cgi(environ) hits = db.query(q["q"], q["method"], q["arg"], **q["metadata"]) results = r.fetch_collocation(hits, environ["SCRIPT_FILENAME"], q, db, full_report=False) yield json.dumps(results, indent=2)
def collocation_fetcher(environ,start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'),("Access-Control-Allow-Origin","*")] start_response(status,headers) environ["SCRIPT_FILENAME"] = environ["SCRIPT_FILENAME"].replace('scripts/collocation_fetcher.py', '') cgi = urlparse.parse_qs(environ["QUERY_STRING"],keep_blank_values=True) full_report = eval(cgi.get('full_report',['True'])[0]) db, path_components, q = parse_cgi(environ) hits = db.query(q["q"],q["method"],q["arg"],**q["metadata"]) if full_report: all_colloc, left_colloc, right_colloc = r.fetch_collocation(hits, environ["SCRIPT_FILENAME"], q, db) yield dumps([all_colloc, left_colloc, right_colloc]) else: results = r.fetch_collocation(hits, environ["SCRIPT_FILENAME"], q, db, full_report=False) yield dumps(results)
def get_total_results(environ,start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'),("Access-Control-Allow-Origin","*")] start_response(status,headers) environ["SCRIPT_FILENAME"] = environ["SCRIPT_FILENAME"].replace('scripts/get_total_results.py', '') cgi = urlparse.parse_qs(environ["QUERY_STRING"],keep_blank_values=True) db, path_components, q = parse_cgi(environ) if q['q'] == '' and q["no_q"]: hits = db.get_all(db.locals['default_object_level']) else: hits = db.query(q["q"],q["method"],q["arg"],**q["metadata"]) total_results = 0 while total_results == 0: if hits.done: break total_results = len(hits) yield json.dumps(total_results)
def get_table_of_contents(environ, start_response): status = '200 OK' headers = [('Content-type', 'text/html; charset=UTF-8'),("Access-Control-Allow-Origin","*")] start_response(status,headers) environ["SCRIPT_FILENAME"] = environ["SCRIPT_FILENAME"].replace('scripts/get_table_of_contents.py', '') db, path_components, q = parse_cgi(environ) config = f.WebConfig() path = db.locals['db_path'] path = path[:path.rfind("/data")] obj = ObjectWrapper(q['philo_id'].split(), db) results = r.navigate_doc(obj, db) if q['format'] == "json": html = '' for philo_id, philo_type, head in results: link_id = philo_id.replace(' ', '_') href = f.link.make_absolute_object_link(config, philo_id.split()[:7]) if philo_type == "div1": html += '<div class="toc-div1">' if philo_type == "div2": html += '<div class="toc-div2">' elif philo_type == "div3": html += '<div class="toc-div3">' html += '<a href="%s" id="%s" style="text-decoration: none;">%s</a></div>' % (href, link_id, head or philo_type.upper()) wrapper = json.dumps({'toc': html, 'citation': f.cite.make_abs_doc_cite_biblio_mobile(db, obj)}) yield wrapper else: div1_markup = '<div class="toc-div1"><span class="bullet-point-div1"></span>' div2_markup = '<div class="toc-div2"><span class="bullet-point-div2"></span>' div3_markup = '<div class="toc-div3"><span class="bullet-point-div3"></span>' html = [''] for philo_id, philo_type, head in results: link_id = philo_id.replace(' ', '_') href = f.link.make_absolute_object_link(config, philo_id.split()[:7]) if philo_type == "div2": space = div2_markup elif philo_type == "div3": space = div3_markup else: space = div1_markup html.append(space + '<a href="%s" id="%s">%s</a></div>' % (href, link_id, head or philo_type.upper())) html = ''.join(html) yield html.encode('utf-8', 'ignore')
def get_total_results(environ, start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) environ["SCRIPT_FILENAME"] = environ["SCRIPT_FILENAME"].replace( 'scripts/get_total_results.py', '') cgi = urlparse.parse_qs(environ["QUERY_STRING"], keep_blank_values=True) db, path_components, q = parse_cgi(environ) if q['q'] == '' and q["no_q"]: hits = db.get_all(db.locals['default_object_level']) else: hits = db.query(q["q"], q["method"], q["arg"], **q["metadata"]) total_results = 0 while total_results == 0: if hits.done: break total_results = len(hits) yield json.dumps(total_results)
def landing_page_content(environ,start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'),("Access-Control-Allow-Origin","*")] start_response(status,headers) environ["SCRIPT_FILENAME"] = environ["SCRIPT_FILENAME"].replace('scripts/landing_page_content.py', '') cgi = urlparse.parse_qs(environ["QUERY_STRING"],keep_blank_values=True) content_type = cgi.get('landing_page_content_type',[])[0] q_range = cgi.get('range',[])[0].lower().split('-') if content_type != "year": letter_range = set([chr(i) for i in range(ord(q_range[0]),ord(q_range[1])+1)]) db, path_components, q = parse_cgi(environ) config = f.WebConfig() c = db.dbh.cursor() content = '' if content_type == "author": content = generate_author_list(c, letter_range, db, config) elif content_type == "title": content = generate_title_list(c, letter_range, db, config) elif content_type == "year": content = generate_year_list(c, q_range, db, config) yield json.dumps(content)
def get_bibliography(environ,start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'),("Access-Control-Allow-Origin","*")] start_response(status,headers) environ["SCRIPT_FILENAME"] = environ["SCRIPT_FILENAME"].replace('scripts/get_bibliography.py', '') cgi = urlparse.parse_qs(environ["QUERY_STRING"],keep_blank_values=True) object_level = cgi.get('object_level', '')[0] db, path_components, q = parse_cgi(environ) if object_level and object_level in object_levels: hits = db.get_all(object_level) else: hits = db.get_all(db.locals['default_object_level']) results = [] for hit in hits: hit_object = {} for field in db.locals['metadata_fields']: hit_object[field] = hit[field] or '' hit_object['philo_id'] = hit.philo_id results.append(hit_object) yield json.dumps(results)
def collocation_fetcher(environ, start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) environ["SCRIPT_FILENAME"] = environ["SCRIPT_FILENAME"].replace( 'scripts/collocation_fetcher.py', '') cgi = urlparse.parse_qs(environ["QUERY_STRING"], keep_blank_values=True) full_report = eval(cgi.get('full_report', ['True'])[0]) db, path_components, q = parse_cgi(environ) hits = db.query(q["q"], q["method"], q["arg"], **q["metadata"]) if full_report: all_colloc, left_colloc, right_colloc = r.fetch_collocation( hits, environ["SCRIPT_FILENAME"], q, db) yield dumps([all_colloc, left_colloc, right_colloc]) else: results = r.fetch_collocation(hits, environ["SCRIPT_FILENAME"], q, db, full_report=False) yield dumps(results)
def get_bibliography(environ, start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) environ["SCRIPT_FILENAME"] = environ["SCRIPT_FILENAME"].replace( 'scripts/get_bibliography.py', '') cgi = urlparse.parse_qs(environ["QUERY_STRING"], keep_blank_values=True) object_level = cgi.get('object_level', '')[0] db, path_components, q = parse_cgi(environ) if object_level and object_level in object_levels: hits = db.get_all(object_level) else: hits = db.get_all(db.locals['default_object_level']) results = [] for hit in hits: hit_object = {} for field in db.locals['metadata_fields']: hit_object[field] = hit[field] or '' hit_object['philo_id'] = hit.philo_id results.append(hit_object) yield json.dumps(results)
def get_more_context(environ,start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'),("Access-Control-Allow-Origin","*")] start_response(status,headers) environ["SCRIPT_FILENAME"] = environ["SCRIPT_FILENAME"].replace('scripts/get_more_context.py', '') cgi = urlparse.parse_qs(environ["QUERY_STRING"],keep_blank_values=True) hit_num = int(cgi.get('hit_num',[0])[0]) db, path_components, q = parse_cgi(environ) config = f.WebConfig() if q['start'] == 0: start = 0 else: start = q['start'] - 1 end = (q['end'] or q['results_per_page']) + 1 hit_range = range(start, end) hits = db.query(q["q"],q["method"],q["arg"],**q["metadata"]) context_size = config['concordance_length'] * 3 html_list = [] for i in hit_range: try: html_list.append(r.fetch_concordance(hits[i], environ["SCRIPT_FILENAME"], context_size)) except IndexError: break yield json.dumps(html_list)
#!/usr/bin/env python import os import sys sys.path.append('..') from functions.wsgi_handler import parse_cgi import reports as r import cgi import json if __name__ == "__main__": environ = os.environ environ["SCRIPT_FILENAME"] = environ["SCRIPT_FILENAME"].replace('scripts/get_collocate.py', '') db, path_components, q = parse_cgi(environ) hits = db.query(q["q"],q["method"],q["arg"],**q["metadata"]) results = r.fetch_collocation(hits, environ["SCRIPT_FILENAME"], q, db, full_report=False) results_with_links = [] for word, num in results: url = r.link_to_concordance(q, word, 'all', num) results_with_links.append((word, num, url)) print "Content-Type: text/html\n" print json.dumps(results_with_links,indent=2)
from functions.wsgi_handler import parse_cgi from bibliography import bibliography from render_template import render_template from concordance import fetch_concordance from kwic import fetch_kwic from mako.template import Template if __name__ == "__main__": environ = os.environ environ["SCRIPT_FILENAME"] = environ["SCRIPT_FILENAME"].replace( 'reports/concordance_switcher.py', '') form = cgi.FieldStorage() dbname = os.path.basename(environ["SCRIPT_FILENAME"]) path = os.getcwd().replace('reports', '') config = f.WebConfig() db, path_components, q = parse_cgi(environ) try: hits = db.query(q["q"], q["method"], q["arg"], **q["metadata"]) except: hits = noHits() print "Content-Type: text/html\n" if q['report'] == 'concordance': mytemplate = Template(filename=path + "templates/concordance_short.mako") print mytemplate.render(results=hits, db=db, dbname=dbname, q=q, fetch_concordance=fetch_concordance, f=f, config=config,