def colloc_concordance(hit, path, q, context_size):
    conc_text = fetch_concordance(hit, path, context_size)
    collocate = q['collocate'].decode('utf-8', 'ignore')
    collocate_match = re.compile(r'(.*\W)(%s)(\W.*)' % collocate,
                                 flags=re.U | re.I)
    conc_text = collocate_match.sub(r'\1<span class="collocate">\2</span>\3',
                                    conc_text)
    return conc_text
def colloc_concordance(hit, path, q, db):
    conc_text = fetch_concordance(hit, path, q)
    split_text = token_regex.split(conc_text)
    keep_text = []
    for w in split_text:
        if w:
            if w.lower() == q['collocate'].decode('utf-8', 'ignore'):
                w = '<span class="collocate">%s</span>' % w
            keep_text.append(w)
    conc_text = ''.join(keep_text)
    return conc_text  
예제 #3
0
def filter_words_by_property(hits, path, q, db, config, word_filter=True, filter_num=100, stopwords=True):
    concordance_object = {"query": dict([i for i in q])}

    length = config['concordance_length']

    # Do these need to be captured in wsgi_handler?
    word_property = q["word_property"]
    word_property_value = q["word_property_value"]
    word_property_total = q["word_property_total"]
    
    new_hitlist = []
    results = []
    position = 0
    more_pages = False
    
    if q.start == 0:
        start = 1
    else:
        start = q.start

    for hit in hits:
        ## get my chunk of text ##
        hit_val = get_word_attrib(hit,word_property,db)

        if hit_val == word_property_value:
            position += 1
            if position < start:
                continue
            new_hitlist.append(hit)
            citation_hrefs = citation_links(db, config, hit)
            metadata_fields = {}
            for metadata in db.locals['metadata_fields']:
                metadata_fields[metadata] = hit[metadata]
            citation = concordance_citation(hit, citation_hrefs)
            context = fetch_concordance(db, hit, config.db_path, config.concordance_length)
            result_obj = {"philo_id": hit.philo_id, "citation": citation, "citation_links": citation_hrefs, "context": context,
                          "metadata_fields": metadata_fields, "bytes": hit.bytes, "collocate_count": 1}            
            results.append(result_obj)

        if len(new_hitlist) == (q.results_per_page):
            more_pages = True
            break
    
    end = start + len(results) - 1 
    if len(results) < q.results_per_page:
        word_property_total = end
    else:
        word_property_total = end + 1
    concordance_object['results'] = results
    concordance_object["query_done"] = hits.done
    concordance_object['results_length'] = word_property_total
    concordance_object["description"] = {"start": start, "end": end, "results_per_page": q.results_per_page, "more_pages": more_pages}    
    print >> sys.stderr, "DONE"
    return concordance_object
import os
import re
import sys
sys.path.append('..')
from functions.wsgi_handler import parse_cgi
from reports.concordance import fetch_concordance
from reports.theme_rheme import adjust_results
import cgi
import json
from philologic.DB import DB


    
if __name__ == "__main__":
    environ = os.environ
    path = environ['SCRIPT_FILENAME']
    path = re.sub('(philo4/[^/]+/).*', '\\1', path)
    form = cgi.FieldStorage()
    num = int(form.getvalue('hit_num'))
    length = int(form.getvalue('length'))
    db, path_components, q = parse_cgi(environ)
    hits = db.query(q["q"],q["method"],q["arg"],**q["metadata"])
    if q['report'] != 'theme_rheme':
        conc_text = fetch_concordance(hits[num], path, q, length=length)
    else:
        new_hits, full_report = adjust_results(hits, path, q, length=length)
        conc_text = new_hits[num].concordance
    print "Content-Type: text/html\n"
    print conc_text.encode('utf-8', 'ignore')
    
def colloc_concordance(hit, path, q, context_size):
    conc_text = fetch_concordance(hit, path, context_size)
    collocate = q['collocate'].decode('utf-8', 'ignore')
    collocate_match = re.compile(r'(.*\W)(%s)(\W.*)' % collocate, flags=re.U|re.I)
    conc_text = collocate_match.sub(r'\1<span class="collocate">\2</span>\3', conc_text)
    return conc_text  
예제 #6
0
#!/usr/bin/env python

import os
import re
import sys

sys.path.append('..')
from functions.wsgi_handler import parse_cgi
from reports.concordance import fetch_concordance
from reports.theme_rheme import adjust_results
import cgi
import json
from philologic.DB import DB

if __name__ == "__main__":
    environ = os.environ
    path = environ['SCRIPT_FILENAME']
    path = re.sub('(philo4/[^/]+/).*', '\\1', path)
    form = cgi.FieldStorage()
    num = int(form.getvalue('hit_num'))
    length = int(form.getvalue('length'))
    db, path_components, q = parse_cgi(environ)
    hits = db.query(q["q"], q["method"], q["arg"], **q["metadata"])
    if q['report'] != 'theme_rheme':
        conc_text = fetch_concordance(hits[num], path, q, length=length)
    else:
        new_hits, full_report = adjust_results(hits, path, q, length=length)
        conc_text = new_hits[num].concordance
    print "Content-Type: text/html\n"
    print conc_text.encode('utf-8', 'ignore')