def filter_words_by_property(hits, path, q, db, config, word_filter=True, filter_num=100, stopwords=True): concordance_object = {"query": dict([i for i in q])} length = config['concordance_length'] # Do these need to be captured in wsgi_handler? word_property = q["word_property"] word_property_value = q["word_property_value"] word_property_total = q["word_property_total"] new_hitlist = [] results = [] position = 0 more_pages = False if q.start == 0: start = 1 else: start = q.start for hit in hits: ## get my chunk of text ## hit_val = get_word_attrib(hit,word_property,db) if hit_val == word_property_value: position += 1 if position < start: continue new_hitlist.append(hit) citation_hrefs = citation_links(db, config, hit) metadata_fields = {} for metadata in db.locals['metadata_fields']: metadata_fields[metadata] = hit[metadata] citation = concordance_citation(hit, citation_hrefs) context = fetch_concordance(db, hit, config.db_path, config.concordance_length) result_obj = {"philo_id": hit.philo_id, "citation": citation, "citation_links": citation_hrefs, "context": context, "metadata_fields": metadata_fields, "bytes": hit.bytes, "collocate_count": 1} results.append(result_obj) if len(new_hitlist) == (q.results_per_page): more_pages = True break end = start + len(results) - 1 if len(results) < q.results_per_page: word_property_total = end else: word_property_total = end + 1 concordance_object['results'] = results concordance_object["query_done"] = hits.done concordance_object['results_length'] = word_property_total concordance_object["description"] = {"start": start, "end": end, "results_per_page": q.results_per_page, "more_pages": more_pages} print >> sys.stderr, "DONE" return concordance_object
def generate_kwic_results(db, q, config, link_to_hit="div1"): """ The link_to_hit keyword defines the text object to which the metadata link leads to""" hits = db.query(q["q"],q["method"],q["arg"],**q.metadata) start, end, n = f.link.page_interval(q.results_per_page, hits, q.start, q.end) kwic_object = {"description": {"start": start, "end": end, "results_per_page": q.results_per_page}, "query": dict([i for i in q])} kwic_results = [] length = config.concordance_length for hit in hits[start - 1:end]: # Get all metadata metadata_fields = {} for metadata in db.locals['metadata_fields']: metadata_fields[metadata] = hit[metadata].strip() ## Get all links and citations citation_hrefs = citation_links(db, config, hit) citation = concordance_citation(hit, citation_hrefs) ## Determine length of text needed byte_distance = hit.bytes[-1] - hit.bytes[0] length = config.concordance_length + byte_distance + config.concordance_length ## Get concordance and align it bytes, byte_start = adjust_bytes(hit.bytes, config.concordance_length) conc_text = f.get_text(hit, byte_start, length, config.db_path) conc_text = format_strip(conc_text, bytes) conc_text = KWIC_formatter(conc_text, len(hit.bytes)) kwic_result = {"philo_id": hit.philo_id, "context": conc_text, "metadata_fields": metadata_fields, "citation_links": citation_hrefs, "citation": citation, "bytes": hit.bytes} kwic_results.append(kwic_result) kwic_object['results'] = kwic_results kwic_object['results_length'] = len(hits) kwic_object["query_done"] = hits.done return kwic_object