def term_list(environ, start_response): status = "200 OK" headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) config = f.WebConfig() db = DB(config.db_path + "/data/") q = WSGIHandler(db, environ) hits = db.query(q["q"], q["method"], q["arg"], **q.metadata) expanded_terms = get_expanded_query(hits) yield json.dumps(expanded_terms[0])
def term_list(environ, start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', '')) db = DB(config.db_path + '/data/') request = WSGIHandler(environ, config) hits = db.query(request["q"], request["method"], request["arg"], **request.metadata) hits.finish() expanded_terms = get_expanded_query(hits) yield json.dumps(expanded_terms[0]).encode('utf8')
def term_list(environ, start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) config = f.WebConfig() db = DB(config.db_path + '/data/') request = WSGIHandler(db, environ) hits = db.query(request["q"], request["method"], request["arg"], **request.metadata) hits.finish() expanded_terms = get_expanded_query(hits) yield json.dumps(expanded_terms[0])
def term_list(environ, start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', '')) db = DB(config.db_path + '/data/') request = WSGIHandler(environ, config) hits = db.query(request["q"], request["method"], request["arg"], **request.metadata) hits.finish() expanded_terms = get_expanded_query(hits) yield simplejson.dumps(expanded_terms[0])
def get_all_words(db, request): """Expand query to all search terms.""" words = request["q"].replace('"', '') hits = db.query(words) hits.finish() expanded_terms = get_expanded_query(hits) word_groups = [] for word_group in expanded_terms: normalized_group = [] for word in word_group: word = ''.join([i for i in unicodedata.normalize("NFKD", word) if not unicodedata.combining(i)]) normalized_group.append(word) word_groups.append(normalized_group) return word_groups
def get_all_words(db, request): """Expand query to all search terms.""" words = request["q"].replace('"', '') hits = db.query(words) hits.finish() expanded_terms = get_expanded_query(hits) word_groups = [] for word_group in expanded_terms: normalized_group = [] for word in word_group: word = u''.join([i for i in unicodedata.normalize("NFKD", word.decode('utf8')) if not unicodedata.combining(i)]).encode("utf-8") normalized_group.append(word) word_groups.append(normalized_group) return word_groups
def collocation_results(request, config): """Fetch collocation results""" db = DB(config.db_path + '/data/') if request["collocate_distance"]: hits = db.query(request["q"], "proxy", int(request['collocate_distance']), **request.metadata) else: hits = db.query(request["q"], "cooc", request["arg"], **request.metadata) hits.finish() collocation_object = {"query": dict([i for i in request])} try: collocate_distance = int(request['collocate_distance']) except ValueError: # Getting an empty string since the keyword is not specificed in the URL collocate_distance = None if request.colloc_filter_choice == "nofilter": filter_list = [] else: filter_list = build_filter_list(request, config) collocation_object['filter_list'] = filter_list filter_list = set(filter_list) # Build list of search terms to filter out query_words = [] for group in get_expanded_query(hits): for word in group: word = word.replace('"', '') query_words.append(word) query_words = set(query_words) filter_list = filter_list.union(query_words) if request["collocate_distance"]: hits = db.query(request["q"], "proxy", int(request['collocate_distance']), raw_results=True, **request.metadata) else: hits = db.query(request["q"], "cooc", request["arg"], raw_results=True, **request.metadata) hits.finish() stored_sentence_id = None stored_sentence_counts = defaultdict(int) sentence_hit_count = 1 hits_done = request.start or 0 max_time = request.max_time or 10 all_collocates = defaultdict(lambda: {'count': 0}) cursor = db.dbh.cursor() start_time = timeit.default_timer() try: for hit in hits[hits_done:]: word_id = ' '.join([str(i) for i in hit[:6]]) + ' ' + str(hit[7]) query = """select parent, rowid from words where philo_id='%s' limit 1""" % word_id cursor.execute(query) result = cursor.fetchone() parent = result['parent'] if parent != stored_sentence_id: rowid = int(result['rowid']) sentence_hit_count = 1 stored_sentence_id = parent stored_sentence_counts = defaultdict(int) if collocate_distance: begin_rowid = rowid - collocate_distance if begin_rowid < 0: begin_rowid = 0 end_rowid = rowid + collocate_distance row_query = """select philo_name from words where parent='%s' and rowid between %d and %d""" % ( parent, begin_rowid, end_rowid) else: row_query = """select philo_name from words where parent='%s'""" % ( parent, ) cursor.execute(row_query) for i in cursor.fetchall(): collocate = i["philo_name"] if collocate not in filter_list: stored_sentence_counts[collocate] += 1 else: sentence_hit_count += 1 for word in stored_sentence_counts: if stored_sentence_counts[word] < sentence_hit_count: continue all_collocates[word]['count'] += 1 hits_done += 1 elapsed = timeit.default_timer() - start_time # avoid timeouts by splitting the query if more than request.max_time (in # seconds) has been spent in the loop if elapsed > int(max_time): break except IndexError: collocation_object['hits_done'] = len(hits) collocation_object['collocates'] = all_collocates collocation_object["results_length"] = len(hits) if hits_done < collocation_object["results_length"]: collocation_object['more_results'] = True collocation_object['hits_done'] = hits_done else: collocation_object['more_results'] = False collocation_object['hits_done'] = collocation_object["results_length"] return collocation_object
def collocation_results(request, config): """Fetch collocation results""" db = DB(config.db_path + '/data/') if request["collocate_distance"]: hits = db.query(request["q"], "proxy", int(request['collocate_distance']), **request.metadata) else: hits = db.query(request["q"], "cooc", request["arg"], **request.metadata) hits.finish() collocation_object = {"query": dict([i for i in request])} try: collocate_distance = int(request['collocate_distance']) except ValueError: # Getting an empty string since the keyword is not specificed in the URL collocate_distance = None if request.colloc_filter_choice == "nofilter": filter_list = [] else: filter_list = build_filter_list(request, config) collocation_object['filter_list'] = filter_list filter_list = set(filter_list) # Build list of search terms to filter out query_words = [] for group in get_expanded_query(hits): for word in group: word = word.replace('"', '') query_words.append(word) query_words = set(query_words) filter_list = filter_list.union(query_words) if request["collocate_distance"]: hits = db.query(request["q"], "proxy", int(request['collocate_distance']), raw_results=True, **request.metadata) else: hits = db.query(request["q"], "cooc", request["arg"], raw_results=True, **request.metadata) hits.finish() stored_sentence_id = None stored_sentence_counts = defaultdict(int) sentence_hit_count = 1 hits_done = request.start or 0 max_time = request.max_time or 10 all_collocates = defaultdict(lambda: {'count': 0}) cursor = db.dbh.cursor() start_time = timeit.default_timer() try: for hit in hits[hits_done:]: word_id = ' '.join([str(i) for i in hit[:6]]) + ' ' + str(hit[7]) query = """select parent, rowid from words where philo_id='%s' limit 1""" % word_id cursor.execute(query) result = cursor.fetchone() parent = result['parent'] if parent != stored_sentence_id: rowid = int(result['rowid']) sentence_hit_count = 1 stored_sentence_id = parent stored_sentence_counts = defaultdict(int) if collocate_distance: begin_rowid = rowid - collocate_distance if begin_rowid < 0: begin_rowid = 0 end_rowid = rowid + collocate_distance row_query = """select philo_name from words where parent='%s' and rowid between %d and %d""" % ( parent, begin_rowid, end_rowid) else: row_query = """select philo_name from words where parent='%s'""" % (parent, ) cursor.execute(row_query) for i in cursor.fetchall(): collocate = i["philo_name"] if collocate not in filter_list: stored_sentence_counts[collocate] += 1 else: sentence_hit_count += 1 for word in stored_sentence_counts: if stored_sentence_counts[word] < sentence_hit_count: continue all_collocates[word]['count'] += 1 hits_done += 1 elapsed = timeit.default_timer() - start_time # avoid timeouts by splitting the query if more than request.max_time (in # seconds) has been spent in the loop if elapsed > int(max_time): break except IndexError: collocation_object['hits_done'] = len(hits) collocation_object['collocates'] = all_collocates collocation_object["results_length"] = len(hits) if hits_done < collocation_object["results_length"]: collocation_object['more_results'] = True collocation_object['hits_done'] = hits_done else: collocation_object['more_results'] = False collocation_object['hits_done'] = collocation_object["results_length"] return collocation_object
def fetch_collocation(hits, q, db, config): collocation_object = {"query": dict([i for i in q])} length = config['concordance_length'] try: within_x_words = int(q['word_num']) except ValueError: ## Getting an empty string since the keyword is not specificed in the URL within_x_words = 5 if q.colloc_filter_choice == "nofilter": filter_list = [] else: filter_list = build_filter_list(q, config) collocation_object['filter_list'] = list(filter_list) ## start going though hits ## all_collocates = {} count = 0 more_results = False c = db.dbh.cursor() parents = {} # Build list of search terms to filter out query_words = set([]) for group in get_expanded_query(hits): for word in group: word = word.replace('"', '') query_words.add(word) stored_sentence_id = None stored_sentence_counts = {} sentence_hit_count = 1 hits_done = q.start or 0 start_time = timeit.default_timer() max_time = q.max_time or 10 try: for hit in hits[hits_done:]: word_id = ' '.join([str(i) for i in hit.philo_id]) query = """select philo_name, parent from words where philo_id='%s'""" % word_id c.execute(query) result = c.fetchone() parent = result['parent'] current_word = result['philo_name'] if parent != stored_sentence_id: sentence_hit_count = 1 stored_sentence_id = parent stored_sentence_counts = {} row_query = """select philo_name from words where parent='%s'""" % (parent,) c.execute(row_query) for i in c.fetchall(): if i['philo_name'] in stored_sentence_counts: stored_sentence_counts[i['philo_name']] += 1 else: stored_sentence_counts[i['philo_name']] = 1 else: sentence_hit_count += 1 for word in stored_sentence_counts: if word in query_words or stored_sentence_counts[word] < sentence_hit_count: continue if word in filter_list: continue query_string = q['q'] + ' "%s"' % word method = 'cooc' if word in all_collocates: all_collocates[word]['count'] += 1 else: all_link = f.link.make_absolute_query_link(config, q, report="concordance", q=query_string, method=method, start='0', end='0') all_collocates[word] = {"count": 1, "url": all_link} hits_done += 1 elapsed = timeit.default_timer() - start_time if elapsed > int(max_time): # avoid timeouts by splitting the query if more than q.max_time (in seconds) has been spent in the loop break except IndexError: collocation['hits_done'] = len(hits) collocation_object['collocates'] = all_collocates collocation_object["results_length"] = len(hits) if hits_done < collocation_object["results_length"]: collocation_object['more_results'] = True collocation_object['hits_done'] = hits_done else: collocation_object['more_results'] = False collocation_object['hits_done'] = collocation_object["results_length"] return collocation_object