Esempio n. 1
0
def term_group(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = f.WebConfig()
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(db, environ)
    hits = db.query(request["q"], request["method"], request["arg"],
                    **request.metadata)
    parsed = parse_query(request.q)
    group = group_terms(parsed)
    all_groups = split_terms(group)
    term_groups = []
    for g in all_groups:
        term_group = ''
        not_started = False
        for kind, term in g:
            if kind == 'NOT':
                if not_started == False:
                    not_started = True
                    term_group += ' NOT '
            elif kind == 'OR':
                term_group += '|'
            elif kind == "TERM":
                term_group += ' %s ' % term
            elif kind == "QUOTE":
                term_group += ' %s ' % term
        term_group = term_group.strip()
        term_groups.append(term_group)
    yield json.dumps(term_groups)
Esempio n. 2
0
def term_group(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(environ, config)
    if not request["q"]:
        dump = json.dumps({"original_query": "", "term_groups": []})
    else:
        hits = db.query(request["q"], request["method"], request["arg"], sort_order=request["sort_order"], **request.metadata)
        parsed = parse_query(request.q)
        group = group_terms(parsed)
        all_groups = split_terms(group)
        term_groups = []
        for g in all_groups:
            term_group = ''
            not_started = False
            for kind, term in g:
                if kind == 'NOT':
                    if not_started is False:
                        not_started = True
                        term_group += ' NOT '
                elif kind == 'OR':
                    term_group += '|'
                elif kind == "TERM":
                    term_group += ' %s ' % term
                elif kind == "QUOTE":
                    term_group += ' %s ' % term
            term_group = term_group.strip()
            term_groups.append(term_group)
        dump = json.dumps({"term_groups": term_groups, "original_query": request.original_q})
    yield dump.encode('utf8')
Esempio n. 3
0
def format_query(q, db, config):
    parsed = parse_query(q)
    group = group_terms(parsed)
    all_groups = split_terms(group)

    # We extract every word tuple
    word_groups = []
    for g in all_groups:
        for inner_g in g:
            word_groups.append(inner_g)
    last_group = word_groups.pop()  # we take the last tuple for autocomplete
    token = last_group[1]
    kind = last_group[0]
    if word_groups:
        prefix = ' '.join([i[1] for i in word_groups]) + " "
    else:
        prefix = ''

    frequency_file = config.db_path + "/data/frequencies/normalized_word_frequencies"

    if kind == "TERM":
        expanded_token = token + '.*'
        grep_proc = grep_word(expanded_token, frequency_file, subprocess.PIPE,
                              db.locals['lowercase_index'])
    elif kind == "QUOTE":
        expanded_token = token[:-1] + '.*' + token[-1]
        grep_proc = grep_exact(expanded_token, frequency_file, subprocess.PIPE)
    elif kind == "NOT" or kind == "OR":
        return []

    matches = []
    len_token = len(token)
    for line in grep_proc.stdout:
        word = line.split(b'\t')[1].strip().decode('utf8')
        highlighted_word = highlighter(word, len_token)
        matches.append(highlighted_word)

    output_string = []
    for m in matches:
        if kind == "QUOTE":
            output_string.append(prefix + '"%s"' % m)
        else:
            output_string.append(prefix + m)

    return output_string
def format_query(q, db, config):
    parsed = parse_query(q)
    group = group_terms(parsed)
    all_groups = split_terms(group)

    # We extract every word tuple
    word_groups = []
    for g in all_groups:
        for inner_g in g:
            word_groups.append(inner_g)
    last_group = word_groups.pop()  # we take the last tuple for autocomplete
    token = last_group[1]
    kind = last_group[0]
    if word_groups:
        prefix = ' '.join([i[1] for i in word_groups]) + " "
    else:
        prefix = ''

    frequency_file = config.db_path + "/data/frequencies/normalized_word_frequencies"

    if kind == "TERM":
        expanded_token = token + '.*'
        grep_proc = grep_word(expanded_token, frequency_file, subprocess.PIPE,
                              db.locals['lowercase_index'])
    elif kind == "QUOTE":
        expanded_token = token[:-1] + '.*' + token[-1]
        grep_proc = grep_exact(expanded_token, frequency_file, subprocess.PIPE)
    elif kind == "NOT" or kind == "OR":
        return []

    matches = []
    len_token = len(token.decode('utf-8'))
    for line in grep_proc.stdout:
        word = line.split('\t')[1].strip()
        highlighted_word = highlighter(word, len_token)
        matches.append(highlighted_word)

    output_string = []
    for m in matches:
        if kind == "QUOTE":
            output_string.append(prefix + '"%s"' % m)
        else:
            output_string.append(prefix + m)

    return output_string
Esempio n. 5
0
def format_query(q, db):
    parsed = parse_query(q)
    group = group_terms(parsed)
    all_groups = split_terms(group)

    # We extract every word tuple
    word_groups = []
    for g in all_groups:
        for inner_g in g:
            word_groups.append(inner_g)
    last_group = word_groups.pop()  ## we take the last tuple for autocomplete
    token = last_group[1]
    kind = last_group[0]
    if word_groups:
        prefix = ' '.join([i[1] for i in word_groups]) + " "
    else:
        prefix = ''

    if kind == "OR":
        return []
    if kind == "QUOTE":
        token = token.replace('"', '')
    frequency_file = db.locals[
        "db_path"] + "/frequencies/normalized_word_frequencies"

    expanded_token = token + '.*'
    grep_proc = grep_word(expanded_token, frequency_file, subprocess.PIPE)

    matches = []
    len_token = len(token.decode('utf-8'))
    for line in grep_proc.stdout:
        word = line.split('\t')[1]
        highlighted_word = highlighter(word, len_token)
        matches.append(highlighted_word)

    output_string = []
    for m in matches:
        if kind == "QUOTE":
            output_string.append(prefix + '"%s"' % m)
        else:
            output_string.append(prefix + m)

    return output_string
Esempio n. 6
0
def term_group(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(
        os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(environ, config)
    if not request["q"]:
        dump = json.dumps({"original_query": "", "term_groups": []})
    else:
        hits = db.query(request["q"],
                        request["method"],
                        request["arg"],
                        sort_order=request["sort_order"],
                        **request.metadata)
        parsed = parse_query(request.q)
        group = group_terms(parsed)
        all_groups = split_terms(group)
        term_groups = []
        for g in all_groups:
            term_group = ''
            not_started = False
            for kind, term in g:
                if kind == 'NOT':
                    if not_started is False:
                        not_started = True
                        term_group += ' NOT '
                elif kind == 'OR':
                    term_group += '|'
                elif kind == "TERM":
                    term_group += ' %s ' % term
                elif kind == "QUOTE":
                    term_group += ' %s ' % term
            term_group = term_group.strip()
            term_groups.append(term_group)
        dump = json.dumps({
            "term_groups": term_groups,
            "original_query": request.original_q
        })
    yield dump.encode('utf8')
Esempio n. 7
0
def format_query(q, db):
    parsed = parse_query(q)
    group = group_terms(parsed)
    all_groups = split_terms(group)
    
    # We extract every word tuple
    word_groups = []
    for g in all_groups:
        for inner_g in g:
            word_groups.append(inner_g)
    last_group = word_groups.pop()  ## we take the last tuple for autocomplete
    token = last_group[1]
    kind = last_group[0]
    if word_groups:
        prefix = ' '.join([i[1] for i in word_groups]) + " "
    else:
        prefix = ''

    if kind == "OR":
        return []
    if kind == "QUOTE":
        token = token.replace('"', '')
    frequency_file = db.locals["db_path"]+"/frequencies/normalized_word_frequencies"
    
    expanded_token = token + '.*'
    grep_proc = grep_word(expanded_token, frequency_file, subprocess.PIPE)
    
    matches = []
    len_token = len(token.decode('utf-8'))
    for line in grep_proc.stdout:
        word = line.split('\t')[1]
        highlighted_word = highlighter(word, len_token)
        matches.append(highlighted_word)

    output_string = []
    for m in matches:
        if kind == "QUOTE":
            output_string.append(prefix + '"%s"' % m)
        else:
            output_string.append(prefix +  m)
    
    return output_string