コード例 #1
0
def _search_pharse_func_tester(pharse, doc_id):
    terms = []
    t_st = Token_Preprocessing_Engine()
    for token in pharse.split():
        terms.append(t_st.process_token(token))
    result = search_pharse(terms, doc_id)
    send_stdout(result)
コード例 #2
0
def process_query(query):
    # initialize stemmer (Lemmatizer)
    if STEMMER:
        st = Token_Preprocessing_Engine()
    # process query
    terms = []
    for token in query.split():
        # Stemming and Lowercasing
        if STEMMER:
            t = st.process_token(token)
        else:
            t = token.lower()
        terms.append(t)
    return terms
コード例 #3
0
def main():
    # read arguments
    args = parse_arguments()
    if args.score not in ['y', 'n']:
        send_stdout('Error! arg "scores" should be either y or n')
        sys.exit()

    # open index file
    try:
        path = join(args.path, INDEX_FILE)
        f = open(path)
    except FileNotFoundError as e:
        send_stdout('Error! Index file "{}" does not exits.'.format(path))
        sys.exit()

    # initialize query stemmer (Lemmatizer)
    if STEMMER:
        st = Token_Preprocessing_Engine()
        query = [st.process_token(t) for t in args.terms]
    else:
        query = [t.lower() for t in args.terms]

    # read index
    try:
        read_index(f)
    except:
        send_stdout('Error! Invalided index file format.')
        sys.exit()

    # compute vector space scores
    score = cosine_score(query)
    k_score = sorted(score.items(), key=lambda x: x[1], reverse=True)
    for i in range(min(args.k, len(k_score))):
        d, s = k_score[i]
        if args.score == 'y':
            send_stdout('{id} \t {score}'.format(id=d, score=s))
        else:
            send_stdout('{id}'.format(id=d))

    f.close()