예제 #1
0
def search

for query in fileinput.input():
    query_tokens = query[:-1].split(' ')
    print (green('QUERY: ') + query)
    for quote in quotes: 
        good = True
        for tok in query_tokens : 
            good_tok = False
            for quot_tok in quote : 
                if match_tokens(tok, quot_tok) : 
                    good_tok = True
            if good_tok == False:
                good = False
                break
        if good : 
            #print (quote)
            print ('   ', my_highlight(query_tokens, quote))
            
#print(lemtas)
예제 #2
0
                        good_token = False
            else:
                good_token = False
            if good_token :
                good = True
                break
        if good: 
            for j in range(len(q)): 
                r.append(Fore.RED + Style.BRIGHT + L[i+j]+ Fore.BLACK + Style.RESET_ALL)
            i+=len(q)
        else:
            r.append(w) 
    return ' '.join(r)


print("Ready for input\n")
for query in fileinput.input():
    query_tokens = []
    split_res = query[:-1].split('\"')
    for i in range(len(split_res)): 
        if i%2 == 0:
            query_tokens+= [[w] for w in split_res[i].split(' ') if w != '']
        else:
            query_tokens+= [[w for w in split_res[i].split(' ') if w != '']]
    
    print(query_tokens)
    print (green('QUERY: ') + query)
    good_documents = sorted([get_dokument(f) for f in find_quotes( query_tokens )], key = lambda x : rank_document(query_tokens, x), reverse = True)
    good_documents = [word_tokenize(x)for x in good_documents]
    for quote in good_documents: 
        print ('   ', my_highlight(query_tokens, quote))
예제 #3
0
lines = []
tokens = []


def match(query, line):
    return all(w in line for w in query)


def search(q):
    return [line for line in tokens if match(q, line)]


for line in open('tokenized_quotes.txt'):
    L = line.split()
    tokens.append(L)

print('No, dalej!')

t0 = time.time()

for x in sys.stdin:
    L = x.split()
    print(green('QUERY: ') + ' '.join(L))
    search(L)
    for res in search(L):
        print('   ', highlight(L, res))
    yellow_line()
    print()
print(time.time() - t0)