Example #1
0
def word_list():
    print(flask.request.method)
    if flask.request.method == 'PUT':
        word_list = flask.request.form.get(
            'word_list') or flask.request.get_json(force=True).get('word_list')
        # Remove all words
        if len(word_list) > 0:
            Word.objects().delete()
        # Creates a wordlist
        for word in word_list:
            Word(word=word).save()

    return flask.jsonify(
        [word.word for word in Word.objects.all().order_by('_id')])
Example #2
0
    def calc_sim(self, document, query_document):
        sum_weight_word_in_document_multiply_word_in_query = 0
        sum_weight_word_in_document_pow2 = 0
        sum_weight_word_in_query_pow2 = 0

        for word in query_document.words:
            query_word = Word.objects(text=word).first()
            if not query_word:
                continue

            weight_word_in_document = self.calc_weight(query_word, document)

            weight_word_in_query = self.calc_weight(query_word, query_document)

            sum_weight_word_in_document_multiply_word_in_query += \
                weight_word_in_document * weight_word_in_query

            sum_weight_word_in_document_pow2 += weight_word_in_document**2
            sum_weight_word_in_query_pow2 += weight_word_in_query**2

        sim = sum_weight_word_in_document_multiply_word_in_query / (
            (math.sqrt(sum_weight_word_in_document_pow2) *
             math.sqrt(sum_weight_word_in_query_pow2)) + 0.0001)

        return sim
Example #3
0
while True:
    query_text = input("Search:")

    query_hash = text_processor.hash(query_text)
    query_words = text_processor.tokenize(query_text)

    print("Words:", query_words)

    query_document = Document(hash=query_hash,
                              text=query_text,
                              words=query_words)

    documents = []
    for word_text in query_document.words:
        word = Word.objects(text=word_text).first()
        if not word:
            print("Word {word} not found!".format(word=word_text))
            continue

        print("[{word}] - {count} documents found.".format(
            word=word_text, count=len(word.documents)))

        documents.extend(word.documents)

    documents = set(documents)

    top_documents = []
    for document in tqdm(documents, total=len(documents)):
        sim_document_query = text_processor.calc_sim(document, query_document)
        if sim_document_query > 0: