Example #1
0
def findSimilarQuestions(questionBody):

    try:
        # Analyzing the question
        entity_tags, topics, categories = analyzeQuestion(questionBody)

        # Search query
        query = {"$or": []}

        # Generating the query (topic)
        for topic in topics:
            query["$or"].append({
                "topics": {
                    "$elemMatch": {
                        "label": {
                            "$regex": topic["label"],
                            "$options": "i"
                        }
                    }
                }
            })

        # No topic is found
        if len(topics) == 0:
            return None

        # Results after checking topic similarity
        questionsFromSimilarTopic = Question.findGeneric(query)

        # Query returns None
        if not questionsFromSimilarTopic:
            return None

        # Score algorithm
        foundQuestions = []
        for question in questionsFromSimilarTopic:

            questionTopics = question['topics']

            currentVector, questionVector = createTwoVectorsFromTopics(
                topics, questionTopics)
            cosSimilarity = round(
                cosineSimilarity(currentVector, questionVector) * 100, 2)

            foundQuestions.append({
                'question': question,
                'similarity_rate': cosSimilarity
            })

        # Sorting according to the topic score
        foundQuestions = sorted(foundQuestions,
                                key=lambda x: x['similarity_rate'],
                                reverse=True)

        for i in range(len(foundQuestions)):

            # We do not need them in the response (probably)
            del foundQuestions[i]['question']['entity_tags']
            del foundQuestions[i]['question']['topics']
            del foundQuestions[i]['question']['categories']

        return foundQuestions

    except Exception as e:
        raise e
        return []