def findSimilarQuestions(questionBody): try: # Analyzing the question entity_tags, topics, categories = analyzeQuestion(questionBody) # Search query query = {"$or": []} # Generating the query (topic) for topic in topics: query["$or"].append({ "topics": { "$elemMatch": { "label": { "$regex": topic["label"], "$options": "i" } } } }) # No topic is found if len(topics) == 0: return None # Results after checking topic similarity questionsFromSimilarTopic = Question.findGeneric(query) # Query returns None if not questionsFromSimilarTopic: return None # Score algorithm foundQuestions = [] for question in questionsFromSimilarTopic: questionTopics = question['topics'] currentVector, questionVector = createTwoVectorsFromTopics( topics, questionTopics) cosSimilarity = round( cosineSimilarity(currentVector, questionVector) * 100, 2) foundQuestions.append({ 'question': question, 'similarity_rate': cosSimilarity }) # Sorting according to the topic score foundQuestions = sorted(foundQuestions, key=lambda x: x['similarity_rate'], reverse=True) for i in range(len(foundQuestions)): # We do not need them in the response (probably) del foundQuestions[i]['question']['entity_tags'] del foundQuestions[i]['question']['topics'] del foundQuestions[i]['question']['categories'] return foundQuestions except Exception as e: raise e return []