def get_questions(self): Question = get_model("Question") Paper = get_model("Paper") return object_session(self)\ .query(Question)\ .filter( (Paper.course_id == self.id) & (Question.paper_id == Paper.id) ).all()
def find_similar_questions(self, question): # Compute the tf-idf if not already completed if not self.vectorizer: self.vectorize() # Grab the question we have to find similar for's index question_index = 0 for i, q in enumerate(self.questions): if question.id == q.id: question_index = i break # Grab our question vector query = self.tfidf_documents[question_index, :] # Compute the similarity and return a gram matrix # of D_n x Query and stick it in a datafram similarity = cosine_similarity(self.tfidf_documents, query).flatten() # Grab the similiar model Similar = get_model("Similar") # Generate the similarity objects return [ Similar(question_id=question.id, similar_question_id=q.id, similarity=s) for q, s in zip(self.questions, similarity) ]
def popular_questions(self): """Find the most popular questions. This loops through all the questions, find's the similar questions and ranks them by sum(similarity) """ session = object_session(self) Similar = get_model("Similar") Question = get_model("Question") Paper = get_model("Paper") # SQL: # exam_papers=# select question_id, sum(similarity) as similarity from similar_questions # where similarity > 0.6 and question_id != similar_question_id # group by question_id order by similarity DESC; popular = (session.query( Similar.question_id.label("question_id"), func.sum(Similar.similarity).label("cum_similarity")).group_by( Similar.question_id)).subquery() questions = session.query(Question)\ .join(popular, Question.id == popular.c.question_id)\ .join(Paper, Paper.id == Question.paper_id)\ .filter(Paper.course_id == self.id)\ .order_by(popular.c.cum_similarity.desc())\ .limit(25)\ .all() # Now we pick only one of a similar group of questions # A graph DB would be handy right about now grouped = [] for question in questions: inside = False # Loop over each similar questions in the already selected questions for grouped_question in grouped: inside = bool( find(grouped_question.similar, lambda q: q.similar_question_id == question.id)) if not inside or len(grouped) == 0: grouped.append(question) return grouped
def popular_questions(self): """Find the most popular questions. This loops through all the questions, find's the similar questions and ranks them by sum(similarity) """ session = object_session(self) Similar = get_model("Similar") Question = get_model("Question") Paper = get_model("Paper") # SQL: # exam_papers=# select question_id, sum(similarity) as similarity from similar_questions # where similarity > 0.6 and question_id != similar_question_id # group by question_id order by similarity DESC; popular = (session.query( Similar.question_id.label("question_id"), func.sum(Similar.similarity).label("cum_similarity") ).group_by(Similar.question_id)).subquery() questions = session.query(Question)\ .join(popular, Question.id == popular.c.question_id)\ .join(Paper, Paper.id == Question.paper_id)\ .filter(Paper.course_id == self.id)\ .order_by(popular.c.cum_similarity.desc())\ .limit(25)\ .all() # Now we pick only one of a similar group of questions # A graph DB would be handy right about now grouped = [] for question in questions: inside = False # Loop over each similar questions in the already selected questions for grouped_question in grouped: inside = bool(find(grouped_question.similar, lambda q: q.similar_question_id == question.id)) if not inside or len(grouped) == 0: grouped.append(question) return grouped
def find_similar_questions(self, question): # Compute the tf-idf if not already completed if not self.vectorizer: self.vectorize() # Grab the question we have to find similar for's index question_index = 0 for i, q in enumerate(self.questions): if question.id == q.id: question_index = i break # Grab our question vector query = self.tfidf_documents[question_index, :] # Compute the similarity and return a gram matrix # of D_n x Query and stick it in a datafram similarity = cosine_similarity(self.tfidf_documents, query).flatten() # Grab the similiar model Similar = get_model("Similar"); # Generate the similarity objects return [Similar(question_id=question.id, similar_question_id=q.id, similarity=s) for q, s in zip(self.questions, similarity)]