def getWholePost(db, postId): """ get a (title, post and all its answers) """ title = None wholePost = None for question in util.iterateQuestions(db, postList=[postId]): answers = "\n\n".join([answer.body for answer in util.iterateAnswers(db, postId)]) title = question.title wholePost = "\n\n".join([question.title, question.body, answers, question.tags]) return (title, wholePost)
def getWholePost(db, postId): """ get a (title, post and all its answers) """ title = None wholePost = None for question in util.iterateQuestions(db, postList=[postId]): answers = "\n\n".join( [answer.body for answer in util.iterateAnswers(db, postId)]) title = question.title wholePost = "\n\n".join( [question.title, question.body, answers, question.tags]) return (title, wholePost)
def __iter__(self): for question in util.iterateQuestions(self.db, self.topic, self.postList): answers = [answer for answer in util.iterateAnswers(self.db, [question.id])] tokens = tokenizePost(question.title, question.body, [answer.body for answer in answers], question.tags) if Config.debug and self.ctr > 0 and (self.ctr % 5000)==0: now = time.time() print >>sys.stderr, "Posts imported:", self.ctr, "(in %0.1fs, %0.2fpost/s)" % ( (now-self.t0), self.ctr/(now-self.tbegin) ) self.t0 = now self.corpusToPost[self.ctr] = question.id self.ctr += 1 yield self.dictionary.doc2bow([utoken for utoken in self.unicodifyTokens(tokens)], allow_update=True)
def __iter__(self): for question in util.iterateQuestions(self.db, self.topic, self.postList): answers = [ answer for answer in util.iterateAnswers(self.db, [question.id]) ] tokens = tokenizePost(question.title, question.body, [answer.body for answer in answers], question.tags) if Config.debug and self.ctr > 0 and (self.ctr % 5000) == 0: now = time.time() print >> sys.stderr, "Posts imported:", self.ctr, "(in %0.1fs, %0.2fpost/s)" % ( (now - self.t0), self.ctr / (now - self.tbegin)) self.t0 = now self.corpusToPost[self.ctr] = question.id self.ctr += 1 yield self.dictionary.doc2bow( [utoken for utoken in self.unicodifyTokens(tokens)], allow_update=True)