def getWholePost(db, postId):
    """ get a (title, post and all its answers) """
    title = None
    wholePost = None
    for question in util.iterateQuestions(db, postList=[postId]):
        answers = "\n\n".join([answer.body for answer in util.iterateAnswers(db, postId)])
        title = question.title
        wholePost = "\n\n".join([question.title, question.body, answers, question.tags])
    return (title, wholePost)
Beispiel #2
0
def getWholePost(db, postId):
    """ get a (title, post and all its answers) """
    title = None
    wholePost = None
    for question in util.iterateQuestions(db, postList=[postId]):
        answers = "\n\n".join(
            [answer.body for answer in util.iterateAnswers(db, postId)])
        title = question.title
        wholePost = "\n\n".join(
            [question.title, question.body, answers, question.tags])
    return (title, wholePost)
 def __iter__(self):
     for question in util.iterateQuestions(self.db, self.topic, self.postList):
         answers = [answer for answer in util.iterateAnswers(self.db, [question.id])]
         tokens = tokenizePost(question.title, question.body, [answer.body for answer in answers], question.tags)
         if Config.debug and self.ctr > 0 and (self.ctr % 5000)==0:
             now = time.time()
             print >>sys.stderr, "Posts imported:", self.ctr, "(in %0.1fs, %0.2fpost/s)" % (
                 (now-self.t0), self.ctr/(now-self.tbegin)
                 )
                 
             self.t0 = now
         self.corpusToPost[self.ctr] = question.id 
         self.ctr += 1    
         yield self.dictionary.doc2bow([utoken for utoken in self.unicodifyTokens(tokens)], allow_update=True)
Beispiel #4
0
    def __iter__(self):
        for question in util.iterateQuestions(self.db, self.topic,
                                              self.postList):
            answers = [
                answer
                for answer in util.iterateAnswers(self.db, [question.id])
            ]
            tokens = tokenizePost(question.title, question.body,
                                  [answer.body for answer in answers],
                                  question.tags)
            if Config.debug and self.ctr > 0 and (self.ctr % 5000) == 0:
                now = time.time()
                print >> sys.stderr, "Posts imported:", self.ctr, "(in %0.1fs, %0.2fpost/s)" % (
                    (now - self.t0), self.ctr / (now - self.tbegin))

                self.t0 = now
            self.corpusToPost[self.ctr] = question.id
            self.ctr += 1
            yield self.dictionary.doc2bow(
                [utoken for utoken in self.unicodifyTokens(tokens)],
                allow_update=True)