Ejemplo n.º 1
0
    def ExtractAllFeatures(self, hours):

        #Create the Object of Feature Class
        features = Features.Features()

        #1. Need to sort the posts dict in ascending order of timestamp, then view the posts.
        allPosts = self.documentDict[self.POSTS]
        postDict, sortedKeys = self.SortByPostCreationTime(allPosts)

        #2. Questioners Reputation
        self.GetQuestionersReputation()

        #3. Questions asked by questionaire

        userDict = {}  #this will store userID and questions asked by the user
        userList = self.documentDict[self.USERS]
        for postKey in sortedKeys:  #posts sorted by creation time
            if postDict[
                    postKey].PostTypeId == '1':  # If id is 1, it is a question
                #search the user who has asked the question
                userID = postDict[postKey].OwnerUserId

                #for this userID enter a new user in dictionary or update the 'question asked until this post' in the existing entry
                try:  #try updating
                    userDict[userID].questionAsked += 1
                except:
                    #create new pbject of User class
                    newUser = QA.User(userID)
                    newUser.questionAsked = 0
                    userDict[
                        userID] = newUser  #initialize the number of questions asked by the user previous to asking this question

                #we know the post ID of this question. We can append this feature value in the self.QuestionAnswerPairs dict
                currentPostId = postDict[postKey].Id

                #To the current Post ID add the feature - question asked by the questionaire before asking the current question
                self.QuestionAnswerPairs[
                    currentPostId].F2_QuesAskedByQuestionaire = userDict[
                        userID].questionAsked
                #self.QuestionAnswerPairs[currentPostId].Owner = userList[userID] #add the original users object
        a = 9

        #4. Num Answers to Questions within one hour and Sum of their Scores
        self.FindNumAnswerAndTheirScoresInXHours(hours)

        #8. NumComments in Q/A of highest reputation user of the answer of current post
        self.NumCommentsInQAOfHighRepUser(hours)

        for key, qaPairs in self.QuestionAnswerPairs.items():
            feature = Features.Feature()

            feature.F1_QuestionersReputation = qaPairs.F1_QuestionersReputation
            feature.F2_QuesAskedByQuestionaire = qaPairs.F2_QuesAskedByQuestionaire
            feature.F3_NumAnswerToQuestionInXHours = qaPairs.F3_NumAnswerToQuestionInXHours
            feature.F4_SumScores = qaPairs.F4_SumScores
            feature.F5_BestScoreAnswerLength = qaPairs.F5_BestScoreAnswerLength
            feature.F6_BestScoreNumComments = qaPairs.F6_BestScoreNumComments
            feature.F7_BestScoreTimeDiff = qaPairs.F7_BestScoreTimeDiff
            feature.F8_ReputedUserNumComments = qaPairs.F8_ReputedUserNumComments

            feature.Y_Label_FrequentlyViewed = qaPairs.Y_Label_FrequentlyViewed

            features.featureList.append(feature)

        return features.featureList

        pass