コード例 #1
0
ファイル: DataAdd.py プロジェクト: laganojunior/Malanalyzer
def addUserData(username, db):
    """
    Adds a username and all of its ratings to a MalDB.
    
    Arguments:
    username - the username to add
    db       - the MalDB instance to add the data to
    """
    
    animelist = WebGrab.getAnimeList(username)
    userid = WebGrab.getUserId(username)

    addAnimeList(db, userid, username, animelist)
コード例 #2
0
ファイル: Extractor.py プロジェクト: laganojunior/Malanalyzer
    def post(self):
        self.response.headers["Content-Type"] = "text/html"

        username = self.request.get("username")

        self.response.out.write("Getting %s" % username)
        logging.debug("Getting %s" % username)

        # Get the users animelist and id
        animelist = WebGrab.getAnimeList(username)

        # Limit the number of animes to use
        if len(animelist) > MAX_ANIMES_TO_USE:
            animelist = random.sample(animelist, MAX_ANIMES_TO_USE)

        # Go through each rating in the new list and create a map from
        # id to rating
        ratingMap = {}
        nameMap = {}
        ratingSum = 0.0
        ratingSumSquares = 0.0
        trueCount = 0
        for anime in animelist:
            animeid = anime["id"]
            rating = anime["score"]

            ratingSum += rating
            ratingSumSquares += rating * rating

            nameMap[str(animeid)] = anime["title"]

            if rating != 0:
                trueCount += 1

        if trueCount != 0:
            mean = ratingSum / trueCount
            stddev = math.sqrt((ratingSumSquares / trueCount) - mean * mean)
        else:
            mean = 0
            stddev = 0

        # Normalize all ratings
        if stddev < 0.1:
            # Standard deviation seems to indicate no variance, so set
            # all the animes to the average
            for anime in animelist:
                ratingMap[str(anime["id"])] = 0.0
        else:
            for anime in animelist:
                rating = anime["score"]
                animeid = str(anime["id"])
                if rating == 0:
                    # No rating, default to average
                    ratingMap[animeid] = 0.0
                else:
                    ratingMap[animeid] = (rating - mean) / stddev

        # Get anime objects, creating new ones if necessary
        animes = self.getAnimeObjects(nameMap)

        # Get all topic objects, making new ones as needed
        topics = self.getTopicObjects(ratingMap.keys(), animes)

        # Deserialize the topic maps
        topicMaps = [0] * len(topics)
        for i, topic in enumerate(topics):
            topicMaps[i] = eval(str(topic.animes))

        # Get the topic weights for this user
        topicWeights = [0.1] * len(topics)
        for i, topic in enumerate(topics):
            for animeid in ratingMap:
                if animeid in topicMaps[i]:
                    topicWeights[i] += topicMaps[i][animeid] * ratingMap[animeid]

        # Normalize by averaging over all ratings
        for i, weight in enumerate(topicWeights):
            topicWeights[i] /= len(ratingMap)

        # Now using the user weights, calculate error predictions from all
        # ratings
        ratingErrors = {}
        for animeid in ratingMap:
            ratingSum = 0.0
            for i, weight in enumerate(topicWeights):
                if animeid in topicMaps[i]:
                    ratingSum += weight * topicMaps[i][animeid]

            ratingErrors[animeid] = ratingSum - ratingMap[animeid]

        # Move the topic->anime weights using gradient descent
        for i, topic in enumerate(topics):

            key_union = set(ratingErrors.keys()) | set(topicMaps[i].keys())
            for animeid in key_union:
                if animeid not in topicMaps[i]:
                    topicMaps[i][animeid] = 0.0

                if animeid not in ratingErrors:
                    ratingErrors[animeid] = 0.0

                topicMaps[i][animeid] -= LEARNING_RATE * (
                    ratingErrors[animeid] * topicWeights[i] + REGULARIZATION_FACTOR * topicMaps[i][animeid]
                )

                # Make sure the weight meets the threshold for keeping it
                if abs(topicMaps[i][animeid]) < THRESHOLD_WEIGHT:
                    del topicMaps[i][animeid]

            # Write the final map
            topic.animes = db.Blob(str(topicMaps[i]))

        # Batch update everything
        db.put(animes + topics)