def tfUserTag(userId): usrObj = User(userId) movies = di.getUserMovies(userId) for movieId in movies: movieId = movieId[0] mv = Movie( movieId, 0) # Here the actor movie rank is not reqd., setting this to 0 movieTags = di.getMovieTags(movieId) for movieTag in movieTags: tagId = movieTag[0] timeStamp = movieTag[1] mv.addTag(tagId, timeStamp) usrObj.addMovie(mv) tfVector = {} usrObj.setUnqTags() unqTags = usrObj.getUnqTags() #print(unqTags) for tagId in unqTags: tfFactorTag = 0 for movie in usrObj.getMovies(): searchTags = movie.getTags() tfFactor = 0 totalMovieWeight = 0 for tag in searchTags: if (tag.getId() == tagId): tfFactor = tfFactor + tag.getTimeWeight() #print(tfFactor) totalMovieWeight = totalMovieWeight + 1 if (totalMovieWeight != 0): tfFactorTag = tfFactorTag + tfFactor / totalMovieWeight tfVector[tagId] = tfFactorTag tfVector = utils.sortByValue(tfVector) return utils.normalizeVector(tfVector)
def tfIdfActorTag(actorId, idfActVector): tfVector = tfCalc.tfActorTag(actorId) tfIdfVector = {} tags = tfVector.keys() for tag in tags: tfIdfVector[tag] = tfVector[tag] * idfActVector[tag] #print('actor',tfIdfVector) tfIdfVector = utils.sortByValue(tfIdfVector) return utils.normalizeVector(tfIdfVector)
def tfIdfGenreTag(genre, idfGenVector): tfVector = tfCalc.tfGenreTag(genre) tfIdfVector = {} tags = tfVector.keys() for tag in tags: tfIdfVector[tag] = tfVector[tag] * idfGenVector[tag] #print('genre',tfIdfVector) tfIdfVector = utils.sortByValue(tfIdfVector) return utils.normalizeVector(tfIdfVector)
def tfIdfUserTag(userId, idfUserVector): tfVector = tfCalc.tfUserTag(userId) tfIdfVector = {} tags = tfVector.keys() for tag in tags: tfIdfVector[tag] = tfVector[tag] * idfUserVector[tag] #print('user',tfIdfVector) tfIdfVector = utils.sortByValue(tfIdfVector) return utils.normalizeVector(tfIdfVector) #idfVect = idfMovieTag() #tfIdfVect = tfIdfMovieTag('3189', idfVect) #print((tfIdfVect))
def tfActorTag(actorId): movies = di.getActorMovies(actorId) #print(movies) actor = Actor(actorId) for movie in movies: # Here the first element in the entry is movieId and second is the actor rank movieId = movie[0] rank = movie[1] # Create the Movie obj and add to the Actor mv = Movie(movieId, rank) # Get the tags of movie movieTags = di.getMovieTags(movieId) #print(movieId) #print(movieTags) # Calculate the weight of the tags for movieTag in movieTags: tagId = movieTag[0] timeStamp = movieTag[1] mv.addTag(tagId, timeStamp) actor.addMovie(mv) tfVector = {} actor.setUnqTags() unqTags = actor.getUnqTags() for tagId in unqTags: tfFactorTag = 0 #print("tagId "+tagId) for movie in actor.getMovies(): searchTags = movie.getTags() tfFactor = 0 totalMovieWeight = 0 movRankWeight = movie.getRWeight() #print(movRankWeight) for tag in searchTags: #print(tag.getId()) if (tag.getId() == tagId): tfFactor = tfFactor + tag.getTimeWeight() totalMovieWeight = totalMovieWeight + 1 #print(tfFactor) #print(totalMovieWeight) if ( totalMovieWeight != 0 ): # Check this condition because their are movies with no tags tfFactorTag = tfFactorTag + (movRankWeight * tfFactor) / totalMovieWeight tfVector[tagId] = tfFactorTag tfVector = utils.sortByValue(tfVector) return utils.normalizeVector(tfVector)
def tfGenreTag(genre): genObj = createGenObj(genre) unqTags = genObj.getUnqTags() tfVector = {} #print(unqTags) for tagId in unqTags: tfFactorTag = 0 for movie in genObj.getMovies(): searchTags = movie.getTags() tfFactor = 0 totalMovieWeight = 0 for tag in searchTags: if (tag.getId() == tagId): tfFactor = tfFactor + tag.getTimeWeight() #print(tfFactor) totalMovieWeight = totalMovieWeight + 1 if (totalMovieWeight != 0): tfFactorTag = tfFactorTag + tfFactor / totalMovieWeight tfVector[tagId] = tfFactorTag tfVector = utils.sortByValue(tfVector) return utils.normalizeVector(tfVector)
def getVals(cls, tweet): before = len(cls.uniq_words) tfMap = {} # stores frequency for a term in a tweet words = tweet.getTokens() for w in words: if w in tfMap: tfMap[w] += 1 else: # when the word comes up for the first time, we tfMap[w] = 1 if w in cls.uniq_words: cls.uniq_words[w] += 1 else: cls.uniq_words[w] = 1 cls.pos_map[w] = cls.pos_in_map cls.pos_in_map += 1 cls.num_prev_tweet += 1 size_increase = len(cls.uniq_words) - before vec = {} for k,v in tfMap.iteritems(): idf_score = 0 if k in cls.idfMap: idf_score = cls.idfMap[k] # get the score # update the score cls.idfMap[k] = math.log10((cls.num_prev_tweet) / (cls.uniq_words[k] + 1)) else: idf_score = math.log10(cls.num_prev_tweet) cls.idfMap[k] = idf_score vec[cls.pos_map[k]] = v * idf_score vec = utils.normalizeVector(vec) tweet.setVector(vec) return size_increase