def tfUserTag(userId): usrObj = User(userId) movies = di.getUserMovies(userId) for movieId in movies: movieId = movieId[0] mv = Movie( movieId, 0) # Here the actor movie rank is not reqd., setting this to 0 movieTags = di.getMovieTags(movieId) for movieTag in movieTags: tagId = movieTag[0] timeStamp = movieTag[1] mv.addTag(tagId, timeStamp) usrObj.addMovie(mv) tfVector = {} usrObj.setUnqTags() unqTags = usrObj.getUnqTags() #print(unqTags) for tagId in unqTags: tfFactorTag = 0 for movie in usrObj.getMovies(): searchTags = movie.getTags() tfFactor = 0 totalMovieWeight = 0 for tag in searchTags: if (tag.getId() == tagId): tfFactor = tfFactor + tag.getTimeWeight() #print(tfFactor) totalMovieWeight = totalMovieWeight + 1 if (totalMovieWeight != 0): tfFactorTag = tfFactorTag + tfFactor / totalMovieWeight tfVector[tagId] = tfFactorTag tfVector = utils.sortByValue(tfVector) return utils.normalizeVector(tfVector)
def movieTagSpace(movieId): tagIds = di.getMovieTags(movieId) tagLen = len(tagIds) actorlist = di.getAllActors() actorNames = di.getAllActorNames() idfActVector = idf.idfActorTag() mov = di.getMovieActorIds(movieId) movieActors = [0 for i in range(len(mov))] for i in range(len(mov)): movieActors[i] = mov[i][0] mat = [[0 for i in range(tagLen)] for j in range(len(movieActors))] newMat = [[0 for i in range(tagLen)] for j in range(len(actorlist))] for i in range(len(movieActors)): taglist = idf.tfIdfActorTag(movieActors[i], idfActVector) for j in range(tagLen): if (tagIds[j][0] in taglist.keys()): mat[i][j] = taglist[tagIds[j][0]] for i in range(0, len(actorlist)): if (actorlist[i][0] not in movieActors): taglist = idf.tfIdfActorTag(actorlist[i][0], idfActVector) for j in range(tagLen): if (tagIds[j][0] in taglist.keys()): newMat[i][j] = taglist[tagIds[j][0]] actVect = [0 for i in range(tagLen)] for j in range(len(movieActors)): for i in range(tagLen): actVect[i] = actVect[i] + mat[j][i] dist = {} for i in range(len(newMat)): if (actorlist[i][0] not in movieActors): dist[actorNames[i][0]] = distance.euclidean(newMat[i], actVect) return utils.sortByValue(dist)[-10:]
def simActors(actId): for i in range(len(actorTags)): if(actId == actorList[i][0]): givenActor = actorTags[i] d = {} for i in range(len(actorList)): if(actId != actorList[i][0]): d[actorNames[i][0]] = distance.euclidean(givenActor,actorTags[i]) return utils.sortByValue(d)[-10:]
def tfIdfActorTag(actorId, idfActVector): tfVector = tfCalc.tfActorTag(actorId) tfIdfVector = {} tags = tfVector.keys() for tag in tags: tfIdfVector[tag] = tfVector[tag] * idfActVector[tag] #print('actor',tfIdfVector) tfIdfVector = utils.sortByValue(tfIdfVector) return utils.normalizeVector(tfIdfVector)
def tfIdfGenreTag(genre, idfGenVector): tfVector = tfCalc.tfGenreTag(genre) tfIdfVector = {} tags = tfVector.keys() for tag in tags: tfIdfVector[tag] = tfVector[tag] * idfGenVector[tag] #print('genre',tfIdfVector) tfIdfVector = utils.sortByValue(tfIdfVector) return utils.normalizeVector(tfIdfVector)
def getSimilarity(mat, query, usrMovies): res = {} for i in range(movieLen): if(movies[i][0] not in usrMovies): res[movies[i][0]] = np.dot(query, mat[i]) ranks = utils.sortByValue(res) rankedRes = [] for i in ranks: rankedRes.append(i[0]) return rankedRes
def tfIdfUserTag(userId, idfUserVector): tfVector = tfCalc.tfUserTag(userId) tfIdfVector = {} tags = tfVector.keys() for tag in tags: tfIdfVector[tag] = tfVector[tag] * idfUserVector[tag] #print('user',tfIdfVector) tfIdfVector = utils.sortByValue(tfIdfVector) return utils.normalizeVector(tfIdfVector) #idfVect = idfMovieTag() #tfIdfVect = tfIdfMovieTag('3189', idfVect) #print((tfIdfVect))
def simActors2(actId): numSemantics = 5 u,s,v = np.linalg.svd(actorTags,0) x=np.zeros((len(u),numSemantics)) givenActor = np.zeros(numSemantics) for i in range(len(u)): for j in range(numSemantics): if(actId == actorList[i][0]): givenActor[j] = u[i][j] x[i][j] = u[i][j] d = {} for i in range(len(actorList)): if(actId != actorList[i][0]): d[actorNames[i][0]] = distance.euclidean(givenActor,x[i]) return utils.sortByValue(d)[-10:]
def tfActorTag(actorId): movies = di.getActorMovies(actorId) #print(movies) actor = Actor(actorId) for movie in movies: # Here the first element in the entry is movieId and second is the actor rank movieId = movie[0] rank = movie[1] # Create the Movie obj and add to the Actor mv = Movie(movieId, rank) # Get the tags of movie movieTags = di.getMovieTags(movieId) #print(movieId) #print(movieTags) # Calculate the weight of the tags for movieTag in movieTags: tagId = movieTag[0] timeStamp = movieTag[1] mv.addTag(tagId, timeStamp) actor.addMovie(mv) tfVector = {} actor.setUnqTags() unqTags = actor.getUnqTags() for tagId in unqTags: tfFactorTag = 0 #print("tagId "+tagId) for movie in actor.getMovies(): searchTags = movie.getTags() tfFactor = 0 totalMovieWeight = 0 movRankWeight = movie.getRWeight() #print(movRankWeight) for tag in searchTags: #print(tag.getId()) if (tag.getId() == tagId): tfFactor = tfFactor + tag.getTimeWeight() totalMovieWeight = totalMovieWeight + 1 #print(tfFactor) #print(totalMovieWeight) if ( totalMovieWeight != 0 ): # Check this condition because their are movies with no tags tfFactorTag = tfFactorTag + (movRankWeight * tfFactor) / totalMovieWeight tfVector[tagId] = tfFactorTag tfVector = utils.sortByValue(tfVector) return utils.normalizeVector(tfVector)
def rankedList(arr, list, seeds, n): d = {} #print("before srt =",arr,list) for i in range(len(arr)): d[list[i][0]] = arr[i] sortedList = utils.sortByValue(d) #print(sortedList) retList = [] k = 0 for i in range(len(arr)): if (sortedList[i][0] not in seeds): k += 1 retList.append(sortedList[i][0]) if (k >= n): break #print("s = ",retList) return retList
def tfGenreTag(genre): genObj = createGenObj(genre) unqTags = genObj.getUnqTags() tfVector = {} #print(unqTags) for tagId in unqTags: tfFactorTag = 0 for movie in genObj.getMovies(): searchTags = movie.getTags() tfFactor = 0 totalMovieWeight = 0 for tag in searchTags: if (tag.getId() == tagId): tfFactor = tfFactor + tag.getTimeWeight() #print(tfFactor) totalMovieWeight = totalMovieWeight + 1 if (totalMovieWeight != 0): tfFactorTag = tfFactorTag + tfFactor / totalMovieWeight tfVector[tagId] = tfFactorTag tfVector = utils.sortByValue(tfVector) return utils.normalizeVector(tfVector)
u = [0 for i in range(0,movieVectors)] result = [[0 for i in range(movieVectors)] for j in range(0,movies)] #pre-calculating ri values for movie in rel: for movieVector in range(0,movieVectors): if(semMatrix[movie][movieVector] != 0): r[movieVector] += 1 #pre-calculating ni values for movie in range(0,movies): for movieVector in range(0,movieVectors): if(semMatrix[movie][movieVector] != 0): n[movieVector] += 1 print(n) #pre-calculating pi and ui values for the formula for movieVector in range(0,movieVectors): p[movieVector] = (r[movieVector] + n[movieVector]/N)/(R + 1) u[movieVector] = (n[movieVector] - r[movieVector] +n[movieVector]/N)/(N - R + 1) print(p) print(u) #computing the values of sim(mi) for each movie mi for movieVector in range(0,movieVectors): q = math.log((p[movieVector]*(1-u[movieVector]))/(u[movieVector]*(1-p[movieVector]))) for movie in range(0,movies): result[movie][movieVector] = semMatrix[movie][movieVector] * q print("result = ",result) for movie in range(0,movies): ranks[movie] = sum(result[movie]) return ranks revisedRanks = getRevisedRanks(semMatrix, rel, irrel) revisedRanks = utils.sortByValue(revisedRanks) print(revisedRanks)
def getRevisedRanks(semMatrix, rel, irr, objList, usrMovies, queryVect): N = len(objList) # Number of movies numSemantics = len(semMatrix[0]) R = len(rel) Q = len(irr) ranks = {} r = np.zeros(numSemantics) q = np.zeros(numSemantics) n = np.zeros(numSemantics) p = np.zeros(numSemantics) u = np.zeros(numSemantics) #pre-calculating ri values for i in rel: #print("rel = ",i) #print("relevent =",semMatrix[i]) for sem in range(numSemantics): if (semMatrix[i][sem] != 0): r[sem] += 1 #pre-calculating qi values for i in irr: #print("irrelevant =", semMatrix[i]) for sem in range(numSemantics): if (semMatrix[i][sem] != 0): q[sem] += 1 #pre-calculating ni values for i in range(N): for sem in range(numSemantics): if (semMatrix[i][sem] != 0): n[sem] += 1 #pre-calculating pi and ui values for the formula #print("r =",r,R,"\nn =",n,N,"\nq =",q,Q) for i in range(len(r)): p[i] = (r[i] + n[i] / N) / (R + 1) u[i] = (q[i] + n[i] / N) / (Q + 1) pSum = np.sum(p) uSum = np.sum(u) if (pSum == 0): pSum = 1 if (uSum == 0): uSum = 1 p = p / pSum u = u / uSum print("\np =", p) print("\nu =", u) for sem in range(numSemantics): nr = p[sem] * (1 - u[sem]) dr = u[sem] * (1 - p[sem]) #print(nr,dr) if (nr != 0 and dr != 0 and nr != dr): sim = math.log(nr / dr) else: sim = 1 queryVect[sem] = sim * queryVect[sem] print("\nNew Query Vector:") utils.printVect(queryVect) for i in range(N): if (objList[i][0] not in usrMovies): ranks[objList[i][0]] = np.dot(queryVect, semMatrix[i]) revisedRanks = utils.sortByValue(ranks) ranks = [] for r in revisedRanks: ranks.append(r[0]) #print(revisedRanks) return ranks