Ejemplo n.º 1
0
def tfUserTag(userId):
    usrObj = User(userId)
    movies = di.getUserMovies(userId)
    for movieId in movies:
        movieId = movieId[0]
        mv = Movie(
            movieId,
            0)  # Here the actor movie rank is not reqd., setting this to 0
        movieTags = di.getMovieTags(movieId)
        for movieTag in movieTags:
            tagId = movieTag[0]
            timeStamp = movieTag[1]
            mv.addTag(tagId, timeStamp)
        usrObj.addMovie(mv)
    tfVector = {}
    usrObj.setUnqTags()
    unqTags = usrObj.getUnqTags()
    #print(unqTags)
    for tagId in unqTags:
        tfFactorTag = 0
        for movie in usrObj.getMovies():
            searchTags = movie.getTags()
            tfFactor = 0
            totalMovieWeight = 0
            for tag in searchTags:
                if (tag.getId() == tagId):
                    tfFactor = tfFactor + tag.getTimeWeight()
                    #print(tfFactor)
                totalMovieWeight = totalMovieWeight + 1
            if (totalMovieWeight != 0):
                tfFactorTag = tfFactorTag + tfFactor / totalMovieWeight
        tfVector[tagId] = tfFactorTag
    tfVector = utils.sortByValue(tfVector)
    return utils.normalizeVector(tfVector)
Ejemplo n.º 2
0
def movieTagSpace(movieId):
    tagIds = di.getMovieTags(movieId)
    tagLen = len(tagIds)
    actorlist = di.getAllActors()
    actorNames = di.getAllActorNames()
    idfActVector = idf.idfActorTag()
    mov = di.getMovieActorIds(movieId)
    movieActors = [0 for i in range(len(mov))]
    for i in range(len(mov)):
        movieActors[i] = mov[i][0]
    mat = [[0 for i in range(tagLen)] for j in range(len(movieActors))]
    newMat = [[0 for i in range(tagLen)] for j in range(len(actorlist))]
    for i in range(len(movieActors)):
        taglist = idf.tfIdfActorTag(movieActors[i], idfActVector)
        for j in range(tagLen):
            if (tagIds[j][0] in taglist.keys()):
                mat[i][j] = taglist[tagIds[j][0]]
    for i in range(0, len(actorlist)):
        if (actorlist[i][0] not in movieActors):
            taglist = idf.tfIdfActorTag(actorlist[i][0], idfActVector)
            for j in range(tagLen):
                if (tagIds[j][0] in taglist.keys()):
                    newMat[i][j] = taglist[tagIds[j][0]]
    actVect = [0 for i in range(tagLen)]
    for j in range(len(movieActors)):
        for i in range(tagLen):
            actVect[i] = actVect[i] + mat[j][i]
    dist = {}
    for i in range(len(newMat)):
        if (actorlist[i][0] not in movieActors):
            dist[actorNames[i][0]] = distance.euclidean(newMat[i], actVect)
    return utils.sortByValue(dist)[-10:]
Ejemplo n.º 3
0
def simActors(actId):
	for i in range(len(actorTags)):
		if(actId == actorList[i][0]):
			givenActor = actorTags[i]
	d = {}
	for i in range(len(actorList)):
		if(actId != actorList[i][0]):
			d[actorNames[i][0]] = distance.euclidean(givenActor,actorTags[i])
	return utils.sortByValue(d)[-10:]
Ejemplo n.º 4
0
def tfIdfActorTag(actorId, idfActVector):
    tfVector = tfCalc.tfActorTag(actorId)
    tfIdfVector = {}
    tags = tfVector.keys()
    for tag in tags:
        tfIdfVector[tag] = tfVector[tag] * idfActVector[tag]
    #print('actor',tfIdfVector)
    tfIdfVector = utils.sortByValue(tfIdfVector)
    return utils.normalizeVector(tfIdfVector)
Ejemplo n.º 5
0
def tfIdfGenreTag(genre, idfGenVector):
    tfVector = tfCalc.tfGenreTag(genre)
    tfIdfVector = {}
    tags = tfVector.keys()
    for tag in tags:
        tfIdfVector[tag] = tfVector[tag] * idfGenVector[tag]
    #print('genre',tfIdfVector)
    tfIdfVector = utils.sortByValue(tfIdfVector)
    return utils.normalizeVector(tfIdfVector)
def getSimilarity(mat, query, usrMovies):
	res = {}
	for i in range(movieLen):
		if(movies[i][0] not in usrMovies):
			res[movies[i][0]] = np.dot(query, mat[i])
	ranks = utils.sortByValue(res)
	rankedRes = []
	for i in ranks:
		rankedRes.append(i[0])
	return rankedRes
Ejemplo n.º 7
0
def tfIdfUserTag(userId, idfUserVector):
    tfVector = tfCalc.tfUserTag(userId)
    tfIdfVector = {}
    tags = tfVector.keys()
    for tag in tags:
        tfIdfVector[tag] = tfVector[tag] * idfUserVector[tag]
    #print('user',tfIdfVector)
    tfIdfVector = utils.sortByValue(tfIdfVector)
    return utils.normalizeVector(tfIdfVector)


#idfVect = idfMovieTag()
#tfIdfVect = tfIdfMovieTag('3189', idfVect)
#print((tfIdfVect))
Ejemplo n.º 8
0
def simActors2(actId):
	numSemantics = 5
	u,s,v = np.linalg.svd(actorTags,0)
	x=np.zeros((len(u),numSemantics))
	givenActor = np.zeros(numSemantics)
	for i in range(len(u)):
		for j in range(numSemantics):
			if(actId == actorList[i][0]):
				givenActor[j] = u[i][j]
			x[i][j] = u[i][j]
	d = {}
	for i in range(len(actorList)):
		if(actId != actorList[i][0]):
			d[actorNames[i][0]] = distance.euclidean(givenActor,x[i])
	return utils.sortByValue(d)[-10:]
Ejemplo n.º 9
0
def tfActorTag(actorId):
    movies = di.getActorMovies(actorId)
    #print(movies)
    actor = Actor(actorId)
    for movie in movies:
        # Here the first element in the entry is movieId and second is the actor rank
        movieId = movie[0]
        rank = movie[1]
        # Create the Movie obj and add to the Actor
        mv = Movie(movieId, rank)
        # Get the tags of movie
        movieTags = di.getMovieTags(movieId)
        #print(movieId)
        #print(movieTags)
        # Calculate the weight of the tags
        for movieTag in movieTags:
            tagId = movieTag[0]
            timeStamp = movieTag[1]
            mv.addTag(tagId, timeStamp)
        actor.addMovie(mv)
    tfVector = {}
    actor.setUnqTags()
    unqTags = actor.getUnqTags()
    for tagId in unqTags:
        tfFactorTag = 0
        #print("tagId "+tagId)
        for movie in actor.getMovies():
            searchTags = movie.getTags()
            tfFactor = 0
            totalMovieWeight = 0
            movRankWeight = movie.getRWeight()
            #print(movRankWeight)
            for tag in searchTags:
                #print(tag.getId())
                if (tag.getId() == tagId):
                    tfFactor = tfFactor + tag.getTimeWeight()
                totalMovieWeight = totalMovieWeight + 1
            #print(tfFactor)
            #print(totalMovieWeight)
            if (
                    totalMovieWeight != 0
            ):  # Check this condition because their are movies with no tags
                tfFactorTag = tfFactorTag + (movRankWeight *
                                             tfFactor) / totalMovieWeight
        tfVector[tagId] = tfFactorTag
    tfVector = utils.sortByValue(tfVector)
    return utils.normalizeVector(tfVector)
def rankedList(arr, list, seeds, n):
    d = {}
    #print("before srt =",arr,list)
    for i in range(len(arr)):
        d[list[i][0]] = arr[i]
    sortedList = utils.sortByValue(d)
    #print(sortedList)
    retList = []
    k = 0
    for i in range(len(arr)):
        if (sortedList[i][0] not in seeds):
            k += 1
            retList.append(sortedList[i][0])
        if (k >= n):
            break
    #print("s = ",retList)
    return retList
Ejemplo n.º 11
0
def tfGenreTag(genre):
    genObj = createGenObj(genre)
    unqTags = genObj.getUnqTags()
    tfVector = {}
    #print(unqTags)
    for tagId in unqTags:
        tfFactorTag = 0
        for movie in genObj.getMovies():
            searchTags = movie.getTags()
            tfFactor = 0
            totalMovieWeight = 0
            for tag in searchTags:
                if (tag.getId() == tagId):
                    tfFactor = tfFactor + tag.getTimeWeight()
                    #print(tfFactor)
                totalMovieWeight = totalMovieWeight + 1
            if (totalMovieWeight != 0):
                tfFactorTag = tfFactorTag + tfFactor / totalMovieWeight
        tfVector[tagId] = tfFactorTag
    tfVector = utils.sortByValue(tfVector)
    return utils.normalizeVector(tfVector)
Ejemplo n.º 12
0
	u = [0 for i in range(0,movieVectors)]
	result = [[0 for i in range(movieVectors)] for j in range(0,movies)]
	#pre-calculating ri values	
	for movie in rel:
			for movieVector in range(0,movieVectors):
				if(semMatrix[movie][movieVector] != 0):
					r[movieVector] += 1
	#pre-calculating ni values		
	for movie in range(0,movies):
		for movieVector in range(0,movieVectors):
			if(semMatrix[movie][movieVector] != 0):
				n[movieVector] += 1
	print(n)
	#pre-calculating pi and ui values for the formula
	for movieVector in range(0,movieVectors):
		p[movieVector] = (r[movieVector] + n[movieVector]/N)/(R + 1)
		u[movieVector] = (n[movieVector] - r[movieVector] +n[movieVector]/N)/(N - R + 1)
	print(p)
	print(u)
	#computing the values of sim(mi) for each movie mi
	for movieVector in range(0,movieVectors):
		q = math.log((p[movieVector]*(1-u[movieVector]))/(u[movieVector]*(1-p[movieVector])))
		for movie in range(0,movies):
			result[movie][movieVector] = semMatrix[movie][movieVector] * q
	print("result = ",result)
	for movie in range(0,movies):
		ranks[movie] = sum(result[movie])
	return ranks
revisedRanks = getRevisedRanks(semMatrix, rel, irrel)
revisedRanks = utils.sortByValue(revisedRanks)
print(revisedRanks)
Ejemplo n.º 13
0
def getRevisedRanks(semMatrix, rel, irr, objList, usrMovies, queryVect):
    N = len(objList)  # Number of movies
    numSemantics = len(semMatrix[0])
    R = len(rel)
    Q = len(irr)
    ranks = {}
    r = np.zeros(numSemantics)
    q = np.zeros(numSemantics)
    n = np.zeros(numSemantics)
    p = np.zeros(numSemantics)
    u = np.zeros(numSemantics)
    #pre-calculating ri values
    for i in rel:
        #print("rel = ",i)
        #print("relevent =",semMatrix[i])
        for sem in range(numSemantics):
            if (semMatrix[i][sem] != 0):
                r[sem] += 1
    #pre-calculating qi values
    for i in irr:
        #print("irrelevant =", semMatrix[i])
        for sem in range(numSemantics):
            if (semMatrix[i][sem] != 0):
                q[sem] += 1
    #pre-calculating ni values
    for i in range(N):
        for sem in range(numSemantics):
            if (semMatrix[i][sem] != 0):
                n[sem] += 1
    #pre-calculating pi and ui values for the formula
    #print("r =",r,R,"\nn =",n,N,"\nq =",q,Q)
    for i in range(len(r)):
        p[i] = (r[i] + n[i] / N) / (R + 1)
        u[i] = (q[i] + n[i] / N) / (Q + 1)
    pSum = np.sum(p)
    uSum = np.sum(u)
    if (pSum == 0): pSum = 1
    if (uSum == 0): uSum = 1
    p = p / pSum
    u = u / uSum
    print("\np =", p)
    print("\nu =", u)
    for sem in range(numSemantics):
        nr = p[sem] * (1 - u[sem])
        dr = u[sem] * (1 - p[sem])
        #print(nr,dr)
        if (nr != 0 and dr != 0 and nr != dr):
            sim = math.log(nr / dr)
        else:
            sim = 1
        queryVect[sem] = sim * queryVect[sem]
    print("\nNew Query Vector:")
    utils.printVect(queryVect)
    for i in range(N):
        if (objList[i][0] not in usrMovies):
            ranks[objList[i][0]] = np.dot(queryVect, semMatrix[i])
    revisedRanks = utils.sortByValue(ranks)
    ranks = []
    for r in revisedRanks:
        ranks.append(r[0])
    #print(revisedRanks)
    return ranks