Ejemplo n.º 1
0
def tfUserTag(userId):
    usrObj = User(userId)
    movies = di.getUserMovies(userId)
    for movieId in movies:
        movieId = movieId[0]
        mv = Movie(
            movieId,
            0)  # Here the actor movie rank is not reqd., setting this to 0
        movieTags = di.getMovieTags(movieId)
        for movieTag in movieTags:
            tagId = movieTag[0]
            timeStamp = movieTag[1]
            mv.addTag(tagId, timeStamp)
        usrObj.addMovie(mv)
    tfVector = {}
    usrObj.setUnqTags()
    unqTags = usrObj.getUnqTags()
    #print(unqTags)
    for tagId in unqTags:
        tfFactorTag = 0
        for movie in usrObj.getMovies():
            searchTags = movie.getTags()
            tfFactor = 0
            totalMovieWeight = 0
            for tag in searchTags:
                if (tag.getId() == tagId):
                    tfFactor = tfFactor + tag.getTimeWeight()
                    #print(tfFactor)
                totalMovieWeight = totalMovieWeight + 1
            if (totalMovieWeight != 0):
                tfFactorTag = tfFactorTag + tfFactor / totalMovieWeight
        tfVector[tagId] = tfFactorTag
    tfVector = utils.sortByValue(tfVector)
    return utils.normalizeVector(tfVector)
Ejemplo n.º 2
0
def movieTagSpace(movieId):
    tagIds = di.getMovieTags(movieId)
    tagLen = len(tagIds)
    actorlist = di.getAllActors()
    actorNames = di.getAllActorNames()
    idfActVector = idf.idfActorTag()
    mov = di.getMovieActorIds(movieId)
    movieActors = [0 for i in range(len(mov))]
    for i in range(len(mov)):
        movieActors[i] = mov[i][0]
    mat = [[0 for i in range(tagLen)] for j in range(len(movieActors))]
    newMat = [[0 for i in range(tagLen)] for j in range(len(actorlist))]
    for i in range(len(movieActors)):
        taglist = idf.tfIdfActorTag(movieActors[i], idfActVector)
        for j in range(tagLen):
            if (tagIds[j][0] in taglist.keys()):
                mat[i][j] = taglist[tagIds[j][0]]
    for i in range(0, len(actorlist)):
        if (actorlist[i][0] not in movieActors):
            taglist = idf.tfIdfActorTag(actorlist[i][0], idfActVector)
            for j in range(tagLen):
                if (tagIds[j][0] in taglist.keys()):
                    newMat[i][j] = taglist[tagIds[j][0]]
    actVect = [0 for i in range(tagLen)]
    for j in range(len(movieActors)):
        for i in range(tagLen):
            actVect[i] = actVect[i] + mat[j][i]
    dist = {}
    for i in range(len(newMat)):
        if (actorlist[i][0] not in movieActors):
            dist[actorNames[i][0]] = distance.euclidean(newMat[i], actVect)
    return utils.sortByValue(dist)[-10:]
Ejemplo n.º 3
0
def ldaInputTags(genre):
    movies = di.getGenreMovies(genre)
    movieTags = []
    for movie in movies:
        arr = []
        tags = di.getMovieTags(movie[0])
        for tag in tags:
            arr.append(tag[0])
        if (len(tags) != 0):
            movieTags.append(arr)
    return movieTags
Ejemplo n.º 4
0
def tfMovTag(movieId):
    mv = Movie(movieId,
               0)  # Here the actor movie rank is not reqd., setting this to 0
    movieTags = di.getMovieTags(movieId)
    #print("tags are")
    #print(movieTags)
    for movieTag in movieTags:
        tagId = movieTag[0]
        timeStamp = movieTag[1]
        mv.addTag(tagId, timeStamp)
    tfArray = utils.getGenreMovieTags(mv)
    return tfArray


#print(tfMovTag('3854'))
Ejemplo n.º 5
0
def tfActorTag(actorId):
    movies = di.getActorMovies(actorId)
    #print(movies)
    actor = Actor(actorId)
    for movie in movies:
        # Here the first element in the entry is movieId and second is the actor rank
        movieId = movie[0]
        rank = movie[1]
        # Create the Movie obj and add to the Actor
        mv = Movie(movieId, rank)
        # Get the tags of movie
        movieTags = di.getMovieTags(movieId)
        #print(movieId)
        #print(movieTags)
        # Calculate the weight of the tags
        for movieTag in movieTags:
            tagId = movieTag[0]
            timeStamp = movieTag[1]
            mv.addTag(tagId, timeStamp)
        actor.addMovie(mv)
    tfVector = {}
    actor.setUnqTags()
    unqTags = actor.getUnqTags()
    for tagId in unqTags:
        tfFactorTag = 0
        #print("tagId "+tagId)
        for movie in actor.getMovies():
            searchTags = movie.getTags()
            tfFactor = 0
            totalMovieWeight = 0
            movRankWeight = movie.getRWeight()
            #print(movRankWeight)
            for tag in searchTags:
                #print(tag.getId())
                if (tag.getId() == tagId):
                    tfFactor = tfFactor + tag.getTimeWeight()
                totalMovieWeight = totalMovieWeight + 1
            #print(tfFactor)
            #print(totalMovieWeight)
            if (
                    totalMovieWeight != 0
            ):  # Check this condition because their are movies with no tags
                tfFactorTag = tfFactorTag + (movRankWeight *
                                             tfFactor) / totalMovieWeight
        tfVector[tagId] = tfFactorTag
    tfVector = utils.sortByValue(tfVector)
    return utils.normalizeVector(tfVector)
def formLdaMat1(numSemantics):
	movies1 = tuple(movies)
	tags1 = tuple(tagIds)

	movTag = []
	for movie in movies1:
		arr = []
		for i in range(allTagLen):
			arr.append(0)
		tgs = tuple(di.getMovieTags(movie[0]))
		for tag in tags1:
			for tg in tgs:
				if tag[0] == tg[0]:
					arr[tags1.index(tag)] += 1
		movTag.append(arr)
	movTag = np.array(movTag)
	#print(movTag)
	model = lda.LDA(n_topics = numSemantics, n_iter = 100, random_state = 1)
	model.fit(movTag)
	return model.doc_topic_
Ejemplo n.º 7
0
def createGenObj(genre):
    genObj = Genre(genre)
    movies = di.getGenreMovies(genre)
    for movieId in movies:
        movieId = movieId[0]
        mv = Movie(
            movieId,
            0)  # Here the actor movie rank is not reqd., setting this to 0
        movieTags = di.getMovieTags(movieId)
        #print("tags are")
        #print(movieTags)
        for movieTag in movieTags:
            tagId = movieTag[0]
            timeStamp = movieTag[1]
            mv.addTag(tagId, timeStamp)
        genObj.addMovie(mv)
        #print("mv unq tags are")
        #print(mv.getUnqTags())
    genObj.setUnqTags()
    return genObj
Ejemplo n.º 8
0
import dbInfo as db
import numpy as np
import persPageRank as ppr

userId = input("\nGive User Id: ")

movies = db.getAllMovies()
movieNames = db.getAllMovieNames()
tfmovies = {}
for movieId in movies:
    Taglist = db.getMovieTags(movieId[0])
    UnqTags = db.getMovieTagIds(movieId[0])[0][0].split(",")
    #print(UnqTags,movieId,Taglist)
    tfvect = {}
    for tag in UnqTags:
        tffact = 0
        for t in Taglist:
            if (t[0] == tag):
                tffact += 1
        tfvect[tag[0]] = tffact / len(Taglist)
    tfmovies[movieId[0]] = tfvect
tagids = db.getAllTags()
#print(tagids)
movietf = np.zeros((len(tfmovies), len(tagids)))
for i in range(len(tfmovies)):
    for j in range(len(tagids)):
        if (tagids[j][0] in tfmovies[movies[i][0]].keys()):
            movietf[i][j] = tfmovies[movies[i][0]][tagids[j][0]]
matrix = np.matmul(movietf, np.transpose(movietf))
seedList = db.getUserMoviesRates(userId)
seeds = []
import dbInfo as di
import numpy as np
import lda
np.set_printoptions(threshold=np.nan)

movies = di.getAllMovies()
movies1 = tuple(movies)
tags = di.getAllTags()
tags1 = tuple(tags)
n1 = len(movies1)
n2 = len(tags)

movTag = []
for movie in movies1:
    arr = []
    for i in range(0,n2):
        arr.append(0)
    tgs = tuple(di.getMovieTags(movie[0]))
    for tag in tags1:
        for tg in tgs:
            if tag[0] == tg[0]:
                arr[tags1.index(tag)] += 1
                print("i = ",tags1.index(tag))
    movTag.append(arr)
movTag = np.array(movTag)
print(movTag)
#model = lda.LDA(n_topics = 500, n_iter = 100, random_state = 1)
#model.fit(movTag)
#doc2topic = model.doc_topic_

Ejemplo n.º 10
0
with open('../testdata/movie-actor.csv', encoding='utf8') as movie_actor:
	entries = csv.DictReader(movie_actor)
	for entry in entries:
		#print(entry)
		myDbCurr.execute("insert into movie_actor values(?,?,?)",[entry['movieid'],entry['actorid'],entry['actor_movie_rank']])

myDbConn.commit()
print("6")
# Add tags to actors
actors = di.getAllActors()
for actor in actors:
	movies = di.getActorMovies(actor[0])
	tags = ''
	for movie in movies:
		movieTags = di.getMovieTags(movie[0])
		for movieTag in movieTags:
			if(tags == ''):
				tags = movieTag[0]
			elif(movieTag[0] not in tags):
				tags = tags + ',' + movieTag[0]
	myDbCurr.execute("update imdb_actor_info set tags = ? where actor_id = ?",(tags, actor[0]))
myDbConn.commit()
print("5")
# Add tags to Genres
myDbCurr.execute("select distinct genre from mlmovies")
genres = myDbCurr.fetchall()
for genre in genres:
	movies = di.getGenreMovies(genre[0])
	tags = ''
	for movie in movies: