def vectActMovTag(): actors = di.getAllActors() tags = di.getAllTags() movies = di.getAllMovies() years = di.getAllYears() movYearsArray = di.getAllMovieYrs() movYears = {} for arr in movYearsArray: movYears[arr[0]] = arr[1] #print("movYears", movYears) actMoviesDb = {} for act in actors: actMovies = di.getActorMovieIds(act[0]) actMov = [] for mov in actMovies: actMov.append(mov[0]) actMoviesDb[act[0]] = actMov vect = defaultdict(lambda: defaultdict(dict)) for mov in movies: movTags = di.getMovieTagIds(mov[0])[0][0].split(",") #print(len(movTags)) for act in actors: actMovies = actMoviesDb[act[0]] #print("actMovies:",actMovies) for tag in tags: #print("tag",tag[0]) vect[mov[0]][act[0]][tag[0]] = 0 #print("i am here") # Set the value to 1 if the given cond. is satisfied if ((mov[0] in actMovies) and (tag[0] in movTags)): #and (movYears[mov[0]] == yr[0])): vect[act[0]][mov[0]][tag[0]] = movYears[mov[0]] #print(vect['1']) return (vect, actors, movies, years)
def loadDataset(filename, trainingSet=[] , testSet=[]): test=[] with open(filename, 'r') as csvfile: lines = csv.reader(csvfile) dataset = list(lines) movies = di.getAllMovies() tagIds = di.getAllTags() allTagLen = len(tagIds) dataset_copy = [['' for i in range(allTagLen+1)] for j in range(len(movies))] idfMovArr = idf.idfMovieTag() for i in range(len(dataset)): idfVect = idf.tfIdfMovieTag(dataset[i][0], idfMovArr) for j in range(len(idfVect)): dataset_copy[i][j] = idfVect[j] dataset_copy[i][allTagLen]=dataset[i][1] trainingSet.append(dataset_copy[i]) train = [0 for i in range(len(dataset))] for i in range(len(dataset)): train[i] = int(dataset[i][0]) k=0 labels = ['0', '1'] testset_copy = [['' for i in range(allTagLen+1)] for j in range(len(movies)-len(train))] for i in range(len(movies)): if(int(movies[i][0]) in train): pass else: test.append(movies[i][0]) idfVect1 = idf.tfIdfMovieTag(movies[i][0], idfMovArr) for j in range(len(idfVect1)): testset_copy[k][j] = idfVect1[j] #testset_copy[k][allTagLen]=db.getMovieGenre(movies[i][0])[0] testset_copy[k][allTagLen]=random.choice(labels) testSet.append(testset_copy[k]) k=k+1 return test,trainingSet,testSet
def loadDataset(filename, trainingSet=[], testSet=[]): with open(filename, 'r') as csvfile: lines = csv.reader(csvfile) dataset = list(lines) labels = ['' for i in range(len(dataset))] movies = di.getAllMovies() tagIds = di.getAllTags() allTagLen = len(tagIds) dataset_copy = [['' for i in range(allTagLen)] for j in range(len(dataset))] #dataset_copy = numpy.zeros((len(movies),allTagLen+1)) #dataset_copy = [[0 for i in range(allTagLen+1)] for j in range(len(movies))] idfMovArr = idf.idfMovieTag() #print(idfMovArr) for i in range(len(dataset)): idfVect = idf.tfIdfMovieTag(dataset[i][0], idfMovArr) for j in range(len(idfVect)): dataset_copy[i][j] = idfVect[j] #dataset_copy[i][allTagLen]=dataset[i][1] labels[i] = dataset[i][1] trainingSet.append(dataset_copy[i]) train = [0 for i in range(len(dataset))] target = ['' for i in range(len(movies))] for i in range(len(dataset)): train[i] = int(dataset[i][0]) k = 0 test = [] label = ['0', '1'] testset_copy = [['' for i in range(allTagLen)] for j in range(len(movies))] for i in range(len(movies)): if (int(movies[i][0]) in train): pass else: test.append(movies[i][0]) idfVect1 = idf.tfIdfMovieTag(movies[i][0], idfMovArr) for j in range(len(idfVect1)): testset_copy[k][j] = idfVect1[j] #testset_copy[k][allTagLen]=di.getMovieGenre(movies[i][0])[0] #testset_copy[k][allTagLen]=random.choice(labels) target[k] = random.choice(label) testSet.append(testset_copy[k]) k = k + 1 #print("train data =",trainingSet) #print("\n\n test data =",testSet) return trainingSet, testSet, labels, target, test
def idfMovieTag(): allTags = di.getAllTags() allMovies = di.getAllMovies() movieCount = len(allMovies) idfMovTagArr = np.zeros(len(allTags)) movTags = [] for mov in allMovies: movTags.append(di.getMovieTagIds(mov[0])[0][0].split(",")) for i in range(len(allTags)): tagCount = 0 for j in range(len(allMovies)): if (allTags[i][0] in movTags[j]): tagCount = tagCount + 1 res = 0 if (tagCount != 0): res = math.log(movieCount / tagCount) idfMovTagArr[i] = res #print(idfMovTagArr) return idfMovTagArr
def vectTagMovRat(): tags = di.getAllTags() movies = di.getAllMovies() ratings = di.getAllRatings() avgRatingsArray = di.getAllMovieRtngs() avgRatings = {} for arr in avgRatingsArray: avgRatings[arr[0]] = arr[1] #print("avgRatings",avgRatings) vect = defaultdict(lambda: defaultdict(dict)) for mov in movies: movTags = di.getMovieTagIds(mov[0])[0][0].split(",") for tag in tags: for rtng in ratings: vect[tag[0]][mov[0]][rtng[0]] = 0 # Set the value to 1 if the given cond. is satisfied if ((tag[0] in movTags) and (rtng[0] <= avgRatings[mov[0]])): vect[tag[0]][mov[0]][rtng[0]] = 1 #print(vect['1']) return (vect, tags, movies, ratings)
def vectActMovYr(): actors = di.getAllActors() movies = di.getAllMovies() years = di.getAllYears() movYearsArray = di.getAllMovieYrs() movYears = {} for arr in movYearsArray: movYears[arr[0]] = arr[1] #print("movYears", movYears) actMoviesDb = {} moviesArr = [] for mov in movies: moviesArr.append(mov[0]) for act in actors: actMovies = di.getActorMovieIds(act[0]) actMov = [] chk = 0 for mov in actMovies: actMov.append(mov[0]) if (mov[0] in moviesArr): chk = 1 if (chk == 1): actMoviesDb[act[0]] = actMov print("\ngot the actor movies\n") vect = defaultdict(lambda: defaultdict(dict)) for act in actMoviesDb: #print("movie set", act) for mov in moviesArr: #print("mov",mov) for yr in years: #print("yr",yr[0]) vect[act][mov][yr[0]] = 0 # Set the value to 1 if the given cond. is satisfied if ((mov in actMoviesDb[act]) and (movYears[mov] == yr[0])): vect[act][mov][yr[0]] = 1 #print(vect['1']) return (vect, actors, movies, years)
import dbInfo as di import probFeedback as pf import movieRecomm as mr import numpy as np userId = input("\nGive User Id: ") numMovies = 5 movies = di.getAllMovies() movieNames = di.getAllMovieNames() moviesArr = [] for mov in movies: moviesArr.append(mov[0]) mat = mr.formPPRMatrix() matrix = np.matmul(mat,np.transpose(mat)) usrSeenMovies = di.getusrMovTime(userId) if(len(usrSeenMovies) <= 0): print("user has not watched any movies to give suggestions") sys.exit() usrMovies = [] for mov in usrSeenMovies: usrMovies.append(mov[0]) print("\nseeds =", usrMovies) rankedRes = mr.pprRes(matrix, usrMovies) #print("ranked res",rankedRes) print("\nRank\tMovie Id\tMovie Name\n") for i in range(numMovies): movIdx = moviesArr.index(rankedRes[i]) print(i+1,":\t",rankedRes[i],"\t\t",movieNames[movIdx][0])
import dbInfo as db import numpy as np import persPageRank as ppr userId = input("\nGive User Id: ") movies = db.getAllMovies() movieNames = db.getAllMovieNames() tfmovies = {} for movieId in movies: Taglist = db.getMovieTags(movieId[0]) UnqTags = db.getMovieTagIds(movieId[0])[0][0].split(",") #print(UnqTags,movieId,Taglist) tfvect = {} for tag in UnqTags: tffact = 0 for t in Taglist: if (t[0] == tag): tffact += 1 tfvect[tag[0]] = tffact / len(Taglist) tfmovies[movieId[0]] = tfvect tagids = db.getAllTags() #print(tagids) movietf = np.zeros((len(tfmovies), len(tagids))) for i in range(len(tfmovies)): for j in range(len(tagids)): if (tagids[j][0] in tfmovies[movies[i][0]].keys()): movietf[i][j] = tfmovies[movies[i][0]][tagids[j][0]] matrix = np.matmul(movietf, np.transpose(movietf)) seedList = db.getUserMoviesRates(userId) seeds = []