Esempi in Python per getAllTags, esempi in Python per dbInfo.getAllTags

Esempio n. 1

0

Mostra file

def loadDataset(filename, trainingSet=[] , testSet=[]):
	test=[]
	with open(filename, 'r') as csvfile:
		lines = csv.reader(csvfile)
		dataset = list(lines)
	movies = di.getAllMovies()
	tagIds = di.getAllTags()
	allTagLen = len(tagIds)
	dataset_copy = [['' for i in range(allTagLen+1)] for j in range(len(movies))]
	idfMovArr = idf.idfMovieTag()
	for i in range(len(dataset)):
		idfVect = idf.tfIdfMovieTag(dataset[i][0], idfMovArr)
		for j in range(len(idfVect)):
			dataset_copy[i][j] = idfVect[j]
		dataset_copy[i][allTagLen]=dataset[i][1]
		trainingSet.append(dataset_copy[i])
	train = [0 for i in range(len(dataset))]
	for i in range(len(dataset)):
		train[i] = int(dataset[i][0])
	k=0
	labels = ['0', '1']
	testset_copy = [['' for i in range(allTagLen+1)] for j in range(len(movies)-len(train))]
	for i in range(len(movies)):
			if(int(movies[i][0]) in train):
				pass
			else:
				test.append(movies[i][0])
				idfVect1 = idf.tfIdfMovieTag(movies[i][0], idfMovArr)
				for j in range(len(idfVect1)):
					testset_copy[k][j] = idfVect1[j]
				#testset_copy[k][allTagLen]=db.getMovieGenre(movies[i][0])[0]
				testset_copy[k][allTagLen]=random.choice(labels)
				testSet.append(testset_copy[k])
				k=k+1
	return test,trainingSet,testSet

Esempio n. 2

0

Mostra file

File: tensorDecomp.py Progetto: giriraj34/MovieRecommendationSystem

def vectActMovTag():
    actors = di.getAllActors()
    tags = di.getAllTags()
    movies = di.getAllMovies()
    years = di.getAllYears()
    movYearsArray = di.getAllMovieYrs()
    movYears = {}
    for arr in movYearsArray:
        movYears[arr[0]] = arr[1]
    #print("movYears", movYears)
    actMoviesDb = {}
    for act in actors:
        actMovies = di.getActorMovieIds(act[0])
        actMov = []
        for mov in actMovies:
            actMov.append(mov[0])
        actMoviesDb[act[0]] = actMov
    vect = defaultdict(lambda: defaultdict(dict))
    for mov in movies:
        movTags = di.getMovieTagIds(mov[0])[0][0].split(",")
        #print(len(movTags))
        for act in actors:
            actMovies = actMoviesDb[act[0]]
            #print("actMovies:",actMovies)
            for tag in tags:
                #print("tag",tag[0])
                vect[mov[0]][act[0]][tag[0]] = 0
                #print("i am here")
                # Set the value to 1 if the given cond. is satisfied
                if ((mov[0] in actMovies) and
                    (tag[0] in movTags)):  #and (movYears[mov[0]] == yr[0])):
                    vect[act[0]][mov[0]][tag[0]] = movYears[mov[0]]
    #print(vect['1'])
    return (vect, actors, movies, years)

Esempio n. 3

0

Mostra file

File: similarity.py Progetto: giriraj34/MovieRecommendationSystem

def getActorTagMatrix():
    tagIds = di.getAllTags()
    tagLen = len(tagIds)
    actorNames = di.getAllActorNames()
    actorlist = di.getAllActors()
    actorTags = np.zeros((len(actorlist), tagLen))
    i = 0
    idfActVector = idf.idfActorTag()
    for actor in actorlist:
        actVect = idf.tfIdfActorTag(actor[0], idfActVector)
        for j in range(tagLen):
            if (tagIds[j][0] in actVect.keys()):
                actorTags[i][j] = actVect[tagIds[j][0]]
        i += 1
    return actorTags

Esempio n. 4

0

Mostra file

def idfActorTag():
    idfActVect = {}
    allTags = di.getAllTags()
    allActors = di.getAllActors()
    actorCount = len(allActors)
    for tag in allTags:
        tagCount = 0
        idfActVect[tag[0]] = 0
        for actor in allActors:
            tags = di.getActorTags(actor[0])
            if (tag[0] in tags[0]):
                tagCount = tagCount + 1
        if (tagCount != 0):
            idfActVect[tag[0]] = math.log(actorCount / tagCount)
    #print(idfActVect)
    return idfActVect

Esempio n. 5

0

Mostra file

def idfUserTag():
    idfUserVect = {}
    allTags = di.getAllTags()
    allUsers = di.getAllUsers()
    userCount = len(allUsers)
    for tag in allTags:
        tagCount = 0
        idfUserVect[tag[0]] = 0
        for user in allUsers:
            tags = di.getUserTags(user[0])
            if (tag[0] in tags[0]):
                tagCount = tagCount + 1
        if (tagCount != 0):
            idfUserVect[tag[0]] = math.log(userCount / tagCount)
    #print(idfUserVect)
    return idfUserVect

Esempio n. 6

0

Mostra file

def idfGenreTag():
    idfGenVect = {}
    allTags = di.getAllTags()
    allGenres = di.getAllGenres()
    genreCount = len(allGenres)
    for tag in allTags:
        tagCount = 0
        idfGenVect[tag[0]] = 0
        for genre in allGenres:
            tags = di.getGenreTags(genre[0])
            if (tag[0] in tags[0]):
                tagCount = tagCount + 1
        if (tagCount != 0):
            idfGenVect[tag[0]] = math.log(genreCount / tagCount)
    #print(idfGenVect)
    return idfGenVect

Esempio n. 7

0

Mostra file

File: svm-1.py Progetto: giriraj34/MovieRecommendationSystem

def loadDataset(filename, trainingSet=[], testSet=[]):

    with open(filename, 'r') as csvfile:
        lines = csv.reader(csvfile)
        dataset = list(lines)
    labels = ['' for i in range(len(dataset))]
    movies = di.getAllMovies()
    tagIds = di.getAllTags()
    allTagLen = len(tagIds)
    dataset_copy = [['' for i in range(allTagLen)]
                    for j in range(len(dataset))]
    #dataset_copy = numpy.zeros((len(movies),allTagLen+1))
    #dataset_copy = [[0 for i in range(allTagLen+1)] for j in range(len(movies))]
    idfMovArr = idf.idfMovieTag()
    #print(idfMovArr)
    for i in range(len(dataset)):
        idfVect = idf.tfIdfMovieTag(dataset[i][0], idfMovArr)
        for j in range(len(idfVect)):
            dataset_copy[i][j] = idfVect[j]
        #dataset_copy[i][allTagLen]=dataset[i][1]
        labels[i] = dataset[i][1]
        trainingSet.append(dataset_copy[i])
    train = [0 for i in range(len(dataset))]

    target = ['' for i in range(len(movies))]
    for i in range(len(dataset)):
        train[i] = int(dataset[i][0])
    k = 0
    test = []
    label = ['0', '1']
    testset_copy = [['' for i in range(allTagLen)] for j in range(len(movies))]
    for i in range(len(movies)):
        if (int(movies[i][0]) in train):
            pass
        else:
            test.append(movies[i][0])
            idfVect1 = idf.tfIdfMovieTag(movies[i][0], idfMovArr)
            for j in range(len(idfVect1)):
                testset_copy[k][j] = idfVect1[j]
            #testset_copy[k][allTagLen]=di.getMovieGenre(movies[i][0])[0]
            #testset_copy[k][allTagLen]=random.choice(labels)
            target[k] = random.choice(label)
            testSet.append(testset_copy[k])
            k = k + 1
    #print("train data =",trainingSet)
    #print("\n\n test data =",testSet)
    return trainingSet, testSet, labels, target, test

Esempio n. 8

0

Mostra file

File: utils.py Progetto: giriraj34/MovieRecommendationSystem

def getGenreMovieTags(movie):
    tagIds = di.getAllTags()
    tagLen = len(tagIds)
    tfArray = [0 for i in range(tagLen)]
    unqTags = movie.getUnqTags()
    tags = movie.getTags()
    totalTags = len(tags)
    i = 0
    tfVect = {}
    for tagId in unqTags:
        tfFactor = 0
        for tag in tags:
            if (tag.getId() == tagId):
                tfFactor = tfFactor + tag.getTimeWeight()
        tfVect[tagId] = tfFactor / totalTags
    for i in range(tagLen):
        if (tagIds[i][0] in tfVect.keys()):
            tfArray[i] = tfVect[tagIds[i][0]]
    return tfArray

Esempio n. 9

0

Mostra file

def idfMovieTag():
    allTags = di.getAllTags()
    allMovies = di.getAllMovies()
    movieCount = len(allMovies)
    idfMovTagArr = np.zeros(len(allTags))
    movTags = []
    for mov in allMovies:
        movTags.append(di.getMovieTagIds(mov[0])[0][0].split(","))
    for i in range(len(allTags)):
        tagCount = 0
        for j in range(len(allMovies)):
            if (allTags[i][0] in movTags[j]):
                tagCount = tagCount + 1
        res = 0
        if (tagCount != 0):
            res = math.log(movieCount / tagCount)
        idfMovTagArr[i] = res
    #print(idfMovTagArr)
    return idfMovTagArr

Esempio n. 10

0

Mostra file

File: tensorDecomp.py Progetto: giriraj34/MovieRecommendationSystem

def vectTagMovRat():
    tags = di.getAllTags()
    movies = di.getAllMovies()
    ratings = di.getAllRatings()
    avgRatingsArray = di.getAllMovieRtngs()
    avgRatings = {}
    for arr in avgRatingsArray:
        avgRatings[arr[0]] = arr[1]
    #print("avgRatings",avgRatings)
    vect = defaultdict(lambda: defaultdict(dict))
    for mov in movies:
        movTags = di.getMovieTagIds(mov[0])[0][0].split(",")
        for tag in tags:
            for rtng in ratings:
                vect[tag[0]][mov[0]][rtng[0]] = 0
                # Set the value to 1 if the given cond. is satisfied
                if ((tag[0] in movTags) and (rtng[0] <= avgRatings[mov[0]])):
                    vect[tag[0]][mov[0]][rtng[0]] = 1
    #print(vect['1'])
    return (vect, tags, movies, ratings)

Esempio n. 11

0

Mostra file

File: task4.py Progetto: giriraj34/MovieRecommendationSystem

movies = db.getAllMovies()
movieNames = db.getAllMovieNames()
tfmovies = {}
for movieId in movies:
    Taglist = db.getMovieTags(movieId[0])
    UnqTags = db.getMovieTagIds(movieId[0])[0][0].split(",")
    #print(UnqTags,movieId,Taglist)
    tfvect = {}
    for tag in UnqTags:
        tffact = 0
        for t in Taglist:
            if (t[0] == tag):
                tffact += 1
        tfvect[tag[0]] = tffact / len(Taglist)
    tfmovies[movieId[0]] = tfvect
tagids = db.getAllTags()
#print(tagids)
movietf = np.zeros((len(tfmovies), len(tagids)))
for i in range(len(tfmovies)):
    for j in range(len(tagids)):
        if (tagids[j][0] in tfmovies[movies[i][0]].keys()):
            movietf[i][j] = tfmovies[movies[i][0]][tagids[j][0]]
matrix = np.matmul(movietf, np.transpose(movietf))
seedList = db.getUserMoviesRates(userId)
seeds = []
for seed in seedList:
    seeds.append(seed[0])
seedNames = []
for i in range(len(movies)):
    if (movies[i][0] in seeds):
        seedNames.append(movieNames[i][0])

Esempio n. 12

0

Mostra file

    di.delRows("mltags", "movie_id", mov)
    di.delRows("movie_actor", "movie_id", mov)
    di.delRows("movie_info", "movie_id", mov)

allUsers = di.getAllUsers()
delUsers = []
for usr in allUsers:
    if (int(usr[0]) <= 71550):
        delUsers.append(usr[0])
print("delUsers", len(delUsers))
for usr in delUsers:
    di.delRows("mlratings", "user_id", usr)
    di.delRows("mltags", "user_id", usr)
    di.delRows("mlusers", "user_id", usr)
    print("usr ="******"actor = ", act[0])
        di.delRows("imdb_actor_info", "actor_id", act[0])
for tag in allTags:
    if (tag[0] not in mlTg):

Esempio n. 13

0

Mostra file

import dbInfo as db
import numpy as np
import utils
import tfCalc as tf
import warnings
warnings.filterwarnings("ignore")

allTags = db.getAllTags()
lenTags = len(allTags)


#this function will generate a Matrix to be used as input to SVD
def genSVDMatrix(genrelist):
    genObj = tf.createGenObj(genrelist)
    movies = genObj.getMovies()
    matrix = [[0 for x in range(0, lenTags)] for y in range(0, len(movies))]
    i = 0
    for movie in movies:
        matrix[i] = utils.getGenreMovieTags(movie)
        i += 1
    return matrix


def svdCalc(mat, numSem):
    U, s, V = np.linalg.svd(mat, full_matrices=False)
    sem = np.zeros((numSem, len(V[0])))
    for i in range(numSem):
        for j in range(len(V[0])):
            sem[i][j] = V[i][j]
    return sem

Esempio n. 14

0

Mostra file

File: movieRecomm.py Progetto: giriraj34/MovieRecommendationSystem

import dbInfo as di
import utils
import lda
import sys
from operator import itemgetter
import tensorDecomp as td
import persPageRank as ppr
import tfCalc as tf
import tfIdfCalc as idf
import numpy as np
from scipy.stats import mode

movies = di.getAllMovies()
tagIds = di.getAllTags()
allTagLen = len(tagIds)
movieLen = len(movies)

def formSvdMat(numSemantics):
	mat = np.zeros((movieLen,allTagLen))
	if(len(mat)<numSemantics or len(mat[0])<numSemantics):
		print("cant report top semantics")
		sys.exit()
	idfMovArr = idf.idfMovieTag()
	for i in range(movieLen):
		mat[i] = idf.tfIdfMovieTag(movies[i][0], idfMovArr)
	U, s, V = np.linalg.svd(mat,full_matrices=False)
	movieFacts = np.zeros((movieLen, numSemantics))
	for i in range(movieLen):
		for j in range(numSemantics):
			movieFacts[i][j] = U[i][j]
	return movieFacts