def movieTagSpace(movieId):
    tagIds = di.getMovieTags(movieId)
    tagLen = len(tagIds)
    actorlist = di.getAllActors()
    actorNames = di.getAllActorNames()
    idfActVector = idf.idfActorTag()
    mov = di.getMovieActorIds(movieId)
    movieActors = [0 for i in range(len(mov))]
    for i in range(len(mov)):
        movieActors[i] = mov[i][0]
    mat = [[0 for i in range(tagLen)] for j in range(len(movieActors))]
    newMat = [[0 for i in range(tagLen)] for j in range(len(actorlist))]
    for i in range(len(movieActors)):
        taglist = idf.tfIdfActorTag(movieActors[i], idfActVector)
        for j in range(tagLen):
            if (tagIds[j][0] in taglist.keys()):
                mat[i][j] = taglist[tagIds[j][0]]
    for i in range(0, len(actorlist)):
        if (actorlist[i][0] not in movieActors):
            taglist = idf.tfIdfActorTag(actorlist[i][0], idfActVector)
            for j in range(tagLen):
                if (tagIds[j][0] in taglist.keys()):
                    newMat[i][j] = taglist[tagIds[j][0]]
    actVect = [0 for i in range(tagLen)]
    for j in range(len(movieActors)):
        for i in range(tagLen):
            actVect[i] = actVect[i] + mat[j][i]
    dist = {}
    for i in range(len(newMat)):
        if (actorlist[i][0] not in movieActors):
            dist[actorNames[i][0]] = distance.euclidean(newMat[i], actVect)
    return utils.sortByValue(dist)[-10:]
def getActorTagMatrix():
    tagIds = di.getAllTags()
    tagLen = len(tagIds)
    actorNames = di.getAllActorNames()
    actorlist = di.getAllActors()
    actorTags = np.zeros((len(actorlist), tagLen))
    i = 0
    idfActVector = idf.idfActorTag()
    for actor in actorlist:
        actVect = idf.tfIdfActorTag(actor[0], idfActVector)
        for j in range(tagLen):
            if (tagIds[j][0] in actVect.keys()):
                actorTags[i][j] = actVect[tagIds[j][0]]
        i += 1
    return actorTags
import dbInfo as di
import tfIdfCalc as idf
import numpy as np
import utils
import similarity
from scipy.spatial import distance

actorTags = similarity.getActorTagMatrix()
actorList = di.getAllActors()
actorNames = di.getAllActorNames()

def simActors(actId):
	for i in range(len(actorTags)):
		if(actId == actorList[i][0]):
			givenActor = actorTags[i]
	d = {}
	for i in range(len(actorList)):
		if(actId != actorList[i][0]):
			d[actorNames[i][0]] = distance.euclidean(givenActor,actorTags[i])
	return utils.sortByValue(d)[-10:]

def simActors2(actId):
	numSemantics = 5
	u,s,v = np.linalg.svd(actorTags,0)
	x=np.zeros((len(u),numSemantics))
	givenActor = np.zeros(numSemantics)
	for i in range(len(u)):
		for j in range(numSemantics):
			if(actId == actorList[i][0]):
				givenActor[j] = u[i][j]
			x[i][j] = u[i][j]
Esempio n. 4
0
import dbInfo as db
import numpy as np
import svd
import utils
import similarity
#np.set_printoptions(threshold=np.nan)

actorTags = similarity.getActorTagMatrix()
mat = np.matmul(actorTags, np.transpose(actorTags))
svdSem = svd.svdCalc(mat, 3)
allActors = db.getAllActors()
actorNames = db.getAllActorNames()
print("\n\nActor-Actor similarity matrix:\n\n", mat, "\n\nsize of matrix :",
      mat.shape)
print("\n\nTop 3 Latent Semantics:\n")
for sem in svdSem:
    print("\n\n", utils.rankSem(sem, allActors))
groups = utils.form_groups_semantics(np.transpose(svdSem), actorNames, 3)
print("\n\n3 Non overlapping groups:")
for grp in groups.keys():
    print("\n\n", grp, ":", groups[grp])
#	print(tl.unfold(factors[i],1))
actorSemantics = tl.unfold(factors[0], 1)
movieSemantics = tl.unfold(factors[1], 1)
yearSemantics = tl.unfold(factors[2], 1)

print("\n\nActor Semantics:")
for sem in actorSemantics:
    print("\n\n", utils.rankSem(sem, actors))
print("\n\nMovie Semantics:")
for sem in movieSemantics:
    print("\n\n", utils.rankSem(sem, movies))
print("\n\nYear Semantics:")
for sem in yearSemantics:
    print("\n\n", utils.rankSem(sem, years))

actList = di.getAllActorNames()
movList = di.getAllMovieNames()
years = di.getAllYears()

actGroups = utils.form_groups_semantics(factors[0], actList, numGroups)
movGroups = utils.form_groups_semantics(factors[1], movList, numGroups)
yearGroups = utils.form_groups_semantics(factors[2], years, numGroups)

print("\n\n5 Non overlapping Actor groups:")
for grp in actGroups.keys():
    print("\n\n", grp, ":", actGroups[grp])
print("\n\n5 Non overlapping Movie groups:")
for grp in movGroups.keys():
    print("\n\n", grp, ":", movGroups[grp])
print("\n\n5 Non overlapping Year groups:")
for grp in yearGroups.keys():