def movieTagSpace(movieId): tagIds = di.getMovieTags(movieId) tagLen = len(tagIds) actorlist = di.getAllActors() actorNames = di.getAllActorNames() idfActVector = idf.idfActorTag() mov = di.getMovieActorIds(movieId) movieActors = [0 for i in range(len(mov))] for i in range(len(mov)): movieActors[i] = mov[i][0] mat = [[0 for i in range(tagLen)] for j in range(len(movieActors))] newMat = [[0 for i in range(tagLen)] for j in range(len(actorlist))] for i in range(len(movieActors)): taglist = idf.tfIdfActorTag(movieActors[i], idfActVector) for j in range(tagLen): if (tagIds[j][0] in taglist.keys()): mat[i][j] = taglist[tagIds[j][0]] for i in range(0, len(actorlist)): if (actorlist[i][0] not in movieActors): taglist = idf.tfIdfActorTag(actorlist[i][0], idfActVector) for j in range(tagLen): if (tagIds[j][0] in taglist.keys()): newMat[i][j] = taglist[tagIds[j][0]] actVect = [0 for i in range(tagLen)] for j in range(len(movieActors)): for i in range(tagLen): actVect[i] = actVect[i] + mat[j][i] dist = {} for i in range(len(newMat)): if (actorlist[i][0] not in movieActors): dist[actorNames[i][0]] = distance.euclidean(newMat[i], actVect) return utils.sortByValue(dist)[-10:]
def getActorTagMatrix(): tagIds = di.getAllTags() tagLen = len(tagIds) actorNames = di.getAllActorNames() actorlist = di.getAllActors() actorTags = np.zeros((len(actorlist), tagLen)) i = 0 idfActVector = idf.idfActorTag() for actor in actorlist: actVect = idf.tfIdfActorTag(actor[0], idfActVector) for j in range(tagLen): if (tagIds[j][0] in actVect.keys()): actorTags[i][j] = actVect[tagIds[j][0]] i += 1 return actorTags
import dbInfo as di import tfIdfCalc as idf import numpy as np import utils import similarity from scipy.spatial import distance actorTags = similarity.getActorTagMatrix() actorList = di.getAllActors() actorNames = di.getAllActorNames() def simActors(actId): for i in range(len(actorTags)): if(actId == actorList[i][0]): givenActor = actorTags[i] d = {} for i in range(len(actorList)): if(actId != actorList[i][0]): d[actorNames[i][0]] = distance.euclidean(givenActor,actorTags[i]) return utils.sortByValue(d)[-10:] def simActors2(actId): numSemantics = 5 u,s,v = np.linalg.svd(actorTags,0) x=np.zeros((len(u),numSemantics)) givenActor = np.zeros(numSemantics) for i in range(len(u)): for j in range(numSemantics): if(actId == actorList[i][0]): givenActor[j] = u[i][j] x[i][j] = u[i][j]
import dbInfo as db import numpy as np import svd import utils import similarity #np.set_printoptions(threshold=np.nan) actorTags = similarity.getActorTagMatrix() mat = np.matmul(actorTags, np.transpose(actorTags)) svdSem = svd.svdCalc(mat, 3) allActors = db.getAllActors() actorNames = db.getAllActorNames() print("\n\nActor-Actor similarity matrix:\n\n", mat, "\n\nsize of matrix :", mat.shape) print("\n\nTop 3 Latent Semantics:\n") for sem in svdSem: print("\n\n", utils.rankSem(sem, allActors)) groups = utils.form_groups_semantics(np.transpose(svdSem), actorNames, 3) print("\n\n3 Non overlapping groups:") for grp in groups.keys(): print("\n\n", grp, ":", groups[grp])
# print(tl.unfold(factors[i],1)) actorSemantics = tl.unfold(factors[0], 1) movieSemantics = tl.unfold(factors[1], 1) yearSemantics = tl.unfold(factors[2], 1) print("\n\nActor Semantics:") for sem in actorSemantics: print("\n\n", utils.rankSem(sem, actors)) print("\n\nMovie Semantics:") for sem in movieSemantics: print("\n\n", utils.rankSem(sem, movies)) print("\n\nYear Semantics:") for sem in yearSemantics: print("\n\n", utils.rankSem(sem, years)) actList = di.getAllActorNames() movList = di.getAllMovieNames() years = di.getAllYears() actGroups = utils.form_groups_semantics(factors[0], actList, numGroups) movGroups = utils.form_groups_semantics(factors[1], movList, numGroups) yearGroups = utils.form_groups_semantics(factors[2], years, numGroups) print("\n\n5 Non overlapping Actor groups:") for grp in actGroups.keys(): print("\n\n", grp, ":", actGroups[grp]) print("\n\n5 Non overlapping Movie groups:") for grp in movGroups.keys(): print("\n\n", grp, ":", movGroups[grp]) print("\n\n5 Non overlapping Year groups:") for grp in yearGroups.keys():