def loadDataset(filename, trainingSet=[] , testSet=[]): test=[] with open(filename, 'r') as csvfile: lines = csv.reader(csvfile) dataset = list(lines) movies = di.getAllMovies() tagIds = di.getAllTags() allTagLen = len(tagIds) dataset_copy = [['' for i in range(allTagLen+1)] for j in range(len(movies))] idfMovArr = idf.idfMovieTag() for i in range(len(dataset)): idfVect = idf.tfIdfMovieTag(dataset[i][0], idfMovArr) for j in range(len(idfVect)): dataset_copy[i][j] = idfVect[j] dataset_copy[i][allTagLen]=dataset[i][1] trainingSet.append(dataset_copy[i]) train = [0 for i in range(len(dataset))] for i in range(len(dataset)): train[i] = int(dataset[i][0]) k=0 labels = ['0', '1'] testset_copy = [['' for i in range(allTagLen+1)] for j in range(len(movies)-len(train))] for i in range(len(movies)): if(int(movies[i][0]) in train): pass else: test.append(movies[i][0]) idfVect1 = idf.tfIdfMovieTag(movies[i][0], idfMovArr) for j in range(len(idfVect1)): testset_copy[k][j] = idfVect1[j] #testset_copy[k][allTagLen]=db.getMovieGenre(movies[i][0])[0] testset_copy[k][allTagLen]=random.choice(labels) testSet.append(testset_copy[k]) k=k+1 return test,trainingSet,testSet
def formSvdMat(numSemantics): mat = np.zeros((movieLen,allTagLen)) if(len(mat)<numSemantics or len(mat[0])<numSemantics): print("cant report top semantics") sys.exit() idfMovArr = idf.idfMovieTag() for i in range(movieLen): mat[i] = idf.tfIdfMovieTag(movies[i][0], idfMovArr) U, s, V = np.linalg.svd(mat,full_matrices=False) movieFacts = np.zeros((movieLen, numSemantics)) for i in range(movieLen): for j in range(numSemantics): movieFacts[i][j] = U[i][j] return movieFacts
def loadDataset(filename, trainingSet=[], testSet=[]): with open(filename, 'r') as csvfile: lines = csv.reader(csvfile) dataset = list(lines) labels = ['' for i in range(len(dataset))] movies = di.getAllMovies() tagIds = di.getAllTags() allTagLen = len(tagIds) dataset_copy = [['' for i in range(allTagLen)] for j in range(len(dataset))] #dataset_copy = numpy.zeros((len(movies),allTagLen+1)) #dataset_copy = [[0 for i in range(allTagLen+1)] for j in range(len(movies))] idfMovArr = idf.idfMovieTag() #print(idfMovArr) for i in range(len(dataset)): idfVect = idf.tfIdfMovieTag(dataset[i][0], idfMovArr) for j in range(len(idfVect)): dataset_copy[i][j] = idfVect[j] #dataset_copy[i][allTagLen]=dataset[i][1] labels[i] = dataset[i][1] trainingSet.append(dataset_copy[i]) train = [0 for i in range(len(dataset))] target = ['' for i in range(len(movies))] for i in range(len(dataset)): train[i] = int(dataset[i][0]) k = 0 test = [] label = ['0', '1'] testset_copy = [['' for i in range(allTagLen)] for j in range(len(movies))] for i in range(len(movies)): if (int(movies[i][0]) in train): pass else: test.append(movies[i][0]) idfVect1 = idf.tfIdfMovieTag(movies[i][0], idfMovArr) for j in range(len(idfVect1)): testset_copy[k][j] = idfVect1[j] #testset_copy[k][allTagLen]=di.getMovieGenre(movies[i][0])[0] #testset_copy[k][allTagLen]=random.choice(labels) target[k] = random.choice(label) testSet.append(testset_copy[k]) k = k + 1 #print("train data =",trainingSet) #print("\n\n test data =",testSet) return trainingSet, testSet, labels, target, test
def formPPRMatrix(): mat = np.zeros((movieLen,allTagLen)) idfMovArr = idf.idfMovieTag() for i in range(len(movies)): mat[i] = idf.tfIdfMovieTag(movies[i][0], idfMovArr) return mat
if __name__ == "__main__": trainingSet = [] movies = loadDataset('foo.csv') numsemantics = 500 allMovies = di.getAllMovies() tagIds = di.getAllTags() allTagLen = len(tagIds) movNames = di.getAllMovieNames() movieNames = np.array(['' for i in range(len(movies))]) for i in range(len(allMovies)): mov = allMovies[i] if(mov[0] in movies): idx = movies.index(mov[0]) movieNames[idx] = movNames[i][0] mat = np.zeros((len(movies),allTagLen)) idfMovArr = idf.idfMovieTag() for i in range(len(movies)): mat[i] = idf.tfIdfMovieTag(movies[i], idfMovArr) a = svd.svdUout(mat,numsemantics) d = 5 xmax = 20 points = np.array(a) l = int(input("Enter number of layers:")) k = int(input("Enter number of hashes per layer:")) r = int(input("Enter number of neighbours:")) q = input("Enter query movieid:") ks = [] ls = [] ks.append(k) ls.append(l) query=[]