def get_best_current_recommendation(business, user): # my.factors <- me %*% m@fit@W # barplot(my.factors) # my.prediction <- my.factors %*% t(m@fit@W) # items$title[order(my.prediction, decreasing=T)[1:10]] try: r = Recommendation.objects.get(user=user,business=business) return r.recommendation except: pass NumFactors = 42 print(business.id) normalizationFactor = getNormFactors(user.id, business.id) businessAverage = getBusAverageRating(business) ufset = UserFactor.objects.filter(user=user) myFactors = np.zeros(NumFactors) for uf in ufset: factor = uf.latentFactor relation = uf.relation myFactors[factor] = relation if ufset.count() == 0: logger.debug("Getting business average since the user has no factors") print("Getting business average since the user has no factors") Recommendation.objects.create(user=user,business=business,recommendation=businessAverage) return businessAverage bfset = BusinessFactor.objects.filter(business=business) busFactors = np.zeros(NumFactors) for bf in bfset: factor = bf.latentFactor relation = bf.relation busFactors[factor] = relation if bfset.count() == 0: logger.debug("Getting business average since the business has no factors") print("Getting business average since the business has no factors") #cache the recommendations Recommendation.objects.create(user=user,business=business,recommendation=businessAverage) return businessAverage logger.debug("Getting recommendation from actual predictions") print("Getting recommendation from actual predictions") prediction = np.dot(myFactors, busFactors) + normalizationFactor rec = round(prediction * 2) / 2 # round to half if rec > 4.0: rec = 4.0 elif rec < 1.0: rec = 1.0 return rec
def getAllRatMatrix(N, M, allRatings): arrID2uid = numpy.zeros(N) arrID2bid = numpy.zeros(M) bid2arrID = dict() uid2arrID = dict() allRatMatrix = [] i = 0 j = 0 c = 0 for r in allRatings: if c % 100 == 0: print(c) c += 1 NormFactor = getNormFactors(r.user.id, r.business.id) #Need to keep a mapping from the position in the # array to the actual business and user ID #arr2... keeps mapping of array position to actual IDs in database #uid2 and bid2 keeps mapping of database IDs to the array IDS #this is used to get the normalization factor and can be used # later to get back recommendations bPos = 0 if r.business.id in bid2arrID: bPos = bid2arrID[r.business.id] else: bPos = j bid2arrID[r.business.id] = bPos arrID2bid[j] = r.business.id j += 1 uPos = 0 if r.user.id in uid2arrID: uPos = uid2arrID[r.user.id] else: uPos = i uid2arrID[r.user.id] = uPos arrID2uid[i] = r.user.id i += 1 # fp2.write("Rating is " + str(r.rating) + " after normalization " + str(float(r.rating - NormFactor))+ "\n") allRatMatrix.append([uPos, bPos, float(r.rating - NormFactor)]) return allRatMatrix, bid2arrID, uid2arrID, arrID2bid, arrID2uid
def get_best_current_recommendation(business, user): # my.factors <- me %*% m@fit@W # barplot(my.factors) # my.prediction <- my.factors %*% t(m@fit@W) # items$title[order(my.prediction, decreasing=T)[1:10]] NumFactors = 42 ufset = UserFactor.objects.filter(user=user) myFactors = np.zeros(NumFactors) for uf in ufset: factor = uf.latentFactor relation = uf.relation myFactors[factor] = relation if ufset.count() == 0: return 0 bfset = BusinessFactor.objects.filter(business=business) busFactors = np.zeros(NumFactors) for bf in bfset: factor = bf.latentFactor relation = bf.relation busFactors[factor] = relation if bfset.count() == 0: return 0 prediction = np.dot(myFactors, busFactors) + getNormFactors(user.id, business.id) print(prediction) print("PREDICTION!!!\n") rec = round(prediction * 2) / 2 # round to half if rec > 4.0: rec = 4.0 elif rec < 1.0: rec = 1.0 #Recommendation.objects.filter(username=user, business=business) return rec
def run_nmf_mult_k(K, Steps, Alpha): N = User.objects.count() M = Business.objects.count() allRatings = Rating.objects.all() user_rating_threshold = 0 bus_rating_threshold =0 resultFile = settings.RESULTS_DIR + "u" + str(user_rating_threshold) + "_b" + str(bus_rating_threshold) + "_s" + str(Steps) + "_k" + str(K[0]) + "-" + str(K[len(K) - 1]) predictionFile = settings.RESULTS_DIR + "predictions_" + "u" + str(user_rating_threshold) + "_b" + str(bus_rating_threshold) + "_s" + str(Steps) + "_k" + str(K[0]) + "-" + str(K[len(K) - 1]) print(resultFile) fp = open(resultFile, "w") pred_fp = open(predictionFile, "w") pred_fp.write("#K, f, Difference, Actual, Predicted") fp.write("#NumUsers = " + str(N + 1) + '\n') fp.write("#NumBusinesses = " + str(M + 1) + '\n') fp.write("#UserThresh = " + str(user_rating_threshold) + '\n') fp.write("#BusinessThresh = " + str(bus_rating_threshold) + '\n') fp.write("#NumRatings = " + str(allRatings.count()) + '\n') fp.write("#Steps = " + str(Steps) + '\n') fp.write("#Alpha = " + str(Alpha) + '\n') fp.write("#TimeStart = " + str(time.asctime()) + '\n') fp.write('#\n') fp.write('#K, AvgRSSRounded, AvgDistRounded, AvgRSSFloat, AvgDistFloat\n') fp.flush() print("Moving data to an array...") fp2 = open("/tmp/debug-ratings.txt", "w") allRatMatrix, bid2arrID, uid2arrID, arrID2bid, arrID2uid = getAllRatMatrix(N,M,allRatings) print("Generating Folds...") folds = get_folds(allRatMatrix) print("Fold Generation Complete...") for k in K: print("Running on K=" + str(k) + " Starting at time= " + time.asctime()) sumDistRounded = 0 sumRSSRounded = 0 sumDistFloat = 0.0 sumRSSFloat = 0.0 ctr = 0 for f in range(0, 5): outFold = [] for iterF in range(0, 5): if iterF != f: for subelement in folds[iterF]: outFold.append((subelement)) inFold = copy.deepcopy(folds[f]) time_before = time.clock() nP, nQ = run_nmf_internal(outFold,N,M,k, Steps, Alpha, fp=fp2) del outFold elapsed = time.clock() - time_before; print("\tK="+str(k)+" Fold=" +str(f)+" TimeElapsed="+ str(elapsed/60) + " minutes") #for keeping track of rss, average distance for floats and rounded distFloat = 0.0 rssFloat = 0.0 rssRounded = 0 distRounded = 0 inFoldLen = len(inFold) for r in inFold: uid = r[0] bid = r[1] NormFactor = getNormFactors(arrID2uid[uid], arrID2bid[bid]) r[2] = r[2] + NormFactor prediction = numpy.dot(nP[uid],nQ[bid]) + NormFactor roundR = round(r[2]) roundP = round(prediction) floatR = float(r[2]) floatP = float(prediction) if r[2] > 4: floatR = 4.0 roundR = 4; elif r[2] < 1: roundR = 1; floatR = 1.0 if prediction > 4: roundP = 4 floatP = 4.0 elif prediction < 1: roundP = 1 floatP = 1.0 #print("Username " + str(r.username)) #print("Business " + str(r.business.name)) #print("Rating " + str(r.rating)) #print("Prediction " + str(prediction)) rssFloat += math.pow(abs(floatP - floatR),2) distFloat += abs(floatP - floatR) rssRounded += math.pow(abs(roundP - roundR),2) distRounded += abs(roundP - roundR) #pred_fp.write(str(abs(floatP - floatR)) + ", " + str(floatR) + ", " + str(floatP) + ", " +str(k) + ", " + str(f) + '\n'); pred_fp.write(str(abs(floatP-floatR)) + " " + str(floatR) + " " + str(floatP) + " " + '\n'); if ctr % 1000 == 0: pred_fp.flush() ctr += 1 sumDistRounded += distRounded / inFoldLen sumRSSRounded += rssRounded/ inFoldLen sumDistFloat += distFloat / inFoldLen sumRSSFloat += rssFloat/ inFoldLen print("\t\t RSS_float=" + str(rssFloat/inFoldLen) + " Distance_float=" + str(distFloat/inFoldLen)) print("\t\t RSS_rounded="+ str(rssRounded/inFoldLen) + " Distance_rounded=" + str(distRounded/inFoldLen)) result_1 = str(sumRSSRounded/5)+ ", " + str(sumDistRounded/5) result_2 = str(sumRSSFloat/5)+ ", " + str(sumDistFloat/5) fp.write(str(k) + ", " + result_1 + ", " + result_2 + '\n') fp.flush() print("\n\n\nK="+str(k) + "Rounded: " + result_1 + " Float: " + result_2) fp.write("#TimeEnd = "+str(time.asctime())+'\n') pred_fp.flush() fp.flush() fp.close() fp2.close() pred_fp.close()