Beispiel #1
0
def find_categories_best_k(k):
    Steps = 5000
    Alpha = 0.05
    #arrID2... are maps from the array indicies to the business and user id
    P, Q, arrID2bid, arrID2uid = get_p_q_best(k, Steps, Alpha)
    zipQ = zip(*Q)
    latentNum = 0
    for l in zipQ:  # {
        maxVal = max(l)
        cutOff = 0.1 * maxVal
        print "  Cutoff is: " + str(cutOff)

        relevantBus = []
        for i in xrange(0, len(l)):
            if l[i] > cutOff:
                # this business is relevant to this latent variable
                # save the id (index is id-1) to use in db look up later
                for elemI in xrange(0, len(relevantBus)):
                    if relevantBus[elemI][1] < l[i]:
                        relevantBus.insert(elemI, [arrID2bid[i], l[i]])
                if len(relevantBus) == 0:
                    relevantBus.append([arrID2bid[i], l[i]])

        otherList = []
        for r in relevantBus:
            print(r[1])
            otherList.append(r[0])
        print("\n")
        print("\n")
        print("\n")

        #print "    " + str(len(relevantBus)) + " businesses past cutoff"
        # For this latent variable, we now have all businesses IDs,
        # print out all of the labels associated with these businesses
        fp = open(settings.RESULTS_DIR + "latent_" + str(latentNum), 'w')
        buses = Business.objects.filter(pk__in=otherList)
        for b in buses:
            keywords = b.keywords.all()
            #print "      " + str(len(keywords)) + " keywords for business"
            nm = b.name
            fp.write(nm.encode("utf8") + "\n")
            fp.write(b.address.encode("utf8") + "\n")
            for k in keywords:
                fp.write(str(k) + "\n")
            fp.write("\n")

        fp.close()
        latentNum = latentNum + 1
Beispiel #2
0
def build_pred_server():
    k = 42
    Steps = 5000
    Alpha = 0.05
    print("BEFORE")
    P, Q, arrID2bid, arrID2uid = get_p_q_best(k, Steps, Alpha)
    print("AFTER)")

#    print(len(arrID2uid))
#    print(len(P))
#    #print(len(P))

    i = 0

    factors = []

    for row in P:
        k = 0
        actualUID = arrID2uid[i]
        #this user hasn't rated anything
        if actualUID == 0:
            continue
        usr = User.objects.get(id=actualUID)
        for col in row:
            uf = UserFactor(user=usr, latentFactor=k, relation=col)
            factors.append(uf)
            k += 1
        i += 1
    UserFactor.objects.bulk_create(factors)

    i = 0
    factors = []
    for row in Q:
        actualBID = arrID2bid[i]
        #this business hasn't been rated
        if actualBID == 0:
            continue
        bus = Business.objects.get(id=actualBID)
        k = 0
        for col in row:

            bf = BusinessFactor(business=bus, latentFactor=k, relation=col)
            factors.append(bf)
            k += 1
        i += 1
    BusinessFactor.objects.bulk_create(factors)

    Predictions = numpy.dot(P,numpy.transpose(Q))
    i = 0
    predictions = []
    for row in Predictions:
        print(len(row))
        j = 0
        bus = Business.objects.get(id=arrID2bid[j])
        for cell in row:
            usr = User.objects.get(id=arrID2uid[i])
            p = Recommendation(business=bus,recommendation=cell,username=usr)
            predictions.append(p)
            j+=1
        i+=1
    Recommendation.objects.bulk_create(predictions)
    transaction.commit();
Beispiel #3
0
Datei: views.py Projekt: zouf/as2
def build_predictions():
    k = 42
    Steps = 5000
    Alpha = 0.05
    print("Run Matrix Factorization to get predictions")
    print("Alpha: "+str(Alpha)+"\n")
    print("Steps: "+str(Steps)+"\n")
    print("K: "+str(k)+"\n")
    P, Q, arrID2bid, arrID2uid = get_p_q_best(k, Steps, Alpha)
    print("Done running Matrix Factorization")

    i = 0
    factors = []
    print("Creating user factors and saving them")
    for row in P:
        k = 0
        actualUID = arrID2uid[i]
        #this user hasn't rated anything
        if actualUID == 0:
            continue
        usr = User.objects.get(id=actualUID)
        for col in row:
            uf = UserFactor(user=usr, latentFactor=k, relation=col)
            factors.append(uf)
            k += 1
        i += 1
    UserFactor.objects.bulk_create(factors)

    i = 0
    factors = []
    print("Creating business factors and saving them")
    for row in Q:
        actualBID = arrID2bid[i]
        #this business hasn't been rated
        if actualBID == 0:
            continue
        bus = Business.objects.get(id=actualBID)
        k = 0
        for col in row:

            bf = BusinessFactor(business=bus, latentFactor=k, relation=col)
            factors.append(bf)
            k += 1
        i += 1
    BusinessFactor.objects.bulk_create(factors)
    print(factors)
    Predictions = numpy.dot(P,numpy.transpose(Q))
    if Predictions == 0:
        logger.debug('Predictions == 0. Returning')
        print('Predictions == 0. Returning')
        return
    print(Predictions)
    i = 0
    predictions = []
    for row in Predictions:
        print(len(row))
        j = 0
        bus = Business.objects.get(id=arrID2bid[j])
        for cell in row:
            usr = User.objects.get(id=arrID2uid[i])
            p = Recommendation(business=bus,recommendation=cell,username=usr)
            predictions.append(p)
            j+=1
        i+=1
    Recommendation.objects.bulk_create(predictions)
    transaction.commit();