def main():
    ks = [3, 5, 10, 20]
    mapk = 200
    train, test = load_data()
    train, test = train.as_matrix(), test.as_matrix()
    x = train.T
    res = np.zeros(9)
    number_of_user = train.shape[0]
    print int(number_of_user * 0.01)
    pca = PCA(n_components=int(number_of_user * 0.01))
    new_x = pca.fit_transform(x)
    for u in xrange(train.shape[0]):
        y = x[:, u]
        truth = test[u]
        clf = LogisticRegression(random_state=42, C=0.001, solver='lbfgs')
        clf.fit(new_x, y)
        #print u, classification.accuracy_score(clf.predict(x), y)
        pred_buy_proba = clf.predict_proba(new_x)[:, 1].ravel()
        pruned_buy_proba = pred_buy_proba - y.ravel()
        pred_order = pruned_buy_proba.argsort()[::-1]
        actual_bought = truth.nonzero()[0]
        score = apk(actual_bought, pred_order, mapk)
        tmp = [score]
        for k in ks:
            tmp.append(prec(actual_bought, pred_order, k))
            tmp.append(recall(actual_bought, pred_order, k))
        res += np.array(tmp)
        if u % 50 == 0:
            print res / (u + 1)
    return res / (u + 1)
def main():
    ks = [3, 5, 10, 20]
    mapk = 200
    train, test = load_data()
    train, test = train.as_matrix(), test.as_matrix()
    pred = train.sum(axis=0)
    res = np.zeros(9)
    x = train.T
    for u in xrange(train.shape[0]):
        y = x[:, u]
        truth = test[u]
        pred_buy_proba = pred
        y[y > 0] = float('-inf')
        pruned_buy_proba = pred_buy_proba + y.ravel()
        pred_order = pruned_buy_proba.argsort()[::-1]
        actual_bought = truth.nonzero()[0]
        score = apk(actual_bought, pred_order, mapk)
        tmp = [score]
        for k in ks:
            tmp.append(prec(actual_bought, pred_order, k))
            tmp.append(recall(actual_bought, pred_order, k))
        res += np.array(tmp)
        if u % 50 == 0:
            print res / (u + 1)
    return res / (u + 1)
Exemple #3
0
def main():
    ks = [3, 5, 10, 20]
    mapk = 200
    train, test = load_data()
    train, test = train.as_matrix(), test.as_matrix()
    x = train
    pca = PCA(n_components=int(train.shape[1] * 0.01))
    pca.fit(train)
    new_x = pca.transform(train)
    res = []
    for i in xrange(train.shape[1]):
        y = x[:, i]
        clf = LogisticRegression(random_state=42, C=0.001, solver='lbfgs')
        clf.fit(new_x, y)
        pred_buy_proba = clf.predict_proba(new_x)[:, 1].ravel()
        res.append(pred_buy_proba)
    res = np.array(res).T
    pred = (res - train).argsort(axis=1)[::-1]

    res = np.zeros(9)
    for u in xrange(train.shape[0]):
        truth = test[u]
        pred_order = pred[u]
        actual_bought = truth.nonzero()[0]
        score = apk(actual_bought, pred_order, mapk)
        tmp = [score]
        for k in ks:
            tmp.append(prec(actual_bought, pred_order, k))
            tmp.append(recall(actual_bought, pred_order, k))
        res += np.array(tmp)
        if u % 50 == 0:
            print res / (u + 1)
    return res / (u + 1)
def main():
    ks = [3, 5, 10, 20]
    mapk = 200
    epoch = 1
    res = np.zeros(9)
    train, test = load_data()
    train, test = train.as_matrix(), test.as_matrix()
    x = train.T
    clf = SGDClassifier(random_state=42, loss='log')

    for i in xrange(epoch):
        for u in xrange(train.shape[0]):
            y = x[:, u]
            new_x = np.append(x,
                              np.repeat(y[:, np.newaxis], x.shape[1], axis=1),
                              axis=1)
            clf.partial_fit(new_x, y, classes=[0, 1])

    for u in xrange(train.shape[0]):
        y = x[:, u]
        truth = test[u]
        new_x = np.append(x,
                          np.repeat(y[:, np.newaxis], x.shape[1], axis=1),
                          axis=1)
        clf.predict(new_x)
        pred_buy_proba = clf.predict_proba(new_x)[:, 1].ravel()
        pruned_buy_proba = pred_buy_proba - y.ravel()
        pred_order = pruned_buy_proba.argsort()[::-1]
        actual_bought = truth.nonzero()[0]
        score = apk(actual_bought, pred_order, mapk)
        tmp = [score]
        for k in ks:
            tmp.append(prec(actual_bought, pred_order, k))
            tmp.append(recall(actual_bought, pred_order, k))
        res += np.array(tmp)
        if u % 50 == 0:
            print res / (u + 1)

    return res / (u + 1)