def main(): ks = [3, 5, 10, 20] mapk = 200 train, test = load_data() train, test = train.as_matrix(), test.as_matrix() x = train.T res = np.zeros(9) number_of_user = train.shape[0] print int(number_of_user * 0.01) pca = PCA(n_components=int(number_of_user * 0.01)) new_x = pca.fit_transform(x) for u in xrange(train.shape[0]): y = x[:, u] truth = test[u] clf = LogisticRegression(random_state=42, C=0.001, solver='lbfgs') clf.fit(new_x, y) #print u, classification.accuracy_score(clf.predict(x), y) pred_buy_proba = clf.predict_proba(new_x)[:, 1].ravel() pruned_buy_proba = pred_buy_proba - y.ravel() pred_order = pruned_buy_proba.argsort()[::-1] actual_bought = truth.nonzero()[0] score = apk(actual_bought, pred_order, mapk) tmp = [score] for k in ks: tmp.append(prec(actual_bought, pred_order, k)) tmp.append(recall(actual_bought, pred_order, k)) res += np.array(tmp) if u % 50 == 0: print res / (u + 1) return res / (u + 1)
def main(): ks = [3, 5, 10, 20] mapk = 200 train, test = load_data() train, test = train.as_matrix(), test.as_matrix() pred = train.sum(axis=0) res = np.zeros(9) x = train.T for u in xrange(train.shape[0]): y = x[:, u] truth = test[u] pred_buy_proba = pred y[y > 0] = float('-inf') pruned_buy_proba = pred_buy_proba + y.ravel() pred_order = pruned_buy_proba.argsort()[::-1] actual_bought = truth.nonzero()[0] score = apk(actual_bought, pred_order, mapk) tmp = [score] for k in ks: tmp.append(prec(actual_bought, pred_order, k)) tmp.append(recall(actual_bought, pred_order, k)) res += np.array(tmp) if u % 50 == 0: print res / (u + 1) return res / (u + 1)
def main(): ks = [3, 5, 10, 20] mapk = 200 train, test = load_data() train, test = train.as_matrix(), test.as_matrix() x = train pca = PCA(n_components=int(train.shape[1] * 0.01)) pca.fit(train) new_x = pca.transform(train) res = [] for i in xrange(train.shape[1]): y = x[:, i] clf = LogisticRegression(random_state=42, C=0.001, solver='lbfgs') clf.fit(new_x, y) pred_buy_proba = clf.predict_proba(new_x)[:, 1].ravel() res.append(pred_buy_proba) res = np.array(res).T pred = (res - train).argsort(axis=1)[::-1] res = np.zeros(9) for u in xrange(train.shape[0]): truth = test[u] pred_order = pred[u] actual_bought = truth.nonzero()[0] score = apk(actual_bought, pred_order, mapk) tmp = [score] for k in ks: tmp.append(prec(actual_bought, pred_order, k)) tmp.append(recall(actual_bought, pred_order, k)) res += np.array(tmp) if u % 50 == 0: print res / (u + 1) return res / (u + 1)
def main(): ks = [3, 5, 10, 20] mapk = 200 epoch = 1 res = np.zeros(9) train, test = load_data() train, test = train.as_matrix(), test.as_matrix() x = train.T clf = SGDClassifier(random_state=42, loss='log') for i in xrange(epoch): for u in xrange(train.shape[0]): y = x[:, u] new_x = np.append(x, np.repeat(y[:, np.newaxis], x.shape[1], axis=1), axis=1) clf.partial_fit(new_x, y, classes=[0, 1]) for u in xrange(train.shape[0]): y = x[:, u] truth = test[u] new_x = np.append(x, np.repeat(y[:, np.newaxis], x.shape[1], axis=1), axis=1) clf.predict(new_x) pred_buy_proba = clf.predict_proba(new_x)[:, 1].ravel() pruned_buy_proba = pred_buy_proba - y.ravel() pred_order = pruned_buy_proba.argsort()[::-1] actual_bought = truth.nonzero()[0] score = apk(actual_bought, pred_order, mapk) tmp = [score] for k in ks: tmp.append(prec(actual_bought, pred_order, k)) tmp.append(recall(actual_bought, pred_order, k)) res += np.array(tmp) if u % 50 == 0: print res / (u + 1) return res / (u + 1)