tr = StandardScaler() pca = PCA(n_components=10) lr = LinearSVC(penalty='l1',dual=False) pipe = Pipeline([('tr',tr),('pca',pca),('lr',lr)]) clf = GridSearchCV(pipe, parms, scoring='f1', n_jobs=5) clf.fit(X,Y) import pickle f = open('%s.model' % __fname__,'wb') pickle.dump(clf, f) f.close() pred = clf.predict(X) summary.clf_summary(clf, feature_names) summary.summary(Y, pred) F1, P, R = TestModel() util.notify_me('%s.F1:%.2f,P:%.2f,R:%.2f' % (__fname__, F1*100, P*100, R*100))
# load need to be recommanded item fo = open(fout, 'wb') fw = csv.writer(fo, delimiter=',') fw.writerow(['user_id','item_id']) rec_set = set() pool = mp.Pool(com.__n_process) re_str = r'feature_total\.merge\.\d+\.csv$' f_list = util.FilterFile(re_str) rec_set_list = pool.map(GenRecDataFromFeatureFile,[(sys.argv[1], f) for f in f_list]) for r in rec_set_list: rec_set |= r for uid, tid in rec_set: fw.writerow([uid, tid]) fo.close() nrows = len(rec_set) print 'recommand %d record.' % nrows util.notify_me('recommand data are done! %d record.' % nrows)
elif shelter == "lerefugefmv": present_value = check_lerefugefmv(url) elif shelter == "spcamontreal": present_value = check_spcamontreal(url) logger.info("%s: %s chiens" % (shelter, present_value)) if present_value > previous_value and not is_first_run: title = "Refuge %s" % shelter diff = present_value - previous_value if diff == 1: msg = "%s nouveau chien\n%s" % (diff, url) elif diff > 1: msg = "%s nouveaux chiens\n%s" % (diff, url) logger.critical(msg.strip()) util.notify_me(title, msg) new_dict[shelter] = present_value except util.MyException as e: new_dict[shelter] = previous_value logger.error(e) #util.notify_me("Adoptions", e) time.sleep(1) util.write_prec_values(new_dict) logger.debug("")
X, Y = GetData() samples = (np.random.rand(len(Y)) < 0.1) | (Y == 1) X = X[samples] Y = Y[samples] feature_names = X.columns parms = { 'C': [1.05], #np.logspace(-1,0,4), # # 'class_weight':[{0:1,1:1}] #[{0:1,1:50},{0:1,1:70},{0:1,1:85},{0:1,1:100},{0:1,1:120},{0:1,1:150}] } lr = LogisticRegression(penalty='l1') clf = GridSearchCV(lr, parms, scoring='f1', n_jobs=1) clf.fit(X, Y) import pickle f = open('%s.model' % __fname__, 'wb') pickle.dump(clf, f) f.close() pred = clf.predict(X) summary.clf_summary(clf, feature_names) summary.summary(Y, pred) TestModel() util.notify_me('%s is finished' % __fname__)
feature_names = X.columns parms = { 'C': np.logspace(-1,0,4), # #'class_weight':[{0:1,1:10}] #[{0:1,1:50},{0:1,1:70},{0:1,1:85},{0:1,1:100},{0:1,1:120},{0:1,1:150}] } lr = LogisticRegression(penalty='l1') clf = GridSearchCV(lr, parms, scoring='f1', n_jobs=10) clf.fit(X,Y) import pickle f = open('%s.model' % __fname__,'wb') pickle.dump(clf, f) f.close() print __doc__ pred = clf.predict(X) summary.clf_summary(clf, feature_names) summary.summary(Y, pred) TestModel() util.notify_me('%s is finished' % __fname__)