def init_start_urls(self): session = loadSession() # matches = session.query(Match500.id).all() matchids = session.query( distinct(Match500RecentModel.match_id).label('match_id')).group_by( Match500RecentModel.match_id).having( func.count(1) != 40).all() for matchid in matchids: # print matchid[0] # print dir(matchid) url = self.gen_match_urls_with_id(str(matchid[0])) self.start_urls.append(url)
print classification_report(y_test,lr_y_predict) # print lr.coef_.T # print lr_y_predict_prob # # print 'Accuracy of LR Classifier:',lr.score(X_test,y_test) #2、对现有数据进行回测 "伪" from adagu_bodan.model import loadSession from adagu_bodan.model.bodan_500 import Bodan500 session = loadSession() bodans = session.query(Bodan500).filter(Bodan500.away_goal != None, Bodan500.zero_one != None).all() def predict_win_no_win(bodan): x_column_names = column_names[0:25] X = np.array([bodan.__dict__[name] for name in x_column_names]) X = ss.transform(X) y_predict_prob = lr.predict_proba(X) y_predict = lr.classes_[np.argmax(y_predict_prob > threshold, axis=1)] bodan.predict_win_no_win = y_predict[0] print y_predict[0] for bodan in bodans: predict_win_no_win(bodan)
def save_errs(self): session = loadSession() for err in self.spider_errs: session.add(err) session.commit()