def generate_param_config(ticks, isindex, look_back, num_features, start_date, data_source): ins = GetPriceData(sid=ticks, index=isindex, data_source=data_source) data = ins.batch_get_n_days_backward(start_date, look_back) data = [(tick, dta.drop(['tick'], axis=1)) for tick, dta in data.groupby(['tick'])] proc = partial(generate_features_and_labels, ta_factors=const.ta_factors, predict_type='next_day') ret = [proc(dta) for dta in data] ret2 = get_best_features(ret[0], num_features) params = batch_find_best_params(ret2) if not isinstance(params, list): params = [params] df = pd.DataFrame(params) return df
def __predict(data_source, end_date, get_config_func, index, look_back, predict_type, start_date, ticks): # 1. 获取数据 ins = GetPriceData(sid=ticks, index=index, data_source=data_source) data = ins.batch_get_data(look_back, start_date, end_date) # 把data从一个长的frame转换成一个tick一份 data = [(tick, dta.drop(['tick'], axis=1)) for tick, dta in data.groupby(['tick'])] # TODO: 修改成先读取config,再生成feature features_and_lables = [generate_features_and_labels(dta, const.ta_factors, predict_type) for dta in data] data_for_pred = [ generate_feature_and_params_from_config(x, get_config_func) for x in features_and_lables] results = [] clf = SVC() for tick, X, y, window_size, clf_params in data_for_pred: try: data_gen = generate_data( X, y, window_size=window_size, start_date=start_date, end_date=end_date) clf.set_params(**clf_params) except LengthError, e: logger.critical('tick = {}'.format(tick)) logger.exception(e) preds = batch_predict(clf, data_gen, probability=False) preds_list = list(preds) preds_idx = trade_cal.loc[start_date: end_date].index preds = pd.Series(preds_list, index=preds_idx) facts = y.shift(1).ix[start_date: end_date] df = pd.DataFrame({'prediction': preds, 'fact': facts}) df.loc[:, 'tick'] = tick results.append(df)