def train_regressors(directory, assas, indices): reg = {} for assa in assas: print assa generic_path = directory + "matrices/cut_train_" + assa data, val_ind = predict.load(generic_path) val_ind = numpy.array(val_ind) data = numpy.array(data) # for i in range(len(data)-1) : # a=len(data[i]) # b=len(data[i+1]) # if not a==b : # print a # print b # print i # print 'suivant' # print data[0] # print 'suivant' # print data[1] reg = predict.train(data, val_ind, indices[codecs.encode(assa, "utf_8")]) with open(directory + "test_reg_dict_" + assa + ".p", "w") as fp: pickle.dump(reg, fp) return reg
def train_regressors(directory,assas,indices): reg = {} for assa in assas : print assa generic_path = directory+'matrices/variant_train_'+assa data,val_ind = predict.variant_load(generic_path) val_ind=numpy.array(val_ind) data = numpy.array(data)#[:,:10] reg = predict.train(data,val_ind,indices[codecs.encode(assa,'utf_8')]) with open(directory+'rf_reg_dict_'+assa+'.p','w') as fp : pickle.dump(reg,fp) return reg
def train_command(args): data, to_predict, true_data, tide_height_nans = process_data(normalise_data=True) predictions, mean, var, _ = train( to_predict, data, ) # filter the true tide_height to only be # at the non_nan points true_data_filtered = true_data.loc[tide_height_nans.values] plot = GPPlot( data, true_data_filtered, mean, var, [predictions], TIDE_HEIGHT, ) plot.init_plot() plot.plot() if args.save_figures: plot.savefig(args.fig_name)
import predict train_data_path, predict_data_path, predict_output_path = predict.download_data( ) for model_id, model_type in predict.MODEL_CONFIGS: predict.train(train_data_path, model_id, model_type, force_training=True)
general_df = load_dataset(dataset='general', simfin_api_key=SIMFIN_API_KEY, shareprices_df=shareprices_df) banks_df = load_dataset(dataset='banks', simfin_api_key=SIMFIN_API_KEY, shareprices_df=shareprices_df) insurance_df = load_dataset(dataset='insurance', simfin_api_key=SIMFIN_API_KEY, shareprices_df=shareprices_df) # TRAIN general_model = train(general_df, winsor_quantile=0.01, model_name='general_model', feature_name='general', param=dict(learning_rate=0.01, max_depth=3, subsample=.5, colsample_bylevel=0.7, colsample_bytree=0.7, n_estimators=200)) banks_model = train(banks_df, winsor_quantile=0.05, model_name='banks_model', feature_name='banks', param=dict(learning_rate=0.01, max_depth=2, subsample=.8, colsample_bylevel=0.7, colsample_bytree=0.7, n_estimators=200))