def train_regressors(directory, assas, indices):
    reg = {}

    for assa in assas:
        print assa
        generic_path = directory + "matrices/cut_train_" + assa
        data, val_ind = predict.load(generic_path)
        val_ind = numpy.array(val_ind)
        data = numpy.array(data)
        #        for i in range(len(data)-1) :
        #            a=len(data[i])
        #            b=len(data[i+1])
        #            if not a==b :
        #                print a
        #                print b
        #                print i
        #                print 'suivant'
        #        print data[0]
        #        print 'suivant'
        #        print data[1]
        reg = predict.train(data, val_ind, indices[codecs.encode(assa, "utf_8")])

        with open(directory + "test_reg_dict_" + assa + ".p", "w") as fp:
            pickle.dump(reg, fp)

    return reg
def train_regressors(directory,assas,indices):    
    reg = {}    
    
    for assa in assas : 
        print assa
        generic_path = directory+'matrices/variant_train_'+assa
        data,val_ind = predict.variant_load(generic_path)    
        val_ind=numpy.array(val_ind)
        data = numpy.array(data)#[:,:10]

        reg = predict.train(data,val_ind,indices[codecs.encode(assa,'utf_8')])
        
        with open(directory+'rf_reg_dict_'+assa+'.p','w') as fp :
            pickle.dump(reg,fp)        
        
    return reg
Exemple #3
0
def train_command(args):
    data, to_predict, true_data, tide_height_nans = process_data(normalise_data=True)
    predictions, mean, var, _ = train(
        to_predict,
        data,
    )
    # filter the true tide_height to only be
    # at the non_nan points
    true_data_filtered = true_data.loc[tide_height_nans.values]
    plot = GPPlot(
        data,
        true_data_filtered,
        mean,
        var,
        [predictions],
        TIDE_HEIGHT,
    )
    plot.init_plot()
    plot.plot()
    if args.save_figures:
        plot.savefig(args.fig_name)
Exemple #4
0
import predict

train_data_path, predict_data_path, predict_output_path = predict.download_data(
)

for model_id, model_type in predict.MODEL_CONFIGS:
    predict.train(train_data_path, model_id, model_type, force_training=True)
general_df = load_dataset(dataset='general',
                          simfin_api_key=SIMFIN_API_KEY,
                          shareprices_df=shareprices_df)
banks_df = load_dataset(dataset='banks',
                        simfin_api_key=SIMFIN_API_KEY,
                        shareprices_df=shareprices_df)
insurance_df = load_dataset(dataset='insurance',
                            simfin_api_key=SIMFIN_API_KEY,
                            shareprices_df=shareprices_df)

# TRAIN
general_model = train(general_df,
                      winsor_quantile=0.01,
                      model_name='general_model',
                      feature_name='general',
                      param=dict(learning_rate=0.01,
                                 max_depth=3,
                                 subsample=.5,
                                 colsample_bylevel=0.7,
                                 colsample_bytree=0.7,
                                 n_estimators=200))

banks_model = train(banks_df,
                    winsor_quantile=0.05,
                    model_name='banks_model',
                    feature_name='banks',
                    param=dict(learning_rate=0.01,
                               max_depth=2,
                               subsample=.8,
                               colsample_bylevel=0.7,
                               colsample_bytree=0.7,
                               n_estimators=200))