def split(df): X = df.iloc[:,0:4].values #independent variables y = df.iloc[:,4].values #dependant variable = mpg X_train, X_test, y_train, y_test = tss(X,y,test_size=0.25, random_state=0) return X_train, X_test, y_train, y_test
def __evaluate_model_less(X, eta_vec,min_child_weight_vec,max_depth_vec,gamma_vec,subsample_vec,colsample_bytree_vec,number_of_splits=5,metric_func=mean_squared_error): error=0 for train_index, test_index in tss(n_splits=number_of_splits).split(X): train_set,test_set=X[train_index],X[test_index] model = SARIMAX(train_set, order=arima_order,seasonal_order=seasonal_order,enforce_stationarity=False,enforce_invertibility=False) model_fit = model.fit(disp=0) yhat = model_fit.forecast(steps=len(test_set)) # calculate out of sample error error = metric_func(test_set, yhat)+error return float(error/number_of_splits)
def validation(x, y, nsplits=5, nrepeats=4): ids = np.array(list(x.keys())) for i in range(nrepeats): kf = KFold(n_splits=nsplits) for subset_indx, test_indx in kf.split(ids): train_keys, val_keys = tss(ids[subset_indx]) x_train, y_train = makeSample(x, y, train_keys) x_val, y_val = makeSample(x, y, val_keys) x_test, y_test = makeSample(x, y, ids[test_indx]) yield x_train, y_train, x_val, y_val, x_test, y_test, train_keys, val_keys, ids[ test_indx]
def calculate_weights(allData,material,number_of_splits=5,metric=mean_squared_error): market_data_by_competitor,sales_data,market_data,stock,market_percentage=allData.get_dataframes_for_material(material) fourier_order,changepoint_prior_scale,seasonality_prior_scale=get_best_param_from_results(material,const.internal_sales_SAP) fourier_order_ext,changepoint_prior_scale_ext,seasonality_prior_scale_ext=get_best_param_from_results(material,const.external_sales_IMS) fourier_order_comp,changepoint_prior_scale_comp,seasonality_prior_scale_comp=get_best_param_from_results(material,const.market_comp_sales_IMS) weight=numpy.arange(0,1,0.01) possible_weights=[(x,y,z) for x in weight for y in weight for z in weight if x+y+z==1] best_weights=(0,0,0) best_score=float('inf') market_data=market_data[-len(sales_data)+1:] market_data=market_data[market_data.columns.values[0]] market_data_by_competitor=market_data_by_competitor[-len(sales_data)+1:] sales_data=sales_data[sales_data.columns.values[0]] mean_percentage=market_percentage[-6:].mean() error={} for pw in possible_weights: error[pw]=0 for train_index, test_index in tss(n_splits=number_of_splits).split(sales_data): train_set_internal,test_set_internal=sales_data[train_index],sales_data[test_index] train_set_external=market_data[train_index[train_index[:-1]]] train_set_competitor=market_data_by_competitor[train_index[train_index[:-1]]] model_internal = Prophet(changepoint_prior_scale=changepoint_prior_scale,growth='linear').add_seasonality(name='yearly',period=365.25, prior_scale=seasonality_prior_scale,fourier_order=fourier_order) to_fit_internal=train_set_internal.to_frame() to_fit_internal=to_fit_internal.rename(columns={to_fit_internal.columns[0]:'y'}) to_fit_internal['ds']=to_fit_internal.index model_fit_internal = model_internal.fit(to_fit_internal) model_external = Prophet(changepoint_prior_scale=changepoint_prior_scale_ext,growth='linear').add_seasonality(name='yearly',period=365.25, prior_scale=seasonality_prior_scale_ext,fourier_order=fourier_order_ext) to_fit_external=train_set_external.to_frame() to_fit_external=to_fit_external.rename(columns={to_fit_external.columns[0]:'y'}) to_fit_external['ds']=to_fit_external.index model_fit_external = model_external.fit(to_fit_external) model_market = Prophet(changepoint_prior_scale=changepoint_prior_scale_comp,growth='linear').add_seasonality(name='yearly',period=365.25, prior_scale=seasonality_prior_scale_comp,fourier_order=fourier_order_comp) to_fit_market=train_set_competitor.to_frame() to_fit_market=to_fit_market.rename(columns={to_fit_market.columns[0]:'y'}) to_fit_market['ds']=to_fit_market.index model_fit_market = model_market.fit(to_fit_market) future_internal=model_internal.make_future_dataframe(periods=len(test_index),freq='MS') future_external=model_external.make_future_dataframe(periods=len(test_index)+1,freq='MS') future_market=model_market.make_future_dataframe(periods=len(test_index)+1,freq='MS') yhat_internal = model_fit_internal.predict(future_internal)['yhat'][-len(test_index):] yhat_external = model_fit_external.predict(future_external)['yhat'][-len(test_index):] yhat_market = model_fit_market.predict(future_market)['yhat'][-len(test_index):] for pw in possible_weights: error[pw]=metric(test_set_internal, pw[0]*yhat_internal+pw[1]*yhat_external+pw[2]*yhat_market*mean_percentage )+error[pw] for err in error: if error[err]<best_score: best_score=error[err] best_weights=err return best_weights
def __evaluate_model_less(X, changepoint_prior_scale_par,seasonality_prior_scale_par,fourier_order_par,number_of_splits=5,test_window=12,metric=mean_squared_error): error=0 for train_index, test_index in tss(n_splits=number_of_splits).split(X): train_set,test_set=X[train_index],X[test_index] model = Prophet( changepoint_prior_scale=changepoint_prior_scale_par,growth='linear').add_seasonality(name='yearly',period=365.25, prior_scale=seasonality_prior_scale_par,fourier_order=fourier_order_par) to_fit=train_set.to_frame() to_fit=to_fit.rename(columns={to_fit.columns[0]:'y'}) to_fit['ds']=to_fit.index model_fit = model.fit(to_fit) future=model.make_future_dataframe(periods=len(test_set)) yhat = model_fit.predict(future)['yhat'][-len(test_set):] # calculate out of sample error error = metric(test_set, yhat)+error return float(error/number_of_splits)
X_test = [] Y_test = [] for features,label in testing_data: X_test.append(features) Y_test.append(label) X_test = np.array(X_test) X_test = X_test.reshape(len(X_test),-1) Y_test = lab_enco.fit_transform(Y_test) print(lab_enco.classes_)''' from sklearn.model_selection import train_test_split as tss X_train,X_test,Y_train,Y_test = tss(X_train,Y_train,test_size = 0.3,random_state = 0) X_train,X_val,Y_train,Y_val = tss(X_train,Y_train,test_size = 0.1,random_state = 0) from keras.models import Sequential from keras.layers import Dense cancer_classifier = Sequential() cancer_classifier.add(Dense(512,input_dim = len(X_train[0]),activation = 'relu')) cancer_classifier.add(Dense(512,activation = 'relu')) cancer_classifier.add(Dense(512,activation = 'relu')) cancer_classifier.add(Dense(512,activation = 'relu')) cancer_classifier.add(Dense(len(Y_train[0]),activation = 'softmax')) cancer_classifier.compile(optimizer = 'adam',loss = 'categorical_crossentropy',metrics = ['accuracy']) classifier = cancer_classifier.fit(X_train,Y_train,batch_size = 30,epochs = 10,validation_data = (X_val,Y_val),verbose = 1)
def train_test_split(x, y, th=0.2, random_state=None): ids = list(x.keys()) train, val = tss(ids, random_state=random_state) x_train, y_train = makeSample(x, y, train) x_test, y_test = makeSample(x, y, val) return x_train, y_train, x_test, y_test
def calculate_weights(allData, material, number_of_splits=5): market_data_by_competitor, sales_data, market_data, stock, market_percentage = allData.get_dataframes_for_material( str(material)) arima_order, seasonal_order = get_best_param_from_results( str(material), const.internal_sales_SAP) arima_order_ext, seasonal_order_ext = get_best_param_from_results( material, const.external_sales_IMS) arima_order_comp, seasonal_order_comp = get_best_param_from_results( material, const.market_comp_sales_IMS) print(arima_order) print(arima_order_comp) print(arima_order_ext) weight = numpy.arange(0, 1, 0.01) possible_weights = [(x, y, z) for x in weight for y in weight for z in weight if x + y + z == 1] best_weights = (0, 0, 0) best_score = float('inf') market_data = market_data[-len(sales_data) + 1:] market_data = market_data[market_data.columns.values[0]] market_data_by_competitor = market_data_by_competitor[-len(sales_data) + 1:] sales_data = sales_data[sales_data.columns.values[0]] mean_percentage = market_percentage[-6:].mean() error = {} for pw in possible_weights: error[pw] = 0 for train_index, test_index in tss( n_splits=number_of_splits).split(sales_data): train_set_internal, test_set_internal = sales_data[ train_index], sales_data[test_index] train_set_external = market_data[train_index[train_index[:-1]]] train_set_competitor = market_data_by_competitor[train_index[ train_index[:-1]]] model_internal = SARIMAX(train_set_internal, order=arima_order, seasonal_order=seasonal_order, enforce_stationarity=False, enforce_invertibility=False) model_fit_internal = model_internal.fit(disp=0) model_external = SARIMAX(train_set_external, order=arima_order_ext, seasonal_order=seasonal_order_ext, enforce_stationarity=False, enforce_invertibility=False) model_fit_external = model_external.fit(disp=0) model_market = SARIMAX(train_set_competitor, order=arima_order_comp, seasonal_order=seasonal_order_comp, enforce_stationarity=False, enforce_invertibility=False) model_fit_market = model_market.fit(disp=0) yhat_internal = model_fit_internal.forecast(steps=len(test_index)) yhat_external = model_fit_external.forecast(steps=(len(test_index) + 1))[1:] yhat_market = model_fit_market.forecast(steps=(len(test_index) + 1))[1:] # calculate out of sample error for pw in possible_weights: error[pw] = mean_squared_error( test_set_internal, pw[0] * yhat_internal + pw[1] * yhat_external + pw[2] * yhat_market * mean_percentage) + error[pw] for err in error: if error[err] < best_score: best_score = error[err] best_weights = err return best_weights