def model_train(): all_types = os.listdir(root + '/type_err_feature/') result = pd.DataFrame() for t in all_types: all_cities = os.listdir(root + '/type_err_feature/' + t + '/center/') for c in all_cities: if os.path.exists(root + '/type_err_feature/' + t + '/center/' + c + '/demand_data.csv'): file = open(root + '/type_err_feature/' + t + '/center/' + c + '/demand_data.csv') data = pd.read_csv(file, encoding='utf-8') rec_start = data.loc[0, 'WEEK'] col = ['demand'] data = data.loc[:, col] if data['demand'].unique().tolist() != [0]: scaler = MaxAbsScaler() data = scaler.fit_transform(data) data = pd.DataFrame(data[:, 0], columns=['demand']) train_sample = int(0.7 * data.shape[0]) + 1 steps = 3 X, y = pre_process(data, steps) X = scale(X, axis=1, with_std=False, with_mean=False) tr_X, tr_y = X[:train_sample], y[:train_sample] t_X, t_y = X[train_sample:], y[train_sample:] tr_X, t_X = np.reshape(tr_X, (tr_X.shape[0], 1, tr_X.shape[1])), np.reshape(t_X, (t_X.shape[0], 1, t_X.shape[1])) print(t, c) path = root + '/type_err_feature/' + t + '/center/' + c tr_predict, t_predict , well_learnt = lstm_train(path, tr_X, tr_y, t_X, t_y, scaler) data = scaler.inverse_transform(data) plot_predict(data, steps, path, tr_predict, t_predict) result = result.append(save_data(c, t, data, tr_predict, t_predict, rec_start, well_learnt)) result.to_csv(root + '/info/parts_prepare.csv', header=True, index=False, encoding='utf-8')
def train_and_estimate_once(np_x: np.ndarray, np_y: np.ndarray, stock_np_x: np.ndarray, num_cols: int, scalery: MaxAbsScaler, rand_state: int) -> float: """ Train an XGBoost market-cap estimator using the entire market less one stock and then make a prediction using that estimator for that stock. Importanly, this has the rand_state which will slightly tweak the training. The return value of this function is averaged over multiple rand_states """ model = XGBRegressor(max_depth=num_cols // 4, learning_rate=.5, n_estimators=20, subsample=.8, random_state=rand_state) model.fit(np_x, np_y) scaled_pred = model.predict(stock_np_x) real_pred = scalery.inverse_transform(scaled_pred.reshape(-1, 1)) return float(real_pred[0][0])
# test_generator = get_generator_cyclic(Xvalid,Xvalid2,y_valid,readin) for e in range(epochs): now = datetime.now() current_time = now.strftime("%H:%M:%S") print('Epoch', e,'Time: ', current_time) batches = 0 while batches< len(X)/readin: Xtrain_1, Xtrain_2, ytrain_1 = next(training_generator) # Xtest_1, Xtest_2, ytest_1 =next(test_generator) # model.fit([Xtrain_1, Xtrain_2], ytrain_1, callbacks = [callback],validation_data = ([Xtest_1,Xtest_2], ytest_1),batch_size=batch_size,verbose = 0) model.fit([Xtrain_1, Xtrain_2], ytrain_1, callbacks = [callback],batch_size=batch_size,verbose = 0) batches += 1 #calculates and prints the running validation once per epoch losssc, msesc, maesc = model.evaluate([Xvalid/255.,Xvalid2],y_valid,verbose=0) mae = sc_y.inverse_transform(np.array(maesc).reshape(1,-1))[0][0] if mae<mae_best: modelbest = model mae_best = mae print('Mean absolute error at {:4.0f} is: {:4.2f}'.format(e,mae)) modelbest.save('cnn3v28') # In[32]: save_notebookparams = 1 if save_notebookparams: pkl_filename = "depthforcemodelparam_cnn3v28_pb.pkl" randata = {} randata['nsamps']=nsamps
def fitnenss(learning_rate, num_layers, num_nodes, optimiser, batch_size, steps_epoch, month, quarter, year, dayyear, weekyear, day, residualV): def predict(x, encoder_predict_model, decoder_predict_model, num_steps_to_predict): y_predicted = [] # Encode the values as a state vector states = encoder_predict_model.predict(x) # The states must be a list if not isinstance(states, list): states = [states] # Generate first value of the decoder input sequence decoder_input = np.zeros((x.shape[0], 1, num_y_signals)) for _ in range(num_steps_to_predict): outputs_and_states = decoder_predict_model.predict( [decoder_input] + states, batch_size=1) output = outputs_and_states[0] states = outputs_and_states[1:] # add predicted value y_predicted.append(output) return np.concatenate(y_predicted, axis=1) groupList = results[["group" ]].groupby("group").sum().index #Number of groups index = pd.date_range(start="2012-03-04", end="2016-12-04", freq='w') residual = pd.DataFrame({"residual": np.zeros(len(index))}, index=index) qtys = [] if residualV: qtys = ["residual"] total = pd.DataFrame(index=index) if dayyear: total = total.assign( Yearday=total.index.dayofyear) #Time related features if weekyear: total = total.assign(Yearweek=total.index.weekofyear) if day: total = total.assign(Monthday=total.index.day) if year: total = total.assign(Year=total.index.year) if month: total = total.assign(month=total.index.month) if quarter: total = total.assign(quarter=total.index.quarter) k = 0 groupsList = [] for i, g in products: product = products.get_group(i) productName = product["product"].array[0] productGroup = product["group"].array[0] del product["group"] del product["product"] product = product.groupby("date").sum() product = product.astype("float64") #roduct.resample("w").sum()["2014-12-04":"2016-12-04"].astype(bool).sum(axis=0).values[0] dataPoints = product.resample( "w").sum()["2014-12-04":"2016-12-04"].astype(bool).sum( axis=0).values[0] #Data Points in the last two years #If more than 10 we'll consider into our prediction #If not I'm gonna put it in the residual column #Fill empty spots and make them same size product = product.resample('w').sum().reindex(index).fillna(0) product["quantity"] = product["quantity"].apply(lambda x: 0.0 if x < 0 else x) if (dataPoints < 20): residual = residual.add(product.values) else: qtColName = productName + "_qty" ctColName = productName + "_group" groupsList.append(mapGroups(productGroup, groupList)) tempDf = pd.DataFrame({ qtColName: product["quantity"], ctColName: mapGroups(productGroup, groupList) }) total = pd.concat([total, tempDf], axis=1, sort=False) qtys.append(qtColName) if residualV: total = pd.concat([total, residual], axis=1, sort=False) products_2 = results.groupby("product") from sklearn.metrics import mean_squared_error index = pd.date_range(start="2012-03-04", end="2016-12-04", freq='w') prods = [] for i in range(1, len(qtys)): prods.append(qtys[i][:-4]) t = 0 k = 0 naive = {} for i, g in products_2: product = products_2.get_group(i) prod = product["product"].array[0] product = product.groupby("date").sum() del product["group"] del product["product"] product = product.resample('w').sum().reindex(index).fillna(0) product["quantity"] = product["quantity"].apply(lambda x: 0.0 if x < 0 else x) coef = 0.8 delimiter = int((1 - coef) * index.shape[0]) validate = product.tail(delimiter) prediction = product[index.shape[0] - delimiter * 2:index.shape[0] - delimiter] if (prod in prods): rmse = np.sqrt( mean_squared_error(prediction.values.astype("float"), validate.values.astype("float"))) naive[prod] = rmse t = t + rmse k = k + 1 product = total target = product[qtys] x_data = product.values #trimming the end of the serie because of the nan values resulted from the shift y_data = target.values data_count = len(x_data) train_split = 0.8 num_train = int(train_split * data_count) num_test = data_count - num_train #Creating the test and the train data x_train = x_data[:num_train] x_test = x_data[num_train:] y_train = y_data[0:num_train] y_test = y_data[num_train:] num_x_signals = x_data.shape[1] num_y_signals = y_data.shape[1] x_scaler = MaxAbsScaler() #Normalize the data x_train_scaled = x_scaler.fit_transform(x_train) x_test_scaled = x_scaler.transform(x_test) y_scaler = MaxAbsScaler() y_train_scaled = y_scaler.fit_transform(y_train) y_test_scaled = y_scaler.transform(y_test) def batch_generator(batch_size, input_seq_len, target_seq_len): """ Generator function for creating random batches of training-data. """ while True: x_shape = (batch_size, input_seq_len, num_x_signals) y_shape = (batch_size, target_seq_len, num_y_signals) encoder_input = np.zeros(shape=x_shape, dtype=np.float16) decoder_output = np.zeros(shape=y_shape, dtype=np.float16) decoder_input = np.zeros(shape=y_shape, dtype=np.float16) total_length = input_seq_len + target_seq_len for i in range(batch_size): idx = np.random.randint(num_train - total_length) encoder_input[i] = x_train_scaled[idx:idx + input_seq_len] decoder_output[i] = y_train_scaled[idx + input_seq_len:idx + total_length] yield ([encoder_input, decoder_input], decoder_output) print('learning rate: {0:.1e}'.format(learning_rate)) print('Number of layers:', num_layers) print('Number of nodes:', num_nodes) stri = "" if (month): stri = stri + "Month " if (quarter): stri = stri + "Quarter " if (year): stri = stri + "Year " if (dayyear): stri = stri + "Dayyear " if (weekyear): stri = stri + "Weekyear " if (day): stri = stri + "Day " if (residualV): stri = stri + "Residual " print("State " + stri) if optimiser == 0: print("Optimiser: RMSProp") else: print("Optimiser: Adam") print('Batch size:', batch_size) print('Steps epoch', steps_epoch) generator = batch_generator(batch_size=batch_size, input_seq_len=15, target_seq_len=15) input_seq_len = 15 target_seq_len = 15 validation_data = ([ np.expand_dims(x_test_scaled[:input_seq_len], axis=0), np.zeros(shape=(1, target_seq_len, num_y_signals), dtype=np.float16) ], np.expand_dims( y_test_scaled[input_seq_len:input_seq_len + target_seq_len], axis=0)) layers = [] for i in range(num_layers): layers.append(num_nodes) #Encoder encoder_inputs = keras.layers.Input(shape=(None, num_x_signals)) encoder_cells = [] for hidden_neurons in layers: encoder_cells.append(keras.layers.GRUCell(hidden_neurons)) encoder = keras.layers.RNN(encoder_cells, return_state=True) encoder_outputs_and_states = encoder(encoder_inputs) encoder_states = encoder_outputs_and_states[1:] #Decoder decoder_inputs = keras.layers.Input(shape=(None, num_y_signals)) decoder_cells = [] for hidden_neurons in layers: decoder_cells.append(keras.layers.GRUCell(hidden_neurons)) decoder = keras.layers.RNN(decoder_cells, return_sequences=True, return_state=True) decoder_outputs_and_states = decoder(decoder_inputs, initial_state=encoder_states) decoder_outputs = decoder_outputs_and_states[0] decoder_dense = keras.layers.Dense(num_y_signals, activation='linear') decoder_outputs = decoder_dense(decoder_outputs) if (optimiser == 0): optimiser = keras.optimizers.RMSprop(lr=learning_rate) else: optimiser = keras.optimizers.Adam(lr=learning_rate) model = keras.models.Model(inputs=[encoder_inputs, decoder_inputs], outputs=decoder_outputs) model.compile(optimizer=optimiser, loss="mse") log_dir = log_dir_name(learning_rate, num_layers, num_nodes, optimiser, batch_size, steps_epoch, stri) callback_early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1) checkpoint_name = "checks/lr_{0:.0e}_layers_{1}_nodes_{2}_{3}_batch{4}_epoch{5}{6}.keras".format( learning_rate, num_layers, num_nodes, optimiser, batch_size, steps_epoch, stri) path_checkpoint = checkpoint_name callback_checkpoint = ModelCheckpoint(filepath=path_checkpoint, monitor='val_loss', verbose=1, save_weights_only=True, save_best_only=True) callback_log = [ callback_early_stopping, callback_checkpoint, TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=True, write_grads=False, write_images=False) ] # Use Keras to train the model. model.fit_generator(generator=generator, epochs=200, steps_per_epoch=steps_epoch, validation_data=validation_data, callbacks=callback_log) try: model.load_weights(path_checkpoint) except Exception as error: print("Error trying to load checkpoint.") print(error) loss = model.evaluate(validation_data[0], validation_data[1]) print() print("Loss: " + str(loss)) print() encoder_predict_model = keras.models.Model(encoder_inputs, encoder_states) decoder_states_inputs = [] for hidden_neurons in layers[::-1]: # One state for GRU decoder_states_inputs.append( keras.layers.Input(shape=(hidden_neurons, ))) decoder_outputs_and_states = decoder(decoder_inputs, initial_state=decoder_states_inputs, training=True) decoder_outputs = decoder_outputs_and_states[0] decoder_states = decoder_outputs_and_states[1:] decoder_outputs = decoder_dense(decoder_outputs) decoder_predict_model = keras.models.Model( [decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states) horizon = 50 X_for_pred = np.expand_dims(x_train_scaled[x_train_scaled.shape[0] - input_seq_len:], axis=0) y_test_predicted = predict(X_for_pred, encoder_predict_model, decoder_predict_model, horizon) pred = y_scaler.inverse_transform(y_test_predicted[0]).T true = y_test[:horizon].T t_rmse = 0 rnn = {} for i in range(len(pred)): pred[i][pred[i] < 0] = 0 rmse = np.sqrt(mean_squared_error(true[i], pred[i])) t_rmse = t_rmse + rmse rnn[qtys[i][:-4]] = rmse valsArr_old = [ 66.6851666549545, 67.88958611597897, 69.21681535327671, 66.48135020266528, 64.27709785778202, 60.72749709928205, 63.701465954928295, 65.17733810711289, 64.78680046203984, 54.03158148686864, 50.996898683969604, 54.064319848800224, 42.69338976116232, 7.7145067699141645, 58.73075744706745, 42.21435723311817, 65.8428871293241, 55.54466048398387, 37.459384554198074, 65.91935078588978, 59.44024944642978, 66.75112087030772, 55.109361656982436, 48.873453068349015, 46.018164004825145, 63.55124830274102, 58.51154392727636, 65.20080941236168, 20.582912304727387, 65.47745592815562, 29.05502729752538, 31.88538428636566, 66.19052965893539, 25.209622432309466, 65.30813282106377, 48.47010482324254, 55.35896540587586, 67.8970786052471, 63.95296694703776, 65.58669446483145, 69.07507501099678, 53.27732807353019, 59.13327495000382, 49.961020439339, 36.43109621690061, 58.25244506735996, 73.47924335249874, 48.24591638102002, 15.306627521117743, 54.619525753773665, 33.460658318138734, 63.38118223469059, 21.4684065315376, 51.57643764331531, 38.35348241078786, 57.49193432508641, 52.378319918141834, 3.2970115809005662, 45.070092238160676, 50.820191022437356, 20.009408502370174, 66.76133851423023, 15.381852603930446, 55.844775832748425, 63.3844295901193, 62.06249668996134, 18.796244866954524, 61.151276543758314, 69.76259052461357, 32.22915185003426, 52.164895526117284, 55.51180174042507 ] rnn["total"] = t_rmse / len(pred) valsArr = [] for f in naive: val = (1 - (rnn[f] / naive[f])) * 100 #print(f + ": " + str(np.round(val,decimals=2)) + "%") valsArr.append(val) print("Current : " + str(np.array(valsArr[:]).mean())) print("Old: " + str(np.array(valsArr_old[:]).mean())) percentage = np.array(valsArr).mean() global highest_per if percentage > highest_per: model.save(path_best_model) highest_per = percentage print("Highest Percentage: " + str(highest_per)) print() del model K.clear_session() return -percentage
# boxplots #for col in data.columns: # plt.figure() # plt.boxplot(data[col]) # plt.title(col) # scaling the data data_scaled = data.copy() scaler = MaxAbsScaler() data_scaled.loc[:, :] = scaler.fit_transform(data) scaler_params = scaler.get_params() # We will eventually need the physical (unscaled) data to display the results extract_scaling_function = np.ones((1, data_scaled.shape[1])) extract_scaling_function = scaler.inverse_transform(extract_scaling_function) pd.set_option('display.max_columns', 7) #print(data_scaled.iloc[:3,:]) ## Shuffling data #data = data.sample(frac=1,random_state=0).reset_index(drop=True) # # ## Separating inputs from outputs #X_data = np.array(data.iloc[:,:6]) #y_data = np.array(data.iloc[:,-1]) # The dataset datasets = {} y = data_scaled['Residuary resistance'].values.reshape(-1, 1)
class ANOMIGAN(): def __init__(self): self.testfile = #TEST_FILE self.data = #TRAIN_FILE self.num_feature = 30 self.X_test = 0 self.X_gen = 0 self.scaler = MaxAbsScaler() self.input_shape = (-1,-1) self.latent_dim = 100 self.C= tf.placeholder(tf.float32, [None, 512]) self.C_prime = tf.placeholder(tf.float32, [None, 512]) # models self.generator = None self.discriminator = None self.preTrainedModel = Sequential() # hyperparameter for loss self.lambda_a = 0.5 self.lambda_b = 1 - self.lambda_a self.confidence = 1.0 self.batch_size = 32 self.num_variance = 5 # temp Lists self.bList = [] self.aList = [] self.bFpr = [] self.bTpr = [] self.bThresholds = [] self.aFpr = [] self.aTpr = [] self.aThresholds = [] self.t_var = {} ######## drawring functions ############### def butter_lowpass_filter(self, data, cutoff, fs, order=5): b, a = self.butter_lowpass(cutoff, fs, order=order) y = lfilter(b, a, data) return y def butter_lowpass(self, cutoff, fs, order=5): nyq = 0.5 * fs normal_cutoff = cutoff / nyq b, a = butter(order, normal_cutoff, btype='low', analog=False) return b, a def drawLoss(self, S_loss_list, E_loss_list): # Filter requirements. order = 6 fs = 30.0 # sample rate, Hz cutoff = 3.667 # desired cutoff frequency of the filter, Hz s_filter = self.butter_lowpass_filter(S_loss_list, cutoff, fs, order) d_filter = self.butter_lowpass_filter(E_loss_list, cutoff, fs, order) ylim = [0,3] f = plt.figure(tight_layout=True) ax = f.add_subplot(111, ylim=ylim) ax.set_xlabel("Epochs",fontsize=20) ax.set_ylabel("Loss",fontsize=20) ax.plot(s_filter, label='Discriminator', color='blue', linewidth=1, linestyle='--' ) ax.plot(d_filter, label='Encoder', color='green', linewidth=1, alpha=0.5 ) ax.legend(loc=1,fontsize=15) plt.show() def drawAccuracyPlot(self): ylim = [0,105] xlim = [0, 10] f = plt.figure(tight_layout=True) ax = f.add_subplot(111, ylim=ylim) ax.set_xlabel("Random Iterative Steps",fontsize=20) ax.set_ylabel("Accuracy",fontsize=20) plt.plot(self.bList, label='Original Samples', color='blue', linewidth=1, linestyle='--' ) plt.plot(self.aList, label='Generated Samples', color='green', linewidth=1, ) plt.legend() plt.show() def drawRocPlot(self): fpr1, tpr1, thresholds1 = self.bFpr, self.bTpr, self.bThresholds roc_auc1 = auc(fpr1, tpr1) fpr2, tpr2, thresholds2 = self.aFpr, self.aTpr, self.aThresholds roc_auc2 = auc(fpr2, tpr2) plt.figure() plt.plot(fpr1, tpr1, color='blue', linestyle='--', linewidth=2, label='ROC curve with original samples (area = %0.2f)' % roc_auc1) plt.plot(fpr2, tpr2, color='green', linewidth=1, label='ROC curve with generated samples (area = %0.2f)' % roc_auc2) plt.plot([0, 1], [0, 1], color='black', lw=1, linestyle=':') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.legend(loc="lower right") plt.show() ######## target classifer model functions ############### def get_pretrainModel(self): #self.preTrainedModel ; # USE API to get target pretrained Model def get_target_features(self): X = 1 # Define input features of X Y = 1 # Define label of input features of X return X, Y ######## AnomiGAN model functions ############### def discriminator(self, x): with tf.variable_scope("discriminator"): x_reshaped = tf.reshape(x, (-1, self.num_feature, 1)) conv1 = tf.layers.conv1d(x_reshaped, filters=32, kernel_size=4, strides=2, padding='VALID', activation=tf.nn.relu) conv2 = tf.layers.conv1d(conv1, filters=10, kernel_size=2, strides=1, padding='SAME', activation=tf.nn.tanh) conv3 = tf.layers.conv1d(conv1, filters=20, kernel_size=2, strides=1, padding='SAME', activation=tf.nn.tanh) conv4 = tf.layers.conv1d(conv1, filters=30, kernel_size=2, strides=1, padding='SAME', activation=tf.nn.tanh) flatten = tf.layers.flatten(conv4) out = tf.layers.dense(flatten, self.num_feature, activation=tf.nn.relu) return out def operation_mode(self, x, message): if mode == 1: dtype = x.dtype x_btensor = tf.cast(x, tf.int32) m_btensor = tf.cast(message, tf.int32) xor = tf.bitwise.bitwise_xor(x_btensor, m_btensor) random = tf.cast(xor, dtype) else: random = x*message % np.amax(x) def encoder(self, x, message, mode): with tf.variable_scope("encoder"): random = operation_mode(x, message, mode) x_flatten = tf.layers.flatten(random) fc1 = tf.reshape(x_flatten, (-1, self.num_feature, 1)) conv1d_t1 = tf.layers.conv1d(fc1, filters=64, kernel_size=4, strides=2, padding='VALID', activation=tf.nn.relu) bn1 = tf.layers.batch_normalization(conv1d_t1) conv1d_t2 = tf.layers.conv1d(bn1, filters=32, kernel_size=2, strides=1, padding='SAME', activation=tf.nn.tanh) bn2 = tf.layers.batch_normalization(conv1d_t2) conv1d_t3 = tf.layers.conv1d(bn2, filters=16, kernel_size=2, strides=1, padding='SAME', activation=tf.nn.tanh) bn3 = tf.layers.batch_normalization(conv1d_t3) conv1d_t4 = tf.layers.conv1d(bn3, filters=8, kernel_size=2, strides=1, padding='SAME', activation=tf.nn.tanh) bn4 = tf.layers.batch_normalization(conv1d_t4) conv1d_t5 = tf.layers.conv1d(bn4, filters=4, kernel_size=2, strides=1, padding='SAME', activation=tf.nn.tanh) bn5 = tf.layers.batch_normalization(conv1d_t5) conv1d_t6 = tf.layers.conv1d(bn5, filters=8, kernel_size=2, strides=1, padding='SAME', activation=tf.nn.tanh) bn6 = tf.layers.batch_normalization(conv1d_t6) conv1d_t7 = tf.layers.conv1d(bn6, filters=16, kernel_size=2, strides=1, padding='SAME', activation=tf.nn.tanh) bn7 = tf.layers.batch_normalization(conv1d_t7) conv1d_t8 = tf.layers.conv1d(bn7, filters=self.num_feature, kernel_size=2, strides=1, padding='SAME', activation=tf.nn.tanh) flatten = tf.layers.flatten(conv1d_t8) out = tf.layers.dense(flatten, self.num_feature, activation=tf.nn.relu) return out def get_solvers(self, learning_rate=1e-3, beta1=0.5): E_solver = tf.train.AdamOptimizer(learning_rate, beta1) S_solver = tf.train.AdamOptimizer(learning_rate, beta1) return E_solver, S_solver def train(self, sess, E_train_step, S_train_step, E_loss, S_loss, epochs=3000, batch_size=10): X, Y = self.get_target_features() for it in range(epochs): minibatch, labels = self.get_shuffle_batch(X, Y, batch_size) minibatch = minibatch.reshape(batch_size, -1) if epochs > (epochs - 2000): self.store_parameters(sess) #randomize original data fake = np.random.normal(0, 1, (batch_size, 30)) randomized = sess.run(self.C_prime, feed_dict = {self.C:minibatch, self.random:fake}) loss = self.target_classifier(randomized, labels, batch_size) _, S_loss_curr = sess.run([S_train_step, S_loss], feed_dict={self.C:minibatch, self.random:fake, self.loss:loss}) _, E_loss_curr = sess.run([E_train_step, E_loss], feed_dict={self.C:minibatch, self.random:fake, self.loss:loss}) S_loss_list.append(S_loss_curr) E_loss_list.append(np.mean(E_loss_curr)) #self.drawLoss(S_loss_list, E_loss_list) print ("Train Finishied") def target_classifier(self, fake, fake_label, batch_size=32): cvscores = [] scores = self.preTrainedModel.evaluate(fake, fake_label, verbose=0) output = np.mean(scores[1]) return output def calculate_loss(self, C, C_prime, logit_real, logit_fake, loss): real_label = tf.ones_like(logit_real) fake_label = tf.zeros_like(logit_fake) loss_S_real = tf.nn.sigmoid_cross_entropy_with_logits( labels=real_label, logits=logit_real) loss_S_fake = tf.nn.sigmoid_cross_entropy_with_logits( labels=fake_label, logits=logit_fake) loss_S = (tf.reduce_mean(loss_S_real) + (tf.reduce_mean(loss_S_fake)* (1-tf.reduce_mean(loss)))) C_flatten = tf.layers.flatten(C) C_prime_flatten = tf.layers.flatten(C_prime) distance = (tf.sqrt(tf.reduce_sum(tf.square(C_flatten - C_prime_flatten), axis=1))) distance = tf.reduce_mean(distance) loss_E = (self.lambda_a * (distance*self.confidence) ) + (self.lambda_b * loss_S) return loss_E, loss_S def get_session(self): config = tf.ConfigProto() config.gpu_options.allow_growth = True session = tf.Session(config=config) return session def get_shuffle_batch(self, X, Y, batch_size=32): idx = random.randint(1, len(X)-batch_size) return X[idx:idx+batch_size], Y[idx:idx+batch_size] def get_next_batch(self, X, Y, start, end, batch_size=32): X_train = [] Y_train = [] start = 0 end = batch_size for i in range(len(X)-batch_size): start+=i end+=i X_train.append(X[start:end]) Y_train.append(Y[start:end]) return X_train, Y_train def anonymize_sample(self, sess, batch_size): minibatch, labels = self.get_target_features() batch_size = len(Y) fake = np.random.normal(0, 1, (batch_size, 30)) randomized = sess.run(self.C_prime, feed_dict = {self.C:minibatch, self.random:fake}) scores1 = self.preTrainedModel.evaluate(randomized, Y, verbose=0) cvscores1.append(scores1[1] * 100) self.get_inversed(randomized) def get_inversed(self, normalized): np.set_printoptions(precision=6, suppress=True) inversed = self.scaler.inverse_transform(normalized) np.savetxt('fileout.txt', inversed, delimiter=',', fmt='%1.3f') return inversed def store_parameters(self, sess): for i in range(1, 7): name = 'Encoder/conv1d_' + i + '/kernel:0' conv = sess.graph.get_tensor_by_name(name) self.t_var[name] = conv self.t_var.append(name, sess.run(conv)) def add_variance(self, sess, num_var): for i in range(num_var): num = random.randint(1, 7) name = 'Encoder/conv1d_' + num + '/kernel:0' conv = sess.graph.get_tensor_by_name(name) var = np.var(self.t_var.get(name)), axis=0) sess.run(tf.assign(conv, conv + var)) def get_pvalue(self, a, b): a = a.flatten() b = b.flatten() t, p = stats.pearsonr(a,b) def main(self): self.get_pretrainModel() tf.reset_default_graph() self.C = tf.placeholder(tf.float32, [None, self.num_feature]) self.random = tf.placeholder(tf.float32, [None, self.num_feature]) self.C_prime = self.encoder(self.C, self.random, mode=2) self.loss = tf.placeholder(tf.float32) with tf.variable_scope("") as scope: logit_real = self.discriminator(self.C) scope.reuse_variables() logit_fake = self.discriminator(self.C_prime) encoder_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "encoder") steganalayzer_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "discriminator") E_solver, S_solver = self.get_solvers() E_loss, S_loss = self.calculate_loss(self.C, self.C_prime, logit_real, logit_fake, self.loss) E_train_step = E_solver.minimize(E_loss, var_list=encoder_vars) S_train_step = E_solver.minimize(S_loss, var_list=steganalayzer_vars) tf.executing_eagerly() sess = self.get_session() sess.run(tf.global_variables_initializer()) self.train(sess, E_train_step, S_train_step, E_loss, S_loss) self.add_variance(sess, self.num_variance) self.anonymize_sample(sess, self.batch_size) if __name__ == '__main__': anomigan = ANOMIGAN() anomigan.main()
class Simulation(object): """Class glueing all the pieces together. Performs whole simulation. :param dataset: Dataset which extends :py:obj:`mutabledataset.SimMixin` :param AgentCl: Class defining agent behavior, namely `benefit` and `cost`. :param learner: Class defining learner behavior, namely `fit` and `predict`. :param split: Defines portion used for fitting the learner. Rest is used for determining `eps` value, regarding the epsilon equilibrium. Simulation is done on the whole dataset. :param cost_distribution: Passed on to AgentTransformer. :param cost_distribution_dep: Passed on to AgentTransformer. :param no_neighbors: Passed on to AgentTransformer. :param max_it: Passed on to AgentTransformer. :param collect_incentive_data: Passed on to AgentTransformer. """ def __init__(self, dataset, AgentCl, learner, cost_distribution, split=[0.5], collect_incentive_data=False, no_neighbors=60, cost_distribution_dep=None, max_it=130): self.dataset = dataset self.no_neighbors = no_neighbors self.cost_distribution = cost_distribution self.max_it = max_it self.learner = learner self.split = split self.AgentCl = AgentCl self.collect_incentive_data = collect_incentive_data self.cost_distribution_dep = cost_distribution_dep def no_classes(self, dataset): """ :param dataset: Some AIF360 dataset :returns: Number of distinct labels (classes) """ return len(set(dataset.labels.ravel())) def start_simulation(self, runs=1, scale=True): """ :param runs: Run simulation multiple times with the same parameters :param scale: Perform scaling on dataset features. :returns: Modified dataset including new ground truth labels :rtype: :py:obj:`simulation.SimulationResultSet` """ res_list = [] for i in range(runs): res_list.append(self._simulate(scale)) return SimulationResultSet(res_list, runs=runs) def _simulate(self, scale): """ Private entrypoint to perform a single simulation :param scale: Perform scaling on dataset features :returns: Modified dataset including new ground truth labels :rtype: :py:obj:`simulation.SimulationResult` """ self.scaler = MaxAbsScaler() dataset = self.dataset.copy(deepcopy=True) # we need at least one example for each class in each of the two splits while True: train, test = dataset.split(self.split, shuffle=False) break if self.no_classes(train) >= 2 and self.no_classes(test) >= 2: break train_indices = list(map(int, train.instance_names)) test_indices = list(map(int, test.instance_names)) self.train, self.test = train, test if scale: train.features = self.scaler.fit_transform(train.features) test.features = self.scaler.transform(test.features) dataset.features = self.scaler.transform(dataset.features) dataset.infer_domain() # learner moves self.learner.fit(train) ft_names = dataset.protected_attribute_names ft_indices = list( map(lambda x: not x in ft_names, dataset.feature_names)) self.Y_predicted = self.learner.predict(dataset.features) self.Y_predicted_pr = self.learner.predict_proba(dataset.features) # agents move at = AgentTransformer( self.AgentCl, self.learner, self.cost_distribution, collect_incentive_data=self.collect_incentive_data, no_neighbors=self.no_neighbors, cost_distribution_dep=self.cost_distribution_dep, max_it=self.max_it) dataset_ = at.transform(dataset) train_ = utils.dataset_from_matrix( np.hstack((dataset_.features[train_indices, :], dataset_.labels[train_indices])), dataset) test_ = utils.dataset_from_matrix( np.hstack((dataset_.features[test_indices, :], dataset_.labels[test_indices])), dataset) acc_h = self.learner.accuracy(test) # update changed features #dataset_ = dataset_from_matrix(np.hstack((np.vstack((train_.features, test_.features)), np.vstack((train_.labels, test_.labels)))), dataset) self.Y_new_predicted = self.learner.predict(dataset_.features) self.Y_new_predicted_pr = self.learner.predict_proba(dataset_.features) acc_h_post = self.learner.accuracy(test_) # fit data again, see if accuracy changes self.learner.fit(train_) acc_h_star_post = self.learner.accuracy(test_) # construct datasets for features # including predicted label if scale: dataset.features = self.scaler.inverse_transform(dataset.features) dataset_df = dataset.convert_to_dataframe(de_dummy_code=True)[0] dataset_df['credit_h'] = pd.Series(self.Y_predicted, index=dataset_df.index) dataset_df['credit_h_pr'] = pd.Series(self.Y_predicted_pr, index=dataset_df.index) if scale: dataset_.features = self.scaler.inverse_transform( dataset_.features) dataset_new_df = dataset_.convert_to_dataframe(de_dummy_code=True)[0] dataset_new_df['credit_h'] = pd.Series(self.Y_new_predicted, index=dataset_new_df.index) dataset_new_df['credit_h_pr'] = pd.Series(self.Y_new_predicted_pr, index=dataset_new_df.index) res = SimulationResult() res.df = dataset_df res.df_new = dataset_new_df res.eps = abs(acc_h_star_post - acc_h_post) res.acc_h = acc_h res.acc_h_post = acc_h_post res.acc_h_star_post = acc_h_star_post res.incentives = at.incentives return res
rfr = RandomForestRegressor(n_estimators=100,max_features="sqrt") knn = neighbors.KNeighborsRegressor(n_neighbors, weights='uniform') reg = ElasticNet(alpha = .5) mlp = MLPRegressor(hidden_layer_sizes=(100,100,100)) prediction = {} prediction_valid = {} for clf, name in [(knn, 'KNeighborsRegressor'), (reg, 'ElasticNet'), # (svc, 'Support Vector Classification'), (rfr, 'Random Forest'), (mlp, 'NeuralNet')]: clf.fit(X_train_valid, y_train_valid[:,0]) prediction[str(name)] = (clf.predict(X_test)) avg_error = (np.mean(((((scaler_output.inverse_transform(y_test.reshape(-1,1))) - scaler_output.inverse_transform(prediction[str(name)].reshape(-1,1))))/(scaler_output.inverse_transform(y_test.reshape(-1,1))))**2.)) r2_test = r2_score(y_test, prediction[str(name)]) print("Mean error (test): ",avg_error,"R2:",r2_test,name) prediction_valid[str(name)] = (clf.predict(X_valid)) avg_error_valid = (np.mean((((scaler_output.inverse_transform(y_valid.reshape(-1,1)) - scaler_output.inverse_transform(prediction_valid[str(name)].reshape(-1,1))))/scaler_output.inverse_transform(y_valid.reshape(-1,1)))**2.)) r2_valid = r2_score(y_valid, prediction_valid[str(name)]) print("Mean error (valid): ",avg_error_valid,"R2:",r2_valid,name) if name == 'KNeighborsRegressor': normalized_std_KNN.append(avg_error) normalized_std_valid_KNN.append(avg_error_valid) R2_KNN.append(r2_test) R2_valid_KNN.append(r2_valid) if name == 'ElasticNet': normalized_std_EN.append(avg_error) normalized_std_valid_EN.append(avg_error_valid) R2_EN.append(r2_test)
class DEAP_CMAES: def __init__( self, centroid=None, sigma=None, popSize=200, # lambda_ in the algorithm evalFunc=defaultEvaluate, hofn=5): global randomizers randomizers = InitRandomizers() if (centroid is None): centroid = defaultInitializer() if (sigma is None): sigma = 0.20 self.scaler = MaxAbsScaler() self.scaler.fit([ centroid, ]) # Reset centroid centroid = self.scaler.transform([ centroid, ])[0] hof = tools.HallOfFame(hofn) self.hof = hof self.popSize = popSize toolbox = base.Toolbox() stats = tools.Statistics(key=lambda ind: 1.0 / ind.fitness.values[0]) stats.register("avg", np.mean) stats.register("std", np.std) stats.register("min", np.min) stats.register("max", np.max) self.stats = stats # Our fitness already takes into account all the molecules simultaneously. # Therefore, there is no need for a multi-objective optimization. creator.create("FitnessMin", base.Fitness, weights=(1.0, )) creator.create("Individual", list, fitness=creator.FitnessMin) toolbox.register("evaluate", evalFunc, scaler=self.scaler) strategy = cma.Strategy(centroid=centroid, sigma=sigma, lambda_=popSize) toolbox.register("generate", strategy.generate, creator.Individual) toolbox.register("update", strategy.update) self.toolbox = toolbox def run(self, nGens, nprocs=1): # Start processes if (nprocs > 1): pool = multiprocessing.Pool(processes=nprocs) self.toolbox.register("map", pool.map) # Run CMA-ES and store final things self.output = algorithms.eaGenerateUpdate(self.toolbox, ngen=nGens, stats=self.stats, halloffame=self.hof, verbose=True) def getBest(self): # Return best multiprofile best = self.hof[0] best = self.scaler.inverse_transform([ best, ])[0] mp = multiProfile() mp.setOptimizableParameters(slice(0, len(best), 1), list(best)) mp.minimizeProfiles() return mp
def trainNeuralNetworkModel(dataFrame, targetColumn, featureNames, seed=43): def get_lr(optimizer): for param_group in optimizer.param_groups: return param_group['lr'] dataFrame = dataFrame[featureNames] FEATURE_NAMES = list(dataFrame.columns) FEATURE_NAMES.remove(targetColumn) COLUMNS = list(dataFrame.columns) LABEL = targetColumn Y_dataFrame = dataFrame[[targetColumn]] Y_values = Y_dataFrame.values X_dataFrame = dataFrame.drop(targetColumn, axis=1) X_values = X_dataFrame.values Y_values = Y_values print(X_dataFrame.describe()) FEATURE_DEFAULTS = ((X_dataFrame.max() + X_dataFrame.min()) * 0.5).to_dict() #preprocessorY = MinMaxScaler() #preprocessorY = StandardScaler() preprocessorY = MaxAbsScaler() preprocessorY.fit(Y_values) preprocessorX = MinMaxScaler() #preprocessorX = StandardScaler() preprocessorX.fit(X_values) Y_values = preprocessorY.transform(Y_values) X_values = preprocessorX.transform(X_values) device = torch.device('cpu') #device = torch.device('cuda') # Uncomment this to run on GPU #Create model in_size = len(FEATURE_NAMES) #model = ConvolutionalNet( in_size ).to( device ) model = LinearNet(in_size).to(device) #model = ImprovedLinearNet( in_size ).to( device ) learning_rate = 0.01 #loss_fn = torch.nn.SmoothL1Loss() #loss_fn = QuantileRegressionLoss( 0.5 ) #loss_fn = HuberRegressionLoss( 0.15 ) #loss_fn = torch.nn.MSELoss ( size_average=False) loss_fn = torch.nn.L1Loss() #optimizer = torch.optim.SGD ( model.parameters(), lr=learning_rate, momentum=0.9) optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, amsgrad=True, weight_decay=0.001) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=250, gamma=0.5) batch_size = 256 average_nbr_corrects = 0 N = 100 alpha = 2. / (N + 1) current_nbr_corrects = 0 X_numpyTrainVal, X_numpyTest, Y_numpyTrainVal, Y_numpyTest = train_test_split( X_values, Y_values, test_size=0.1) X_torchTest = torch.from_numpy(X_numpyTest.astype(np.float32)).to(device) Y_torchTest = torch.from_numpy(Y_numpyTest.astype(np.float32)).to(device) X_torchTest_s = torch.split(X_torchTest, batch_size, dim=0) Y_torchTest_s = torch.split(Y_torchTest, batch_size, dim=0) for t in range(6000): model.train() X_numpyTrain, X_numpyVal, Y_numpyTrain, Y_numpyVal = train_test_split( X_numpyTrainVal, Y_numpyTrainVal, test_size=0.25) X_torchTrain = torch.from_numpy(X_numpyTrain.astype( np.float32)).to(device) X_torchVal = torch.from_numpy(X_numpyVal.astype(np.float32)).to(device) Y_torchTrain = torch.from_numpy(Y_numpyTrain.astype( np.float32)).to(device) Y_torchVal = torch.from_numpy(Y_numpyVal.astype(np.float32)).to(device) train_size = X_numpyTrain.shape[0] val_size = X_numpyVal.shape[0] train_index_s = torch.randperm(train_size) X_torchTrain_s = X_torchTrain[train_index_s] Y_torchTrain_s = Y_torchTrain[train_index_s] val_index_s = torch.randperm(val_size) X_torchVal_s = X_torchVal[val_index_s] Y_torchVal_s = Y_torchVal[val_index_s] X_torchTrain_s = torch.split(X_torchTrain, batch_size, dim=0) Y_torchTrain_s = torch.split(Y_torchTrain, batch_size, dim=0) X_torchVal_s = torch.split(X_torchVal, batch_size, dim=0) Y_torchVal_s = torch.split(Y_torchVal, batch_size, dim=0) length = (len(X_torchVal_s) - 1) * batch_size #Train for i in range(len(Y_torchTrain_s) - 1): x = X_torchTrain_s[i] y = Y_torchTrain_s[i] y_pred = model(x) loss = loss_fn((y_pred - y) / y, torch.zeros(y.shape)) model.zero_grad() loss.backward() optimizer.step() scheduler.step() #Validate model.eval() ValLoss = 0 Y_torchPredict = torch.zeros(Y_torchVal.shape, dtype=torch.float32).to(device) Y_torchPredict_s = torch.split(Y_torchPredict, batch_size, dim=0) for i in range(len(Y_torchPredict_s) - 1): x = X_torchVal_s[i] y = Y_torchVal_s[i] y_pred = model(x) Y_torchPredict_s[i].copy_(y_pred) ValLoss += loss_fn(y_pred, y) ValLoss /= (len(Y_torchPredict_s) - 1) Y_numpyPredict = Y_torchPredict.cpu().detach().numpy() threshold = 0.1 eps = 0.001 ValTrue_s = np.sum( np.abs((Y_numpyPredict - Y_numpyVal) / (Y_numpyVal + eps)) <= threshold) ValFalse_s = np.sum( np.abs((Y_numpyPredict - Y_numpyVal) / (Y_numpyVal + eps)) > threshold) ValAccuracy = float(ValTrue_s) / (ValTrue_s + ValFalse_s) TestLoss = 0 TestAccuracy = 0 if Y_torchTest.nelement() > 0: model.eval() TestLoss = 0 Y_torchPredict = torch.zeros(Y_torchTest.shape, dtype=torch.float32).to(device) Y_torchPredict_s = torch.split(Y_torchPredict, batch_size, dim=0) for i in range(len(Y_torchPredict_s) - 1): x = X_torchTest_s[i] y = Y_torchTest_s[i] y_pred = model(x) Y_torchPredict_s[i].copy_(y_pred) TestLoss += loss_fn(y_pred, y) TestLoss /= (len(Y_torchPredict_s) - 1) Y_numpyPredict = Y_torchPredict.cpu().detach().numpy() threshold = 0.1 eps = 0.001 TestTrue_s = np.sum( np.abs((Y_numpyPredict - Y_numpyTest) / (Y_numpyTest + eps)) <= threshold) TestFalse_s = np.sum( np.abs((Y_numpyPredict - Y_numpyTest) / (Y_numpyTest + eps)) > threshold) TestAccuracy = float(TestTrue_s) / (TestTrue_s + TestFalse_s) print( "epoch: {:6d}, lr: {:8.6f}, val_loss: {:6.4f}, val_acc: {:6.4f}, test_loss: {:6.4f}, test_acc: {:6.4f}" .format(t, get_lr(optimizer), ValLoss, ValAccuracy, TestLoss, TestAccuracy)) # Check model model.eval() X_numpyTotal = X_values Y_numpyTotal = Y_values X_torchTotal = torch.from_numpy(X_numpyTotal.astype(np.float32)).to(device) Y_torchTotal = torch.from_numpy(Y_numpyTotal.astype(np.float32)).to(device) Y_torchPredict = model(X_torchTotal) Y_numpyPredict = Y_torchPredict.cpu().detach().numpy() Y_numpyTotal = Y_torchTotal.cpu().detach().numpy() eps = 0.001 Y_relErr = np.abs(Y_numpyPredict - Y_numpyTotal) / (Y_numpyTotal + eps) for threshold in [0.025, 0.05, 0.10, 0.15]: bad_s = np.sum((Y_relErr > threshold)) good_s = np.sum((Y_relErr <= threshold)) total_s = Y_relErr.size print("threshold = {:5}, good = {:10}, bad = {:10}, err = {:4}".format( threshold, good_s, bad_s, good_s / (good_s + bad_s))) Y_numpyPredict = preprocessorY.inverse_transform(Y_numpyPredict) Y_numpyTotal = preprocessorY.inverse_transform(Y_numpyTotal) modelPacket = dict() modelPacket['model'] = model modelPacket['preprocessorX'] = preprocessorX modelPacket['preprocessorY'] = preprocessorY modelPacket['feature_names'] = FEATURE_NAMES modelPacket['feature_defaults'] = FEATURE_DEFAULTS return modelPacket, (Y_numpyPredict, Y_numpyTotal)
def XGBoost(dataFrame, targetColumn, featureNames): dataFrame_short = dataFrame[featureNames] FEATURE_NAMES = list(dataFrame_short.columns) FEATURE_NAMES.remove(targetColumn) COLUMNS = list(dataFrame_short.columns) LABEL = targetColumn Y_dataFrame = dataFrame_short[[targetColumn]] Y_values = Y_dataFrame.values X_dataFrame = dataFrame_short.drop(targetColumn, axis=1) X_values = X_dataFrame.values Y_values = Y_values print(X_dataFrame.describe()) FEATURE_DEFAULTS = ((X_dataFrame.max() + X_dataFrame.min()) * 0.5).to_dict() # preprocessorY = MinMaxScaler() # preprocessorY = StandardScaler() preprocessorY = MaxAbsScaler() preprocessorY.fit(Y_values) preprocessorX = MinMaxScaler() # preprocessorX = StandardScaler() preprocessorX.fit(X_values) Y_values = preprocessorY.transform(Y_values) X_values = preprocessorX.transform(X_values) Y_values_indexes = np.arange(0, len(Y_values), 1) X_numpyTrainVal, X_numpyTest, Y_numpyTrainVal_indexes, Y_numpyTest_indexes = train_test_split( X_values, Y_values_indexes, test_size=0.1) Y_numpyTrainVal = Y_values[Y_numpyTrainVal_indexes] Y_numpyTest = Y_values[Y_numpyTest_indexes] FEATURE_DEFAULTS = ((X_dataFrame.max() + X_dataFrame.min()) * 0.5).to_dict() model = xgboost.XGBRegressor(max_depth=20, objective='reg:squarederror', n_estimators=120, learning_rate=0.1, n_jobs=-1) model.fit(X_numpyTrainVal, Y_numpyTrainVal) # обучение Y_numpyPredict = model.predict(X_numpyTest) X_numpyTotal = X_values Y_numpyTotal = Y_values eps = 0.001 Y_relErr = np.abs(Y_numpyPredict - Y_numpyTest.flatten()) / (Y_numpyTest.flatten() + eps) for threshold in [0.025, 0.05, 0.10, 0.15]: bad_s = np.sum((Y_relErr > threshold)) good_s = np.sum((Y_relErr <= threshold)) total_s = Y_relErr.size print("threshold = {:5}, good = {:10}, bad = {:10}, err = {:4}".format( threshold, good_s, bad_s, bad_s / (good_s + bad_s))) Y_numpyPredict = preprocessorY.inverse_transform( Y_numpyPredict.reshape(-1, 1)) Y_numpyTest = preprocessorY.inverse_transform(Y_numpyTest.reshape(-1, 1)) modelPacket = dict() modelPacket['model'] = model modelPacket['preprocessorX'] = preprocessorX modelPacket['preprocessorY'] = preprocessorY modelPacket['feature_names'] = FEATURE_NAMES modelPacket['feature_defaults'] = FEATURE_DEFAULTS threshold = 10 print() Y_relativeError = np.abs(Y_numpyPredict - Y_numpyTest) * 100 / Y_numpyTest allValues = dataFrame.loc[Y_numpyTest_indexes] mask = Y_relativeError > threshold badValues = allValues[mask] mask = Y_relativeError <= threshold goodValues = allValues[mask] #print(badValues) f_bad_values = open("bad_values.txt", 'w') f_bad_values.write(str(badValues[featureNames + ['source_url']])) f_bad_values.close() allValues = Y_numpyTest mask = Y_relativeError > threshold badValues = Y_numpyTest[mask] mask = Y_relativeError <= threshold goodValues = Y_numpyTest[mask] bins = range(1, 20) bins = [i * 0.5e6 for i in bins] figure, axes = plt.subplots(3, 1) axes[1].axis('tight') axes[1].axis('off') resultValues = axes[0].hist([allValues, goodValues, badValues], bins=bins, histtype='bar', color=['green', 'yellow', 'red']) allValues = resultValues[0][0] goodValues = resultValues[0][1] badValues = resultValues[0][2] accuracy = goodValues * 100 / (allValues + 0.01) col_label = [ '{:5d}'.format(int((bins[i + 0] + bins[i + 1]) / 2)) for i in range(len(bins) - 1) ] cell_text = [ ['{:2.1f}'.format(acc_) for acc_ in accuracy], ] table_ = axes[1].table(cellText=cell_text, colLabels=col_label, loc='center') table_.auto_set_font_size(False) table_.set_fontsize(8) Y_numpyTest_max = np.max(Y_numpyTest) Y_numpyTest_min = np.min(Y_numpyTest) # axes[2].set_position([Y_numpyTotal_min-Y_numpyTotal_width*0.1,Y_numpyTotal_min-Y_numpyTotal_width*0.1,Y_numpyTotal_width*0.2,Y_numpyTotal_width*0.2]) axes[2].plot(Y_numpyTest, Y_numpyTest, c='blue') axes[2].plot(Y_numpyTest, Y_numpyTest * (1.0 + 0.1), c='red') axes[2].plot(Y_numpyTest, Y_numpyTest * (1.0 - 0.1), c='red') axes[2].scatter(Y_numpyPredict, Y_numpyTest) plt.show() # figure, axes =plt.subplots(3,1) # clust_data = np.random.random((10,3)) # collabel=("col 1", "col 2", "col 3") # axs[0].axis('tight') # axs[0].axis('off') # the_table = axs[0].table(cellText=clust_data,colLabels=collabel,loc='center') # axs[1].plot(clust_data[:,0],clust_data[:,1]) # plt.show() return modelPacket, (Y_numpyPredict, Y_numpyTotal)
print(question_AR) #question_AR = Reshape((-1, args.input_length))(question_AR) #print(question_AR) #question_AR =TimeDistributed(Dense(1, kernel_regularizer=regularizers.l2(0.01)))(question_AR) question_AR = TimeDistributed(Dense(1))(question_AR) print(question_AR) question_AR = Flatten()(question_AR) main_output = add([main_output, question_AR]) #main_output = Dense(y_train.shape[1])(main_output) #question_AR = Reshape(input_X_train.shape[2])(question_AR) #question_AR = Flatten()(question_AR) print(main_output) #y_valid_ = y_valid * math.sqrt(scaler.var_[-1]) + scaler.mean_[-1] #y_test_ = y_test * math.sqrt(scaler.var_[-1]) + scaler.mean_[-1] y_valid_ = scaler.inverse_transform(y_valid) y_test_ = scaler.inverse_transform(y_test) opt = optimizers.RMSprop(lr=0.0001, decay=1e-8) model = Model(inputs=[memory, question], outputs=[main_output]) model.compile(loss='mae', optimizer=opt, metrics=['mse', 'mae', 'mape', RRSE]) filepath = './model/memdnn_AR_inputlength_' + str( args.input_length) + '_cnn_' + str(args.CNN_unit) + '_cnnkernel_' + str( args.CNN_kernel) + '_gru_' + str(args.GRU_unit) + '_horizon_' + str( args.horizon) + '.h5' checkpoint = ModelCheckpoint(filepath, monitor='val_loss', save_weights_only=False, save_best_only=True) earlystop = EarlyStopping(monitor='val_loss', patience=100) callbacks_list = [checkpoint, earlystop]
model.reset_states() plt.figure(figsize=(16, 10)) plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title('Model loss', fontsize=18) plt.ylabel('Mean Squared Error (Loss)', fontsize=18) plt.xlabel('Epoch', fontsize=18) plt.legend(['Train', 'Test'], loc='upper right') plt.show() # Model Predict on test set y_pred = model.predict(X_test, batch_size=batch_size) y_pred.shape y_pred y_new_inverse = scalery.inverse_transform(y_pred) y_new_inverse y_real = scalery.inverse_transform(y_test) y_real for i in range(len(y_pred)): print(y_new_inverse[i]) for i in range(len(y_val)): print(y_real[i])