def compute_reconstruction_metrics(self): epoch_id = self.get_last_epoch() data = self.load_data(epoch_id) recon = self.load_recon(epoch_id) # TODO(nina): Rewrite mi and fid in pytorch mutual_information = metrics.mutual_information(recon, data) fid = metrics.frechet_inception_distance(recon, data) data = torch.Tensor(data) recon = torch.Tensor(recon) bce = metrics.binary_cross_entropy(recon, data) mse = metrics.mse(recon, data) l1_norm = metrics.l1_norm(recon, data) context = { 'title': 'Vaetree Report', 'bce': bce, 'mse': mse, 'l1_norm': l1_norm, 'mutual_information': mutual_information, 'fid': fid, } # Placeholder return context
def iterate(dir, folder1, folder2): ''' Gets 2 list of sorted images in each folder :param str folder1: path to folder1 with images to compare :param str folder2: path to folder2 with images to compare return: tuple (3) of (MSE, PSNR, SSIM) ''' list1, list2 = getImgList(dir, folder1, folder2) mse = 0 ssim = 0 # calculates metric for each corresponding image # calculate how many iterations ran count = 0 for i in range(0, len(list1), int(len(list1) / 10) + 1): count += 1 path1 = "{}/{}/{}".format(dir, folder1, list1[i]) path2 = "{}/{}/{}".format(dir, folder2, list2[i]) # 3 decimal digit mse += metrics.mse(path1, path2) ssim += metrics.ssim(path1, path2) mse = float(f'{mse/count:.4g}') ssim = float(f'{ssim/count:.4g}') return (mse, ssim)
def validate_model(net, criterion, validloader, num_ens=1): """Calculate ensemble MSE and NLL Loss""" net.eval() valid_loss = 0.0 mses = [] kl_list = [] pred_list = [] for i, (inputs, targets) in enumerate(validloader): inputs, targets = inputs.to(device), targets.to(device) outputs = torch.zeros(inputs.shape[0], net.outputs, num_ens).to(device) kl = 0.0 for j in range(num_ens): net_out, _kl = net(inputs) kl += _kl outputs[:, :, j] = net_out kl = kl / num_ens kl_list.append(kl.item()) outputs = torch.mean(outputs, dim=2) if outputs.shape[1] == 1: outputs = outputs.reshape([outputs.shape[0]]) pred = metrics.log_gaussian_loss(outputs, targets, net.log_noise.exp(), net.outputs) * criterion.train_size pred_list.append(pred.cpu().data.numpy()) loss = criterion(outputs, targets, net.log_noise.exp(), net.outputs, kl, kl_weight=1) mses.append(metrics.mse(outputs.data, targets).cpu().data.numpy()) valid_loss += loss.cpu().data.numpy() return valid_loss/len(validloader), np.mean(np.array(mses)),np.mean(np.array(kl_list)), np.mean(np.array(pred_list))
def validate_model(net, criterion, validloader, num_ens=1): """Calculate ensemble MSE and NLL Loss""" net.eval() valid_loss = 0.0 mses = [] for i, (inputs, targets) in enumerate(validloader): inputs, targets = inputs.to(device), targets.to(device) outputs = torch.zeros(inputs.shape[0], net.outputs, num_ens).to(device) kl = 0.0 for j in range(num_ens): net_out, _kl = net(inputs) kl += _kl outputs[:, :, j] = net_out outputs = torch.mean(outputs, dim=2) if outputs.shape[1] == 1: outputs = outputs.reshape([outputs.shape[0]]) samples = outputs[:, :1].reshape([outputs.shape[0]]) noises = outputs[:, 1:].reshape([outputs.shape[0]]) loss = criterion(samples, targets, noises.exp(), 1, kl) mses.append(metrics.mse(samples.data, targets).cpu().data.numpy()) valid_loss += loss.cpu().data.numpy() return valid_loss/len(validloader), np.mean(np.array(mses))
def model_assessment(self, X_va, y_va, model): """ Computes assessment measures for each fold evaluation. Parameters ---------- X_va: numpy.ndarray the validation design matrix y_va: numpy.ndarray the validation target column vector model: nn.NeuralNetwork the model to use for the validation phase Returns ------- assessment : dict dictionary with structure { metric: estimated value}. """ assessment = dict() y_pred = model.predict(X_va) assessment['mse'] = metrics.mse(y_va, y_pred) # possibile aggiungere altre metriche al dizionario return assessment
def elasticnet_cv(self, nsplits: int, lam: float = None, l1_ratio: float = None): """ runs a cross validation on the data set and returns the cross validation performance :param nsplits: number of cv splits :param lam: tuning parameter :param l1_ratio: balance l1 and l2 penalization, 0 means ridge, 1 means lasso :return: the cross-validated mse """ if lam is None or l1_ratio is None: model = ElasticNetCV(cv=nsplits, l1_ratio=[0.1, 0.3, 0.5, 0.7, 0.95, 0.99, 1]).fit(self.x, self.y) if lam is None: lam = model.alpha_ if l1_ratio is None: l1_ratio = model.l1_ratio_ cv = KFold(n_splits=nsplits) mse_result = [] for train, test in cv.split(self.x): x_train = self.x[train, :] x_test = self.x[test, :] y_train = self.y[train] y_test = self.y[test] model = ElasticNet(alpha=lam, l1_ratio=l1_ratio).fit(x_train, y_train) y_predict = model.predict(x_test) mse_result.append(mse(y_test, y_predict)) return np.mean(mse_result)
def eval_test(csvspath, runspath, fid, test_data, test_consts, out_path, batch_size=8): z = noise.uniform(7) df = pd.DataFrame(columns=["FID", "MSE"]) bar = progressbar.ProgressBar(maxvalue=len(os.listdir(csvspath)), redirect_stdout=True) for csv in bar(os.listdir(csvspath)): wpath = get_best_epoch_weights( csvspath + os.sep + csv, runspath + os.sep + csv.replace(".csv", "")) model = K.models.load_model(wpath) preds = [] for i in range(0, len(test_consts), batch_size): preds += list( model.predict([z(batch_size), test_consts[i:i + batch_size]])) tfid = fid(test_data, preds, batch_size=batch_size) tmse = mse(preds, test_consts) df = df.append({"FID": tfid, "MSE": tmse}, ignore_index=True) df.to_csv(out_path + os.sep + "values.csv")
def train(args): if args.is_train: input_setup(args) else: nx, ny = input_setup(args) counter = 0 start_time = time.time() if args.is_train: print("Training...") data_dir = os.path.join('./{}'.format(args.checkpoint_dir), "train.h5") train_data, train_label = read_data(data_dir) display_step = 5 for step in range(args.epochs): batch_idxs = len(train_data) // args.batch_size for idx in range(0, batch_idxs): batch_images = train_data[idx * args.batch_size:(idx + 1) * args.batch_size] batch_labels = train_label[idx * args.batch_size:(idx + 1) * args.batch_size] run_optimization(batch_images, batch_labels) if step % display_step == 0: pred = srcnn(batch_images) loss = mse(pred, batch_labels) #psnr_loss = psnr(batch_labels, pred) #acc = accuracy(pred, batch_y) #print("step: %i, loss: %f", "psnr_loss: %f" %(step, loss, psnr_loss)) #print("Step:'{0}', Loss:'{1}', PSNR: '{2}'".format(step, loss, psnr_loss)) print("step: %i, loss: %f" % (step, loss)) else: print("Testing...") data_dir = os.path.join('./{}'.format(args.checkpoint_dir), "test.h5") test_data, test_label = read_data(data_dir) result = srcnn(test_data) result = merge(result, [nx, ny]) result = result.squeeze() image_path = os.path.join(os.getcwd(), args.sample_dir) image_path = os.path.join(image_path, "test_image.png") print(result.shape) imsave(result, image_path)
def Eval(): err = 0 for name in cfg.names_val: img_predicton = imread(join(model_name, 'train', name)) img_predicton = img_predicton/255 img_label = imread(join(cfg.path_label, name)) img_label = img_label/255 err += mse(prediction=img_predicton, label=img_label) return err/len(cfg.names_val)
def run_optimization(x, y): # Wrap computation inside a GradientTape for automatic differentiation. with tf.GradientTape() as g: # Forward pass. pred = srcnn(x, is_training=True) # Compute loss. loss = mse(pred, y) # Variables to update, i.e. trainable variables. trainable_variables = srcnn.trainable_variables # Compute gradients. gradients = g.gradient(loss, trainable_variables) # Update W and b following gradients. optimizer.apply_gradients(zip(gradients, trainable_variables))
def lm_cv(self, nsplits: int): """ runs a cross validation on the data set and returns the cross validation performance :param nsplits: number of cv splits :return: the cross-validated mse """ cv = KFold(n_splits=nsplits) mse_result = [] for train, test in cv.split(self.x): x_train = self.x[train, :] x_test = self.x[test, :] y_train = self.y[train] y_test = self.y[test] model = LinearRegression().fit(x_train, y_train) y_predict = model.predict(x_test) mse_result.append(mse(y_test, y_predict)) return np.mean(mse_result)
def eval_test(csvspath, runspath, fid, test_data, test_consts, out_path, batch_size=8): vals = {} z = noise.uniform(7) df = pd.DataFrame(columns=["Lambda", "FID", "MSE"]) bar = progressbar.ProgressBar(maxvalue=len(os.listdir(csvspath)), redirect_stdout=True) for csv in bar(os.listdir(csvspath)): lmbda = csv.split("_")[-1].replace(".csv", "") if lmbda[-1] == ".": lmbda = lmbda[:-1] lmbda = float(lmbda) if lmbda not in vals: vals[lmbda] = [[], []] wpath = get_best_epoch_weights( csvspath + os.sep + csv, runspath + os.sep + csv.replace(".csv", "")) model = K.models.load_model(wpath) preds = [] for i in range(0, len(test_consts), batch_size): preds += list( model.predict([z(batch_size), test_consts[i:i + batch_size]])) tfid = fid(test_data, preds, batch_size=batch_size) tmse = mse(preds, test_consts) vals[lmbda][0].append(tfid) vals[lmbda][1].append(tmse) df = df.append({ "Lambda": float(lmbda), "FID": tfid, "MSE": tmse }, ignore_index=True) df.to_csv(out_path + os.sep + "values.csv")
def train_model(net, optimizer, criterion, trainloader, num_ens=1): net.train() training_loss = 0.0 mses = [] kl_list = [] freq = cfg.recording_freq_per_epoch print(len(trainloader)) freq = len(trainloader) // freq for i, (inputs, targets) in enumerate(trainloader, 1): cfg.curr_batch_no = i if i % freq == 0: cfg.record_now = True else: cfg.record_now = False optimizer.zero_grad() inputs, targets = inputs.to(device), targets.to(device) outputs = torch.zeros(inputs.shape[0], net.outputs, num_ens).to(device) kl = 0.0 for j in range(num_ens): net_out, _kl = net(inputs) kl += _kl outputs[:, :, j] = net_out kl = kl / num_ens kl_list.append(kl.item()) outputs = torch.mean(outputs, dim=2) if outputs.shape[1] == 1: outputs = outputs.reshape([outputs.shape[0]]) samples = outputs[:, :1].reshape([outputs.shape[0]]) noises = outputs[:, 1:].reshape([outputs.shape[0]]) loss = criterion(samples, targets, noises.exp(), 1, kl) loss.backward() optimizer.step() b = outputs[:, :1].data mses.append(metrics.mse(samples.data, targets).cpu().data.numpy()) training_loss += loss.cpu().data.numpy() return training_loss / len(trainloader), np.mean(np.array(mses)), np.mean( np.array(kl_list))
def k_folds(x_train, y_train, k=10): """ Establece un loop para los k-folds, por defecto se definen 10. Devuelve media del error cuadratico obtenido en cada loop 1ro ___ ___ ___ ___ ___ | 1 | 2 | 3 | |k-1| k | |Val|Tra|Tra| ... |Tra|Tra| 2do ___ ___ ___ ___ ___ | 1 | 2 | 3 | |k-1| k | |Tra|Val|Tra| ... |Tra|Tra| k-esimo ___ ___ ___ ___ ___ | 1 | 2 | 3 | |k-1| k | |Tra|Tra|Tra| ... |Tra|Val| """ l_regression = LinearRegression() chunk_size = int(len(x_train) / k) mse_list = [] for i in range(0, len(x_train), chunk_size): end = i + chunk_size if i + chunk_size <= len(x_train) else len( x_train) new_x_valid = x_train[i:end] new_y_valid = y_train[i:end] new_x_train = np.concatenate([x_train[:i], x_train[end:]]) new_y_train = np.concatenate([y_train[:i], y_train[end:]]) l_regression.fit(new_x_train, new_y_train) l_regression.predict(new_x_valid) mse_list.append(mse(new_y_valid, l_regression.predicted)) mean_mse = np.mean(mse_list) return mean_mse
def lasso_cv(self, nsplits: int, lam: float = None): """ runs a cross validation on the data set and returns the cross validation performance :param nsplits: number of cv splits :param lam: tuning parameter :return: the cross-validated mse """ if lam is None: lam = LassoCV(cv=nsplits).fit(self.x, self.y).alpha_ cv = KFold(n_splits=nsplits) mse_result = [] for train, test in cv.split(self.x): x_train = self.x[train, :] x_test = self.x[test, :] y_train = self.y[train] y_test = self.y[test] model = Lasso(alpha=lam).fit(x_train, y_train) y_predict = model.predict(x_test) mse_result.append(mse(y_test, y_predict)) return np.mean(mse_result)
adfuller(train_set_diff) acf_pacf(train_set_diff) # ряд стационарен, можно переходить к поиску оптимальных параметров # best_params.append(sarima_best_params(train_set, 7, 1, 0, 7, 3)) # построение модели model = sm.tsa.SARIMAX(train_set, order=(best_params[j][0], 1, best_params[j][1]), seasonal_order=(best_params[j][2], 0, best_params[j][3], 7)).fit(disp=False) # вывод информации о модели print(model.summary()) # проверка остатков модели на случайность ljungbox(model.resid) acf_pacf(model.resid) # SARIMA прогноз на конкретный час hour_pred = model.forecast(2)[-1] # добавление прогноза на час к итоговому прогнозу total_pred.append(hour_pred) # оценка прогноза по метрикам nnf = nnfmetrics(total_pred, test_set, plan_set) mse = mse(test_set, total_pred) mape = mape(test_set, total_pred) acc = accuracy(test_set, total_pred) print('Оценка по NNFMETRICS = ', nnf) print('Оценка по MSE = ', mse) print('Оценка по MAPE = ', mape) print('Точность прогноза = ', acc) # отрисовка графика plot_results(pred_date.strftime('%d-%m-%Y'), test_set, total_pred, plan_set)
#Classic approach print('> Run classic approach') t0 = time() classic_img,ckms = km_clusterize(img, n_clusters) classic_time = time() - t0 #Fuzzy approach print('> Run fuzzy approach') t0 = time() fuzzy_img, fcms = fuzzy_clusterize(img, n_clusters) fuzzy_time = time() - t0 print('> Evaluating') mse_incremental = mse(img, incremental_img) mse_classic = mse(img, classic_img) mse_fuzzy = mse(img, fuzzy_img) print('> MSE Incremental: %.4f'%(mse_incremental)) print('> MSE Classic: %.4f'%(mse_classic)) print('> MSE Fuzzy: %.4f'%(mse_fuzzy)) psnr_incremental = psnr(img, incremental_img) psnr_classic = psnr(img, classic_img) psnr_fuzzy = psnr(img, fuzzy_img) print('> PSNR Incremental: %.4f'%(psnr_incremental)) print('> PSNR Classic: %.4f'%(psnr_classic)) print('> PSNR Fuzzy: %.4f'%(psnr_fuzzy)) print('> Elapsed time') print('> Incremental: %.4f'%(incremental_time)) print('> Classic: %.4f'%(classic_time))
def train(self, X, y, X_va=None, y_va=None): """ This function implements the neural network's training routine. Parameters ---------- X : numpy.ndarray the design matrix y : numpy.ndarray the target column vector X_va: numpy.ndarray the design matrix used for the validation (Default value = None) y_va: numpy.ndarray the target column vector used for the validation (Default value = None) Returns ------- """ velocity_W = [0 for i in range(self.n_layers)] velocity_b = [0 for i in range(self.n_layers)] self.error_per_epochs = [] self.error_per_epochs_old = [] self.error_per_batch = [] self.mee_per_epochs = [] if X_va is not None: self.error_per_epochs_va = [] self.mee_per_epochs_va = [] else: self.error_per_epochs_va = None self.mee_per_epochs_va = None if self.task == 'classifier': self.accuracy_per_epochs = [] self.accuracy_per_epochs_va = [] self.stop_GL = None self.stop_PQ = None stop_GL = False stop_PQ = False # for e in tqdm(range(self.epochs), desc='TRAINING'): for e in range(self.epochs): error_per_batch = [] dataset = np.hstack((X, y)) np.random.shuffle(dataset) X, y = np.hsplit(dataset, [X.shape[1]]) for b_start in np.arange(0, X.shape[0], self.batch_size): # BACK-PROPAGATION ALGORITHM ################################## x_batch = X[b_start:b_start + self.batch_size, :] y_batch = y[b_start:b_start + self.batch_size, :] error = self.forward_propagation(x_batch, y_batch) self.error_per_batch.append(error) error_per_batch.append(error) self.back_propagation(x_batch, y_batch) # WEIGHTS' UPDATE ############################################# for layer in range(self.n_layers): weight_decay = reg.regularization(self.W[layer], self.reg_lambda, self.reg_method) velocity_b[layer] = (self.alpha * velocity_b[layer]) \ - (self.eta / x_batch.shape[0]) * self.delta_b[layer] self.b[layer] += velocity_b[layer] velocity_W[layer] = (self.alpha * velocity_W[layer]) \ - (self.eta / x_batch.shape[0]) * self.delta_W[layer] self.W[layer] += velocity_W[layer] - weight_decay ############################################################### # COMPUTING OVERALL MSE ########################################### self.error_per_epochs_old.append( np.sum(error_per_batch)/X.shape[0]) y_pred = self.predict(X) self.error_per_epochs.append(metrics.mse(y, y_pred)) self.mee_per_epochs.append(metrics.mee(y, y_pred)) if X_va is not None: y_pred_va = self.predict(X_va) self.error_per_epochs_va.append( metrics.mse(y_va, y_pred_va)) self.mee_per_epochs_va.append( metrics.mee(y_va, y_pred_va)) if self.task == 'classifier': y_pred_bin = np.apply_along_axis( lambda x: 0 if x < .5 else 1, 1, y_pred).reshape(-1, 1) y_pred_bin_va = np.apply_along_axis( lambda x: 0 if x < .5 else 1, 1, y_pred_va).reshape(-1, 1) bin_assess = metrics.BinaryClassifierAssessment( y, y_pred_bin, printing=False) bin_assess_va = metrics.BinaryClassifierAssessment( y_va, y_pred_bin_va, printing=False) self.accuracy_per_epochs.append(bin_assess.accuracy) self.accuracy_per_epochs_va.append(bin_assess_va.accuracy) # CHECKING FOR EARLY STOPPING ##################################### if self.early_stop is not None \ and e > self.early_stop_min_epochs \ and (e + 1) % 5 == 0: generalization_loss = 100 \ * ((self.error_per_epochs_va[e] / min(self.error_per_epochs_va)) - 1) # GL method if generalization_loss > self.epsilon: stop_GL = True # PQ method if self.early_stop != 'GL': # PQ or 'testing' min_e_per_strip = min( self.error_per_epochs_va[e - 4:e + 1]) sum_per_strip = sum(self.error_per_epochs_va[e - 4:e + 1]) progress = 1000 * \ ((sum_per_strip / (5 * min_e_per_strip)) - 1) progress_quotient = generalization_loss / progress if progress_quotient > self.epsilon: stop_PQ = True # stopping if stop_GL and self.stop_GL is None: self.stop_GL = e if stop_PQ and self.stop_PQ is None: self.stop_PQ = e if self.early_stop != 'testing' and (stop_GL or stop_PQ): break
converted_image = color_models.hsv_to_bgr(hsv_image) cv2.imwrite('converted_image.png', converted_image) hsvcv2_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2HSV) cv2.imwrite('hsvcv2_image.png', hsvcv2_image) convertedcv2_image = cv2.cvtColor(hsvcv2_image, cv2.COLOR_HSV2BGR) cv2.imwrite('convertedcv2_image.png', convertedcv2_image) iterations = 1 time_start = time() for i in range(iterations): image1 = color_models.bgr_brighten(bgr_image, 1.2) image2 = color_models.hsv_to_bgr(color_models.hsv_brighten(color_models.bgr_to_hsv(bgr_image), 1.2)) time1 = time() - time_start mse1 = metrics.mse(image1, image2) cv2.imwrite('bgr_brighten.png', image1) cv2.imwrite('converted_brighten.png', image2) time_start = time() for i in range(iterations): image1 = color_models.bgr_brighten(bgr_image, 1.2) image2 = cv2.cvtColor(color_models.hsv_brighten(cv2.cvtColor(bgr_image, cv2.COLOR_BGR2HSV), 1.2), cv2.COLOR_HSV2BGR) time2 = time() - time_start mse2 = metrics.mse(image1, image2) cv2.imwrite('convertedcv2_brighten.png', image2) print(time1, time2) print(mse1, mse2)
train_set = np.load('./data/train_regr_set.npy') train_answers = np.load('./data/train_regr_answers.npy') model = models.regression_model(train_set[0].shape) model.fit(train_set, train_answers, batch_size=64, epochs=5, verbose=2, validation_split=0.1) test_set = np.load('./data/test_regr_set.npy') test_answers = np.load('./data/test_regr_answers.npy') predictions = model.predict(test_set) print('MSE:', metrics.mse(predictions, test_answers)) print('MAE/RMSE:', metrics.rmse(predictions, test_answers)) model.save('./models/regr.h5') t5 = time.time() # Time for work print('Time for extract sequences from input data:', t1 - t0) print('Time for classification dataset', t2 - t1) print('Time for regression dataset', t3 - t2) print('Time for classification model', t4 - t3) print('Time for regression model', t5 - t4)
plan = dataset.plan # длина периода сезонной составляющей p = 168 # тренировочный датасет train_set, данные с 14.01.2019 по 25.11.2019 train_start_date = datetime.datetime(2019, 1, 14, 0, 0, 0) train_end_date = datetime.datetime(2019, 11, 25, 23, 0, 0) train_set = fact[train_start_date:train_end_date] # тестовый датасет test_set, данные за прогнозный день 27.11.2019 pred_date = datetime.date(2019, 11, 27) pred_end_date = train_end_date + datetime.timedelta(days=2) pred_start_date = pred_end_date - datetime.timedelta(hours=23) test_set = fact[pred_start_date:pred_end_date] # план предприятия plan_set для сверки, данные за прогнозный день 27.11.2019 plan_set = plan[pred_start_date:pred_end_date] # выбор метода method = additiveHoltWinters # найденные постоянные сглаживания best_params = [[0.06846567, 0.00032291, 0.26071966]] # прогнозирование pred = method(best_params[0], train_set, p, 48)[24:] # оценка прогноза по метрикам nnf_val = nnfmetrics(pred, test_set, plan_set) mse_val = mse(test_set, pred) mape_val = mape(test_set, pred) acc_val = accuracy(test_set, pred) print('Оценка по NNFMETRICS = ', nnf_val) print('Оценка по MSE = ', mse_val) print('Оценка по MAPE = ', mape_val) print('Точность прогноза = ', acc_val) # отрисовка графика plot_results(pred_date.strftime('%d-%m-%Y'), test_set, pred, plan_set)
def run_base_model_nfm(dfTrain, dfTest, folds, kdfm_params): fd = FeatureDictionary(dfTrain=dfTrain, dfTest=dfTest, numeric_cols=config.NUMERIC_COLS, ignore_cols=config.IGNORE_COLS, xm_cols=config.XM_COLS) data_parser = DataParser(feat_dict=fd) # 新添 word2idx, idx2word = build_vocab(config.word_file) # Xi_train :列的序号 # Xv_train :列的对应的值 Xi_train, Xv_train, y_train = data_parser.parse(df=dfTrain) Xt_train, Xm_train = read_text_data( config.TRAIN_FILE, word2idx, config.num_unroll_steps) # read data TODO:config 与 pnn_params Xi_test, Xv_test, y_test = data_parser.parse(df=dfTest) Xt_test, Xm_test = read_text_data(config.TEST_FILE, word2idx, config.num_unroll_steps) kdfm_params['feature_size_one_hot'] = fd.feat_dim kdfm_params['word_embeddings'] = load_embedding( config.embedding_size, filename=config.embedding_file) # read data #TODO:change y_train_meta = np.zeros((dfTrain.shape[0], 1), dtype=float) y_test_meta = np.zeros((dfTest.shape[0], 1), dtype=float) results_cv = np.zeros(len(folds), dtype=float) results_epoch_train = np.zeros((len(folds), kdfm_params['epoch']), dtype=float) results_epoch_valid = np.zeros((len(folds), kdfm_params['epoch']), dtype=float) results_epoch_train_mae = np.zeros((len(folds), kdfm_params['epoch']), dtype=float) results_epoch_valid_mae = np.zeros((len(folds), kdfm_params['epoch']), dtype=float) def _get(x, l): return [x[i] for i in l] for i, (train_idx, valid_idx) in enumerate(folds): Xi_train_, Xv_train_, y_train_, Xt_train_, Xm_train_ = \ _get(Xi_train, train_idx), _get(Xv_train, train_idx), _get(y_train, train_idx), \ _get(Xt_train, train_idx), _get(Xm_train, train_idx) Xi_valid_, Xv_valid_, y_valid_, Xt_valid_, Xm_valid_ = \ _get(Xi_train, valid_idx), _get(Xv_train, valid_idx), _get(y_train, valid_idx), \ _get(Xt_train, valid_idx), _get(Xm_train, valid_idx) kdfm = DeepAFM(**kdfm_params) Xim_train_ = [] Xvm_train_ = [] Xim_valid_ = [] Xvm_vaild_ = [] Xim_test = [] Xvm_test = [] kdfm.fit(Xi_train_, Xv_train_, Xim_train_, Xvm_train_, Xt_train_, y_train_, Xi_valid_, Xv_valid_, Xim_valid_, Xvm_vaild_, Xt_valid_, y_valid_) y_train_meta[valid_idx, 0] = kdfm.predict(Xi_valid_, Xv_valid_, Xim_valid_, Xvm_vaild_, Xt_valid_) y_test_meta[:, 0] += kdfm.predict(Xi_test, Xv_test, Xim_test, Xvm_test, Xt_test) results_cv[i] = mse_norm(y_valid_, y_train_meta[valid_idx]) results_epoch_train[i] = kdfm.train_result results_epoch_valid[i] = kdfm.valid_result results_epoch_train_mae[i] = kdfm.mae_train_result results_epoch_valid_mae[i] = kdfm.mae_valid_result y_test_meta /= float(len(folds)) mse_test = mse(y_test, y_test_meta) # save result if kdfm_params["use_afm"] and kdfm_params["use_deep"]: clf_str = "KDFM" elif kdfm_params["use_afm"]: clf_str = "AFM" elif kdfm_params["use_deep"]: clf_str = "DNN" print("%s: %.5f (%.5f)" % (clf_str, results_cv.mean(), results_cv.std())) filename = "%s_Mean%.5f_Std%.5f.csv" % (clf_str, results_cv.mean(), results_cv.std()) _make_submission(y_test, y_test_meta, mse_test, filename) _plot_fig(results_epoch_train, results_epoch_valid, clf_str + 'mse', "mse") _plot_fig(results_epoch_train_mae, results_epoch_valid_mae, clf_str + 'mae', "mae")
def regression_report(y_true, y_pred): print("MSE", metrics.mse(y_true, y_pred).item()) print("RMSE", metrics.rmse(y_true, y_pred).item()) print("MAPE", metrics.mape(y_true, y_pred).item()) print("MPE", metrics.mpe(y_true, y_pred).item()) print("R2", metrics.r2(y_true, y_pred).item())
def eval_metrics(self, y_true, pred): return { 'loss': pred['loss'], 'mse': metrics.mse(y_true, pred['y']), 'pearson': metrics.pearson(y_true, pred['y']) }
def compare_with_RF(self): clf = RandomForestClassifier(n_estimators = 20, criterion = "entropy", min_samples_split = 10) clf.fit(self.train_data, self.train_labels) preds = clf.predict(self.test_data) logging.info("ERROR_RF = {}".format(mse(preds, self.train_labels)))
def run(cfg, dataset_path, logs_dir, checkpoints_dir, checkpoints=True): with tf.Session() as sess: get_data = from_files.get_data_provider(dataset_path, cfg.batch_size) get_gen = from_files.get_genconst_provider(dataset_path, cfg.batch_size) if cfg.validation: get_valid_data = from_files.get_valid_provider(dataset_path, cfg.batch_size) get_valid_const = from_files.get_validconst_provider(dataset_path, cfg.batch_size) else: get_valid_data = None get_valid_const = None get_noise = cfg.noise_provider(**cfg.noise_provider_args) if cfg.fid_model is not None: fid = create_fid_func(cfg.fid_model, sess) else: fid = lambda x, y: 0 # Building models Z = tf.placeholder(tf.float32, (None, cfg.zx, cfg.zx, cfg.nz,), name="Z") X = tf.placeholder(tf.float32, (None, cfg.npx, cfg.npx, cfg.channels,), name="X") C = tf.placeholder(tf.float32, (None, cfg.npx, cfg.npx, cfg.channels,), name="C") Cf = tf.placeholder(tf.float32, (None, cfg.npx, cfg.npx, cfg.channels,), name="Cf") D = cfg.discriminator(X, C, **cfg.disc_args) G = cfg.generator(Z, Cf, **cfg.gen_args) D_out = D.output G_out = G.output with tf.name_scope("DG"): DG = D([G_out, Cf]) # Objectives with tf.name_scope("D_real_objective"): D_real_objective = tf.reduce_mean(-tf.log(D_out + 1e-8)) with tf.name_scope("D_fake_objective"): D_fake_objective = tf.reduce_mean(-tf.log(1 - DG + 1e-8)) with tf.name_scope("D_objective"): D_objective = D_real_objective + D_fake_objective with tf.name_scope("G_objective"): G_real_objective = tf.reduce_mean(-tf.log(DG + 1e-8)) G_objective = G_real_objective # Optimizers D_optimizer = cfg.disc_optimizer(**cfg.disc_optimizer_args) G_optimizer = cfg.gen_optimizer(**cfg.gen_optimizer_args) D_cost = D_optimizer.minimize(D_objective, var_list=D.trainable_weights) G_cost = G_optimizer.minimize(G_objective, var_list=G.trainable_weights) # Logging costs drealcostsumm = tf.summary.scalar("D_real_cost", D_real_objective) dfakecostsumm = tf.summary.scalar("D_fake_cost", D_fake_objective) gcostsumm = tf.summary.scalar("G_cost", G_real_objective) # Logging images constimgpl = tf.placeholder(tf.float32, shape=(1, cfg.npx, cfg.npx, 3)) consttrueimgpl = tf.placeholder(tf.float32, shape=(1, cfg.npx, cfg.npx, 3)) imgpl = tf.placeholder(tf.float32, shape=(1, cfg.npx, cfg.npx, cfg.channels)) trueimgpl = tf.placeholder(tf.float32, shape=(1, cfg.npx, cfg.npx, cfg.channels)) imgsummaries = [ tf.summary.image("Generated_const", constimgpl), tf.summary.image("Ground_truth_const", consttrueimgpl), tf.summary.image("Generated", imgpl), tf.summary.image("Ground_truth", trueimgpl) ] imgsumm = tf.summary.merge(imgsummaries) # Logging weights histograms weightsumms = [] for layer in D.layers: i = 0 for vect in layer.trainable_weights: weightsumms.append(tf.summary.histogram( "D_" + layer.name + str(i), vect)) for layer in G.layers: i = 0 for vect in layer.trainable_weights: weightsumms.append(tf.summary.histogram( "G_" + layer.name + str(i), vect)) weightsum = tf.summary.merge(weightsumms) # Setting up the training sess.run(tf.global_variables_initializer()) writer = tf.summary.FileWriter( logs_dir + os.sep + cfg.name, tf.get_default_graph()) os.mkdir(checkpoints_dir) writer.flush() # Do the actual training for epoch in range(cfg.epochs): bar = progressbar.ProgressBar(maxvalue=cfg.epoch_iters, redirect_stdout=True) print("----------------------------------------------------Epoch " + str(epoch) + "----------------------------------------------------") for it in bar(range(int(cfg.dataset_size / cfg.batch_size))): # Training D x_real, c_real = get_data() c_fake = get_gen() noise = get_noise(cfg.batch_size) _, drealout, dfakeout = sess.run([D_cost, drealcostsumm, dfakecostsumm], feed_dict={X: x_real, Z: noise, C: c_real, Cf: c_fake}) # Training G c_fake = get_gen() noise = get_noise(cfg.batch_size) _, gout = sess.run([G_cost, gcostsumm], feed_dict={Z: noise, Cf: c_fake}) # Logging losses t = int(cfg.dataset_size / cfg.batch_size) * epoch + it writer.add_summary(drealout, t) writer.add_summary(dfakeout, t) writer.add_summary(gout, t) # Epoch ended # Logging metrics on validation set curmets = {} generated = [] valid_data = [] valid_consts = [] bar = progressbar.ProgressBar(maxvalue=int(cfg.valid_size / cfg.batch_size), redirect_stdout=True) print("Generating on validation") for i in bar(range(int(cfg.valid_size / cfg.batch_size))): real_imgs = get_valid_data() consts = get_valid_const() if len(real_imgs) == cfg.batch_size: noise = get_noise(cfg.batch_size) generated.extend(list(sess.run(G_out, feed_dict={Z: noise, Cf: consts}))) valid_data.extend(list(real_imgs)) valid_consts.extend(list(consts)) generated = np.asarray(generated) valid_data = np.asarray(valid_data) valid_consts = np.asarray(valid_consts) for m in cfg.metrics: if m not in curmets: curmets[m] = [] curmets[m].append(cfg.metrics[m](valid_data, generated)) metricslist = [ tf.Summary.Value(tag="MSE", simple_value=metrics.mse(generated, valid_consts)), tf.Summary.Value(tag="FID", simple_value=fid(valid_data, generated)) ] for m in curmets.keys(): metricslist.append(tf.Summary.Value(tag=m, simple_value=np.mean(curmets[m]))) metricsout = tf.Summary(value=metricslist) # Logging weights histograms weightout = sess.run(weightsum) # Logging images print("Logging images") true_img = np.expand_dims(x_real[0], axis=0) const = np.expand_dims(c_real[0], axis=0) noise = get_noise(1) img = sess.run(G_out, feed_dict={Z: noise, Cf: const}) imgout = sess.run( imgsumm, feed_dict={ trueimgpl: true_img, imgpl: img, constimgpl: log.constraints_image(img, const), consttrueimgpl: log.constraints_image(true_img, const) }) writer.flush() # Writing all logs as tensorboard writer.add_summary(metricsout, epoch) writer.add_summary(imgout, epoch) writer.add_summary(weightout, epoch) writer.flush() # Saving weights if checkpoints: G.save(checkpoints_dir + os.sep + "G_" + str(epoch) + ".hdf5", include_optimizer=False) D.save(checkpoints_dir + os.sep + "D_" + str(epoch) + ".hdf5", include_optimizer=False) # Run end writer.close() tf.reset_default_graph()
# автокорреляции и частные автокорреляции acf_pacf(remainder) # поиск лучших параметров # sarima_best_params(remainder, 24, 0, 0, 4, 3) # найденные параметры для 27.11.2019: best_params = [2, 3, 1, 2] # построение модели model = sm.tsa.SARIMAX(remainder, order=(best_params[0], 0, best_params[1]), seasonal_order=(best_params[2], 0, best_params[3], 24)).fit(disp=False) # вывод информации о модели print(model.summary()) # проверка остатков модели на случайность ljungbox(model.resid) acf_pacf(model.resid) # SARIMA прогноз остатков remainder_pred = model.forecast(48) # итоговый прогноз total_pred = trend_pred + seasonal_pred + remainder_pred[24:] # оценка прогноза по метрикам nnf_val = nnfmetrics(total_pred, test_set, plan_set) mse_val = mse(test_set, total_pred) mape_val = mape(test_set, total_pred) acc_val = accuracy(test_set, total_pred) print('Оценка по NNFMETRICS = ', nnf_val) print('Оценка по MSE = ', mse_val) print('Оценка по MAPE = ', mape_val) print('Точность прогноза = ', acc_val) # отрисовка графика plot_results(pred_date.strftime('%d-%m-%Y'), test_set, total_pred, plan_set)
def check_results(self, correct_answers): logging.info("ERROR = {}".format(mse(self.predictions, correct_answers)))
def train_metrics(self, y_true, pred): return { 'loss': pred['loss'], 'mse': metrics.mse(y_true, pred['y']) }
def pls(matrix,vector,factores,ejeX=None,printR2=False,grafico=False,name=' ',rango=' '): #in: matrix=list of lists, vector=list ,factores=int #in(opt): ejeX=list, printR2=boolean, grafico=boolean, name=string,rango=string #out: int #fn: Execute Partial Least Squares with Matrix as Training Data, vector as Target Values and factores as number of iterations. Y=vector[:] X=matrix[:] X_model=[] X_val=[] Y_model=[] Y_val=[] Y_pred=[] if(len(X)!=len(Y)): sys.exit('Distinta cantidad de muestras entre Espectros y SS') for i in range(len(X)): if(i%2==0): X_model.append(X[i]) Y_model.append(Y[i]) else: X_val.append(X[i]) Y_val.append(Y[i]) pls=plsLib.PLS(factores) pls.learn(X_model,Y_model) for i in X_val: Y_pred.append(float(pls.pred(i))) slope, intercept, r_value, p_value, std_err = stats.linregress(Y_val,Y_pred) if(printR2==True): print 'R^2: '+str(r_value**2) print 'R^2: '+str(metrics.r2_corr(Y_val,Y_pred))+' MSE: '+str(metrics.mse(Y_val,Y_pred)) auxPrint=[[Y_val[i],round(Y_pred[i],2)] for i in range(len(Y_val))] auxPrint.insert(0,['Validation','Prediction']) pp=pprint.PrettyPrinter(indent=1) pp.pprint(auxPrint) if(grafico==True): fig=plt.figure() ax1=plt.subplot2grid((2,2),(0,0),colspan=2) plt.plot(Y_val,'r') plt.plot(Y_pred,'g') plt.legend(('Validation','Prediction'),loc=0) plt.text(0.5,0.9,"R^2="+str(round(r_value**2,3)),horizontalalignment='center',verticalalignment='center',transform=ax1.transAxes,size='large',weight='heavy') plt.text(0.5,0.8,'MSE='+str(metrics.mse(Y_val,Y_pred)),horizontalalignment='center',verticalalignment='center',transform=ax1.transAxes,size='large',weight='heavy') if(name!=None): plt.title('Prediction of SS '+name+' '+rango, size='large',weight='normal', family='serif') xMax=max(max(Y_val),max(Y_pred)) xMin=min(min(Y_val),min(Y_pred)) ax2=plt.subplot2grid((2,2),(1,0)) plt.scatter(Y_val,Y_pred) ax2.set_ylabel('Prediction', size='medium',weight='normal', family='serif') ax2.set_xlabel('Validation', size='medium',weight='normal', family='serif') plt.xlim(xMin,xMax) plt.ylim(xMin,xMax) plt.autoscale(enable=True,axis='both',tight='False') plt.axis('scaled') ax3=plt.subplot2grid((2,2),(1,1)) plt.title('Fruits Spectra '+name+' '+rango, size='large',weight='normal', family='serif') if(ejeX!=None): plt.plot(ejeX,transpose(X)) ax3.set_xlabel('Longitud de onda [nm]', size='medium',weight='normal', family='serif') else: plt.plot(transpose(X)) plt.show() return r_value**2
tabulate(data, headers=["Date", "Sales", "Moving Average", "α=0.1", "α=0.5"], tablefmt='fancy_grid', floatfmt='.4f')) sales = data['Sales'] elements = seasonal_decompose(sales, model='multiplicative') data.plot() elements.plot() plt.show() print( tabulate( [[ "MSE", mse(data['Sales'].iloc[3:], data['Moving Average'].iloc[3:]), mse(data['Sales'], data['α=0.1']), mse(data['Sales'], data['α=0.5']) ], [ "MAPE", mape(data['Sales'].iloc[3:], data['Moving Average'].iloc[3:]), mape(data['Sales'], data['α=0.1']), mape(data['Sales'], data['α=0.5']) ], [ "LAD", lad(data['Sales'].iloc[3:], data['Moving Average'].iloc[3:]), lad(data['Sales'], data['α=0.1']), lad(data['Sales'], data['α=0.5']) ]],