def test(): # testing the output of the vision model ids_fn_sem = ddir.sp_testSem_ids_fn ids_fn = ddir.sp_test_ids_fn with open(ids_fn, "r") as fo: ids = map(lambda x: x.strip('\n'), fo.readlines()) with open(ids_fn_sem, "r") as fo: ids_sem = map(lambda x: x.strip('\n'), fo.readlines()) value_bow, gtKwDict, captionsDict = data_io.get_semValues( ddir.labels_csv, ddir.keywords_test) count_bow = data_io.get_semCounts(ddir.counts_csv, ddir.keywords_test) vis_multi_sem = np.zeros([len(ids_sem), len(mapping)]) vis_multi_exact, gt_multi = [], [] for i in range(len(ids)): # caption = ' '.join(captions_dict[ids[i]]) idx = [ids[i]] vis_vec = np.stack(map(lambda x: vision_bow_vec[x[4:-2]], idx), axis=0) caption_vec = np.stack( map(lambda x: caption_bow_vec1[x], idx), axis=0) # GT for evaluating exact match kw pred metrics vis_vec_mapped = vis_vec[0][mapping] vis_multi_exact.append(vis_vec) gt_multi.append(caption_vec) z = idx[0].split("_") del z[0] idxnew = "_".join(z) if (idxnew in ids_sem): vis_multi_sem[ids_sem.index(idxnew)] = vis_vec_mapped vis_multi_exact, gt_multi = np.concatenate( vis_multi_exact, axis=0), np.concatenate(gt_multi, axis=0) eer, ap, prec10, precN = utils.get_metrics(vis_multi_exact.T, gt_multi.T) pcont = "Overall ratings: EER: %f, Average precision: %f, Precision@10: %f, Precision@N: %f" % ( eer, ap, prec10, precN) print(pcont) with open("vis_exact.csv", "a+") as fo: fo.write( str(prec10 * 100) + ',' + str(precN * 100) + ',' + str(eer * 100) + ',' + str(ap * 100) + '\n') eer, ap, spearman, prec10, precN = utils.get_metrics( vis_multi_sem, value_bow, count_bow) pcont = "Subjective ratings: EER: %f, Average precision: %f, Precision@10: %f, Precision@N: %f, Spearman's rho: %f" % ( eer, ap, prec10, precN, spearman) print(pcont) with open("vision_sem.csv", "a+") as fo: fo.write( str(spearman) + ',' + str(prec10 * 100) + ',' + str(precN * 100) + ',' + str(eer * 100) + ',' + str(ap * 100) + '\n')
def eval_model(args): with tf.Session() as sess: iterator = BigEarthNet(args['test_tf_record_files'], args['batch_size'], 1, 0, args['label_type']).batch_iterator nb_iteration = int( np.ceil(float(args['test_size']) / args['batch_size'])) iterator_ins = iterator.get_next() model = importlib.import_module('models.' + args['model_name']).DNN_model( args['label_type'], args['modality']) model.create_network() variables_to_restore = tf.global_variables() metric_names, metric_means, metric_update_ops = get_metrics( model.multi_hot_label, model.predictions, model.probabilities) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) model_saver = tf.train.Saver(max_to_keep=0, var_list=variables_to_restore) model_file = args['model_file'] model_saver.restore(sess, model_file) summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter( os.path.join(args['out_dir'], 'logs', 'test'), sess.graph) iteration_idx = 0 progress_bar = tf.contrib.keras.utils.Progbar(target=nb_iteration) eval_res = {} while True: try: batch_dict = sess.run(iterator_ins) iteration_idx += 1 progress_bar.update(iteration_idx) except tf.errors.OutOfRangeError: print() means = sess.run(metric_means[0]) for idx, name in enumerate(metric_names[0]): eval_res[name] = str(means[idx]) print(name, means[idx]) break sess_res = sess.run([metric_update_ops, summary_op] + metric_means[1], feed_dict=model.feed_dict(batch_dict)) summary_writer.add_summary(sess_res[1], iteration_idx) metric_means_res = sess_res[2:] for idx, name in enumerate(metric_names[1]): eval_res[name] = str(metric_means_res[idx]) print(name, metric_means_res[idx]) with open(os.path.join(args['out_dir'], 'eval_result.json'), 'wb') as f: json.dump(eval_res, f)
def collect(self): # Request data from ambari Collect Host API # Request exactly the System level information we need from node # beans returns a type of 'List' try: count = 0 # In case no metrics we need in the jmx url, a time sleep and while-loop was set here to wait for the KEY metrics while count < 5: beans = utils.get_metrics(self._url) if 'init_total_count_tables' not in beans: count += 1 time.sleep(1) continue else: break except: logger.info("Can't scrape metrics from url: {0}".format(self._url)) else: pass finally: # set up all metrics with labels and descriptions. self._setup_labels(beans) # add metric value to every metric. self._get_metrics(beans) # update namenode metrics with common metrics common_metrics = common_metrics_info(self._cluster, beans, "hive", "hiveserver2") self._hadoop_hiveserver2_metrics.update(common_metrics()) for i in range(len(self._merge_list)): service = self._merge_list[i] for metric in self._hadoop_hiveserver2_metrics[service]: yield self._hadoop_hiveserver2_metrics[service][metric]
def main(): cluster = "cluster_indata" beans = utils.get_metrics("http://10.110.13.164:50070/jmx") component = "hdfs" service = "namenode" common_metrics = common_metrics_info(cluster, beans, component, service) print common_metrics()
def run_cv_pred(X, y, clf, n_folds): """ Run n-fold cross validation returning a prediction for every row of X :param X: A scipy sparse feature matrix :param y: The target labels corresponding to rows of X :param clf: The :param n_folds: :return: """ # Construct a kfolds object skf = StratifiedKFold(n_splits=n_folds, shuffle=True) splits = skf.split(X, y) y_pred = y.copy() # Iterate through folds for idx, (train_index, test_index) in enumerate(splits): X_train, X_test = X[train_index], X[test_index] y_train = y[train_index] # Initialize a classifier with key word arguments clf.fit(X_train, y_train) preds = clf.predict(X_test) macro, micro = utils.get_metrics(preds, y[test_index]) print 'run ', idx print 'macro: ', macro print 'micro: ', micro y_pred[test_index] = preds return y_pred
def testSem(): network.eval() ids_fn_sem = ddir.sp_testSem_ids_fn ids_fn = ddir.sp_test_ids_fn with open(ids_fn, "r") as fo: ids = map(lambda x: x.strip('\n'), fo.readlines()) with open(ids_fn_sem, "r") as fo: ids_sem = map(lambda x: x.strip('\n'), fo.readlines()) mapping = data_io.get_mapping(ddir.flickr8k_keywords, ddir.keywords_test) value_bow = data_io.get_semValues(ddir.labels_csv, ddir.keywords_test) count_bow = data_io.get_semCounts(ddir.counts_csv, ddir.keywords_test) pred_multi = np.zeros([len(ids_sem), len(mapping)]) for i in range(len(ids)): idx = [ids[i]] z = idx[0].split("_") del z[0] idxnew = "_".join(z) if (idxnew in ids_sem): Xs, _ = data_io.load_mfcc(ddir.mfcc_dir, idx, args.n_pad) Xs = np.transpose(Xs, (0, 2, 1)) Ys = np.stack(map(lambda x: vision_bow_vec[x[4:-2]], idx), axis=0) caption_Ys = np.stack(map(lambda x: caption_bow_vec[x], idx), axis=0) pred = getKWprob(Xs) predMapped = pred[0][mapping] pred_multi[ids_sem.index(idxnew)] = predMapped eer, ap, spearman, prec10, precN = utils.get_metrics( pred_multi, value_bow, count_bow) pcont = "Subjective ratings: EER: %f, Average precision: %f, Precision@10: %f, Precision@N: %f, Spearman's rho: %f" % ( eer, ap, prec10, precN, spearman) print(pcont) with open(saveLog, "a+") as fo: fo.write(pcont + "\n")
def evaluate_test_sample(X, y, clf, nreps, name, results, train_pct): """ Calculate results for this clf at various train / test split percentages :param X: features :param y: targets :param clf: detector :param nreps: number of random repetitions :param name: name of the detector :param results: A tuple of Pandas DataFrames containing (macro, micro) F1 results :param train_pct: The percentage of the data used for training :return: A tuple of Pandas DataFrames containing (macro, micro) F1 results """ seed = 0 for rep in range(nreps): # setting a random seed will cause the same sample to be generated each time X_train, X_test, y_train, y_test = train_test_split( X, y, train_size=train_pct, random_state=seed, stratify=y) seed += 1 clf.fit(X_train, y_train) try: # Gradient boosted trees do not accept sparse matrices in the predict function currently preds = clf.predict(X_test) except TypeError: preds = clf.predict(X_test.todense()) macro, micro = utils.get_metrics(y_test, preds, auc=False) results[0].loc[name, rep] = macro results[1].loc[name, rep] = micro return results
def train(self, ep: int): '''训练模型 Args: ep(int): 当前epoch ''' self.model.train() size = len(self.train_loader) # 执行训练 for step, (X, y, _) in tqdm(enumerate(self.train_loader), desc='Epoch {:3d}'.format(ep), total=size): X = X.to(self.device) # type: torch.Tensor y = y.to(self.device) # type: torch.Tensor self.optimizer.zero_grad() y_ = self.model(X) # type: torch.Tensor loss = self.criterion(y_, y) # type: torch.Tensor loss.backward() self.optimizer.step() y_ = y_.argmax(dim=1).cpu().numpy() y = y.cpu().numpy() # 计算运行时指标 miou, _, pacc = get_metrics(y, y_) # 输出到tensorboard n_iter = ep * size + step self.writer.add_scalar('train/pacc', pacc, n_iter) self.writer.add_scalar('train/mIoU', miou, n_iter) self.writer.add_scalar('train/loss', loss.item(), n_iter)
def validate(self, ep: int): '''验证模型 Args: ep(int): 当前epoch ''' mious, paccs = [], [] total_loss = 0 self.model.eval() with torch.no_grad(): for X, y, _ in tqdm(self.val_loader, desc='Validating'): X, y = X.to(self.device), y.to(self.device) y_ = self.model(X) loss = self.criterion(y_, y) total_loss += loss.item() y_ = y_.argmax(dim=1) y_gd = y.cpu().numpy() y_pred = y_.cpu().numpy() miou, _, pacc = get_metrics(y_gd, y_pred) mious.append(miou) paccs.append(pacc) avg_loss = total_loss / len(self.val_loader) miou = np.average(mious) pacc = np.average(paccs) print(ep, miou, pacc) # 输出信息 self.writer.add_scalar('test/pacc', pacc, ep) self.writer.add_scalar('test/mIoU', miou, ep) self.writer.add_scalar('test/avg_loss', avg_loss, ep)
def collect(self): self._clear_init() # 发送HTTP请求从JMX URL中获取指标数据。 # 获取JMX中对应bean JSON数组。 try: # 发起HTTP请求JMX JSON数据 beans = utils.get_metrics(self._url) except: logger.info("Can't scrape metrics from url: {0}".format(self._url)) pass else: # 设置监控需要关注的每个MBean,并设置好指标对应的标签以及描述 self._setup_metrics_labels(beans) # 设置每个指标值 self._get_metrics(beans) # 将通用的指标更新到NameNode对应的指标中 common_metrics = common_metrics_info(self._cluster, beans, "hdfs", "namenode") self._hadoop_namenode_metrics.update(common_metrics()) # 遍历每一个指标分类(包含NameNode以及Common的指标分类) # 返回每一个指标和标签 for i in range(len(self._merge_list)): service = self._merge_list[i] for metric in self._hadoop_namenode_metrics[service]: yield self._hadoop_namenode_metrics[service][metric]
def initialised_embedding_scenario(): emd1 = pd.read_csv('../../local_results/tf0.emd', header=None, index_col=0, skiprows=1, sep=" ") del emd1.index.name emd2 = pd.read_csv('../../local_results/tf_1in10000_init.emd', header=None, index_col=0, skiprows=1, sep=" ") del emd2.index.name feature_path = '../../local_resources/features_1in10000.tsv' rf_features = pd.read_csv(feature_path, sep='\t', index_col=0) temp1 = emd1.join(rf_features, how='left') y_train = temp1['target_churned'].values.astype(int) print 'training counts', pd.Series(y_train).value_counts() test_emd = utils.subtract_intersection(emd2, emd1) # temp2 = test_emd.join(rf_features, how='left') temp2 = emd2.join(rf_features, how='left') y_test = temp2['target_churned'].values.astype(int) print 'test counts', pd.Series(y_test).value_counts() for clf in classifiers: clf.fit(emd1.values, y_train) preds = clf.predict_proba(emd2.values)[:, 1] # preds = clf.predict_proba(test_emd.values)[:, 1] print len(preds), preds.sum() macro, micro = utils.get_metrics(y_test, preds) print macro, micro
def collect(self): # Request data from ambari Collect Host API # Request exactly the System level information we need from node # beans returns a type of 'List' try: beans = utils.get_metrics(self._url) except: logger.info("Can't scrape metrics from url: {0}".format(self._url)) pass else: # set up all metrics with labels and descriptions. self._setup_labels(beans) # add metric value to every metric. self._get_metrics(beans) # update namenode metrics with common metrics common_metrics = common_metrics_info(self._cluster, beans, "hbase", "regionserver") self._hadoop_regionserver_metrics.update(common_metrics()) for i in range(len(self._merge_list)): service = self._merge_list[i] for metric in self._hadoop_regionserver_metrics[service]: yield self._hadoop_regionserver_metrics[service][metric]
def run_repetitions(X, y, names, clf, reps, train_pct=0.8): """ Run repeated experiments on random train test splits of the data :param X: an iterable of numpy arrays :param y: a numpy array of target variables :param clf: a scikit-learn classifier :param names: the names of the data sets. Size should match data :param reps: the number of repetitions to run for each dataset :param train_pct: the percentage of the data to use for training. The rest will be held out for the test set. :return: """ results = np.zeros(shape=(len(X), reps)) min_split = min(train_pct, 1 - train_pct) assert len( y ) * min_split > 1, 'Only {} data points is not enough for a train split of {}'.format( len(y), train_pct) for rep in range(reps): for idx, dataset in enumerate(X): try: X_train, X_test, y_train, y_test = train_test_split( dataset, y, train_size=train_pct, stratify=y) except ValueError: print 'could not stratify as too many classes for train percentage {}'.format( train_pct) print 'performing unstratified train test split instead' X_train, X_test, y_train, y_test = train_test_split( dataset, y, train_size=train_pct) clf.fit(X_train, y_train) probs = clf.predict_proba(X_test) macro, micro = utils.get_metrics(y_test, probs, auc=False) results[idx, rep] = macro train = [] std_error = sem(results, axis=1) mean = results.mean(axis=1) for idx, dataset in enumerate(X): clf.fit(dataset, y) probs = clf.predict_proba(dataset) macro, micro = utils.get_metrics(y, probs, auc=False) train.append(macro) df = pd.DataFrame(data=results, index=names) df['mean'] = mean df['train'] = train df['sde'] = std_error return df
def main(argv): config_file = argv[1] global env env = Env() env.read_env(_get_env_path(config_file)) yaml.add_implicit_resolver('!envvar', envvar_matcher, Loader=yaml.FullLoader) yaml.add_constructor('!envvar', envvar_constructor, Loader=yaml.FullLoader) config = yaml.load(open(config_file), Loader=yaml.FullLoader) if argv[2] == "upload": return upload(config, argv) ffrom = datetime.date(*map(int, argv[2].split("-"))) tto = datetime.date(*map(int, argv[3].split("-"))) extra_args = argv[4:] skip_metrics = "--skip-metrics" in extra_args js_metrics = codecs.open(config["js_metrics"], "w", encoding="utf-8") js_metrics.write("metrics = [\n") for metric in get_metrics(config): metric_class = plugins.get_metric_class(metric["type"]) if not metric_class: print("Error no existe la métrica de tipo %s" % metric["type"], file=sys.stderr) sys.exit(3) metric_obj = metric_class(config, metric, metric["name"]) if not skip_metrics: print("Generating metric %s" % metric["name"]) data = metric_obj.generate(ffrom, tto) save_data(data, metric["name"], config) js_metrics.write(metric_obj.js_line()) js_metrics.write("];\n") js_metrics.write("\n") js_metrics.write("\n") js_metrics.write("pills = [\n") for pill in config["pills"]: js_metrics.write("jQuery.parseJSON('%s'),\n" % json.dumps(pill)) js_metrics.write("];\n") js_metrics.write("\n") print("Saving js_metrics file") js_metrics.write("periodChoices = [\n") for period_choice in config["period_choices"]: js_metrics.write(' {value: %(value)s, name: "%(name)s"},\n' % period_choice) js_metrics.write("];\n")
def test_index_factor_return(): print("test index factor return") files = [f for f in os.listdir(const.INDEX_FACTOR_DIR)] pnl = utils.get_all_panel(const.INDEX_FACTOR_DIR, files) # 得到收益率 pnl.ix[:, :, 'return'] = pnl.minor_xs('close').pct_change() # 计算Momentum因子收益率 k = 60 df = pnl.minor_xs('return').rolling(window=k).mean() return_df = pnl.minor_xs('return') daily_return = analysis.factor_return(df, return_df, threshold=0.2) daily_return = daily_return[daily_return != 0] # print daily_return utils.get_metrics(daily_return) acc_ret = utils.get_accumulated_return(daily_return) acc_ret.plot() plt.show()
def test(epoch, subset): network.eval() pcont4 = " " ids_fn = ddir.sp_dev_ids_fn if subset == "dev" else ddir.sp_test_ids_fn with open(ids_fn, "r") as fo: ids = map(lambda x: x.strip(), fo.readlines()) pred_multi, grt_multi, pred_multiBoW, grt_multiBoW, vis_multi, = [], [], [], [], [], [], [] for i in range(0, len(ids), args.test_batch_size): idx = ids[i:i + args.test_batch_size] Xs, _ = data_io.load_mfcc(ddir.mfcc_dir, idx, args.n_pad) Xs = np.transpose(Xs, (0, 2, 1)) vision_Ys = np.stack(map(lambda x: vision_bow_vec[x[4:-2]], idx), axis=0) # GT from vision model caption_Ys1 = np.stack( map(lambda x: caption_bow_vec1[x], idx), axis=0) # GT for evaluating exact match kw pred metrics caption_Ys2 = np.stack(map(lambda x: caption_bow_vec2[x], idx), axis=0) # GT for bow loss if (args.mt): l, lBoW, pred, predBoW = run_net(Xs, vision_Ys, caption_Ys2) else: l, pred = run_net(Xs, vision_Ys) pred_multi.append(pred) grt_multi.append(caption_Ys1) if (args.mt): pred_multiBoW.append(predBoW) grt_multiBoW.append(caption_Ys2) if (args.mt): pred_multiBoW, grt_multiBoW = np.concatenate( pred_multiBoW, axis=0), np.concatenate(grt_multiBoW, axis=0) pred_multiBoW = np.concatenate( (pred_multiBoW, np.zeros((pred_multiBoW.shape[0], grt_multiBoW.shape[1] - pred_multiBoW.shape[1])).astype(np.float32)), axis=1) if (subset == 'test'): # On keyword spotting # precisionBoW, recallBoW, fscoreBoW = utils.get_fscore(pred_multiBoW >= args.threshold, grt_multiBoW) # pcont3 = "Threshold = %.1f: precision BoW: %.3f, recall BoW: %.3f, fscore BoW: %.3f" % (args.threshold, precisionBoW, recallBoW, fscoreBoW) eer, ap, prec10, precN = utils.get_metrics(pred_multiBoW.T, grt_multiBoW.T) pcont5 = "Overall ratings (on BoW): EER: %f, Average precision: %f, Precision@10: %f, Precision@N: %f" % ( eer, ap, prec10, precN) with open("aux_exact.csv", "a+") as fo: fo.write(args.mtType + ',' + str(args.alpha) + ',' + str(args.n_bow2) + ',' + str(prec10 * 100) + ',' + str(precN * 100) + ',' + str(eer * 100) + ',' + str(ap * 100) + '\n') # print(pcont3+"\n") print(pcont5 + "\n") # with open(saveLog, "a+") as fo: # fo.write("\n"+pcont5+"\n") return 0
def train_fg(model, optim, loss, features, labels, train_g, test_g, test_mask, device, n_epochs, thresh, compute_metrics=True): """ A full graph verison of RGCN training """ duration = [] for epoch in range(n_epochs): tic = time.time() loss_val = 0. pred = model(train_g, features.to(device)) l = loss(pred, labels) optim.zero_grad() l.backward() optim.step() loss_val += l duration.append(time.time() - tic) metric = evaluate(model, train_g, features, labels, device) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | f1 {:.4f} ".format( epoch, np.mean(duration), loss_val, metric)) class_preds, pred_proba = get_model_class_predictions(model, test_g, features, labels, device, threshold=thresh) if compute_metrics: acc, f1, p, r, roc, pr, ap, cm = get_metrics(class_preds, pred_proba, labels.numpy(), test_mask.numpy(), './') print("Metrics") print("""Confusion Matrix: {} f1: {:.4f}, precision: {:.4f}, recall: {:.4f}, acc: {:.4f}, roc: {:.4f}, pr: {:.4f}, ap: {:.4f} """.format(cm, f1, p, r, acc, roc, pr, ap)) return model, class_preds, pred_proba
def run_cv_pred(X, y, clf, n_folds, name, results, debug=True): """ Run n-fold cross validation returning a prediction for every row of X :param X: A scipy sparse feature matrix :param y: The target labels corresponding to rows of X :param clf: The :param n_folds: :return: """ # Construct a kfolds object skf = StratifiedKFold(n_splits=n_folds, shuffle=True) splits = skf.split(X, y) y_pred = np.zeros(shape=(len(y), 2)) # Iterate through folds for idx, (train_index, test_index) in enumerate(splits): X_train, X_test = X[train_index, :], X[test_index, :] assert len(set(train_index).intersection(test_index)) == 0 y_train = y[train_index] # Initialize a classifier with key word arguments clf.fit(X_train, y_train) try: # Gradient boosted trees do not accept sparse matrices in the predict function currently preds = clf.predict_proba(X_test) except TypeError: preds = clf.predict_proba(X_test.todense()) macro, micro = utils.get_metrics(y[test_index], preds) results[0].loc[name, idx] = macro results[1].loc[name, idx] = micro y_pred[test_index, :] = preds # add on training results clf.fit(X, y) try: # Gradient boosted trees do not accept sparse matrices in the predict function currently preds = clf.predict_proba(X) except TypeError: preds = clf.predict_proba(X.todense()) macro, micro = utils.get_metrics(y, preds) results[0].loc[name, n_folds] = macro results[1].loc[name, n_folds] = micro # y_pred[test_index] = preds return y_pred, results
def collect_metric(counter): counter += 1 timer = threading.Timer(_period, collect_metric, (counter, )) timer.start() if counter >= count: timer.cancel() try: data = utils.get_metrics(self.db_info) internal_metrics.append(data) except Exception as err: print "[GET Metrics]Exception:", err
def collect_metric(counter): counter += 1 timer = threading.Timer(_period, collect_metric, (counter, )) timer.start() if counter >= count: timer.cancel() try: data = utils.get_metrics(self.db_info) internal_metrics.append(data) except MySQLdb.Error as e: print("[GET Metrics]Exception:%s" % e.message)
def test_accuracy(x, y, testx, testy, dist='Multinomial'): py = [] if dist in ('Multinomial', 'MultivariateBernoulli'): probs = get_class_conditional_probs(x, y, dist=dist) for q in testx: py.append(classify(x, y, q, dist=dist, probs=probs)) elif dist == 'Normal': mean, var = get_class_conditional_probs(x, y, dist=dist) for q in testx: py.append(classify(x, y, q, dist=dist, mean=mean, var=var)) return utils.get_metrics(testy, np.array(py))
def draw_output(img_path: str, imgs: dict, show_output: bool = True): '''绘制原始图像和ground truth到一个文件中 Args: img_path(str): 原始图像路径,用来获取标签路径 imgs(dict<str: np.ndarray>): 输出的图片信息 ''' assert 'Predict Image' in imgs, '必须包含预测图像' output_path = './output' if not os.path.exists(output_path): os.mkdir(output_path) gd_path = img_path.replace('img', 'label') gd_array = np.array( Image.open(gd_path)) if os.path.exists(gd_path) else None # 将Ground Truth加入图像中 if gd_array is not None: imgs['Ground Truth'] = gd_array pred_img = imgs['Predict Image'] # 绘制图例 legend_array = np.zeros((100, 800), dtype=np.uint8) for i in range(config.num_classes): legend_array[:, i * 200:(i + 1) * 200] = i legend_array = legend_array / (config.num_classes - 1) * 255 imgs['Legend'] = legend_array fig_w, fig_h = 15, int(6 * np.ceil(len(imgs) / 3)) # 宽度固定为15,高为6的整数倍 fig = plt.figure(figsize=(fig_w, fig_h)) # 绘制图像 for i, (title, img) in enumerate(imgs.items()): ax = fig.add_subplot(31 + i + np.ceil(len(imgs) / 3) * 100) ax.set_title(title) if len(img.shape) == 2: ax.imshow(img / 3 * 255, cmap='bone') else: ax.imshow(img) # 计算各项指标 if gd_array is not None: miou, ious, acc = get_metrics(gd_array, pred_img) fig.suptitle('$mIoU={:.2f}, acc={:.2f}$\n$IoUs={}$'.format( miou, acc, ['%.2f' % x for x in ious])) # 获取原始文件名,并根据文件名得到输出目录信息 filename = os.path.basename(img_path) _, _, _, parent_img = extract_info_from_filename(filename) output_path = os.path.join(output_path, parent_img.replace('.png', '')) # 按父文件名分类 output_filename = os.path.join(output_path, filename) if not os.path.exists(output_path): os.mkdir(output_path) fig.savefig(output_filename) if show_output: print('Output has been saved to {}.'.format(output_filename))
def predict(config, args): gpu_manage(args) dataset = Dataset(args.test_dir) data_loader = DataLoader(dataset=dataset, num_workers=config.threads, batch_size=1, shuffle=False) gen = UNet(in_ch=config.in_ch, out_ch=config.out_ch, gpu_ids=args.gpu_ids) param = torch.load(args.pretrained) gen.load_state_dict(param) criterionMSE = nn.MSELoss() if args.cuda: gen = gen.cuda(0) criterionMSE = criterionMSE.cuda(0) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') avg_mse = 0 avg_psnr = 0 avg_ssim = 0 with torch.no_grad(): for i, batch in enumerate(tqdm(data_loader)): input_, ground_truth = Variable(batch[0]), Variable(batch[1]) filename = batch[2][0] input_ = F.interpolate(input_, size=256).to(device) ground_truth = F.interpolate(ground_truth, size=256).to(device) output = gen(input_) save_image_from_tensors(input_, output, ground_truth, config.out_dir, i, 0, filename) mse, psnr, ssim = get_metrics(output, ground_truth, criterionMSE) print(filename) print('MSE: {:.4f}'.format(mse)) print('PSNR: {:.4f} dB'.format(psnr)) print('SSIM: {:.4f} dB'.format(ssim)) avg_mse += mse avg_psnr += psnr avg_ssim += ssim avg_mse = avg_mse / len(data_loader) avg_psnr = avg_psnr / len(data_loader) avg_ssim = avg_ssim / len(data_loader) print('Average MSE: {:.4f}'.format(avg_mse)) print('Average PSNR: {:.4f} dB'.format(avg_psnr)) print('Average SSIM: {:.4f} dB'.format(avg_ssim))
def trainval(data_path, transformer_checkpoint_path, model_checkpoint_path): data = pd.read_csv(data_path) data["text"] = data["text"].apply(preprocess_text) #train transformer = TfidfVectorizer(ngram_range=(1, 2), max_features=100000) X_train = transformer.fit_transform(data[data["split"] == "train"]["text"].values) model = LogisticRegression(C=5e1, solver='lbfgs', random_state=42, n_jobs=8) model.fit(X_train, data[data["split"] == "train"]["sentiment"]) #validation X_val = transformer.transform(data[data["split"] == "val"]["text"].values) group_val = data[data["split"] == "val"]["source"].values y_val = data[data["split"] == "val"]["sentiment"].values preds = model.predict(X_val) get_metrics(y_val, preds, group_val) joblib.dump(transformer, transformer_checkpoint_path) joblib.dump(model, model_checkpoint_path)
def testSem(): network.eval() ids_fn_sem = ddir.sp_testSem_ids_fn ids_fn = ddir.sp_test_ids_fn with open(ids_fn, "r") as fo: ids = map(lambda x: x.strip('\n'), fo.readlines()) with open(ids_fn_sem, "r") as fo: ids_sem = map(lambda x: x.strip('\n'), fo.readlines()) # with open(os.path.join(ddir.flickr8k_dir, "word_ids/captions_dict.pkl"),'rb') as f: # captions_dict = pkl.load(f) predKwList = [] value_bow, gtKwDict, captionsDict = data_io.get_semValues( ddir.labels_csv, ddir.keywords_test) count_bow = data_io.get_semCounts(ddir.counts_csv, ddir.keywords_test) pred_multi, pred_multiBoW = np.zeros([len(ids_sem), len(mapping)]), np.zeros( [len(ids_sem), len(mapping)]) vis_multi = np.zeros([len(ids_sem), len(mapping)]) for i in range(len(ids)): # caption = ' '.join(captions_dict[ids[i]]) idx = [ids[i]] z = idx[0].split("_") del z[0] idxnew = "_".join(z) if (idxnew in ids_sem): Xs, _ = data_io.load_mfcc(ddir.mfcc_dir, idx, args.n_pad) Xs = np.transpose(Xs, (0, 2, 1)) Ys = np.stack(map(lambda x: vision_bow_vec[x[4:-2]], idx), axis=0) visMapped = Ys[0][mapping] if (args.mt): pred, predBoW = getKWprob(Xs) predBoWMapped = predBoW[0][mapping] else: pred = getKWprob(Xs) predMapped = pred[0][mapping] pred_multi[ids_sem.index(idxnew)] = predMapped pred_multiBoW[ids_sem.index(idxnew)] = predBoWMapped vis_multi[ids_sem.index(idxnew)] = visMapped eer, ap, spearman, prec10, precN = utils.get_metrics( pred_multiBoW, value_bow, count_bow) pcont = "Subjective ratings: EER: %f, Average precision: %f, Precision@10: %f, Precision@N: %f, Spearman's rho: %f" % ( eer, ap, prec10, precN, spearman) print(pcont) with open("aux_sem.csv", "a+") as fo: fo.write(args.mtType + ',' + str(args.alpha) + ',' + str(args.n_bow2) + ',' + str(spearman) + ',' + str(prec10 * 100) + ',' + str(precN * 100) + ',' + str(eer * 100) + ',' + str(ap * 100) + '\n')
def run_repetitions(data, target, clf, names, reps, train_pct=0.8): """ Run repeated experiments on random train test splits of the data :param data: an iterable of numpy arrays :param target: a numpy array of target variables :param clf: a scikit-learn classifier :param names: the names of the data sets. Size should match data :param reps: the number of repetitions to run for each dataset :param train_pct: the percentage of the data to use for training. The rest will be held out for the test set. :return: """ results = np.zeros(shape=(len(data), reps)) for rep in range(reps): msk = np.random.rand(len(target)) < train_pct y_train = target[msk] y_test = target[~msk] for idx, dataset in enumerate(data): X_train = dataset[msk, :] X_test = dataset[~msk, :] clf.fit(X_train, y_train) probs = clf.predict_proba(X_test) res = utils.get_metrics(y_test, probs)[0] print 'rep{0} '.format(idx), res results[idx, rep] = res train = [] mean = results.mean(axis=1) for idx, dataset in enumerate(data): clf.fit(dataset, target) probs = clf.predict_proba(dataset) res = utils.get_metrics(target, probs)[0] train.append(res) df = pd.DataFrame(data=results, index=names) df['mean'] = mean df['train'] = train return df
def test_accuracy(x, y, testx, testy, b, r, hashing_type='hamming', bucket_width=None): bands = init_bands(x, y, b, r, hashing_type=hashing_type, bucket_width=bucket_width) missed_points = 0 py = [] for q in testx: res = classify(q, y, bands) if not res: if missed_points == 0: print('Warning: Some of the points might get missed because ' 'their hash doesn\'t match with hash of any other ' 'points in training data.') missed_points += 1 py.append(-10) continue py.append(res) if missed_points > 0: print('Total %d points were missed during classification' % (missed_points)) indices = np.where(np.array(py) != -10) return utils.get_metrics( np.array(testy)[indices], np.array(py)[indices]) return utils.get_metrics(testy, py)
def evaluate(epoch, val_loader, model, loss_fn, log_writer=None): model.eval() avg_loss = 0.0 avg_preci = 0.0 avg_recall = 0.0 all_labels = [] all_preds = [] for batch_id, data in enumerate(val_loader()): xd, yd = data xd = xd.unsqueeze((1)) label = yd logits = model(xd) loss_val = loss_fn(logits, label) pred = F.softmax(logits) all_labels += [label.numpy()] all_preds += [pred.numpy()] preci, recall = get_metrics(label, pred) avg_preci = (avg_preci * batch_id + preci) / (1 + batch_id) avg_recall = (avg_recall * batch_id + recall) / (1 + batch_id) avg_loss = (avg_loss * batch_id + loss_val.numpy()[0]) / (1 + batch_id) msg = f'eval epoch:{epoch}, batch:{batch_id}' msg += f'|{len(val_loader)}' msg += f',loss:{avg_loss:.3}' msg += f',recall:{avg_recall:.3}' msg += f',preci:{avg_preci:.3}' avg_preci = (avg_preci * batch_id + preci) / (1 + batch_id) avg_recall = (avg_recall * batch_id + recall) / (1 + batch_id) if batch_id % 20 == 0: logger.info(msg) if log_writer is not None: log_writer.add_scalar(tag="eval loss", step=batch_id, value=avg_loss) log_writer.add_scalar(tag="eval preci", step=batch_id, value=avg_preci) log_writer.add_scalar(tag="eval recall", step=batch_id, value=avg_recall) all_preds = np.concatenate(all_preds, 0) all_labels = np.concatenate(all_labels, 0) mAP_scores = average_precision_score(all_labels, all_preds, average=None) return avg_loss, avg_preci, avg_recall, mAP_scores
def train(self, x1, x2, x3): """ The triplet loss is calculated by and is minimized using Optimizer.Returns the metrics and optimizer operation. :param x1: :param x2: :param x3: :return: train_op, summary_op, metrics_update_op, loss_op """ with tf.name_scope("train"): loss_op = self.triplet_loss(x1, x2, x3) train_op = tf.train.AdamOptimizer(self.learning_rate).minimize(loss_op) summary_op, mean_loss_op, metrics_update_op = utils.get_metrics(loss_op) tf.summary.scalar('loss', loss_op) return train_op, summary_op, metrics_update_op, loss_op, mean_loss_op
def collect_metric(counter): counter += 1 timer = threading.Timer(_period, collect_metric, (counter,)) timer.start() db = database(self.db_info["host"], self.db_info["port"],self.db_info["user"], self.db_info["password"], "sbtest", ) if counter >= count: timer.cancel() try: data = utils.get_metrics(db) internal_metrics.append(data) except Exception as err: logger.info("[GET Metrics]Exception:" ,err)
def train_save_evaluate( params, kb, train_set, dev_set, ind2emoji, embeddings_array, dataset_name ): # If the minibatch is larger than the number of emojis we have, we can't fill train/test batches if params.mb > len(ind2emoji): print( str.format( "Skipping: k={}, batch={}, epochs={}, ratio={}, dropout={}", params.out_dim, params.pos_ex, params.max_epochs, params.neg_ratio, params.dropout, ) ) print("Can't have an mb > len(ind2emoji)") return "N/A" else: print( str.format( "Training: k={}, batch={}, epochs={}, ratio={}, dropout={}", params.out_dim, params.pos_ex, params.max_epochs, params.neg_ratio, params.dropout, ) ) model_folder = params.model_folder(dataset_name=dataset_name) model_path = model_folder + "/model.pt" dsets = {"train": train_set, "dev": dev_set} predictions = dict() results = dict() if os.path.exists(model_path): predictions = pk.load(open(model_folder + "/results.p", "rb")) else: model = Emoji2Vec( model_params=params, num_emojis=kb.dim_size(0), embeddings_array=embeddings_array, ) model.train( kb=kb, epochs=params.max_epochs, learning_rate=params.learning_rate ) os.makedirs(model_folder) torch.save(model.nn, model_folder + "/model.pt") e2v = model.create_gensim_files( model_folder=model_folder, ind2emoj=ind2emoji, out_dim=params.out_dim, ) if params.in_dim != params.out_dim: embeddings_array = model.nn.project_embeddings(embeddings_array) for dset_name in dsets: _, pred_values, _, true_values = generate_predictions( e2v=e2v, dset=dsets[dset_name], phr_embeddings=embeddings_array, ind2emoji=ind2emoji, threshold=params.class_threshold, ) predictions[dset_name] = { "y_true": true_values, "y_pred": pred_values, } pk.dump(predictions, open(model_folder + "/results.p", "wb")) for dset_name in dsets: true_labels = [bool(x) for x in predictions[dset_name]["y_true"]] pred_labels = [ x >= params.class_threshold for x in predictions[dset_name]["y_pred"] ] true_values = predictions[dset_name]["y_true"] pred_values = predictions[dset_name]["y_pred"] # Calculate metrics acc, f1, auc = get_metrics( pred_labels, pred_values, true_labels, true_values ) print( str.format( "{}: Accuracy(>{}): {}, f1: {}, auc: {}", dset_name, params.class_threshold, acc, f1, auc, ) ) results[dset_name] = {"accuracy": acc, "f1": f1, "auc": auc} return results["dev"]