def add_sign(self, gt: GroundTruth, img: np.array, mask: np.array): self.area.append(gt.rectangle.get_area()) self.form_factor.append(float(gt.rectangle.width / gt.rectangle.height)) self.filling_ratio.append(get_filling_ratio(gt.rectangle, mask)) hists_rgb = get_histogram(img, gt, mask, False) hists_hsv = get_histogram(img, gt, mask, True) for i in range(3): self.histogram[:, 0, i] += hists_rgb[i][:, 0] self.histogram[:, 1, i] += hists_hsv[i][:, 0]
def score_histogram(question, p, tol=1e-3): # TODO pc is added to pu then renormalized, see https://discord.com/channels/694850840200216657/694850840707596320/798616689918738453 # TODO pc is actually calculated by logistic best fit # Normalized resolution x_norm = get_norm_resolution(question) # Is question range open or closed? pc = get_histogram(question)['c'].values.tolist() assert len(pc) == len(p) n_bins = len(p) - 1 dx = 1 / n_bins closed = abs(sum(pc) * dx - 1) < tol # for lack of anything better # Community distribution pc_star = interpolate(pc, x_norm, closed) # Uniform distribution pu = 1 / n_bins if closed else (1 - .15) / n_bins # Own prediction p_star = interpolate(p, x_norm, closed) # # predictions N = question['number_of_predictions'] my_sc = scoring_(p_star, pc_star, pu, N) comm_sc = scoring_(pc_star, pc_star, pu, N) return my_sc - comm_sc
def do_split_10h(records, speakers, max_seconds_per_speaker, min_seconds_per_speaker, total_seconds): """ Greedily selecting speakers, provided we don't go over budget """ scaler = 1.0 / 16000 # sampling rate speaker2time = get_histogram(records, lambda_key=lambda r: r.speaker.id, lambda_value=lambda r: r.length * scaler) speakers = set([r.speaker.id for r in records]) speakers = sorted(speakers) random.shuffle(speakers) time_taken = 0.0 speakers_taken = [] for speaker in speakers: current_speaker_time = speaker2time[speaker] if min_seconds_per_speaker <= current_speaker_time <= max_seconds_per_speaker and current_speaker_time < total_seconds - time_taken: speakers_taken.append(speaker) time_taken += current_speaker_time speakers_taken = set(speakers_taken) records_filtered = [r for r in records if r.speaker.id in speakers_taken] return records_filtered
def update_overview_graphs(bin_size, si_range, length_range, area_range, prec_property, prec_type): heavy_precipitation_filter = True if prec_type == "Heavy" else False filtered_df = events_df[ events_df["si"] > 0.0] if heavy_precipitation_filter else events_df filtered_ts_df = ts_events_df[ ts_events_df["si_ev"] > 0.0] if heavy_precipitation_filter else ts_events_df # TODO move filtration into separate function mask = ((filtered_df["si"] >= si_range[0]) & (filtered_df["si"] <= si_range[1]) & (filtered_df["length"] >= length_range[0]) & (filtered_df["length"] <= length_range[1]) & (filtered_df["area"] >= area_range[0]) & (filtered_df["area"] <= area_range[1])) ts_mask = ((filtered_ts_df["si_ev"] >= si_range[0]) & (filtered_ts_df["si_ev"] <= si_range[1]) & (filtered_ts_df["length"] >= length_range[0]) & (filtered_ts_df["length"] <= length_range[1]) & (filtered_ts_df["area"] >= area_range[0]) & (filtered_ts_df["area"] <= area_range[1])) filtered_df = filtered_df.loc[mask, :] filtered_ts_df = filtered_ts_df.loc[ts_mask, :] # Overview u_events_graph = utils.get_stacked_histogram(filtered_df, bin_size=bin_size) u_events_graph.update_layout( title= f"Number of {prec_type.lower()} precipitation events (bin size: {bin_size})" ) if prec_property in ["maxPrec", "meanPre"]: u_property_graph = utils.get_histogram(filtered_ts_df, bin_size=bin_size, column_name=prec_property) else: u_property_graph = utils.get_histogram(filtered_df, bin_size=bin_size, column_name=prec_property) u_property_graph.update_layout( title= f"Property {prec_property} of {prec_type.lower()} precipitation events" ) return u_events_graph, u_property_graph
def quantize(im_orig, n_quant, n_iter): """Performs optimal quantization of a grayscale or RGB image. :param im_orig: Input grayscale or RGB image to be quantized (float64 image with values in [0, 1]). :param n_quant: Number of intensities the output im_quant image should have. :param n_iter: Maximum number of iterations of the optimization procedure (may converge earlier). :return: Quantized output image (copy of the original image). """ if n_quant <= 0 or n_iter <= 0: raise ValueError("Error: n_quant and n_iter must be positive") if im_orig.ndim == 3: img = utils.rgb2yiq(im_orig) img_hist = utils.get_histogram(img[:, :, 0]) else: img = im_orig.copy() img_hist = utils.get_histogram(img) q = np.zeros(n_quant).astype(np.float64) z = _calculate_initial_z(img_hist, n_quant) last_iter_z = z.copy() for i in range(n_iter): q = _get_q(z, q, img_hist) z = _get_z(q, z) # Checks for convergence. if np.array_equal(last_iter_z, z): break last_iter_z = z.copy() lookup_table = np.zeros(256) for i in range(len(z) - 1): start = int(np.round(z[i])) end = int(z[i + 1]) + 1 lookup_table[start:end] = q[i] if im_orig.ndim == 3: img[:, :, 0] = utils.normalize_image(lookup_table[np.rint(img[:, :, 0] * 255).astype(np.uint8)]) return utils.yiq2rgb(img) return utils.normalize_image(lookup_table[np.rint(img * 255).astype(np.uint8)])
def submit_recalibrated(qid, ir_model, session=None, username=USERNAME, password=PASSWORD, debug=False): if session is None: metaculus = ergo.Metaculus() metaculus.login_via_username_and_password(username=username, password=password) else: metaculus = session question = metaculus.get_question(qid) comm_cdf = get_histogram(question.data)['c'].cumsum() / 200 cdf = ir_model.predict(comm_cdf) lm = LogisticMixture() lm.fit(cdf) low_closed, high_closed = [not question.possibilities.get(side)=='tail' for side in ('low', 'high')] data = lm.get_prediction_data(low_closed, high_closed) if not debug and is_open(question): print(f"\t{question.name}") metaculus.predict(q_id=str(qid), data=data) return lm.func(np.linspace(0,1,len(cdf)), *lm.opt), cdf, comm_cdf.squeeze().values
''' Created on Aug 1, 2019 @author: jsaavedr Image Histogram ''' import numpy as np import matplotlib.pyplot as plt import pai_io import utils if __name__ == '__main__': filename = '../images/gray/ten_coins.png' image = pai_io.imread(filename) #image = np.zeros((400,400), dtype = np.uint8) #image[100:170, 200:270] = 255 h = utils.get_histogram(image) fig, xs = plt.subplots(1, 2) xs[0].set_axis_off() xs[0].imshow(image, cmap='gray', vmin=0, vmax=255) xs[1].bar(x=np.arange(256), height=h) plt.show()
print('Training NMI: {}'.format(training_nmi)) print('Training ACC: {}'.format(training_all_acc)) logger.log_value('All Training acc', training_all_acc, step=epoch) #calculate and log all information model_weights = retrieval_layer.out.eval(sess) weight_norm_mean, weight_norm_var = utils.eval_feature_norm_var( model_weights.T) category_mean = utils.get_category_mean(training_normed_embedding, class_id, num_training_category) for i in range(model_weights.shape[1]): model_weights[:, i] = model_weights[:, i] / np.sqrt( np.sum(model_weights[:, i]**2) + 1e-4) mean_weight_diff = np.sum((category_mean - model_weights.T)**2) training_histogram, bad_wrong_histogram, bad_correct_histogram, bad_num_list, num_list =\ utils.get_histogram(training_embedding, training_normed_embedding, model_weights, class_id) norm_list, cos_dis_list = utils.get_norm_and_number( training_embedding, training_normed_embedding, model_weights, class_id) _, cos_dis_to_mean_list = utils.get_norm_and_number( training_embedding, training_normed_embedding, category_mean.T, class_id) weights_dis_mat = np.matmul(model_weights.T, model_weights) mean_dis_mat = np.matmul(category_mean, category_mean.T) mean_norm = np.mean(norm_list) var_norm = np.var(norm_list) mean_cos_val = np.mean(cos_dis_to_mean_list) var_cos_val = np.var(cos_dis_to_mean_list) logger.log_value('Train Norm Mean', mean_norm, step=epoch) logger.log_value('Train Norm Var', var_norm, step=epoch) logger.log_value('Train Cos Mean', mean_cos_val, step=epoch)
Created on Aug 6, 2019 @author: jsaavedr Histogram equalization ''' import matplotlib.pyplot as plt import pai_io import utils import numpy as np if __name__ == '__main__' : #filename ='../images/gray/im_3.tif' filename ='../images/gray/mri.tif' #filename = '../images/gray/Lowcontr.tif' #filename = '../images/gray/low_contraste_1.jpg' image=pai_io.imread(filename, as_gray = True) h = utils.get_histogram(image) im_eq = utils.equalize_image(image) h_eq = utils.get_histogram(im_eq) fig, xs = plt.subplots(2,2) for i in range(2): xs[0,i].set_axis_off() xs[0,0].imshow(image, cmap = 'gray', vmin =0 , vmax=255) xs[0,0].set_title('Original') xs[0,1].imshow(im_eq, cmap = 'gray', vmin = 0, vmax = 255) xs[0,1].set_title('Equalized') xs[1,0].bar(x=np.arange(256), height = h) xs[1,1].bar(x=np.arange(256), height = h_eq) plt.show()
db, 'train', utils.read_from_csvfile(f_train, col_types_train)) utils.insert_to_collection(db, 'test', utils.read_from_csvfile(f_test, col_types_test)) coll_name = 'train' # command line: spark-submit run.py # spark, df = load_all_data(db, coll_name) # command line: spark-submit --packages org.mongodb.spark:mongo-spark-connector_2.11:2.0.0 run.py spark, df = load_with_mongodb_connector(dbname, coll_name) print 'dataframe count: {}'.format(df.count()) print df.take(2) df.printSchema() df.toPandas().describe() utils.get_histogram(df, ["Sex", "Age"], bins=10) # stats_numeric = spark.get_stats_summary_numeric_fields(df, numeric_fields) # utils.pretty_print_stats(stats_numeric) stats_text = spark.get_stats_summary_text_fields(df, text_fields) utils.pretty_print_stats(stats_text) print corr_matrix = spark.get_correlation_matrix(df, numeric_fields) df_corr = utils.create_pandas_dataframe(corr_matrix, numeric_fields, numeric_fields) print df_corr print cov_matrix = spark.get_covariance_matrix(df, numeric_fields) df_cov = utils.create_pandas_dataframe(cov_matrix, numeric_fields, numeric_fields)
kmeans = MiniBatchKMeans( n_clusters=k, init="k-means++", n_init=10, max_iter=100, init_size=1000, batch_size=1000 ) print " [!] Kmeans fitting" kmeans.fit(all_sift) if SAVE: save_pickle(prefix + "kmeans.pkl", kmeans) print " [!] Kmeans prediction" train_predicted = kmeans.predict(reduced_train_sift) test_predicted = kmeans.predict(reduced_test_sift) print " [!] Making histogram" if not os.path.isfile(prefix + "train_hist_features.pkl"): train_hist_features = get_histogram(k, train_sift, train_predicted) if SAVE and not os.path.isfile(prefix + "train_hist_features.pkl"): print " [!] Saving histogram 1" save_pickle(prefix + "train_hist_features.pkl", train_hist_features) del train_hist_features test_hist_features = get_histogram(k, test_sift, test_predicted) if SAVE and not os.path.isfile(prefix + "test_hist_features.pkl"): print " [!] Saving histogram 2" save_pickle(prefix + "test_hist_features.pkl", test_hist_features) del test_hist_features if not os.path.isfile(prefix + "train_spp_hist.pkl"): print " [!] Making SPP histogram 1"
parser.add_argument('--meta_path', type=str) args = parser.parse_args() return args if __name__ == '__main__': args = get_args() speakers = get_speakers(pathlib.Path(args.meta_path) / 'SPEAKERS.TXT') fname2length = traverse_tree(args.root) records = full_records(speakers, fname2length, subset_name=None) print(f'Utterances: {len(records)}') time_by_gender = get_histogram(records, lambda_key=lambda r: r.speaker.gender, lambda_value=lambda r: r.length / 16000) print('Time by gender, seconds', time_by_gender) time_by_subset = get_histogram(records, lambda_key=lambda r: r.speaker.subset, lambda_value=lambda r: r.length / 16000) print('Time by subset, seconds', time_by_subset) speaker_freq = get_histogram(records, lambda_key=lambda r: r.speaker.id, lambda_value=lambda r: 1) print('Number of uniq speakers', len(speaker_freq)) book_lengths = get_histogram(records, lambda_key=lambda r: r.book,
def predict_ir(ir_model, q): return ir_model.predict(get_histogram(q)['c'].values.tolist())
def F_star(question): """CDF evaluated at resolution""" h = get_histogram(question) return h[h['x'] <= question['resolution']]['c'].sum() / h.shape[0]
max_iter = 100, init_size = 1000, batch_size = 1000) print " [!] Kmeans fitting" kmeans.fit(all_sift) if SAVE: save_pickle(prefix + "kmeans.pkl",kmeans) print " [!] Kmeans prediction" train_predicted = kmeans.predict(reduced_train_sift) test_predicted = kmeans.predict(reduced_test_sift) print " [!] Making histogram" if not os.path.isfile(prefix+"train_hist_features.pkl"): train_hist_features = get_histogram(k, train_sift, train_predicted) if SAVE and not os.path.isfile(prefix+"train_hist_features.pkl"): print " [!] Saving histogram 1" save_pickle(prefix+"train_hist_features.pkl",train_hist_features) del train_hist_features test_hist_features = get_histogram(k, test_sift, test_predicted) if SAVE and not os.path.isfile(prefix+"test_hist_features.pkl"): print " [!] Saving histogram 2" save_pickle(prefix+"test_hist_features.pkl",test_hist_features) del test_hist_features if not os.path.isfile(prefix+"train_spp_hist.pkl"): print " [!] Making SPP histogram 1"