Beispiel #1
0
 def add_sign(self, gt: GroundTruth, img: np.array, mask: np.array):
     self.area.append(gt.rectangle.get_area())
     self.form_factor.append(float(gt.rectangle.width /
                                   gt.rectangle.height))
     self.filling_ratio.append(get_filling_ratio(gt.rectangle, mask))
     hists_rgb = get_histogram(img, gt, mask, False)
     hists_hsv = get_histogram(img, gt, mask, True)
     for i in range(3):
         self.histogram[:, 0, i] += hists_rgb[i][:, 0]
         self.histogram[:, 1, i] += hists_hsv[i][:, 0]
Beispiel #2
0
def score_histogram(question, p, tol=1e-3):
    # TODO pc is added to pu then renormalized, see https://discord.com/channels/694850840200216657/694850840707596320/798616689918738453
    # TODO pc is actually calculated by logistic best fit
    # Normalized resolution
    x_norm = get_norm_resolution(question)

    # Is question range open or closed?
    pc = get_histogram(question)['c'].values.tolist()
    assert len(pc) == len(p)
    n_bins = len(p) - 1
    dx = 1 / n_bins
    closed = abs(sum(pc) * dx - 1) < tol  # for lack of anything better

    # Community distribution
    pc_star = interpolate(pc, x_norm, closed)

    # Uniform distribution
    pu = 1 / n_bins if closed else (1 - .15) / n_bins

    # Own prediction
    p_star = interpolate(p, x_norm, closed)

    # # predictions
    N = question['number_of_predictions']

    my_sc = scoring_(p_star, pc_star, pu, N)
    comm_sc = scoring_(pc_star, pc_star, pu, N)

    return my_sc - comm_sc
def do_split_10h(records, speakers, max_seconds_per_speaker,
                 min_seconds_per_speaker, total_seconds):
    """
    Greedily selecting speakers, provided we don't go over budget
    """
    scaler = 1.0 / 16000  # sampling rate
    speaker2time = get_histogram(records,
                                 lambda_key=lambda r: r.speaker.id,
                                 lambda_value=lambda r: r.length * scaler)

    speakers = set([r.speaker.id for r in records])
    speakers = sorted(speakers)
    random.shuffle(speakers)

    time_taken = 0.0
    speakers_taken = []

    for speaker in speakers:
        current_speaker_time = speaker2time[speaker]
        if min_seconds_per_speaker <= current_speaker_time <= max_seconds_per_speaker and current_speaker_time < total_seconds - time_taken:
            speakers_taken.append(speaker)
            time_taken += current_speaker_time

    speakers_taken = set(speakers_taken)

    records_filtered = [r for r in records if r.speaker.id in speakers_taken]
    return records_filtered
Beispiel #4
0
def update_overview_graphs(bin_size, si_range, length_range, area_range,
                           prec_property, prec_type):
    heavy_precipitation_filter = True if prec_type == "Heavy" else False
    filtered_df = events_df[
        events_df["si"] > 0.0] if heavy_precipitation_filter else events_df
    filtered_ts_df = ts_events_df[
        ts_events_df["si_ev"] >
        0.0] if heavy_precipitation_filter else ts_events_df
    # TODO move filtration into separate function
    mask = ((filtered_df["si"] >= si_range[0])
            & (filtered_df["si"] <= si_range[1])
            & (filtered_df["length"] >= length_range[0])
            & (filtered_df["length"] <= length_range[1])
            & (filtered_df["area"] >= area_range[0])
            & (filtered_df["area"] <= area_range[1]))
    ts_mask = ((filtered_ts_df["si_ev"] >= si_range[0])
               & (filtered_ts_df["si_ev"] <= si_range[1])
               & (filtered_ts_df["length"] >= length_range[0])
               & (filtered_ts_df["length"] <= length_range[1])
               & (filtered_ts_df["area"] >= area_range[0])
               & (filtered_ts_df["area"] <= area_range[1]))
    filtered_df = filtered_df.loc[mask, :]
    filtered_ts_df = filtered_ts_df.loc[ts_mask, :]

    # Overview
    u_events_graph = utils.get_stacked_histogram(filtered_df,
                                                 bin_size=bin_size)
    u_events_graph.update_layout(
        title=
        f"Number of {prec_type.lower()} precipitation events (bin size: {bin_size})"
    )

    if prec_property in ["maxPrec", "meanPre"]:
        u_property_graph = utils.get_histogram(filtered_ts_df,
                                               bin_size=bin_size,
                                               column_name=prec_property)
    else:
        u_property_graph = utils.get_histogram(filtered_df,
                                               bin_size=bin_size,
                                               column_name=prec_property)
    u_property_graph.update_layout(
        title=
        f"Property {prec_property} of {prec_type.lower()} precipitation events"
    )

    return u_events_graph, u_property_graph
def quantize(im_orig, n_quant, n_iter):
    """Performs optimal quantization of a grayscale or RGB image.
    :param im_orig: Input grayscale or RGB image to be quantized (float64 image with values in [0, 1]).
    :param n_quant: Number of intensities the output im_quant image should have.
    :param n_iter: Maximum number of iterations of the optimization procedure (may converge earlier).
    :return: Quantized output image (copy of the original image).
    """
    if n_quant <= 0 or n_iter <= 0:
        raise ValueError("Error: n_quant and n_iter must be positive")

    if im_orig.ndim == 3:
        img = utils.rgb2yiq(im_orig)
        img_hist = utils.get_histogram(img[:, :, 0])
    else:
        img = im_orig.copy()
        img_hist = utils.get_histogram(img)

    q = np.zeros(n_quant).astype(np.float64)
    z = _calculate_initial_z(img_hist, n_quant)
    last_iter_z = z.copy()
    for i in range(n_iter):
        q = _get_q(z, q, img_hist)
        z = _get_z(q, z)

        # Checks for convergence.
        if np.array_equal(last_iter_z, z):
            break
        last_iter_z = z.copy()

    lookup_table = np.zeros(256)
    for i in range(len(z) - 1):
        start = int(np.round(z[i]))
        end = int(z[i + 1]) + 1
        lookup_table[start:end] = q[i]

    if im_orig.ndim == 3:
        img[:, :, 0] = utils.normalize_image(lookup_table[np.rint(img[:, :, 0] * 255).astype(np.uint8)])
        return utils.yiq2rgb(img)
    return utils.normalize_image(lookup_table[np.rint(img * 255).astype(np.uint8)])
Beispiel #6
0
def submit_recalibrated(qid, ir_model, session=None, username=USERNAME, password=PASSWORD, debug=False):
  if session is None:
    metaculus = ergo.Metaculus()
    metaculus.login_via_username_and_password(username=username, password=password)
  else:
    metaculus = session
  question = metaculus.get_question(qid)
  comm_cdf = get_histogram(question.data)['c'].cumsum() / 200
  cdf = ir_model.predict(comm_cdf)
  lm = LogisticMixture()
  lm.fit(cdf)
  low_closed, high_closed = [not question.possibilities.get(side)=='tail' for side in ('low', 'high')]
  data = lm.get_prediction_data(low_closed, high_closed)
  
  if not debug and is_open(question):
    print(f"\t{question.name}")
    metaculus.predict(q_id=str(qid), data=data)

  return lm.func(np.linspace(0,1,len(cdf)), *lm.opt), cdf, comm_cdf.squeeze().values
'''
Created on Aug 1, 2019

@author: jsaavedr
Image Histogram
'''

import numpy as np
import matplotlib.pyplot as plt
import pai_io
import utils

if __name__ == '__main__':
    filename = '../images/gray/ten_coins.png'
    image = pai_io.imread(filename)
    #image = np.zeros((400,400), dtype = np.uint8)
    #image[100:170, 200:270] = 255
    h = utils.get_histogram(image)
    fig, xs = plt.subplots(1, 2)
    xs[0].set_axis_off()
    xs[0].imshow(image, cmap='gray', vmin=0, vmax=255)
    xs[1].bar(x=np.arange(256), height=h)
    plt.show()
        print('Training NMI: {}'.format(training_nmi))
    print('Training ACC: {}'.format(training_all_acc))
    logger.log_value('All Training acc', training_all_acc, step=epoch)

    #calculate and log all information
    model_weights = retrieval_layer.out.eval(sess)
    weight_norm_mean, weight_norm_var = utils.eval_feature_norm_var(
        model_weights.T)
    category_mean = utils.get_category_mean(training_normed_embedding,
                                            class_id, num_training_category)
    for i in range(model_weights.shape[1]):
        model_weights[:, i] = model_weights[:, i] / np.sqrt(
            np.sum(model_weights[:, i]**2) + 1e-4)
    mean_weight_diff = np.sum((category_mean - model_weights.T)**2)
    training_histogram, bad_wrong_histogram, bad_correct_histogram, bad_num_list, num_list =\
            utils.get_histogram(training_embedding, training_normed_embedding, model_weights, class_id)
    norm_list, cos_dis_list = utils.get_norm_and_number(
        training_embedding, training_normed_embedding, model_weights, class_id)
    _, cos_dis_to_mean_list = utils.get_norm_and_number(
        training_embedding, training_normed_embedding, category_mean.T,
        class_id)
    weights_dis_mat = np.matmul(model_weights.T, model_weights)
    mean_dis_mat = np.matmul(category_mean, category_mean.T)
    mean_norm = np.mean(norm_list)
    var_norm = np.var(norm_list)
    mean_cos_val = np.mean(cos_dis_to_mean_list)
    var_cos_val = np.var(cos_dis_to_mean_list)

    logger.log_value('Train Norm Mean', mean_norm, step=epoch)
    logger.log_value('Train Norm Var', var_norm, step=epoch)
    logger.log_value('Train Cos Mean', mean_cos_val, step=epoch)
Beispiel #9
0
Created on Aug 6, 2019

@author: jsaavedr

Histogram equalization
'''
import matplotlib.pyplot as plt
import pai_io
import utils
import numpy as np

if __name__ == '__main__' :
    #filename ='../images/gray/im_3.tif'
    filename ='../images/gray/mri.tif'
    #filename = '../images/gray/Lowcontr.tif'
    #filename = '../images/gray/low_contraste_1.jpg'
    image=pai_io.imread(filename, as_gray = True)    
    h = utils.get_histogram(image)
    im_eq = utils.equalize_image(image)
    h_eq = utils.get_histogram(im_eq)        
    fig, xs = plt.subplots(2,2)
    for i in range(2):
        xs[0,i].set_axis_off()    
    xs[0,0].imshow(image, cmap = 'gray', vmin =0 , vmax=255)
    xs[0,0].set_title('Original')        
    xs[0,1].imshow(im_eq, cmap = 'gray', vmin = 0, vmax = 255)
    xs[0,1].set_title('Equalized')   
    xs[1,0].bar(x=np.arange(256), height = h) 
    xs[1,1].bar(x=np.arange(256), height = h_eq)
    plt.show()
Beispiel #10
0
        db, 'train', utils.read_from_csvfile(f_train, col_types_train))
    utils.insert_to_collection(db, 'test',
                               utils.read_from_csvfile(f_test, col_types_test))

    coll_name = 'train'
    # command line: spark-submit run.py
    # spark, df = load_all_data(db, coll_name)

    # command line: spark-submit --packages org.mongodb.spark:mongo-spark-connector_2.11:2.0.0 run.py
    spark, df = load_with_mongodb_connector(dbname, coll_name)

    print 'dataframe count: {}'.format(df.count())
    print df.take(2)
    df.printSchema()
    df.toPandas().describe()
    utils.get_histogram(df, ["Sex", "Age"], bins=10)

    # stats_numeric = spark.get_stats_summary_numeric_fields(df, numeric_fields)
    # utils.pretty_print_stats(stats_numeric)
    stats_text = spark.get_stats_summary_text_fields(df, text_fields)
    utils.pretty_print_stats(stats_text)

    print
    corr_matrix = spark.get_correlation_matrix(df, numeric_fields)
    df_corr = utils.create_pandas_dataframe(corr_matrix, numeric_fields,
                                            numeric_fields)
    print df_corr
    print
    cov_matrix = spark.get_covariance_matrix(df, numeric_fields)
    df_cov = utils.create_pandas_dataframe(cov_matrix, numeric_fields,
                                           numeric_fields)
            kmeans = MiniBatchKMeans(
                n_clusters=k, init="k-means++", n_init=10, max_iter=100, init_size=1000, batch_size=1000
            )

            print " [!] Kmeans fitting"
            kmeans.fit(all_sift)
            if SAVE:
                save_pickle(prefix + "kmeans.pkl", kmeans)

    print " [!] Kmeans prediction"
    train_predicted = kmeans.predict(reduced_train_sift)
    test_predicted = kmeans.predict(reduced_test_sift)

    print " [!] Making histogram"
    if not os.path.isfile(prefix + "train_hist_features.pkl"):
        train_hist_features = get_histogram(k, train_sift, train_predicted)
        if SAVE and not os.path.isfile(prefix + "train_hist_features.pkl"):
            print " [!] Saving histogram 1"
            save_pickle(prefix + "train_hist_features.pkl", train_hist_features)

        del train_hist_features

        test_hist_features = get_histogram(k, test_sift, test_predicted)
        if SAVE and not os.path.isfile(prefix + "test_hist_features.pkl"):
            print " [!] Saving histogram 2"
            save_pickle(prefix + "test_hist_features.pkl", test_hist_features)

        del test_hist_features

    if not os.path.isfile(prefix + "train_spp_hist.pkl"):
        print " [!] Making SPP histogram 1"
    parser.add_argument('--meta_path', type=str)

    args = parser.parse_args()
    return args


if __name__ == '__main__':
    args = get_args()
    speakers = get_speakers(pathlib.Path(args.meta_path) / 'SPEAKERS.TXT')

    fname2length = traverse_tree(args.root)
    records = full_records(speakers, fname2length, subset_name=None)
    print(f'Utterances: {len(records)}')

    time_by_gender = get_histogram(records,
                                   lambda_key=lambda r: r.speaker.gender,
                                   lambda_value=lambda r: r.length / 16000)
    print('Time by gender, seconds', time_by_gender)

    time_by_subset = get_histogram(records,
                                   lambda_key=lambda r: r.speaker.subset,
                                   lambda_value=lambda r: r.length / 16000)
    print('Time by subset, seconds', time_by_subset)

    speaker_freq = get_histogram(records,
                                 lambda_key=lambda r: r.speaker.id,
                                 lambda_value=lambda r: 1)
    print('Number of uniq speakers', len(speaker_freq))

    book_lengths = get_histogram(records,
                                 lambda_key=lambda r: r.book,
Beispiel #13
0
def predict_ir(ir_model, q):
  return ir_model.predict(get_histogram(q)['c'].values.tolist())
Beispiel #14
0
def F_star(question):
  """CDF evaluated at resolution"""
  h = get_histogram(question)
  return h[h['x'] <= question['resolution']]['c'].sum() / h.shape[0]
                                    max_iter   = 100,
                                    init_size  = 1000,
                                    batch_size = 1000)

            print " [!] Kmeans fitting"
            kmeans.fit(all_sift)
            if SAVE:
                save_pickle(prefix + "kmeans.pkl",kmeans)

    print " [!] Kmeans prediction"
    train_predicted = kmeans.predict(reduced_train_sift)
    test_predicted = kmeans.predict(reduced_test_sift)

    print " [!] Making histogram"
    if not os.path.isfile(prefix+"train_hist_features.pkl"):
        train_hist_features = get_histogram(k, train_sift, train_predicted)
        if SAVE and not os.path.isfile(prefix+"train_hist_features.pkl"):
            print " [!] Saving histogram 1"
            save_pickle(prefix+"train_hist_features.pkl",train_hist_features)

        del train_hist_features

        test_hist_features = get_histogram(k, test_sift, test_predicted)
        if SAVE and not os.path.isfile(prefix+"test_hist_features.pkl"):
            print " [!] Saving histogram 2"
            save_pickle(prefix+"test_hist_features.pkl",test_hist_features)

        del test_hist_features

    if not os.path.isfile(prefix+"train_spp_hist.pkl"):
        print " [!] Making SPP histogram 1"