Ejemplo n.º 1
0
    def decode(self, bit_msg, morse_format):
        if not bit_msg or '1' not in bit_msg:
            raise ValueError(f'Invalid bit sequence: "{bit_msg}"')

        len_set = {'0': set(), '1': set()}

        # Remove enclosing zeroes transmission noise
        bit_msg = bit_msg.strip('0')

        parsed = []
        prev = None
        count = 1
        for bit in bit_msg:
            if bit not in ('0', '1'):
                raise ValueError(f'Invalid bit: "{bit}"')
            if bit == prev:
                count += 1
                continue
            if prev is not None:  # Avoid first initial None element
                len_set[prev].add(count)
                parsed.append((prev, count))
            count = 1
            prev = bit
        # Process last bit sequence
        len_set[bit].add(count)
        parsed.append((bit, count))

        pulse_satisfies_jenks = len(len_set['1']) > 2
        if pulse_satisfies_jenks:
            pulse_breaks = jenkspy.jenks_breaks(len_set['1'], 2)
        else:
            pulse_breaks = [0] + sorted(len_set['1'])

        space_satisfy_jenks = len(len_set['0']) > 3
        if space_satisfy_jenks:
            space_breaks = jenkspy.jenks_breaks(len_set['0'], 3)
        else:
            space_breaks = sorted(len_set['0'])

        normalized = []
        for bit, count in parsed:
            if bit == '1':
                is_dot = count <= pulse_breaks[1]  # first break
                seq = morse_format.dot if is_dot else morse_format.dash
            else:
                if len(space_breaks) < 2:
                    is_intra_char = True
                else:
                    is_intra_char = count < space_breaks[1]  # first break
                    is_inter_word = count >= space_breaks[2]  # second break
                if is_intra_char:
                    seq = morse_format.intra_char
                elif is_inter_word:
                    seq = morse_format.inter_word
                else:
                    seq = morse_format.inter_char
            normalized.append(seq)

        return ''.join(normalized)
def goodness_of_variance_fit(array, classes):
    # get the break points
    classes = jenkspy.jenks_breaks(array, classes)

    # do the actual classification
    classified = np.array([classify(i, classes) for i in array])

    # max value of zones
    maxz = max(classified)

    # nested list of zone indices
    zone_indices = [[
        idx for idx, val in enumerate(classified) if zone + 1 == val
    ] for zone in range(maxz)]

    # sum of squared deviations from array mean
    sdam = np.sum((array - array.mean())**2)

    # sorted polygon stats
    array_sort = [
        np.array([array[index] for index in zone]) for zone in zone_indices
    ]

    # sum of squared deviations of class means
    sdcm = sum([
        np.sum((classified - classified.mean())**2)
        for classified in array_sort
    ])

    # goodness of variance fit
    gvf = (sdam - sdcm) / sdam

    return gvf
Ejemplo n.º 3
0
def optimal_jenk(image, threshold, gvf=0.0, nclasses=2):
    """
    Find GVF at incrementing class numbers until threshold
    GVF is reached

    Parameters
    ----------
    image: np.array shape = (x,x).ravel()
        unrolled input image from spatial slice of swe cube
    threshold: float
        cutoff value for how high to optimize gvf
    gvf: float
        initial gvf value (default = 0.0)
    nclasses: int
        how many classes to start with

    Returns
    -------
    [nclasses, bounds]
    nclasses: int
        optimal number of classes
    bounds: list
        Generated jenks boundaries for given image
    """
    while gvf < threshold:
        gvf = govf(image, nclasses)
        nclasses += 1
    bounds = jenkspy.jenks_breaks(image, nclasses)
    return nclasses, bounds
Ejemplo n.º 4
0
def emperical_histogram(observes,k):   
# observes is a list of observations
# k is the number of bins
# here we use Jenks natural breaks optimization to select the bins
    observes.sort()
    breaks = jenkspy.jenks_breaks(observes, nb_class=k) 
    print(observes)
    print(breaks)

    omega = []
    phi = []
    curr_sum = 0
    curr_count = 0 
    brk_idx = 1
    curr_break = breaks[brk_idx]
    N = len(observes)

    for v in observes:
        #print(v,curr_break)
        if v < curr_break:
            curr_sum += v
            curr_count += 1
        else:
            omega.append(curr_sum/curr_count)
            phi.append(curr_count/N)
            curr_sum = v
            curr_count = 1
            if brk_idx < k:
                brk_idx += 1
                curr_break = breaks[brk_idx]

    print("Precision of the phi values: " + str(abs(1-sum(phi))))

    return multinomial(omega,phi)            
Ejemplo n.º 5
0
def cluster_data(confirmed, clusters_config=None):
    if len(confirmed) == 0:
        return {"data": [], "clusters": []}

    if clusters_config is None:
        clusters_config = {"clusters": CLUSTERS, "labels": CLUSTERS_LABELS}

    df = pd.DataFrame(confirmed)
    df = df.dropna(how="any", axis=0, subset=["confirmed"])

    breaks = jenkspy.jenks_breaks(df["confirmed"],
                                  nb_class=clusters_config["clusters"])

    rounded_breaks = list(map(lambda limit: round(limit, sigfigs=3), breaks))

    df["group"] = pd.cut(
        df["confirmed"],
        bins=breaks,
        labels=clusters_config["labels"],
        include_lowest=True,
    )
    df = df.where(pd.notnull(df), None)  # convert NaN to None
    non_inclusive_lower_limits = list(
        map(lambda limit: 0 if limit == 0 else limit + 1,
            rounded_breaks))  # add 1 to all limits (except 0)
    return {
        "data": df.to_dict("records"),
        "clusters": list(zip(non_inclusive_lower_limits, rounded_breaks[1:])),
    }
Ejemplo n.º 6
0
def assess(df, column):
    import jenkspy
    import pandas as pd

    pd.set_option('mode.chained_assignment', None)
    if len(df[column]) > 6:
        breaks = jenkspy.jenks_breaks(df[column], 5)
        df['p'] = 0
        df['p'][df[column] <= breaks[1]] = 0.75
        df['p'][(df[column] > breaks[1]) & (df[column] <= breaks[2])] = 0.25
        df['p'][(df[column] > breaks[2]) & (df[column] <= breaks[3])] = 0.20
        df['p'][(df[column] > breaks[3]) & (df[column] <= breaks[4])] = 0.15
        df['p'][(df[column] > breaks[4]) & (df[column] <= breaks[5])] = 0.10

    else:
        sum_df = df[column].sum()
        df['prop'] = df[column] / sum_df
        df['p'] = 0
        df['p'][df[column] <= 0.1] = 0.75
        df['p'][(df[column] > 0.1) & (df[column] <= (0.8 / 3) + 0.1)] = 0.25
        df['p'][(df[column] > (0.8 / 3) + 0.1)
                & (df[column] <= (0.8 / 3) * 2 + 0.1)] = 0.20
        df['p'][(df[column] > (0.8 / 3) * 2 + 0.1)
                & (df[column] <= 0.9)] = 0.15
        df['p'][df[column] > 0.9] = 0.10

    return df['p']
Ejemplo n.º 7
0
def jenks_clustering(Ms, clique_start_nr = 1, n_clusters=2):
    ''' Return the clusters 

    Parameters
    ----------
    Ms : pd.Series
        1D arrays to be clustered
    cliques_start_nr : int
        Number from which to start the clique numbering
    n_clusters : int
        Number of clusters to cluster data into

    Returns
    -------
    result : pd.Series
        Series containing which cluster number each of the vertices belong to (result[vertex] = cluster_nr)
    '''

    if n_clusters > 1:
        breaks = np.unique( jenkspy.jenks_breaks(list(Ms), nb_class=n_clusters) )
    else:
        breaks = np.array([min(Ms), max(Ms)])
    
    labels_list = [int(i) + int(clique_start_nr) for i in range(len(breaks) - 1)]
    
    return pd.cut(Ms, bins=breaks, labels= labels_list, include_lowest=True)
Ejemplo n.º 8
0
def Jenks_algorithm_to_bin_in_3_classes(y_train, y_test):
    """
    Implements the Jenks algorithm to cluster the continuous target variable.
    This algorithm allows us to find natural breaks in a 1D-array.
    Three classes are selected, aiming to represent a low-value pass,
    a medium-value pass and a high-value pass.

    Parameters
    -----------
       y_train: the continuous target array of the training set
       y_test: the continuous target array of the test set
    Returns
    -----------
       y_binned: the target array of the training set (with 3 distinct classes)
    """
    breaks = jenkspy.jenks_breaks(y_train, nb_class=3)
    y_train_binned = pd.cut(y_train,
                            bins=breaks,
                            labels=[0, 1, 2],
                            include_lowest=True)

    y_test_binned = y_test.copy()
    y_test_binned.loc[y_test < breaks[1]] = 0
    y_test_binned.loc[(y_test > breaks[1]) & (y_test < breaks[2])] = 1
    y_test_binned.loc[y_test > breaks[2]] = 2
    return y_train_binned, y_test_binned
Ejemplo n.º 9
0
    def define_levels(self, nb_class, disc_func):
        zi = self.zi
        _min = np.nanmin(zi)

        if not nb_class:
            #            nb_class = int(get_opt_nb_class(len(zi)) - 2)
            nb_class = 8
        if not disc_func or "prog_geom" in disc_func:
            levels = [_min
                      ] + [np.nanmax(zi) / i
                           for i in range(1, nb_class + 1)][::-1]
        elif "equal_interval" in disc_func:
            _bin = np.nanmax(zi) / nb_class
            levels = [_min] + [_bin * i for i in range(1, nb_class + 1)]
        elif "percentiles" in disc_func:
            levels = np.percentile(
                np.concatenate((zi[zi.nonzero()], np.array([_min]))),
                np.linspace(0.0, 100.0, nb_class + 1))
        elif "jenks" in disc_func:
            levels = list(
                jenks_breaks(np.concatenate(([_min], zi[zi.nonzero()])),
                             nb_class))
            levels[0] = levels[0] - _min * 0.01
        elif "head_tail" in disc_func:
            levels = head_tail_breaks(
                np.concatenate(([_min], zi[zi.nonzero()])))
        elif "maximal_breaks" in disc_func:
            levels = maximal_breaks(np.concatenate(([_min], zi[zi.nonzero()])),
                                    nb_class)
        else:
            raise ValueError

        return levels
Ejemplo n.º 10
0
 def goodness_of_variance_fit(self, array, classes):
     """对一维数据进行聚类
     :param array: type:array. definition:一维数组
     :param classes: type:int. definition:聚类的个数
     :return: gvf:(The Goodness of Variance Fit)type:float. definiti:方差拟合优度,值越大效果越好。
              classes:聚类的阈值区间
     """
     # get the break points
     classes = classes
     classes = jenks_breaks(array, classes)
     classified = np.array([self.classify(i, classes) for i in array])
     # 获取区间最大值
     maxz = max(classified)
     zone_indices = [[
         idx for idx, val in enumerate(classified) if zone + 1 == val
     ] for zone in range(maxz)]
     # 与阵列平均值的平方偏差之和
     sdam = np.sum((array - array.mean())**2)
     array_sort = [
         np.array([array[index] for index in zone]) for zone in zone_indices
     ]
     sdcm = sum([
         np.sum((classified - classified.mean())**2)
         for classified in array_sort
     ])
     gvf = (sdam - sdcm) / sdam
     return gvf, classes
Ejemplo n.º 11
0
def ver_poligonos(request):

    poligonos_para_analizar = Poligono.objects.all()

    poligonos_clasificados = {'ide': [], 'pob': [], 'clase': []}

    for row in poligonos_para_analizar:
        poligonos_clasificados['ide'].append(row.id)
        poligonos_clasificados['pob'].append(row.poblacion)

    categorias = jenkspy.jenks_breaks(poligonos_clasificados['pob'],
                                      nb_class=3)
    print(categorias)
    print(len(poligonos_clasificados['ide']))

    for i in range(len(poligonos_clasificados['ide'])):
        if poligonos_clasificados['pob'][i] <= categorias[1]:
            poligonos_clasificados['clase'].append('clase1')
        elif poligonos_clasificados['pob'][i] <= categorias[2]:
            poligonos_clasificados['clase'].append('clase2')
        elif poligonos_clasificados['pob'][i] <= categorias[3]:
            poligonos_clasificados['clase'].append('clase3')

    # ------------------------------------------------------------------------

    poligonos_py = Poligono.objects.all()
    poligonos_json = serialize('geojson', poligonos_py)
    poligonos = json.loads(poligonos_json)

    return render(request, 'mapa/ver_poligonos.html', {
        'poligonos': poligonos,
        'poligonos_clasificados': poligonos_clasificados
    })
Ejemplo n.º 12
0
def get_split_point(values, classes, mode, result_decimal):
    rets = []

    if mode == 'equal_count':  # 等宽
        df = pd.DataFrame(np.array(values), columns=['data'])
        for i in range(classes):
            p = (i + 1) / classes
            rets.append(df.quantile(p, 0, 'linear')['data'])

    elif mode == 'equal_interval':  # 等间隔
        value_interval = (max(values) - min(values)) / classes
        for i in range(classes):
            rets.append(min(values) + value_interval * (i + 1))

    elif mode == 'nature_breaks':  # 自然裂点
        rets = jenkspy.jenks_breaks(values, classes)
        rets.pop(0)

    elif mode == 'standard_deviation':  # 标准差
        avg_value = sum(values) / len(values)
        p = pd.Series(values).std()
        delta = (math.ceil(classes / 2) - 1) * 0.5
        for i in range(classes - 1):
            rets.append(avg_value + p * (i * 0.5 - delta))
        rets.append(max(values))

    return [round(x, int(result_decimal)) for x in rets]
Ejemplo n.º 13
0
def mask_ocean_winter(swe_matrix, day=0, nclasses=3):
    """
    Use a winter day to mask ocean pixels out of coastal imagery in arctic.

    There is a clear difference between winter land pixels and ocean pixels
    that classification can sort out for us using a simple jenks classification.
    Data should have already moved through "vector_clean" and "apply_filter"

    Parameters
    ----------
    swe_matrix: np.array
        swe time cube
    day: int
        julian date of time series to use for classification (should be winter)
    nclasses: int
        number of classes to use in jenks classification, defaults to 3
    """
    winter_day = swe_matrix[day, :, :]
    classes_jenk = jenkspy.jenks_breaks(winter_day.ravel(), nclasses)
    mask = classes_jenk == 1
    winter_day[mask] = -8888
    matrix_mask = np.zeros(swe_matrix.shape, dtype=bool)
    matrix_mask[:, :, :] = winter_day[np.newaxis, :, :] == -8888
    swe_matrix[matrix_mask] = -8888
    return swe_matrix
Ejemplo n.º 14
0
def get_breaks(atoms, n_layers):	
	atom_zs = np.array([atom.z for atom in atoms])
	try:
		breaks = jenkspy.jenks_breaks(atom_zs, nb_class = n_layers)
	except ValueError:
		breaks = atom_zs
	return get_unique_list(sorted(breaks))
Ejemplo n.º 15
0
def decision(lista):
    import jenkspy as jk

    lista_sem_na = lista[~np.isnan(lista)]
    if len(lista_sem_na) < 4:
        quebra = max(lista_sem_na)
    else:
        quebra = jk.jenks_breaks(lista_sem_na, nb_class=3)[-2]
    return quebra
Ejemplo n.º 16
0
def jenks_break(act_score_map, num):

    values = []
    act_score_map = sorted(act_score_map.items(),
                           key=lambda x: x[1],
                           reverse=True)
    for _, val in act_score_map:
        values.append(val)
    return jenkspy.jenks_breaks(values, nb_class=num)[1:]
Ejemplo n.º 17
0
def natural_banding():
    fn = sys.argv[1]
    num_of_breaks = int(sys.argv[2])

    input_list = []
    with open(fn, 'r') as f:
        for line in f:
            input_list.append(float(line))
    print(jenkspy.jenks_breaks(input_list, num_of_breaks))
Ejemplo n.º 18
0
def jenks_discretize(values, number_of_bins=None):
    import jenkspy
    import math
    if number_of_bins is None:
        number_of_bins = min(len(set(values)), round(math.sqrt(len(values))))
    #print(number_of_bins)
    breaks = jenkspy.jenks_breaks(values, int(number_of_bins))
    values_in_bins = [classify(value, breaks) for value in values]
    return values_in_bins
Ejemplo n.º 19
0
 def _discretize_continuous(ar, func=None, num_bins=None):
     '''Assign continuous vector to bins;
     available functions:
     - quantile, uniform, kmeans (https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.KBinsDiscretizer.html)
     '''
     if func is None:  # no discretization is needed
         return (ar)
     if num_bins is None:
         # by default, num_bins = cube_root(n) or # unique(n)
         num_bins = min(math.floor(len(ar)**(1. / 3.)), len(set(ar)))
     elif num_bins == 0:
         raise ValueError('# bins should be > 0')
     else:
         num_bins = min(num_bins, len(set(ar)))
     if func in ['quantile', 'uniform', 'kmeans']:
         warnings.simplefilter('ignore')  # ignore nan warning
         # remove NaNs temporarily since KBinsDiscretizer currently doesn't handle NaNs
         temp_ar = np.array([x for x in ar if x == x])
         nans = np.array([not x == x for x in ar])
         discretizer = KBinsDiscretizer(n_bins=num_bins,
                                        encode='ordinal',
                                        strategy=func)
         temp_discretized_result = discretizer.fit_transform(
             temp_ar.reshape(len(temp_ar), 1)).reshape(len(temp_ar))
         # assign NaNs to a separate bin
         discretized_result = np.zeros(len(ar))
         discretized_result[~nans] = temp_discretized_result
         discretized_result[nans] = np.nan
         warnings.resetwarnings()
     elif func == 'jenks':
         warnings.simplefilter('ignore')  # ignore nan warning
         # remove the first lower bound
         breaks = jenkspy.jenks_breaks(ar, nb_class=num_bins)
         breaks[-1] += 1
         discretized_result = []
         for val in ar:
             if np.isnan(val):
                 discretized_result.append(np.nan)
                 continue
             for bound_i in range(1, len(breaks)):
                 if val >= breaks[bound_i - 1] and val < breaks[bound_i]:
                     discretized_result.append(bound_i - 1)
                     break
         discretized_result = np.array(discretized_result, dtype=float)
         warnings.resetwarnings()
     else:
         raise ValueError('Discretization function not available...')
     # tidy up data in case some bins are empty
     counter = 0
     for i in range(int(np.nanmax(discretized_result) + 1)):
         if i not in discretized_result: continue
         discretized_result[discretized_result == i] = counter
         counter += 1
     # assign missing values to a separate bin
     discretized_result[np.isnan(
         discretized_result)] = np.nanmax(discretized_result) + 1
     return (discretized_result)
Ejemplo n.º 20
0
 def test_errors(self):
     # Using wrong 'nb_class' argument:
     with self.assertRaises(ValueError):
         jenks_breaks([1, 2, 3, 4], 32)
     with self.assertRaises(ValueError):
         jenks_breaks(self.data2, -5)
     # Using a wrong 'values' argument:
     with self.assertRaises(TypeError):
         jenks_breaks("a sequence of characters", 4)
     with self.assertRaises(TypeError):
         jenks_breaks(['a', 'b', 'c', 'd'], 3)
Ejemplo n.º 21
0
def recovery_rate_for_all(df, dgf):
    #------------------------Find ruptures in time------------------------------------------------
    # temp_arr = {}
    # for cell_id in dgf.id.unique():
    #     print("******************************")
    #     print("ID = {}".format(cell_id))
    #     dchange = dgf[(dgf.id == cell_id)][["date_c","rad_corr"]].drop_duplicates()
    #     dates_rpt = changepoint_detection_singlecell(dchange, cell_id, penalty=15, create_plot=False)
    #     print("dates = {}".format(dates_rpt))
    #     temp_arr[cell_id] = dates_rpt
    # code.interact(local=locals())

    #-----------------------Create baseline for recovery tracking---------------------------------------
    di = pd.read_hdf("yemen_groups.h5", key="zeal")
    dbase = df[(df.date_c < "2015-03-26")].groupby(
        ["id", "Latitude", "Longitude"]).mean()[["rad_corr"]].reset_index()
    dbase = dbase.rename(columns={"rad_corr": "rad_base"})
    dbase = pd.merge(dbase, di, left_on=["id"], right_on=["id"], how="left")
    # base_mean = dbase.rad_base.values[0]

    # #---------------------- PICKLED ALREADY----------------
    # dm = df[(df.date_c>="2016-06-04")]
    # dm = dm.groupby(["id",pd.Grouper(freq="1M",key="date_c")]).mean()[["rad_corr"]].reset_index()
    # dm = dm.rename(columns={"rad_corr":"rad_month"})

    # temp_dict = {}
    # for cell_id in df.id.unique():
    #     print("******************************")
    #     print("ID = {}".format(cell_id))
    #     dm_rec = dm[(dm.id == cell_id)].reset_index().drop(columns=["index"])
    #     rate_of_recovery = run_OLS(dm_rec, column=["rad_month"])
    #     temp_dict[cell_id] = rate_of_recovery
    #     print("ROR = {}".format(rate_of_recovery))

    dr = pd.read_hdf("id_ror.h5", key="zeal")
    dr = pd.merge(dr[["id", "ror"]].drop_duplicates(),
                  dbase,
                  left_on=["id"],
                  right_on=["id"],
                  how="left")
    dr["ror_norm"] = (dr["ror"] - min(dr["ror"])) / (max(dr["ror"] -
                                                         min(dr["ror"])))
    breaks = jenkspy.jenks_breaks(dr.ror.values, nb_class=3)
    dr["ror_group"] = dr.ror.apply(lambda x: "Low" if breaks[0] <= x < breaks[
        1] else "Medium" if breaks[1] <= x < breaks[2] else "High")
    dr_gdf = create_geodataframe(dr, buffered=True, radius=462, cap_style=3)

    #---------create plots---------------------------------------------
    # fig, ax1 = plt.subplots(figsize=(4,5))
    # plot_geospatial_heatmap_with_event_locs(geo_df=dr_gdf, col_name="ror_norm", events_data=None, title=None, cmap=cm.seismic, cmap_type="seismic", marker_color=None, events_data_type="locations_points", needs_colormapping=False, add_title=False, event_locs_included=False, include_colorbar=True, with_streetmap=True, ax=ax1)
    # plt.rc('font', size=14)
    # plt.tight_layout()
    # plt.show()

    code.interact(local=locals())
    return None
Ejemplo n.º 22
0
def goodness_of_variance_fit(array, classes):
    classes = jenkspy.jenks_breaks(array, nb_class=classes)
    classifiedd = np.array([classify(i, classes) for i in array])
    maxz = np.amax(classifiedd)
    zone_indices = [[idx for idx, val in enumerate(classifiedd) if zone + 1 == val] for zone in range(maxz)]
    sdam = np.sum((array - array.mean()) ** 2)
    array_sort = [np.array([array[index] for index in zone]) for zone in zone_indices]
    sdcm = np.sum([np.sum((cla - cla.mean()) ** 2) for cla in array_sort])
    gvf = (sdam - sdcm) / sdam
    return gvf
Ejemplo n.º 23
0
    def test_json_ref(self):
        # Test it against break values computed using another library
        # implementing jenks natural breaks:
        res = jenks_breaks(self.data1, 5)
        self.assertEqual(len(self.res1), len(res))
        for break_values in zip(res, self.res1):
            self.assertAlmostEqual(break_values[0], break_values[1], places=6)

        # Test the result is the same using a python array as input:
        res_py_array = jenks_breaks(array('d', self.data1), 5)
        self.assertEqual(len(self.res1), len(res_py_array))
        for break_values in zip(res_py_array, self.res1):
            self.assertAlmostEqual(break_values[0], break_values[1], places=6)

        # Test the result is the same using a numpy array as input:
        if np:
            data_np = np.array(self.data1)
            res_np = jenks_breaks(data_np, 5)
            self.assertEqual(res_np, res)
Ejemplo n.º 24
0
def calculateJenks(auth, args, es):
    #Build the view string from arguments
    view = "dsra_{eq_scenario}_{retrofit_prefix}_{dbview}".format(**{'eq_scenario':args.eqScenario, 'retrofit_prefix':args.retrofitPrefix, 'dbview':args.dbview})
    response = Search(using=es, index=view)
    #Create a dataframe containing the full series of values from the specified view and field
    df = pd.DataFrame([getattr(hit.properties, args.field) for hit in response.scan()], columns=[args.field])
    #Use Jenskpy to create natural breaks
    breaks = jenkspy.jenks_breaks(df[args.field], nb_class=args.bins)
    
    return breaks
Ejemplo n.º 25
0
def plot_jenks(image, gvt, interactive=False):
    """
    Given swe image, classify using jenks classication.

    Uses goodness of variance fit to optimize number of classes
    given a threshold to maximize to.

    Paramters
    ---------
    image: np.array
        2-d image of swe values to be classified
    gvt: float
        goodness of variance threshold value. Optimize
        gvf until this value is reached.

        Values between 0-1, generally around 0.8
    """
    list_colors = [
        "blue",
        "green",
        "orange",
        "magenta",
        "cyan",
        "gray",
        "red",
        "yellow",
    ]
    classes_jenk = jenkspy.jenks_breaks(
        image.ravel(), optimal_jenk(image.ravel(), gvt)[0]
    )
    classes = np.digitize(image, classes_jenk)
    nclasses = len(classes_jenk)
    fig, ax = plt.subplots(1, 1, figsize=(14, 5))
    xlabel = str(nclasses) + " Classes, Jenks Classification"
    ax.set_title("Jenk Classification with" + str(nclasses) + " Classes")
    ax.set_xlabel(xlabel)
    bounds = range(0, nclasses + 1)
    cmap = c.ListedColormap(list_colors[0:nclasses])
    kmp = ax.imshow(
        classes,
        interpolation="nearest",
        aspect="auto",
        cmap=cmap,
        origin="lower",
    )
    plt.colorbar(kmp, cmap=cmap, ticks=bounds, ax=ax, orientation="vertical")
    if interactive:
        plt.ion()
        plt.show()
        plt.pause(0.001)
        plt.close()
    else:
        plt.show()

    return fig
Ejemplo n.º 26
0
    def create_weights(self):
        avg_prices = self.df.groupby(["brand"]).mean()["price"]
        prices_df = pd.DataFrame(data={"avg_prices": avg_prices})
        prices_df = prices_df.sort_values("avg_prices")

        breaks = jenkspy.jenks_breaks(avg_prices, nb_class=28)

        mapping_dict = {}
        for brand, i in zip(prices_df.index, prices_df["avg_prices"]):
            mapping_dict[brand] = bisect.bisect_left(breaks, i) + 1

        self.df["brand_weight"] = self.df["brand"].map(mapping_dict)
        mapping_dict

        brands_df = {}

        for brand in self.df["brand"]:
            brands_df[brand], _ = [
                x for _, x in self.df.groupby(self.df['brand'] != brand)
            ]

        mapping_dict = {}

        for brand, dataframe in brands_df.items():
            sub_brand_averages = pd.DataFrame(
                data={
                    "sub_brand_averages":
                    dataframe.groupby(["sub-brand"]).mean()["price"]
                })
            sub_brand_breaks = jenkspy.jenks_breaks(
                sub_brand_averages["sub_brand_averages"],
                nb_class=int(len(sub_brand_averages) - 1))
            for i, weight in zip(sub_brand_averages.index,
                                 sub_brand_averages["sub_brand_averages"]):
                mapping_dict[i] = bisect.bisect_left(sub_brand_breaks,
                                                     weight) + 1
            self.df["sub_brand_weight"] = self.df["sub-brand"].map(
                mapping_dict)

        self.df = self.df.drop(1095).dropna()
        self.df = self.df.drop(1728).dropna()
        self.df["no. of cylinders"] = self.df["no. of cylinders"].astype(float)
Ejemplo n.º 27
0
 def _get_clfs_weights(self):
     gu = self.global_utilities
     if self.jenks == True:
         self.natural_breaks = jenkspy.jenks_breaks(gu, nb_class=5)
         gu = [
             i if i >= self.natural_breaks[-self.jenks_limit] else 0
             for i in gu
         ]
     gu_sum = sum(gu)
     for value in gu:
         self.weights.append(value / gu_sum)
Ejemplo n.º 28
0
 def breaks(self):
     # todo: handle custom bucket counts
     if not self._breaks:
         values = [
             feat['properties']['value']
             for feat in self.as_geojson()['features']
             if feat['properties']['value'] is not None
         ]
         self._breaks = jenks_breaks(values, nb_class=min(len(values),
                                                          6))[0:]
     return self._breaks
Ejemplo n.º 29
0
    def __determine_knots(self, X: np.array):
        """ Determine the locations of the knots for every feature using Jenks Natural Breaks.

        Arguments:
            X (np.array): The train input used to determine the location of the knots.
        """
        self.knots = []
        for column, num_curves in zip(X.T, self.num_curves):
            breaks = jenks_breaks(column, nb_class=num_curves)
            self.knots.append(breaks)
        self.knots = np.array(self.knots).T
Ejemplo n.º 30
0
 def cluster_by_attention_weight(self):
     attention_weight = np.load("average_weight.npy")
     attention_weight_array = attention_weight.reshape(
         [-1, self._num_features])
     all_feature_breaks = []
     for nums in range(self._num_features):
         one_feature_breaks = jenkspy.jenks_breaks(
             attention_weight_array[:, nums], nb_class=5)
         print(one_feature_breaks)
         all_feature_breaks.append(one_feature_breaks)
     np.save("all_features_breaks_ave.npy", all_feature_breaks)