def Hartigans(k, data):
    start = tm.clock()
    Ct = np.hstack(
        (data,
         np.reshape(np.random.choice(range(0, k), data.shape[0], replace=True),
                    (data.shape[0], 1))))
    meu = (npi.group_by(Ct[:, 2]).mean(Ct))[1][:, 0:2]
    Converged = False
    while Converged is False:
        Converged = True
        for j in range(0, Ct.shape[0]):
            Cj = Ct[j, 2]
            dmin = []
            for i in range(0, k):
                Ct[j, 2] = i
                G = (npi.group_by(Ct[:, 2])).split(Ct)
                dist = 0
                #print(G)
                for p in range(0, k):
                    t = (G[p][:, 0:2])
                    mi = np.reshape(np.mean(t, axis=0), (1, 2))
                    t = np.sum((t - mi)**2, axis=1)
                    dist = dist + np.sum(t, axis=0)
                dmin.append(dist)
            Cw = np.argmin(dmin)
            if Cw != Cj:
                Converged = False
                Ct[j, 2] = Cw
                meu = (npi.group_by(Ct[:, 2]).mean(Ct))[1][:, 0:2]
            else:
                Ct[j, 2] = Cj
    end = tm.clock()
    return Ct, np.hstack((meu, np.reshape(np.array(list(range(k))),
                                          (k, 1)))), end - start
def cascade_predict(X_out, detect, role, detect_label):

    # # refit both detect and role
    # detectparams = detect.best_estimator_.get_params()
    # roleparams=  role.best_estimator_.get_params()
    # re_detect = detect.best_estimator_.set_params(**detectparams)
    # re_role = role.best_estimator_.set_params(**roleparams)
    #
    # detect = re_detect.fit(X_in, y_in)
    # role = re_role.fit(X_in, y_in)

    # predict holdout for detection y_pred_detect
    y_pred_dtct = detect.predict(X_out).astype(int)

    # split off positive and negs from y and X in y_pred_detect_minus1, y_pred_detect_1
    unique_dtct, idx_groups_dtct = npi.group_by(y_pred_dtct,
                                                np.arange(len(y_pred_dtct)))
    pred_dtct_idc = dict(zip(unique_dtct, idx_groups_dtct))
    dtct_idc = pred_dtct_idc[detect_label]
    X_dtct = X_out[dtct_idc]

    # predict X_1: return y_pred_1_pred
    y_pred_role = role.predict(X_dtct).astype(int)
    unique_role, idx_groups_role = npi.group_by(y_pred_role,
                                                np.arange(len(y_pred_role)))
    pred_role_idc = dict(zip(unique_role, idx_groups_role))

    # reconstruct full y_pred
    y_pred = reconstruct_list([pred_dtct_idc, pred_role_idc],
                              filter_label=[detect_label])

    return y_pred
Beispiel #3
0
def grouper(x, y):
    '''
	Assumed sorted by speed
	'''
    result = npi.group_by(x).mean(y)
    sdev = npi.group_by(x).std(y)
    return result, sdev
Beispiel #4
0
    def _group_sample_statistics(self):
        r"""
        Computes group summary statistics (mean, number of observations, and variance), for use when
        performing analysis of variance.

        Returns
        -------
        group_stats : dict
            Dictionary containing each group's mean, number of observations and variance.

        """
        group_means = npi.group_by(self.design_matrix[:, 0],
                                   self.design_matrix[:, 1], np.mean)
        group_obs = npi.group_by(self.design_matrix[:, 0],
                                 self.design_matrix[:, 1], len)
        group_variance = npi.group_by(self.design_matrix[:, 0],
                                      self.design_matrix[:, 1], var)

        groups = len(np.unique(self.design_matrix[:, 0]))

        group_stats = {
            'Group Means': group_means,
            'Group Observations': group_obs,
            'Group Variance': group_variance,
            'Number of Groups': groups
        }

        return group_stats
Beispiel #5
0
    def _mse(self):
        r"""
        Calculates the Mean Square Error for use in computing Tukey's HSD.

        Returns
        -------
        mse : float
            The Mean Square Error of the design.

        Notes
        -----
        The MSE is found by diving the standard error of the design by the total number of observations minus
        the number of groups.

        .. math::

            MSE = \frac{SE}{(N - k)}

        """
        group_variance = npi.group_by(self.design_matrix[:, 0],
                                      self.design_matrix[:, 1], var)
        group_n = npi.group_by(self.design_matrix[:, 0],
                               self.design_matrix[:, 1], len)

        sse = 0

        for i, j in zip(group_n, group_variance):
            sse += (i[1] - 1) * j[1]

        mse = sse / (self.n - self.k)

        return mse
Beispiel #6
0
 def double_crossover(parent_1, parent_2):   
     """This funcvtion create 2 childs by same sizes
        but reverses (len(p1) = len(ch2) and ...)
     """
     row1, col1 = parent_1.shape
     row2, col2 = parent_2.shape
     row = np.min([row1,row2])
     rowt1 = (random.randrange(1, row - 1 if row>2 else row))
     rowt2 = (random.randrange(1, row - 1 if row>2 else row))
     #print(rowt1,rowt2)
     
     child_1 = np.concatenate((parent_1[:rowt1, :], 
                               parent_2[rowt1:rowt2, :],
                               parent_1[rowt2:, :]
                               ), axis = 0)    
     child_2 = np.concatenate((parent_2[:rowt1, :], 
                               parent_1[rowt1:rowt2, :],
                               parent_2[rowt2:, :]
                              ), axis = 0)
     """after create childs by composit of parents
        probably create duplicated columns then shoud be remove
        by "group by" of "numpy_indexed" 
     """
     
     _, child_1 = npi.group_by(child_1[:,0]).min(child_1)
     _, child_2 = npi.group_by(child_2[:,0]).max(child_2)
     
     return child_1, child_2
Beispiel #7
0
def make_Xy_cat(X, y, task):

    X_cat, y_cat = npi.group_by(X).mean(y)
    if task == 'quantile':
        y_cat = npi.group_by(X).split(y)
        y_cat = np.array([np.quantile(yc, q=0.75) for yc in y_cat])
    return X_cat, y_cat
    def mode(self, values, weights=None):
        """compute the mode within each group.

        Parameters
        ----------
        values : array_like, [keys, ...]
            values to compute the mode of per group
        weights : array_like, [keys], float, optional
            optional weight associated with each entry in values

        Returns
        -------
        unique: ndarray, [groups]
            unique keys
        reduced : ndarray, [groups, ...]
            value array, reduced over groups
        """
        if weights is None:
            unique, weights = npi.count(
                (self.index.sorted_group_rank_per_key, values))
        else:
            unique, weights = npi.group_by(
                (self.index.sorted_group_rank_per_key, values)).sum(weights)

        x, bin = npi.group_by(unique[0]).argmax(weights)
        return x, unique[1][bin]
Beispiel #9
0
def predict(df):
    for col in df:
        df[col] = norm(df[col])
    y = df['time']
    x = df.drop('time', 1)
    config_dictionary = json.load(
        open('/home/thanasis/PycharmProjects/dionePredict/analysis.json'))
    plotter = Plotter()
    test_sizes = config_dictionary['train_size']
    plotter.setup_plot(**config_dictionary)

    scores_per_technique = {}

    for model in models:

        if model not in scores_per_technique:
            scores_per_technique[model] = list()
        for size in range(len(test_sizes)):

            x_train, x_test, y_train, y_test = train_test_split(
                x, y, test_size=test_sizes[size])
            regressor = models[model]()
            regressor.fit(x_train, y_train)
            y_pred = regressor.predict(x_test)

            score = regressor.score(x_test, y_test)

            print(models[model])
            print('MSE: ' + str(mean_squared_error(y_test, y_pred)))
            print('R2_Score: ' + str(score))
            print('-----------------------------------------------------')

            scores_per_technique[model].append([test_sizes[size] * 100, score])

    bar_pos = -1.5 * config_dictionary['width']
    pos = 0
    for technique in scores_per_technique:
        print('Plotting results for ' + technique)
        results = scores_per_technique[technique]
        print(results)
        results_numpy = np.array(results)
        config_dictionary['label'] = technique
        config_dictionary['bar_position'] = bar_pos
        config_dictionary['color'] = config_dictionary['colors'][pos]
        x_unique, y_unique = npi.group_by(results_numpy[..., 0]).mean(
            results_numpy[..., 1])
        x_unique, y_std = npi.group_by(results_numpy[...,
                                                     0]).std(results_numpy[...,
                                                                           1])
        plotter.plot_data_using_error_bars(x_unique, y_unique, y_std,
                                           config_dictionary)
        # plotting_tool.plot_data_using_bars(x_unique, y_unique, config_dictionary)
        bar_pos += config_dictionary['width']
        pos += 1

    plotter.store_and_show(**config_dictionary)
Beispiel #10
0
    def _mse(self):
        group_variance = npi.group_by(self.ranked_matrix[:, 0],
                                      self.ranked_matrix[:, 2], var)
        group_n = npi.group_by(self.ranked_matrix[:, 0],
                               self.ranked_matrix[:, 2], len)

        sse = 0

        for i, j in zip(group_n, group_variance):
            sse += (i[1] - 1) * j[1]

        return sse / (self.n - self.k)
Beispiel #11
0
    def _group_comparison(self):
        r"""
        Constructs a pandas DataFrame containing the test results and group comparisons as found by Tukey's HSD test.

        Returns
        -------
        groups : array-like
            pandas DataFrame of group comparison results.

        """
        group_means = npi.group_by(self.design_matrix[:, 0],
                                   self.design_matrix[:, 1], np.mean)

        group_means = [i for _, i in group_means]

        group_mean_differences = np.array(list(combinations(group_means, 2)))[:, 0] - \
                                 np.array(list(combinations(group_means, 2)))[:, 1]

        group_sd = npi.group_by(self.design_matrix[:, 0],
                                self.design_matrix[:, 1], std_dev)
        group_sd = [i for _, i in group_sd]

        group_names = np.unique(self.design_matrix[:, 0])

        groups = pd.DataFrame(np.array(list(combinations(group_names, 2))))

        groups['groups'] = groups[0] + ' - ' + groups[1]
        groups['group means'] = group_means
        groups['mean difference'] = group_mean_differences

        groups['std_dev'] = group_sd

        groups['significant difference'] = np.where(
            np.abs(groups['mean difference']) >= self.hsd, True, False)

        groups['upper interval'] = groups[
            'mean difference'] + self.tukey_q_value * np.sqrt(
                self.mse / 2. * (2. / (self.n / self.k)))

        groups['lower interval'] = groups[
            'mean difference'] - self.tukey_q_value * np.sqrt(
                self.mse / 2. * (2. / (self.n / self.k)))

        q_values = groups['mean difference'] / group_sd

        groups['p_adjusted'] = psturng(np.absolute(q_values), self.n / self.k,
                                       self.dof)

        del groups[0]
        del groups[1]

        return groups
Beispiel #12
0
    def _getPitParams(self, hl, nbpits):
        """
        Define depression global parameters:

        - volume of each depression
        - maximum filled depth

        :arg hl: numpy array of unfilled surface elevation
        :arg nbpits: number of depression in the global mesh
        """

        # Get pit parameters (volume and maximum filled elevation)
        ids = self.inIDs == 1
        grp = npi.group_by(self.pitIDs[ids])
        uids = grp.unique
        _, vol = grp.sum((self.lFill[ids] - hl[ids]) * self.larea[ids])
        _, hh = grp.max(self.lFill[ids])
        _, dh = grp.max(self.lFill[ids] - hl[ids])
        totv = np.zeros(nbpits, dtype=np.float64)
        hmax = -np.ones(nbpits, dtype=np.float64) * 1.0e8
        diffh = np.zeros(nbpits, dtype=np.float64)
        ids = uids > -1
        totv[uids[ids]] = vol[ids]
        hmax[uids[ids]] = hh[ids]
        diffh[uids[ids]] = dh[ids]
        MPI.COMM_WORLD.Allreduce(MPI.IN_PLACE, totv, op=MPI.SUM)
        MPI.COMM_WORLD.Allreduce(MPI.IN_PLACE, hmax, op=MPI.MAX)
        MPI.COMM_WORLD.Allreduce(MPI.IN_PLACE, diffh, op=MPI.MAX)

        self.pitParams = np.empty((nbpits, 3), dtype=np.float64)
        self.pitParams[:, 0] = totv
        self.pitParams[:, 1] = hmax
        self.pitParams[:, 2] = diffh

        return
Beispiel #13
0
def ATE_emp_IPW(Xg, yg, ys, dismiss):
    idx_cat = npi.group_by(Xg).split(np.arange(len(yg)))
    ATE = 0
    n_emp = 0
    cpt, cpt2 = 0, 0
    for ic in idx_cat:
        m1 = yg[ic] == 1
        m0 = yg[ic] == 0
        if dismiss:
            # Dismiss categories with only men/women
            if m1.any() and m0.any():
                ate_cat = (ys[ic][m1].mean() - ys[ic][m0].mean()) * len(ic)
                ATE += ate_cat
                n_emp += len(ic)
                cpt += 1
            else:
                cpt2 += len(ic)
        else:
            # Keep categories with only men/women
            if not m1.any():
                y1 = 0
            else:
                y1 = ys[ic][m1].mean()
            if not m0.any():
                y0 = 0
            else:
                y0 = ys[ic][m0].mean()
            ate_cat = (y1 - y0) * len(ic)
            ATE += ate_cat
            n_emp += len(ic)
    ATE = ATE / n_emp
    print(cpt2)
    return ATE
Beispiel #14
0
    def join_windows(list_grads):
        """Join windows by averaging overlapping regions of .czar.grad files.
        https://stackoverflow.com/questions/41821539/calculate-average-of-y-values-with-different-x-values

        Parameters
        ----------
        list_grads : list
            list of Grad objects to be combined

        Returns
        -------
        new_grad : Grad
            new Grad object with xdata and ydata of combined grads
        """

        # combine all xdata and all ydata
        x, grad, allfiles = Profile._decompose_list(list_grads)

        # average the values having same x gridpoint
        x_unique, grad_mean = npi.group_by(x).mean(grad)

        # create new grad instance for joined data
        new_grad = Grad(allfiles, x_unique.flatten(), grad_mean.flatten())

        # reorder data for ascending x, then return object
        new_grad._sort_by_x()
        return new_grad
Beispiel #15
0
    def symmetrize(self):

        # average the values having same abs(x) gridpoint
        rhs_x, rhs_y = npi.group_by(np.abs(self.xdata)).mean(self.ydata)

        # regenerate -x data from averaged values (stored in +x side)
        full_x = np.concatenate((np.flip(-rhs_x), rhs_x))
        full_y = np.concatenate((np.flip( rhs_y), rhs_y))

        # remove the -0.0 entry if it exists
        first_neg_idx = len(rhs_x)-1
        if (rhs_x[0] == 0.0) and (full_y[first_neg_idx] == full_y[len(rhs_x)]):
            full_x = np.delete(full_x, first_neg_idx)
            full_y = np.delete(full_y, first_neg_idx)

        # compute difference before and after symmetrization
        if not np.array_equal(self.xdata, full_x):
            print("   error in subtracting pmfs before/after symmetrization" +
                "\n   the x-range differs here:\n   " +
                np.setdiff1d(self.xdata, full_x))
        else:
            subtracted = np.abs(self.ydata - full_y)
            self.errbar = subtracted

        # set data in object
        self.xdata = full_x
        self.ydata = full_y
Beispiel #16
0
    def __init__(self, y1, y2=None, group=None, continuity=True):

        if group is None:
            self.y1 = y1
            self.y2 = y2
        else:
            if len(np.unique(group)) > 2:
                raise ValueError('there cannot be more than two groups')

            obs_matrix = npi.group_by(group, y1)
            self.y1 = obs_matrix[1][0]
            self.y2 = obs_matrix[1][1]

        self.n1 = len(self.y1)
        self.n2 = len(self.y2)
        self.n = self.n1 + self.n2

        self.continuity = continuity
        self.ranks = self._rank()
        self.u_statistic = self._u()
        self.meanrank = self._mu()
        self.sigma = self._sigma_val()
        self.z_value = self._z()
        self.p_value = self._p_val()
        self.effect_size = self._eff_size()
        self.test_summary = {
            'continuity': self.continuity,
            'U': self.u_statistic,
            'mu meanrank': self.meanrank,
            'sigma': self.sigma,
            'z-value': self.z_value,
            'effect size': self.effect_size,
            'p-value': self.p_value,
            'test description': 'Mann-Whitney U test'
        }
Beispiel #17
0
 def remap_edges(self, field):
     """given a quantity computed on each triangle-edge, sum the contributions from each adjecent triangle"""
     edges = self.edges().reshape(-1, 3, 2)
     sorted_edges = np.sort(edges, axis=-1)
     _, field = npi.group_by(sorted_edges.reshape(-1,
                                                  2)).sum(field.flatten())
     return field
Beispiel #18
0
    def update_intermediaries(self):
        # Count types
        self.n_i = np.zeros(self.n_cell_types)
        cell_types, counts = np.unique(self.Z, return_counts=True)
        self.n_i[cell_types] = counts

        # Reverse map each type l: list of cells having type l
        values, indices = npi.group_by(self.Z, np.arange(self.n_cells))
        self.Vi = [np.array([]) for _ in range(self.n_cell_types)]
        for i, v in enumerate(values):
            self.Vi[v] = indices[i]

        # For each cell v, number of neighbors of type k
        self.mvk = np.zeros((self.n_cells, self.n_cell_types))
        for i in range(self.n_cells):
            cell_types, counts = np.unique(self.Z[self.graph[i]],
                                           return_counts=True)
            self.mvk[i][cell_types] = counts

        # Count number of edge k->l (in l,k)
        self.ctype_ctype_counts = np.zeros(
            (self.n_cell_types, self.n_cell_types))
        for l in range(self.n_cell_types):
            if len(self.Vi[l]):
                self.ctype_ctype_counts[l] = self.mvk[self.Vi[l]].sum(axis=0)
Beispiel #19
0
    def _get_bandgroups_with_unique_rpc_coeffs(self) -> List[List]:
        # combine RPC coefficients of all bands in a single numpy array
        band_inds = list(range(len(self.rpc_coeffs_per_band)))
        coeffs_first_band = list(self.rpc_coeffs_per_band.values())[0]
        keys_float = [
            k for k in coeffs_first_band
            if isinstance(coeffs_first_band[k], float)
        ]
        keys_npa = [
            k for k in coeffs_first_band
            if isinstance(coeffs_first_band[k], np.ndarray)
        ]

        coeffs_allbands = None
        for i, coeffdict in enumerate(self.rpc_coeffs_per_band.values()):
            coeffs_curband = np.hstack([[coeffdict[k] for k in keys_float],
                                        *(coeffdict[k] for k in keys_npa)])

            if coeffs_allbands is None:
                coeffs_allbands = np.zeros(
                    (len(band_inds), 1 + len(coeffs_curband)))
                coeffs_allbands[:, 0] = band_inds

            coeffs_allbands[i, 1:] = coeffs_curband

        # get groups of band indices where bands have the same RPC coefficients
        groups = npi.group_by(coeffs_allbands[:, 1:]).split(coeffs_allbands[:,
                                                                            0])
        groups_bandinds = [group.astype(int).tolist() for group in groups]

        return groups_bandinds
Beispiel #20
0
def recalculate_data(data, clusters):
    calc_average_change = partial(np.apply_along_axis, func1d=gmean, axis=0)
    clustered = np.empty((clusters.max(), data.shape[1]))
    groups = npi.group_by(clusters).split(data)
    for i, clust in enumerate(groups):
        clustered[i] = calc_average_change(arr=clust)
    return clustered
Beispiel #21
0
def main(infile, outfile, column):

    # load data
    xdata, ydata = np.loadtxt(args.infile, usecols=(0, column), unpack=True)

    # round the xdata to four decimal places
    xdata = np.round_(xdata, 4)

    # average the values having same abs(x) gridpoint
    rhs_x, rhs_y = npi.group_by(np.abs(xdata)).mean(ydata)

    # regenerate -x data from averaged values (stored in +x side)
    full_x = np.concatenate((np.flip(-rhs_x), rhs_x))
    full_y = np.concatenate((np.flip(rhs_y), rhs_y))

    # remove the -0.0 entry if it exists
    first_neg_idx = len(rhs_x) - 1
    if (rhs_x[0] == 0.0) and (full_y[first_neg_idx] == full_y[len(rhs_x)]):
        full_x = np.delete(full_x, first_neg_idx)
        full_y = np.delete(full_y, first_neg_idx)

    # save to file
    np.savetxt(outfile,
               np.column_stack((full_x, full_y)),
               delimiter='\t',
               fmt='%10.4f %10.10f')
Beispiel #22
0
    def _pitInformation(self, gZ, hFill):
        """

        Function to extract the volume of all depressions based on current elevation, depressionless
        one and voronoi cell areas. It also stores the spillover vertices indices for each of the depression. It is ran over the global mesh.

        .. note::

            This function uses the **numpy-indexed** library which contains functionality for indexed
            operations on numpy ndarrays and provides efficient vectorized functionality such as
            grouping and set operations.

        :arg gZ: global elevation numpy array
        :arg hFill: global depressionless elevation numpy array
        """

        # Compute pit volumes
        groupPits = npi.group_by(self.pits[:, 0])
        pitNb, self.pitVol = groupPits.sum((hFill - gZ) * self.garea)
        _, outids, _ = np.intersect1d(self.pits[:, 0], pitNb, return_indices=True)
        self.outFlows = self.pits[outids, 1]

        del groupPits, pitNb, outids
        gc.collect()

        return
Beispiel #23
0
 def best_individual(self):
     """Return the individual with the best fitness in the current
     generation.
     """
     best = self.current_generation[0] 
     _, genes = npi.group_by(best.genes[:,0]).max(best.genes)
     return (best.fitness, genes)
Beispiel #24
0
 def add_swap(parent, meta_date):
     """This function vreate new child with adding
        rows and then swaping last and random row
     """
 
     child = parent
     points = meta_data[0]
     rq_time = meta_data[1]
     
     msk = np.isin(points, child[:,0])
     points_accpt = points[~msk]
     p = 1/len(points_accpt) if len(points_accpt)>0 else 1 
     #print(points_accpt)
     
     while p < 1:
         #print(p)
         new_row = np.array([[np.random.choice(points_accpt, 1, p)[0],60]])    
         child = np.append(child, new_row, axis=0)
         
         msk = np.isin(points, child[:,0])
         points_accpt = points[~msk]
         p = 1/len(points_accpt) if len(points_accpt)>0 else 1 
     
     row , col = child.shape
     rowt = random.randrange(1, row - 1 if row>2 else row)
     #print(rowt)
     child[rowt, 0] ,child[row-1, 0] = child[row-1, 0], child[rowt, 0]
     _, child = npi.group_by(child[:,0]).min(child)
     
     return child
Beispiel #25
0
 def add_swap(parent, meta_data):
     """This function vreate new child with adding
        rows and then swaping last and random row
     """
 
     child = parent
     points = meta_data[:,0]
     rq_time = meta_data[:,1]
     
     msk = np.isin(meta_data[:,0], child[:,0])
     points_accpt = meta_data[~msk]
     row = len(points_accpt)
     p = 1/row if row>0 else 1 
     #print(points_accpt)
     
     while p < 1:
         #print(p)                  
         rowAcpt = random.randrange(1, row - 1 if row>2 else row)
         new_row = points_accpt[rowAcpt] 
         try:
             child = np.vstack((child, new_row))
         except:
             print('------------------')
         msk = np.isin(meta_data[:,0], child[:,0])
         points_accpt = meta_data[~msk]
         row = len(points_accpt)
         p = 1/row if row>0 else 1  
     
     row , col = child.shape
     rowt = random.randrange(1, row - 1 if row>2 else row)
     #print(rowt)
     child[rowt] ,child[row-1] = child[row-1], child[rowt]
     _, child = npi.group_by(child[:,0]).min(child)
     
     return child
Beispiel #26
0
def fitness(individual, meta_data):    
    _, individual = npi.group_by(individual[:,0]).max(individual)
    
#    individual = set_const(individual, const)
    calc_starttime(individual)
    individual = set_const(individual, const)
    
    len_pln = len(individual)
    edge = len_pln - 1   
    pln_pnt = individual[:,0]
    len_points = len(points)
    all_duration = np.sum(individual[:,1])    
    end_plan = individual[edge,3]+individual[edge,1]
    all_dist = end_plan  - all_duration
    
    cost_fultm = cost_fulltime(individual, end_plan)
    cost_lntm  = cost_lentime(individual, all_dist, all_duration)
    cost_cnt   = cost_count(individual, meta_data)
    cost_vis_time, cost_rq_time = cost_diffTime(individual)
#    print('cost_fultm: '+str(cost_fultm))
#    print('cost_lntm: '+str(cost_lntm))
#    print('cost_cnt: '+str(cost_cnt))
#    print('cost_diff_rqTime: '+str(cost_diff_rqTime))   
    cost =((coh_fultm*cost_fultm) + 
           (coh_lntm*cost_lntm) + 
           (coh_cnt*cost_cnt) + 
           (coh_dffRqTime*cost_rq_time)+
           (coh_dffVisTime*cost_vis_time)
           )    
#    print(cost)
#    msk = np.isin(const[:,0], individual[:,0])
#    notUsed_const = const[~msk]
#    penalty = np.sum(notUsed_const[:,1]) / tot_lenTimeConst   
    
    return cost #*(1 + (coh_pnlty*penalty))
 def _update_beta(self, X):
     #print "b_gamma 0", self.gamma_b.shape
     #print "b_rho 0", self.rho_b.shape
     self.gamma_b = self.b + npi.group_by(self.row_index).sum(self.phi_var)[1]
     self.rho_b = self.b + np.sum(self.Et, axis=0, keepdims=True)
     #print "b_gamma 1", self.gamma_b.shape
     #print "b_rho 1", self.rho_b.shape
     self.Eb, self.Elogb = _compute_expectations(self.gamma_b, self.rho_b)
Beispiel #28
0
def visualize_gridsearch(results, pipe_step, parameter):
    plt.figure(figsize=(13, 13))
    plt.title("Evaluation of the Parameter %s" % (parameter), fontsize=16)

    plt.xlabel(parameter)
    plt.ylabel("Average Score")
    plt.grid()

    ax = plt.axes()

    # Get the regular numpy array from the MaskedArray

    X_axis = np.array(results['param_%s' % (pipe_step)].data, dtype=float)

    for sample, style in (('train', '--'), ('test', '-')):
        x_unique, sample_score_mean = npi.group_by(X_axis).mean(
            results['mean_%s_score' % (sample)])
        x_unique, sample_score_std = npi.group_by(X_axis).mean(
            results['std_%s_score' % (sample)])
        ax.fill_between(x_unique,
                        sample_score_mean - sample_score_std,
                        sample_score_mean + sample_score_std,
                        alpha=0.1 if sample == 'test' else 0)
        ax.plot(x_unique,
                sample_score_mean,
                style,
                alpha=1 if sample == 'test' else 0.7,
                label=sample)

    best_index = np.nonzero(results['rank_test_score'] == 1)[0][0]
    best_score = results['mean_test_score'][best_index]

    # Plot a dotted vertical line at the best score for that scorer marked by x
    ax.plot([
        X_axis[best_index],
    ] * 2, [0, best_score],
            linestyle='-.',
            marker='x',
            markeredgewidth=3,
            ms=8)

    # Annotate the best score for that scorer
    ax.annotate("%0.2f" % best_score, (X_axis[best_index], best_score + 0.005))

    plt.legend(loc="best")
    plt.show()
 def _update_theta(self, X):
     #print "t_gamma 0", self.gamma_t.shape
     #print "t_rho 0", self.rho_t.shape
     self.gamma_t = self.a + npi.group_by(self.cols_index).sum(self.phi_var)[1]
     self.rho_t = self.a  + np.sum(self.Eb, axis=0, keepdims=True)
     #print "t_gamma 1", self.gamma_t.shape
     #print "t_rho 1", self.rho_t.shape
     self.Et, self.Elogt = _compute_expectations(self.gamma_t, self.rho_t)
Beispiel #30
0
    def matheron(self, bin_type="auto", bins=10, var=False):
        #should add binning with constant number of values
        """
        *Calculate Matheron variogram for points and field values previously
        fed into variogram. A few options for specifying binning exist.

        Parameters
        ----------
        bin_type : str
            Descriptor of the format of data passed into the bin parameter.
            Can be one of:
                * "auto" : select bounds to be (0, self.range[1]/2), bin
                    centers will be calculated accordingly based on user given
                    number of bins.
                * "lin" : bin boundaries will be linearly spaced based on a
                    user given minima, maxima and number of bins. Bin centers
                    will not fall on given maxima and minima.
                * "bound" : bounds will be completely given by the user as a
                    numpy array
        bins : int, list, array-like
            Description of how binning will be performed in Matheron
            variogram, specific formats given below for each bin type.
                * "auto" : int giving number of bins to use
                * "lin" : list containing three entries. First is minima of
                    bin boundaries, second is maxima of bin bounds and third
                    is the number of bins to use as int
                * "bound" : array-like object specifying boundaries of bins.
                    Number of bins is length of this array - 1.
        var : bool
            Set True for bin-wise variance to be calculated and returned

        Returns
        -------
        centers : numpy.ndarray
            Bin centers used for variogram
        n_bins : numpy.ndarray
            Number of point relations used to calculate each semivariance
        v : numpy.ndarray
            Estimated semivariance values at lags corresponding to bin centers
        v_var (optional) : numpy.ndarray
            Variance associated with squared difference values within a bin
        """
        bins = self.set_bins(bin_type, bins)
        centers = bins[:-1] + np.diff(bins, 1) / 2

        b_ind = np.digitize(self.lags, bins)
        n_bins = np.bincount(b_ind - 1)[:-1]

        gp = group_by(b_ind[np.where(b_ind != bins.size)])
        _, v = gp.mean(self.diffs[np.where(
            b_ind != bins.size)])  #account for lags bigger than bins

        if var:
            _, v_var = gp.var(self.diffs[np.where(
                b_ind != bins.size)])  #account for lags bigger than bins
            return centers, n_bins, v, v_var
        else:
            return centers, n_bins, v
    def boundary_edges(self):
        """

        Returns
        -------
        ndarray, [3, 2**level], int
        """
        r, c = np.nonzero(self.boundary_edges_chain)
        return npi.group_by(c).split_array_as_array(r)
    def boundary_vertices(self):
        """

        Returns
        -------
        ndarray : [3, 1], int
        """
        r, c = np.nonzero(self.boundary_vertices_chain)
        return npi.group_by(c).split_array_as_array(r)
Beispiel #33
0
    def get_coset(self, orbit):
        """Compute the cosets, given a labeling of all elements describing the orbits

        Returns
        -------
        array_like, [n-index], of ndarray, [coset_size], int
            each array represents a set of elements that form a coset
        """
        n_tiles, labels = orbit
        cosets = npi.group_by(labels).split(np.arange(len(labels)))
        return cosets
    def boundary_info(self):
        """Return terms describing how the triangle boundary stitches together

        Returns
        -------
        vertices : ndarray, [n_terms], int
            the vertex index this boundary term applies to
            single number for edge vertices; multiple entries for corner vertices
        quotient : ndarray, [n_terms], int
            relative element in quotient group to reach opposing element
            how current index relates to other side of the term
        neighbor : ndarray, [n_terms], int
            relative element in quotient group to reach opposing element
            how current index relates to other side of the term
        sub : ndarray, [n_terms], int
            relative subgroup transform.
            only needed by normal transformation so far to get transformations
        """
        #
        vi = self.group.vertex_incidence            # [n_vertex_entries, 4]
        ei = self.group.edge_incidence              # [n_edge_entries, 4]

        # these are the vertex indices for all edges and corners of the triangle
        bv = self.triangle.boundary_vertices        # [3, 1]
        be = self.triangle.boundary_edge_vertices   # [3, n_boundary_edges]

        def broadcast(a, b):
            shape = len(b), len(a), 3
            a = np.broadcast_to(a[None], shape)
            b = np.broadcast_to(b[:, None], shape[:-1])
            return np.concatenate([b.reshape(-1, 1), a.reshape(-1, 3)], axis=1)

        v = [broadcast(a, b) for a, b in zip(npi.group_by(vi[:, 0]).split(vi[:, 1:]), bv)]
        e = [broadcast(a, b) for a, b in zip(npi.group_by(ei[:, 0]).split(ei[:, 1:]), be)]

        return np.concatenate(v + e, axis=0)
Beispiel #35
0
    def self_intersect(self):
        """
        test curve of arc-segments for intersection
        raises exception in case of intersection
        alternatively, we might resolve intersections by point insertion
        but this is unlikely to have any practical utility, and more likely to be annoying
        """
        vertices = self.vertices
        faces = self.faces
        tree   = KDTree(vertices)
        # curve points per edge, [n, 2, 3]
        cp     = util.gather(faces, vertices)
        # normal rotating end unto start
        normal = util.normalize(np.cross(cp[:,0], cp[:,1]))
        # midpoints of edges; [n, 3]
        mid    = util.normalize(cp.sum(axis=1))
        # vector from end to start, [n, 3]
        diff   = np.diff(cp, axis=1)[:,0,:]
        # radius of sphere needed to contain edge, [n]
        radius = np.linalg.norm(diff, axis=1) / 2 * 1.01

        # FIXME: this can be vectorized by adapting pinv
        projector = [np.linalg.pinv(q) for q in np.swapaxes(cp, 1, 2)]

        # incident[vertex_index] gives a list of all indicent edge indices
        incident = npi.group_by(faces.flatten(), np.arange(faces.size))

        def intersect(i,j):
            """test if spherical line segments intersect. bretty elegant"""
            intersection = np.cross(normal[i], normal[j])                               #intersection direction of two great circles; sign may go either way though!
            return all(np.prod(np.dot(projector[e], intersection)) > 0 for e in (i,j))  #this direction must lie within the cone spanned by both sets of endpoints
        for ei,(p,r,cidx) in enumerate(izip(mid, radius, faces)):
            V = [v for v in tree.query_ball_point(p, r) if v not in cidx]
            edges = np.unique([ej for v in V for ej in incident[v]])
            for ej in edges:
                if len(np.intersect1d(faces[ei], faces[ej])) == 0:      #does not count if edges touch
                    if intersect(ei, ej):
                        raise Exception('The boundary curves intersect. Check your geometry and try again')
Beispiel #36
0
 def remap_edges(self, field):
     """given a quantity computed on each triangle-edge, sum the contributions from each adjecent triangle"""
     edges = self.edges().reshape(-1, 3, 2)
     sorted_edges = np.sort(edges, axis=-1)
     _, field = npi.group_by(sorted_edges.reshape(-1, 2)).sum(field.flatten())
     return field
 def stitch_groups(self):
     info = self.boundary_info
     groups = npi.group_by(info[:, :2])
     return groups