def contour_plot(ax, g, pts, wmax, num_contours, my_colors, pts_levels):
    #### define input space for function and evaluate ####
    w1 = np.linspace(-wmax, wmax, 100)
    w2 = np.linspace(-wmax, wmax, 100)
    w1_vals, w2_vals = np.meshgrid(w1, w2)
    w1_vals.shape = (len(w1)**2, 1)
    w2_vals.shape = (len(w2)**2, 1)
    h = np.concatenate((w1_vals, w2_vals), axis=1)
    func_vals = np.asarray([g(s) for s in h])
    w1_vals.shape = (len(w1), len(w1))
    w2_vals.shape = (len(w2), len(w2))
    func_vals.shape = (len(w1), len(w2))

    ### make contour right plot - as well as horizontal and vertical axes ###
    # set level ridges
    levelmin = min(func_vals.flatten())
    levelmax = max(func_vals.flatten())
    cutoff = 0.3
    cutoff = (levelmax - levelmin) * cutoff
    numper = 3
    levels1 = np.linspace(cutoff, levelmax, numper)
    num_contours -= numper

    ##### plot filled contours with generic contour lines #####
    # produce generic contours
    levels2 = np.linspace(levelmin, cutoff, min(num_contours, numper))
    levels = np.unique(np.append(levels1, levels2))
    num_contours -= numper
    while num_contours > 0:
        cutoff = levels[1]
        levels2 = np.linspace(levelmin, cutoff, min(num_contours, numper))
        levels = np.unique(np.append(levels2, levels))
        num_contours -= numper

    # plot the contours
    ax.contour(w1_vals, w2_vals, func_vals, levels=levels[1:], colors='k')
    ax.contourf(w1_vals, w2_vals, func_vals, levels=levels, cmap='Blues')

    ###### add contour curves based on input points #####
    # add to this list the contours passing through input points
    ax.contour(w1_vals,
               w2_vals,
               func_vals,
               levels=pts_levels,
               colors='k',
               linewidths=3)
    ax.contour(w1_vals,
               w2_vals,
               func_vals,
               levels=pts_levels,
               colors=my_colors,
               linewidths=2.5)

    ###### clean up plot ######
    ax.set_xlabel('$w_0$', fontsize=12)
    ax.set_ylabel('$w_1$', fontsize=12, rotation=0)
    ax.axhline(y=0, color='k', zorder=0, linewidth=0.5)
    ax.axvline(x=0, color='k', zorder=0, linewidth=0.5)
Example #2
0
def reduce_grid(x):
    """
    Undoes expand_grid to take (nx, 2) array to two vectors containing unique values of each col.
    :param x: (nx, 2) points
    :return: x1, x2 each vectors
    """
    x1 = np.sort(np.unique(x[:, 0]))
    x2 = np.sort(np.unique(x[:, 1]))
    return x1, x2
    def surface_plot(self,g,ax,wmax,view):
        ##### Produce cost function surface #####
        r = np.linspace(-wmax,wmax,300)

        # create grid from plotting range
        w1_vals,w2_vals = np.meshgrid(r,r)
        w1_vals.shape = (len(r)**2,1)
        w2_vals.shape = (len(r)**2,1)
        w_ = np.concatenate((w1_vals,w2_vals),axis = 1)
        g_vals = []
        for i in range(len(r)**2):
            g_vals.append(g(w_[i,:]))
        g_vals = np.asarray(g_vals)
        
        w1_vals.shape = (np.size(r),np.size(r))
        w2_vals.shape = (np.size(r),np.size(r))
        
        ### is this a counting cost?  if so re-calculate ###
        levels = np.unique(g_vals)
        if np.size(levels) < 30:
            # plot each level of the counting cost
            levels = np.unique(g_vals)
            for u in levels:
                # make copy of cost and nan out all non level entries
                z = g_vals.copy()
                ind = np.argwhere(z != u)
                ind = [v[0] for v in ind]
                z[ind] = np.nan

                # plot the current level
                z.shape = (len(r),len(r)) 
                ax.plot_surface(w1_vals,w2_vals,z,alpha = 1,color = '#696969',zorder = 0,shade = True,linewidth=0)

        else: # smooth cost function, plot usual
            # reshape and plot the surface, as well as where the zero-plane is
            g_vals.shape = (np.size(r),np.size(r))

            # plot cost surface
            ax.plot_surface(w1_vals,w2_vals,g_vals,alpha = 1,color = 'w',rstride=25, cstride=25,linewidth=1,edgecolor = 'k',zorder = 2)  
        
        ### clean up panel ###
        ax.xaxis.pane.fill = False
        ax.yaxis.pane.fill = False
        ax.zaxis.pane.fill = False

        ax.xaxis.pane.set_edgecolor('white')
        ax.yaxis.pane.set_edgecolor('white')
        ax.zaxis.pane.set_edgecolor('white')

        ax.xaxis._axinfo["grid"]['color'] =  (1,1,1,0)
        ax.yaxis._axinfo["grid"]['color'] =  (1,1,1,0)
        ax.zaxis._axinfo["grid"]['color'] =  (1,1,1,0)

        ax.view_init(view[0],view[1])
    def predict(self, X, y=None):
        '''
        Function to predict 
        '''
        AL = self.forward_prop(X, self.final_params)
        # print(AL[:,2],AL[:,4])
        if self.activations[-1] == 'softmax':

            # print(AL.shape-)
            y_hat = AL.argmax(axis=0)
            y = np.argmax(y, axis=0)

            acc = (y_hat == y).mean()
            # print(y_hat.shape, y.shape)
            print(y_hat)
            # y_hat = y_hat.reshape((1, len(y_hat)))
            # # print(y)

        else:
            # Classification Problem
            # print(np.unique(y))
            if len(np.unique(y)) == self.layer_info[-1] or len(
                    np.unique(y)) == 2:
                y_hat = np.zeros((1, AL.shape[1]))
                for i in range(AL.shape[1]):
                    if AL[0, i] > 0.5:
                        y_hat[0, i] = 1
                    else:
                        y_hat[0, i] = 0
                # print(AL.shape)
                acc = (abs(y_hat - y)).mean()
                print("Accuracy:", acc)
                return y_hat, acc

            # Regression Problem
            else:
                y_hat = AL
                sq_sum = 0
                for i in range(y_hat.shape[1]):
                    sq_sum += (y[0, i] - y_hat[0, i])**2
                    # print(y[0,i], y_hat[0,i])
                rmse = sq_sum / y_hat.shape[1]

                print("RMSE: ", rmse)
                return y_hat, rmse

        if y is None:
            return y_hat

        print("Test Accuracy:", acc)

        return y_hat, acc
Example #5
0
def rank_data(x):
    """ Ranks a set of observations, assigning the average of ranks to ties. 


    Arguments:

        x: `ndarray(nsamples)`. Vector of data to be compared

    Returns:

        ranks: `ndarray(nsamples)`. Ranks for each observation
    
    """
    x = x.flatten()
    nsamples = x.size

    # Sort in ascenting order
    idx = np.argsort(x)
    ranks = np.empty(idx.size)
    ranks[idx] = np.arange(idx.size) + 1

    # Now average the ranks for ties
    unique_x = np.unique(x)
    if unique_x.size < nsamples:
        for i, xi in enumerate(unique_x):
            if x[x == xi].size > 1:
                ranks[x == xi] = np.mean(ranks[x == xi])

    return ranks
Example #6
0
    def m_step(self,
               expectations,
               datas,
               inputs,
               masks,
               tags,
               optimizer="adam",
               num_iters=10,
               **kwargs):
        """
        Fit a logistic regression for the transitions.
        
        Technically, this is a stochastic M-step since the states 
        are sampled from their posterior marginals.
        """
        K, M, D = self.K, self.M, self.D

        zps, zns = [], []
        for Ez, _, _ in expectations:
            z = np.array([np.random.choice(K, p=p) for p in Ez])
            zps.append(z[:-1])
            zns.append(z[1:])

        X = np.vstack([
            np.hstack((input[1:], data[:-1]))
            for input, data in zip(inputs, datas)
        ])
        y = np.concatenate(zns)

        # Identify used states
        used = np.unique(y)
        K_used = len(used)
        unused = np.setdiff1d(np.arange(K), used)

        # Reset parameters before filling in
        self.Ws = np.zeros((K, M))
        self.Rs = np.zeros((K, D))
        self.r = np.zeros((K, ))

        if K_used == 1:
            warn(
                "RecurrentOnlyTransitions: Only using 1 state in expectation. "
                "M-step cannot proceed. Resetting transition parameters.")
            return

        # Fit the logistic regression
        self._lr.fit(X, y)

        # Extract the coefficients
        assert self._lr.coef_.shape[0] == (K_used if K_used > 2 else 1)
        if K_used == 2:
            # lr thought there were only two classes
            self.Ws[used[1]] = self._lr.coef_[0, :M]
            self.Rs[used[1]] = self._lr.coef_[0, M:]
        else:
            self.Ws[used] = self._lr.coef_[:, :M]
            self.Rs[used] = self._lr.coef_[:, M:]

        # Set the intercept
        self.r[used] = self._lr.intercept_
Example #7
0
def preprocessing(problem):

    path = os.path.join('datasets', '{}.csv'.format(problem))
    data = np.genfromtxt(path, delimiter=',')

    inputs = data[:, :-1]
    labels = data[:, -1]

    n_classes = len(np.unique(labels))
    n_dims = inputs.shape[1]

    # one-hot code targets
    if np.min(labels) != 0:
        labels -= 1  # need dummy code to start at zero for this to work
    labels = labels.astype(int)
    labels = np.eye(n_classes)[np.array(labels)]

    # norm data to be between -1 and 1
    if problem[:-1] != 'shj':
        inputs -= np.min(inputs, axis=0)
        inputs /= np.ptp(inputs, axis=0)
        inputs *= 2
        inputs -= 1
        full_set = np.append(inputs, labels, 1)
    else:
        full_set = np.append(inputs, labels, 1)
        full_set = np.concatenate((full_set, full_set),
                                  axis=0)  # to match Nosofsky+ '94

    return [full_set, n_classes, n_dims]
Example #8
0
    def _prox(self, beta, thresh):
        """Proximal operator."""

        #print('beginprox', beta[0:2],thresh)
        group_ids = np.unique(self.group)
        result = np.zeros(beta.shape)
        result = np.asarray(result, dtype=float)
        #print('gids',group_ids)
        for i in range(len(group_ids)):
            gid = i
            #print(self.group)
            idxs_to_update = np.where(self.group == gid)[0]
            #print('idx',idxs_to_update)
            #print('norm', np.linalg.norm(beta[idxs_to_update]))
            if np.linalg.norm(beta[idxs_to_update]) > 0.:
                #print('in here')
                potentialoutput = beta[idxs_to_update] - (
                    thresh / np.linalg.norm(
                        beta[idxs_to_update])) * beta[idxs_to_update]
                posind = np.where(beta[idxs_to_update] > 0.)[0]
                negind = np.where(beta[idxs_to_update] < 0.)[0]
                po = beta[idxs_to_update].copy()
                #print('potention', potentialoutput[0:2])
                po[posind] = np.asarray(np.clip(potentialoutput[posind],
                                                a_min=0.,
                                                a_max=1e15),
                                        dtype=float)
                po[negind] = np.asarray(np.clip(potentialoutput[negind],
                                                a_min=-1e15,
                                                a_max=0.),
                                        dtype=float)
                result[idxs_to_update] = po
        #print('end', result[0:2])
        return result
Example #9
0
    def predict_cumulative_hazard(self, X, times=None, ancillary_X=None):
        """
        Return the cumulative hazard rate of subjects in X at time points.

        Parameters
        ----------

        X: numpy array or DataFrame
            a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns
            can be in any order. If a numpy array, columns must be in the
            same order as the training data.
        times: iterable, optional
            an iterable of increasing times to predict the cumulative hazard at. Default
            is the set of all durations (observed and unobserved). Uses a linear interpolation if
            points in time are not in the index.
        ancillary_X: numpy array or DataFrame, optional
            a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns
            can be in any order. If a numpy array, columns must be in the
            same order as the training data.

        Returns
        -------
        cumulative_hazard_ : DataFrame
            the cumulative hazard of individuals over the timeline
        """
        import numpy as np

        times = coalesce(times, self.timeline, np.unique(self.durations))
        exp_mu_, sigma_ = self._prep_inputs_for_prediction_and_return_scores(X, ancillary_X)
        mu_ = np.log(exp_mu_)
        Z = np.subtract.outer(np.log(times), mu_) / sigma_
        return pd.DataFrame(-logsf(Z), columns=_get_index(X), index=times)
Example #10
0
 def getLevelClusters(rowNum):
     clusterLabels = np.unique(self.clusterAssignments[rowNum])
     clusters = {
         c: np.where(self.clusterAssignments[rowNum] == c)[0]
         for c in clusterLabels
     }
     return clusters
Example #11
0
def handle_time_inds(times, h=None):
    """
    Takes a list of time vectors and returns the unique, potentially augmented,
    vector 
    """
    # get size of each vector
    t_sizes = [len(t) for t in times]

    # concatenate to single time vector
    tt = np.concatenate(times)

    # get the distinct times, and the indices
    tt_uni, inv_ind = np.unique(tt, return_inverse=True)

    # split inv_ind up
    ind_ti = util._unpack_vector(inv_ind, t_sizes)

    if h is None:
        return tt_uni, ind_ti

    elif isinstance(h, float) and h > 0:
        # augment the time vector so that diff is at most h
        ttc, inds_c = augment_times(tt_uni, h)
        data_inds = [inds_c[ind_i] for ind_i in ind_ti]
        return ttc, data_inds

    else:
        raise ValueError("h should be a float > 0")
    def fit_k_class_regularised(self,
                                X,
                                y,
                                batch_size=None,
                                n_iter=200,
                                lr=0.01,
                                lr_type='constant'):

        if batch_size == None:
            batch_size = len(X)
        self.batch_size = batch_size
        self.n_iter = n_iter
        k = len(np.unique(y))
        n = len(X)
        batch_size = len(X)
        temp = lr
        column_length = len(X.columns)
        theta = np.zeros((k, column_length))
        self.coef_ = theta
        soft = self.k_class_predict(X)
        for i in range(1, n_iter):
            current = theta.copy()
            for j in range(k):
                theta[j] = current[j] + lr * (np.sum(X * (np.tile(
                    np.where(y == j, 1, 0) - soft[:, j],
                    (len(current[0]), 1)).T),
                                                     axis=0))

        self.coef_ = theta
        pass
 def plot_data(self,ax,special_class,special_size):
     # scatter points in both panels
     class_nums = np.unique(self.y)
     C = len(class_nums)
     for c in range(C):
         ind = np.argwhere(self.y == class_nums[c])
         ind = [v[1] for v in ind]
         s = 80
         if class_nums[c] == special_class:
             s = special_size
         ax.scatter(self.x[0,ind],self.x[1,ind],s = s,color = self.color_opts[c],edgecolor = 'k',linewidth = 1.5)
         
     # control viewing limits
     minx = min(self.x[0,:])
     maxx = max(self.x[0,:])
     gapx = (maxx - minx)*0.1
     minx -= gapx
     maxx += gapx
     
     miny = min(self.x[1,:])
     maxy = max(self.x[1,:])
     gapy = (maxy - miny)*0.1
     miny -= gapy
     maxy += gapy
     
     ax.set_xlim([minx,maxx])
     ax.set_ylim([miny,maxy])
     #ax.axis('equal')
     ax.axis('off')
Example #14
0
 def fit_multiclass(self, X, y):
     """
         multiclass learning
         X: samples
         y: labels
     """
     self.labels = np.unique(y)
     self.X = X.copy()
     bias = np.ones((self.X.shape[0], 1))
     self.X = np.append(bias, self.X, axis=1)
     self.X = np.array(self.X)
     self.y = y
     self.nofFeatures = self.X.shape[1]
     self.samples = len(self.X)
     self.coef_ = np.ones((self.nofFeatures, y.shape[1]))
     for i in range(self.epoch):
         err = 1 / (1 + np.exp(-(self.X.dot(self.coef_)))) - y
         x = sum([
             np.exp(self.X.dot(self.coef_[:, j]))
             for j in range(self.y.shape[1])
         ])
         for j in range(self.y.shape[1]):
             err = np.exp(self.X.dot(self.coef_[:, j])) / x - self.y[:, j]
             self.coef_[:, j] = self.coef_[:, j] - self.lr * err.dot(
                 self.X) / self.samples
     return self.coef_
    def fit_k_class_regularised_autograd(self,
                                         X,
                                         y,
                                         batch_size=None,
                                         n_iter=200,
                                         lr=0.01,
                                         lr_type='constant'):

        if batch_size == None:
            batch_size = len(X)
        self.batch_size = batch_size
        self.n_iter = n_iter
        k = len(np.unique(y))
        n = len(X)
        batch_size = len(X)
        temp = lr
        column_length = len(X.columns)
        theta = np.zeros((k, column_length))
        self.coef_ = theta
        soft = self.k_class_predict(X)
        kclass = grad(self.kclass)
        for i in range(1, n_iter):
            current = theta.copy()
            for j in range(k):
                theta[j] = current[j] + lr * kclass(X, y, theta, soft)

        self.coef_ = theta
        pass
Example #16
0
def organize_data_from_txt(data_filepath, delimiter=','):
    data = np.genfromtxt(data_filepath, delimiter=delimiter)

    data = {
        'inputs': data[:, :-1],
        'labels': data[:, -1],
        'categories': np.unique(data[:, -1]),
    }

    # map categories to label indices
    data['idx_map'] = {
        category: idx
        for category, idx in zip(data['categories'],
                                 range(len(data['categories'])))
    }

    # map original labels to label indices
    data['labels_indexed'] = [
        data['idx_map'][label] for label in data['labels']
    ]

    # generate one hot targets
    data['one_hot_targets'] = np.eye(len(
        data['categories']))[data['labels_indexed']]

    return data
Example #17
0
    def fit(self, X, y):
        if y.ndim == 1:
            y = y.reshape(-1, 1)

        n_classes = y.shape[
            1] if self.loss == CategoricalCrossEntropy else np.unique(y).size
        if self.loss in (SparseCategoricalCrossEntropy,
                         CategoricalCrossEntropy):
            if self.layers[-1].activation != softmax:
                raise ValueError(
                    f'NeuralNetworkClassifier with {type(self.loss).__name__} loss '
                    'function only works with softmax output layer')
            if self.layers[-1].fan_out != n_classes:
                raise ValueError(
                    'the number of neurons in the output layer must '
                    f'be equal to the number of classes, i.e., {n_classes}')
        elif self.loss in (MeanSquaredError, BinaryCrossEntropy):
            if n_classes > 2:
                raise ValueError(
                    f'NeuralNetworkClassifier with {type(self.loss).__name__} '
                    'loss function only works for binary classification')
            if self.layers[-1].activation != sigmoid:
                raise ValueError(
                    f'NeuralNetworkClassifier with {type(self.loss).__name__} '
                    'loss function only works with sigmoid output layer')
            if self.layers[-1].fan_out != 1:
                raise ValueError(
                    f'NeuralNetworkClassifier with {type(self.loss).__name__} loss '
                    'function only works with one neuron in the output layer')

        return super(NeuralNetworkClassifier, self).fit(X, y)
Example #18
0
def plot_2d(zl, classes):
    ''' Plot a representation of 2D latent variables 
    zl (numobs, M^{(l)} x r_l ndarray): The latent variable of layer l
    classes (numobs x n_clusters ndarray): The predicted or ground truth labels
    ---------------------------------------------------------------------------
    returns (None): The plot of the latent variables colorized by class
    '''

    n_clusters = len(np.unique(classes))

    colors = ['red', 'green', 'blue', 'silver', 'purple', 'black',\
              'gold', 'orange'] # For a 2 classes classification

    if n_clusters >= len(colors):
        raise ValueError('Too many classes for plotting,\
                         please add some colors names above this line')

    fig = plt.figure(figsize=(16, 9))
    ax = plt.axes()

    ax.scatter(zl[:, 0], zl[:, 1] , c = classes,\
                    cmap=matplotlib.colors.ListedColormap(colors[:n_clusters]))

    plt.title("2D Latent space representation of the data")
    ax.set_xlabel('Latent dimension 1', fontweight='bold')
    ax.set_ylabel('Latent dimension 2', fontweight='bold')

    plt.show()
    def random(self, size, X, *params):
        dist_params = np.array(params[0:self.k_dist])
        phi_params = np.array(params[self.k_dist:])

        x = []
        X_out = []
        if type(X) == tuple:
            X = np.random.uniform(*X, size)

        for stress in np.unique(X, axis=0):
            life_param_mask = np.array(range(
                0, len(dist_params))) == self.param_map[self.life_parameter]
            dist_params = np.where(
                life_param_mask,
                self.param_transform(self.phi(stress, *phi_params)),
                dist_params)

            U = np.random.uniform(0, 1, size)
            x.append(self.dist.qf(U, *dist_params))
            if np.isscalar(stress):
                cols = 1
            else:
                cols = len(stress)
            X_out.append((np.ones((size, cols)) * stress))
        return np.array(x).flatten(), np.concatenate(X_out)
Example #20
0
def rank_grouped_data(x, g):
    """ Ranks observations taken across several groups

    Arguments:

        x: `ndarray(nsamples)`. Vector of data to be compared
        g: `ndarray(nsamples)`. Group ID's

    Returns:

        ranks: `ndarray(nsamples)`. Ranks for each observation
        G: `ndarray(nsamples, ngroups)`.  Matrix indicating whether sample i is in group j
        R: `ndarray((nsamples, ngroups))`. Matrix indicating the rank for sample i in group j
        lab: `ndarray(ngroups)`. Group labels
    """
    nsamples = x.size
    ngroups = np.unique(g).size

    # Sort in ascending order
    idx = np.argsort(x)
    G, lab = make_onehot(g[idx])

    ranks = rank_data()

    R = np.tile(ranks.reshape(-1, 1), [1, ngroups]) * G
    return ranks, G, R, lab
    def predict_cumulative_hazard(self, df, times=None):
        """
        Return the cumulative hazard rate of subjects in X at time points.

        Parameters
        ----------
        X: numpy array or DataFrame
            a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns
            can be in any order. If a numpy array, columns must be in the
            same order as the training data.
        times: iterable, optional
            an iterable of increasing times to predict the cumulative hazard at. Default
            is the set of all durations (observed and unobserved). Uses a linear interpolation if
            points in time are not in the index.

        Returns
        -------
        cumulative_hazard_ : DataFrame
            the cumulative hazard of individuals over the timeline
        """
        times = np.asarray(
            coalesce(times, self.timeline, np.unique(self.durations)))
        n = times.shape[0]
        times = times.reshape((n, 1))

        lambdas_ = self._prep_inputs_for_prediction_and_return_parameters(df)

        bp = self.breakpoints
        M = np.minimum(np.tile(bp, (n, 1)), times)
        M = np.hstack([M[:, tuple([0])], np.diff(M, axis=1)])

        return pd.DataFrame(np.dot(M, (1 / lambdas_)),
                            columns=_get_index(df),
                            index=times[:, 0])
Example #22
0
    def predict_cumulative_hazard(self, X, times=None, ancillary_X=None):
        """
        Return the cumulative hazard rate of subjects in X at time points.

        Parameters
        ----------
        X: numpy array or DataFrame
            a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns
            can be in any order. If a numpy array, columns must be in the
            same order as the training data.
        times: iterable, optional
            an iterable of increasing times to predict the cumulative hazard at. Default
            is the set of all durations (observed and unobserved). Uses a linear interpolation if
            points in time are not in the index.
        ancillary_X: numpy array or DataFrame, optional
            a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns
            can be in any order. If a numpy array, columns must be in the
            same order as the training data.

        Returns
        -------
        cumulative_hazard_ : DataFrame
            the cumulative hazard of individuals over the timeline
        """
        times = coalesce(times, self.timeline, np.unique(self.durations))
        alpha_, beta_ = self._prep_inputs_for_prediction_and_return_scores(X, ancillary_X)
        return pd.DataFrame(np.log1p(np.outer(times, 1 / alpha_) ** beta_), columns=_get_index(X), index=times)
Example #23
0
File: base.py Project: as4529/gp3
    def __init__(self,
                 X,
                 y,
                 kernels,
                 likelihood=None,
                 mu=None,
                 obs_idx=None,
                 max_grad=10.,
                 noise=1e-6):
        """

        Args:
            X (): data (full grid)
            y (): response
            kernels (): list of kernel objects
            likelihood (): likelihood object
            mu (): prior mean
            obs_idx (): indices of observed points on grid
            max_grad (): for gradient clipping
            noise (): observation noise jitter
        """

        self.X = X
        self.y = y
        self.m = self.X.shape[0]
        self.d = self.X.shape[1]
        self.obs_idx = obs_idx
        self.n = len(self.obs_idx) if self.obs_idx is not None else self.m
        self.X_dims = [np.expand_dims(np.unique(X[:, i]), 1) for i in range(self.d)]
        self.mu = np.zeros(self.m) if mu is None else mu
        self.max_grad = max_grad
        self.init_Ks(kernels, noise)
        if likelihood is not None:
            self.likelihood = likelihood
            self.likelihood_grad = egrad(self.likelihood.log_like)
Example #24
0
def assign_to_modal_uparams(this_uparam, modal_uparam):
    try:
        mid_pts = 0.5 * (modal_uparam[1:] + modal_uparam[:-1])
        bins = np.concatenate(((-np.inf, ), mid_pts, (np.inf, )))
        inds_in_modal = np.digitize(this_uparam, bins) - 1
        numerical = True
    except:
        print('non-numerical parameter')
        numerical = False
    if numerical:
        uinds = np.unique(inds_in_modal)
        inds_in_this = np.zeros((0, ), dtype='int')
        for uind in uinds:
            candidates = np.where(inds_in_modal == uind)[0]
            dist_from_modal = np.abs(this_uparam[candidates] -
                                     modal_uparam[uind])
            to_keep = candidates[np.argmin(dist_from_modal)]
            inds_in_this = np.concatenate((inds_in_this, (to_keep, )))
        inds_in_modal = inds_in_modal[inds_in_this]
        bool_in_this = np.zeros((len(this_uparam), ), dtype='bool')
        bool_in_modal = np.zeros((len(modal_uparam), ), dtype='bool')
        bool_in_this[inds_in_this] = True
        bool_in_modal[inds_in_modal] = True
    else:
        assert (np.all(this_uparam == modal_uparam))
        bool_in_this, bool_in_modal = [
            np.ones(this_uparam.shape, dtype='bool') for iparam in range(2)
        ]
    return bool_in_this, bool_in_modal
Example #25
0
def handle_time_inds(times, h=None):
    """
    Takes a list of time vectors and returns the unique, potentially augmented,
    vector 
    """
    # get size of each vector
    t_sizes = [len(t) for t in times]

    # concatenate to single time vector
    tt = np.concatenate(times)

    # get the distinct times, and the indices
    tt_uni, inv_ind = np.unique(tt, return_inverse=True)

    # split inv_ind up
    ind_ti = util._unpack_vector(inv_ind, t_sizes)

    if h is None:
        return tt_uni, ind_ti

    elif isinstance(h, float) and h > 0:
        # augment the time vector so that diff is at most h
        ttc, inds_c = augment_times(tt_uni, h)
        data_inds = [inds_c[ind_i] for ind_i in ind_ti]
        return ttc, data_inds

    else:
        raise ValueError("h should be a float > 0")
def xgb_train_pred(train, y):
    # Function outputs clfs, importances, and oof_preds
    xg_clfs = []
    xg_importances = pd.DataFrame()
    xgb_oof_train = np.zeros((len(train), np.unique(y).shape[0]))
    w = y.value_counts()
    weights = {i: np.sum(w) / w[i] for i in w.index}
    for fold_, (trn_, val_) in enumerate(folds.split(y, y)):
        trn_x, trn_y = train.iloc[trn_], y.iloc[trn_]
        val_x, val_y = train.iloc[val_], y.iloc[val_]
        clf = xgb.XGBClassifier(**xgb_params)
        clf.fit(trn_x,
                trn_y,
                eval_set=[(trn_x, trn_y), (val_x, val_y)],
                eval_metric=xgb_multi_weighted_logloss,
                verbose=100,
                early_stopping_rounds=50,
                sample_weight=trn_y.map(weights))
        xgb_oof_train[val_, :] = clf.predict_proba(
            val_x, ntree_limit=clf.best_ntree_limit)
        print(multi_weighted_logloss(val_y, xgb_oof_train[val_, :]))
        imp_df = pd.DataFrame()
        imp_df['feature'] = train.columns
        imp_df['gain'] = clf.feature_importances_
        imp_df['fold'] = fold_ + 1
        xg_importances = pd.concat([xg_importances, imp_df], axis=0)
        xg_clfs.append(clf)
    return xg_clfs, xg_importances, xgb_oof_train
Example #27
0
def _subsfs_list(sfs, n_chunks, rnd):
    n_snps = int(sfs.n_snps())
    logger.debug("Splitting {} SNPs into {} minibatches".format(
        n_snps, n_chunks))

    logger.debug("Building list of length {}".format(n_snps))
    idxs = np.zeros(n_snps, dtype=int)
    total_counts = np.array(sfs._total_freqs, dtype=int)
    curr = 0
    for i, cnt in enumerate(total_counts):
        idxs[curr:(curr + cnt)] = i
        curr += cnt

    logger.debug("Permuting list of {} SNPs".format(n_snps))
    idxs = rnd.permutation(idxs)

    logger.debug(
        "Splitting permuted SNPs into {} minibatches".format(n_chunks))
    ret = []
    for chunk in range(n_chunks):
        chunk_idxs, chunk_cnts = np.unique(idxs[chunk::n_chunks],
                                           return_counts=True)
        sub_configs = _ConfigList_Subset(sfs.configs, chunk_idxs)
        ret.append(
            Sfs.from_matrix(np.array([chunk_cnts]).T,
                            sub_configs,
                            folded=sfs.folded,
                            length=None))
    return ret
    def plot_subproblem_data(self):
        C = len(np.unique(self.y))
        
        # construct figure
        fig = plt.figure(figsize=(9,2.5))

        # create subplot with 2 panels
        gs = gridspec.GridSpec(1, C) 
        
        # scatter points
        for c in range(C):
            # create subproblem data
            y_temp = copy.deepcopy(self.y)
            ind = np.argwhere(y_temp.astype(int) == (c))
            ind = ind[:,0]
            ind2 = np.argwhere(y_temp.astype(int) != (c))
            ind2 = ind2[:,0]
            y_temp[ind] = 1
            y_temp[ind2] = -1
        
            # create new axis to plot
            ax = plt.subplot(gs[c])
            xmin,xmax = self.scatter_pts(ax,self.x,y_temp)
            
            # pretty up panel
            title = 'class ' + str(c+1) + ' versus all'
            ax.set_title(title,fontsize = 14)
    def contour_plot(self,ax,wmax,num_contours):
        
        #### define input space for function and evaluate ####
        w1 = np.linspace(-wmax,wmax,100)
        w2 = np.linspace(-wmax,wmax,100)
        w1_vals, w2_vals = np.meshgrid(w1,w2)
        w1_vals.shape = (len(w1)**2,1)
        w2_vals.shape = (len(w2)**2,1)
        h = np.concatenate((w1_vals,w2_vals),axis=1)
        func_vals = np.asarray([ self.g(np.reshape(s,(2,1))) for s in h])

        #func_vals = np.asarray([self.g(s) for s in h])
        w1_vals.shape = (len(w1),len(w1))
        w2_vals.shape = (len(w2),len(w2))
        func_vals.shape = (len(w1),len(w2)) 

        ### make contour right plot - as well as horizontal and vertical axes ###
        # set level ridges
        levelmin = min(func_vals.flatten())
        levelmax = max(func_vals.flatten())
        cutoff = 0.5
        cutoff = (levelmax - levelmin)*cutoff
        numper = 3
        levels1 = np.linspace(cutoff,levelmax,numper)
        num_contours -= numper

        levels2 = np.linspace(levelmin,cutoff,min(num_contours,numper))
        levels = np.unique(np.append(levels1,levels2))
        num_contours -= numper
        while num_contours > 0:
            cutoff = levels[1]
            levels2 = np.linspace(levelmin,cutoff,min(num_contours,numper))
            levels = np.unique(np.append(levels2,levels))
            num_contours -= numper

        a = ax.contour(w1_vals, w2_vals, func_vals,levels = levels,colors = 'k')
        ax.contourf(w1_vals, w2_vals, func_vals,levels = levels,cmap = 'Blues')
                
        # clean up panel
        ax.set_xlabel('$w_0$',fontsize = 12)
        ax.set_ylabel('$w_1$',fontsize = 12,rotation = 0)
        ax.set_title(r'$g\left(w_0,w_1\right)$',fontsize = 13)

        ax.axhline(y=0, color='k',zorder = 0,linewidth = 0.5)
        ax.axvline(x=0, color='k',zorder = 0,linewidth = 0.5)
        ax.set_xlim([-wmax,wmax])
        ax.set_ylim([-wmax,wmax])
def train(x,y,feature_transforms,**kwargs):
    # get and run optimizer to solve two-class problem
    N = np.shape(x)[0]
    C = np.size(np.unique(y))
    max_its = 100; 
    alpha_choice = 1; 
    cost_name = 'softmax';
    normalize = 'standard'
    w = 0.1*np.random.randn(N+1,1); 
    
    # switches for user choices
    if 'max_its' in kwargs:
        max_its = kwargs['max_its']
    if 'alpha_choice' in kwargs:
        alpha_choice = kwargs['alpha_choice']
    if 'cost_name' in kwargs:
        cost_name = kwargs['cost_name']
    if 'w' in kwargs:
        w = kwargs['w']
    if 'normalize' in kwargs:
        normalize = kwargs['normalize']

    # loop over subproblems and solve
    weight_histories = []
    for c in range(0,C):
        # prepare temporary C vs notC sub-probem labels
        y_temp = copy.deepcopy(y)
        ind = np.argwhere(y_temp.astype(int) == c)
        ind = ind[:,1]
        ind2 = np.argwhere(y_temp.astype(int) != c)
        ind2 = ind2[:,1]
        y_temp[0,ind] = 1
        y_temp[0,ind2] = -1
        
        # run on normalized data
        run = basic_runner.Setup(x,y_temp,feature_transforms,cost_name,normalize = normalize)
        run.fit(w=w,alpha_choice = alpha_choice,max_its = max_its)
        
        # store each weight history
        weight_histories.append(run.weight_history)
        
    # combine each individual classifier weights into single weight 
    # matrix per step
    R = len(weight_histories[0])
    combined_weights = []
    for r in range(R):
        a = []
        for c in range(C):
            a.append(weight_histories[c][r])
        a = np.array(a).T
        a = a[0,:,:]
        combined_weights.append(a)
        
    # run combined weight matrices through fusion rule to calculate
    # number of misclassifications per step
    counter = basic_runner.Setup(x,y,feature_transforms,'multiclass_counter',normalize = normalize).cost_func
    count_history = [counter(v) for v in combined_weights]
        
    return combined_weights, count_history
Example #31
0
 def __init__(self,
              X,
              y,
              i_,
              n_users=10,
              n_items=5,
              d=3,
              lambda_=0.,
              gamma=1.,
              gamma_v=0.,
              n_epoch=10,
              df=None,
              fair=False,
              training='ll'):
     self.X = X
     self.y = y
     self.i_ = i_
     self.n_users = n_users
     self.n_items = n_items
     print(n_users, 'users', n_items, 'items')
     # sys.exit(0)
     self.d = d
     self.GAMMA = gamma
     self.GAMMA_V = gamma_v
     self.LAMBDA = lambda_
     self.mu = 0.
     # self.w = np.random.random(n_users + n_items)
     # self.V = np.random.random((n_users + n_items, d))
     self.y_pred = []
     self.predictions = []
     self.item_bias = np.random.random(n_items)
     self.item_slopes = np.random.random(n_items)
     self.w = np.random.random(n_users)
     self.V = np.random.random((n_users, d))
     # self.V = np.random.random((10, 3))
     # self.w = np.random.random(3)
     # self.item_bias = np.random.random(3)
     self.item_embed = np.random.random((n_items, d))
     self.users = np.random.random((n_users, 5))
     self.items = np.random.random((n_items, 5))
     # self.V2 = np.power(self.V, 2)
     self.fair = fair
     self.metrics = defaultdict(list)
     self.prepare_sets()
     attr_ids = self.X_train[:, 2]
     n_attr = len(np.unique(attr_ids))
     self.n_samples = len(i_['train'])
     print(self.n_samples, 'samples')
     self.W_attr = np.zeros((n_attr, self.n_samples))
     self.W_attr[attr_ids, range(self.n_samples)] = 1
     self.W_attr /= self.W_attr.sum(axis=1)[:, None]  # Normalize
     self.n_epoch = n_epoch
     self.batch_size = BATCH_SIZE
     self.n_batches = self.n_samples // self.batch_size
     print('n_iter will be', self.n_epoch, self.n_batches,
           self.n_epoch * self.n_batches)
     self.c = 0.
     self.training = training
     self.prepare_model()
Example #32
0
    def inf_up_loss_influence(
        self,
        X_test,
        y_test,
        include_reg=False,
        include_hessian=True,
    ):
        """
        Non-sklearn function.
        This is the influence of a training point on a testing point.
        """
        y_fit = self.y_
        assert set(np.unique(y_fit)).issubset(set([0, 1])), \
            "y values must be 0 or 1"

        # TODO put inside other functions
        X_fit = self.X_
        assert len(X_fit) == len(y_fit)

        L2_alpha = self.L2_alpha
        if not include_reg:
            L2_alpha = 1e-10

        # Precompute global hess
        curr_hess = self.hess_loss(
            X_fit,
            y_fit,
            L2_alpha=L2_alpha,
        )
        inv_emp_hess = slin.inv(curr_hess)  # invert

        curr_losses_train = np.zeros((len(X_fit), len(self.W_b)))
        for i, (X_i, y_i) in enumerate(zip(X_fit, y_fit)):
            curr_loss_i = self.grad_loss(
                X_i.reshape(1, -1),
                y_i,
                L2_alpha=L2_alpha,
            )
            curr_losses_train[i] = curr_loss_i

        curr_losses_test = np.zeros((len(X_test), len(self.W_b)))
        for i, (X_i, y_i) in enumerate(zip(X_test, y_test)):
            curr_loss_i = self.grad_loss(
                X_i.reshape(1, -1),
                y_i,
                L2_alpha=L2_alpha,
            )
            curr_losses_test[i] = curr_loss_i

        # Rows are test points
        LOO_infs = np.zeros((len(X_test), len(X_fit)))
        for i, curr_loss_i in enumerate(curr_losses_test):
            for j, curr_loss_j in enumerate(curr_losses_train):
                if include_hessian:
                    LOO_inf = -curr_loss_i.dot(inv_emp_hess).dot(curr_loss_j.T)
                else:
                    LOO_inf = -curr_loss_i.dot(curr_loss_j.T)
                LOO_infs[i, j] = LOO_inf
        return LOO_infs
	def init_weight(self, x, y):
		self.classes_ = np.unique(y)
		if self.prob_func_ == "sigmoid" and len(self.classes_) > 2:
			raise ValueError()
		if self.prob_func_ is None:
			if len(self.classes_) == 2:
				self.prob_func_ = "sigmoid"
			else:
				self.prob_func_ = "softmax"
		if self.prob_func_ == "sigmoid":
			return np.array([self.eps_] * (x.shape[1] + 1))
		else: # self.prob_func_ == "softmax"
			return np.array([[self.eps_] * len(self.classes_) for i in xrange(x.shape[1] + 1)])
Example #34
0
    def compute_rotated_map(self, rotation):
        """
        Compute stellar maps projected on the plane of the sky for a given rotation of the star
        Args:
            rotation (float) : rotation around the star in degrees given as [longitude, latitude] in degrees
        
        Returns:
            pixel_unique (int) : vector with the "active" healpix pixels
            pixel_map (int) : map showing the healpix pixel projected on the plane of the sky
            mu_pixel (float): map of the astrocentric angle for each pixel on the plane of the sky (zero for pixels not in the star)
            T_pixel (float): map of temperatures for each pixel on the plane of the sky
        """
        mu_pixel = np.zeros_like(self.mu_angle)
        T_pixel = np.zeros_like(self.mu_angle)

# Get the projection of the healpix pixel indices on the plane of the sky
        pixel_map = self.projector.projmap(self.indices, self.f_vec2pix, rot=rotation)[:,0:int(self.npix/2)]

# Get the unique elements in the vector
        pixel_unique = np.unique(pixel_map)
        
# Now loop over all unique pixels, filling up the array of the projected map with the mu and temeperature values
        for j in range(len(pixel_unique)):
            ind = np.where(pixel_map == pixel_unique[j])            

            if (np.all(np.isfinite(self.mu_angle[ind[0],ind[1]]))):
                if (self.mu_angle[ind[0],ind[1]].size == 0):
                    value = 0.0
                else:                    
                    value = np.nanmean(self.mu_angle[ind[0],ind[1]])
                    mu_pixel[ind[0],ind[1]] = value

                    T_pixel[ind[0],ind[1]] = self.temperature_map[int(pixel_unique[j])]
            else:
                mu_pixel[ind[0],ind[1]] = 0.0
                T_pixel[ind[0],ind[1]] = 0.0

        return pixel_unique, pixel_map, mu_pixel, T_pixel
Example #35
0
File: dr.py Project: HelenLiGit/POT
def split_classes(X, y):
    """split samples in X by classes in y
    """
    lstsclass = np.unique(y)
    return [X[y == i, :].astype(np.float32) for i in lstsclass]
Example #36
0
    def precompute_rotation_maps(self, rotations=None):
        """
        Compute the averaged spectrum on the star for a given temperature map and for a given rotation
        Args:
            rotations (float) : [N_phases x 2] giving [longitude, latitude] in degrees for each phase
        
        Returns:
            None
        """
        if (rotations is None):
            print("Use some angles for the rotations")
            return

        self.n_phases = rotations.shape[0]

        self.avg_mu = [None] * self.n_phases
        self.avg_v = [None] * self.n_phases
        self.velocity = [None] * self.n_phases
        self.n_pixel_unique = [None] * self.n_phases
        self.n_pixels = [None] * self.n_phases
        self.pixel_unique = [None] * self.n_phases

        for loop in range(self.n_phases):
            mu_pixel = np.zeros_like(self.mu_angle)
            v_pixel = np.zeros_like(self.vel_projection)
        
            pixel_map = self.projector.projmap(self.indices, self.f_vec2pix, rot=rotations[loop,:])[:,0:int(self.npix/2)]
            pixel_unique = np.unique(pixel_map[np.isfinite(pixel_map)])

            for j in range(len(pixel_unique)):
                ind = np.where(pixel_map == pixel_unique[j])

                if (np.all(np.isfinite(self.mu_angle[ind[0],ind[1]]))):
                    if (self.mu_angle[ind[0],ind[1]].size == 0):
                        mu_pixel[ind[0],ind[1]] = 0.0
                        v_pixel[ind[0],ind[1]] = 0.0
                    else:                    
                        
                        if (self.clv):
                            value = np.nanmean(self.mu_angle[ind[0],ind[1]])
                        else:
                            value = 1.0

                        mu_pixel[ind[0],ind[1]] = value

                        value = np.nanmean(self.vel_projection[ind[0],ind[1]])
                        v_pixel[ind[0],ind[1]] = value
                else:
                    mu_pixel[ind[0],ind[1]] = 0.0
                    v_pixel[ind[0],ind[1]] = 0.0

            self.n_pixel_unique[loop] = len(pixel_unique)
            self.avg_mu[loop] = np.zeros(self.n_pixel_unique[loop])
            self.avg_v[loop] = np.zeros(self.n_pixel_unique[loop])
            self.velocity[loop] = np.zeros(self.n_pixel_unique[loop])
            self.n_pixels[loop] = np.zeros(self.n_pixel_unique[loop], dtype='int')
            self.pixel_unique[loop] = pixel_unique.astype('int')

            for i in range(len(pixel_unique)):
                ind = np.where(pixel_map == pixel_unique[i])
                self.n_pixels[loop][i] = len(ind[0])
                self.avg_mu[loop][i] = np.unique(mu_pixel[ind[0], ind[1]])
                self.avg_v[loop][i] = np.unique(v_pixel[ind[0], ind[1]])            
                self.velocity[loop][i] = self.avg_mu[loop][i] * self.avg_v[loop][i]