Ejemplo n.º 1
0
def get_dimwise_prob_metrics(X_real: np.array,
                             X_fake: np.array,
                             y_real: np.array = None,
                             y_fake: np.array = None,
                             measure='mean',
                             n_num_cols: int = 0):
    if measure in ['mean', 'avg']:
        real = X_real.mean(axis=0)
        fake = X_fake.mean(axis=0)
    elif measure == 'std':
        real = X_real.std(axis=0)
        fake = X_fake.std(axis=0)
    else:
        raise ValueError(
            f'"measure" must be "mean" or "std" but "{measure}" was specified.'
        )

    corr_value = pearsonr(real, fake)[0]
    rmse_value = np.sqrt(mean_squared_error(real, fake))

    if n_num_cols > 0:
        num_corr_value = pearsonr(real[:n_num_cols], fake[:n_num_cols])[0]
        num_rmse_value = np.sqrt(
            mean_squared_error(real[:n_num_cols], fake[:n_num_cols]))
    else:
        num_rmse_value, num_corr_value = -1, -1

    if X_real.shape[1] - n_num_cols > 0:
        cat_corr_value = pearsonr(real[n_num_cols:], fake[n_num_cols:])[0]
        cat_rmse_value = np.sqrt(
            mean_squared_error(real[n_num_cols:], fake[n_num_cols:]))
    else:
        cat_rmse_value, cat_corr_value = -1, -1,
    return rmse_value, corr_value, num_rmse_value, num_corr_value, cat_rmse_value, cat_corr_value
Ejemplo n.º 2
0
    def update_parameters(self, epsilons: np.array, r_plus: np.array,
                          r_minus: np.array):
        """
        This method update internal mu and sigma according to evaluation results of perturbated parameters

        mus, sigmas, baseline, maximum_reward are updated.
        epsilons: parameter_number x sample_number
        r_plus: sample_number
        r_minus: sample_number
        """
        self.__mu += self.mu_delta(epsilons, r_plus, r_minus)
        self.__mu = np.minimum(
            self.__mu, self.__parameter_bound["upper"])  # prevent too large
        self.__mu = np.maximum(
            self.__mu, self.__parameter_bound["lower"])  # prevent too small

        self.__sigma += self.sigma_delta(epsilons, r_plus, r_minus)
        self.__sigma = np.minimum(
            self.__sigma, self.__sigma_upper_bound)  # prevent too large sigma
        assert np.all(self.__sigma > 0), "got negative sigma\n{}".format(
            self.__sigma)
        print("updated sigma", self.__sigma)

        self.__maximum_reward = max(self.__maximum_reward, r_plus.max(),
                                    r_minus.max())
        b = self.__baseline.add_new_value(
            (r_plus.mean() + r_minus.mean()) / 2.)
Ejemplo n.º 3
0
    def fit(self, X: np.array, y: np.array):
        EPS = 1e-10
        if self.scale:
            X, self.X_offset, self.X_scale = scale(X)
        n_samples, n_features = X.shape
        _, self.n_classes = y.shape

        self.p_classes = np.zeros(self.n_classes)
        self.mean_class = np.zeros((self.n_classes, n_features))
        self.cov_class = np.zeros((n_features, n_features))
        # within

        for i in range(self.n_classes):
            self.p_classes[i] = sum(y[:, i]) / n_samples
            Xi = X[y[:, i] == 1]
            self.mean_class[i] = Xi.mean(0)
            self.cov_class += np.cov(Xi.T)

        self.S_within = self.cov_class
        # between
        self.S_between = (self.mean_class - X.mean(0)).T @ (self.mean_class -
                                                            X.mean(0))
        pinv = np.linalg.pinv(self.S_within)
        if self.solver == "svd":
            u, s, v = np.linalg.svd(pinv @ self.S_between)
            self.proj = v.T
            # self.bias = TODO
        elif self.solver == "eig":
            xc = pinv @ self.S_between
            evalue, evector = np.linalg.eigh(xc)
            self.proj = evector[::-1].T
            # self.bias = TODO
        else:
            raise NotImplementedError
Ejemplo n.º 4
0
def gpr_distance(x: np.array, y: np.array, theta: float) -> float:
    """
    Calculates the distance between two Gaussians under the Generic Parametric Representation (GPR) approach.

    According to the original work https://www.researchgate.net/publication/322714557 (p.70):
    "This is a fast and good proxy for distance d_theta when the first two moments ... predominate". But it's not
    a good metric for heavy-tailed distributions.

    Parameter theta defines what type of information dependency is being tested:
    - for theta = 0 the distribution information is tested
    - for theta = 1 the dependence information is tested
    - for theta = 0.5 a mix of both information types is tested

    With theta in [0, 1] the distance lies in range [0, 1] and is a metric. (See original work for proof, p.71)

    :param x: (np.array/pd.Series) X vector.
    :param y: (np.array/pd.Series) Y vector (same number of observations as X).
    :param theta: (float) Type of information being tested. Falls in range [0, 1].
    :return: (float) Distance under GPR approach.
    """

    # Calculating the GPR distance
    distance = theta * (1 - spearmans_rho(x, y)) / 2 + \
               (1 - theta) * (1 - ((2 * x.std() * y.std()) / (x.std()**2 + y.std()**2))**(1/2) *
                              np.exp(- (1 / 4) * (x.mean() - y.mean())**2 / (x.std()**2 + y.std()**2)))

    return distance**(1 / 2)
Ejemplo n.º 5
0
 def _mean_learning_curve_profile(self, sampled_nlls: np.array,
                                  training_nlls: np.array):
     learning_curves = {
         "sampled": float(np.float(sampled_nlls.mean())),
         "training": float(np.float(training_nlls.mean()))
     }
     return learning_curves
Ejemplo n.º 6
0
def b_formula(x_list: np.array, y_list: np.array, denominator: float):
    """TODO Docs"""
    b = (y_list.mean()
         * x_list.dot(x_list)
         - x_list.mean()
         * x_list.dot(y_list)) \
        / denominator
    return b
Ejemplo n.º 7
0
    def _correlation(x: np.array, vals: np.array):
        x = x[:, np.newaxis]
        mu_x = x.mean()  # cells
        mu_vals = vals.mean(axis=0)  # cells by gene --> cells by genes
        sigma_x = x.std()
        sigma_vals = vals.std(axis=0)

        return (
            (vals * x).mean(axis=0) - mu_vals * mu_x) / (sigma_vals * sigma_x)
Ejemplo n.º 8
0
    def _compute_p_value(cls, serie_1: np.array, serie_2: np.array) -> float:

        total_std = cls._get_total_std(serie_1, serie_2)

        stat = (serie_1.mean() - serie_2.mean()) / (
            total_std * math.sqrt(1 / len(serie_1) + 1 / len(serie_2)))

        pvalue = stats.norm.cdf(stat)
        if np.isnan(pvalue):
            return 1.0
        return pvalue
Ejemplo n.º 9
0
def periodogram_covar(x: np.array, y: np.array, tau: int, p: int):
    '''Takes in numpy arrays x and y, an int value tau for the lag and
    another int p for the maximum lag and returns the periodogram
    estimate of the covariance.
    '''
    assert np.allclose(x.mean(), 0), 'Signal x must be 0 mean!'
    assert np.allclose(y.mean(), 0), 'Signal y must be 0 mean!'
    T = len(x) - p
    if tau == 0:
        return (1 / T) * np.dot(x[p:], y[p:])
    else:
        return (1 / T) * np.dot(x[p:], y[p - tau:-tau])
Ejemplo n.º 10
0
def pearson_similarity(x: np.array, y: np.array) -> float:
    """
    Calculate a Pearson correlation coefficient given 1-D data arrays x and y
    Args:
        x, y: two points in n-space
    Returns:
        Pearson correlation between x and y
    """
    x = x - x.mean()
    y = y - y.mean()
    return (x * y).sum() / np.sqrt(np.square(x).sum()) / np.sqrt(
        np.square(y).sum())
Ejemplo n.º 11
0
def preprocess_data(data: np.array):
    data_mean = data.mean()
    data_std = data.std()
    print("The data mean value is", data_mean)
    print("The data std value is", data_std)

    data -= data_mean
    data /= data_std
    # check again to double check
    print("After normalization the data has mean value", data.mean())
    print("After normalization the data has standard deviation", data.std())
    return data
Ejemplo n.º 12
0
    def vector(x: np.array, y: np.array):
        """
        Correlate each column in y with a vector x

        :param x: np.ndarray vector of length n
        :param y: np.ndarray matrix of shape (n, k)
        :returns: vector of length n
        """
        # x = x[:, np.newaxis]  # for working with matrices
        mu_x = x.mean()  # cells
        mu_y = y.mean(axis=0)  # cells by gene --> cells by genes
        sigma_x = x.std()
        sigma_y = y.std(axis=0)

        return ((y * x).mean(axis=0) - mu_y * mu_x) / (sigma_y * sigma_x)
Ejemplo n.º 13
0
    def CCC(self, predictions: np.array, labels: np.array):
        """ Concordance Correlation Coefficient (CCC) metric.
        
        Args:
            predictions (np.array): Model predictions.
            labels (np.array): Data labels.
        """

        predictions = np.concatenate(predictions).reshape(-1, )
        labels = np.concatenate(labels).reshape(-1, )

        mean_cent_prod = ((predictions - predictions.mean()) *
                          (labels - labels.mean())).mean()
        return (2 * mean_cent_prod) / (predictions.var() + labels.var() +
                                       (predictions.mean() - labels.mean())**2)
Ejemplo n.º 14
0
    def fit(self,
            X: np.array,
            y: Optional[np.array] = None,
            epochs: int = 50) -> None:
        """
        Fit the model to the data.
        
        Args:
            X (np.array): point cloud
            y (np.array): for compatibility
            epochs (int): the number of epochs
        
        Returns:
            None
        """
        self._reinit(X.mean(), X.std(), X.shape[1:])

        for epoch in range(1, epochs):
            for point in X:
                # error is the distance from closest to the point
                closest_neuron, second_closest, error = self._get_winners(
                    point)
                # accumulating the local error
                self._errors[closest_neuron] += error**2
                # move neurons closer to the point
                self._move_neurons(closest_neuron, second_closest, point)
                # deleting inactive edges
                for dead_edge in [
                        edge for edge, age in self._edge_age.items()
                        if age > self.max_age
                ]:
                    self._remove_edge(dead_edge)
            if epoch % self.birth_period == 0:
                self._create_neuron()
Ejemplo n.º 15
0
def calc_mean_color(img: np.array) -> Tuple[int]:
    """

    :param img:
    :return:
    """
    return img.mean(axis=0).mean(axis=0)
Ejemplo n.º 16
0
def return_high_pass_filtered_depth(z: np.array, max_period: float, numtaps: int = 101):
    """
    Take in two dim array of depth (pings, beams) and return a high pass filtered mean depth for each ping.  Following
    the JHC 'Dynamic Motion Residuals...' paper which suggests using a 4 * max period cutoff.  I've found a 6 * max
    period seems to retain more of the signal that we want, but I don't really know what I'm doing here yet.

    Parameters
    ----------
    z
        numpy array (ping, beam) for depth
    max_period
        float, max period of the attitude arrays (roll, pitch, heave)
    numtaps
        filter length, must be odd

    Returns
    -------
    np.array
        HPF ping-wise mean depth
    """

    meandepth = z.mean(axis=1)
    zerocentered_meandepth = meandepth - meandepth.mean()

    # butterworth filter I never quite got to work in a way i understood
    # sos = butter(numtaps, 1 / max_period, btype='highpass', output='sos')
    # filt = sosfilt(sos, meandepth)

    coef = build_highpass_filter_coeff(1 / (max_period * 4), numtaps=numtaps)
    filt_depth = lfilter(coef, 1.0, zerocentered_meandepth)
    # trim the bad sections from the start of the filtered depth
    trimfilt_depth = filt_depth[int(numtaps / 2):]
    return trimfilt_depth
Ejemplo n.º 17
0
    def _compute_number_of_replicas(self,
                                    distance_to_center: np.array) -> np.array:
        mean_dist = distance_to_center.mean()

        def to_weight(d):
            if "single" in self.exploration_type:
                return np.exp(-(d / mean_dist)**2)
            elif "multi" in self.exploration_type:
                return expit((d / mean_dist)**2)
            else:
                raise ValueError("{} is not a valid exploration type".format(
                    self.exploration_type))

        weights = np.array([to_weight(d) for d in distance_to_center])
        n_replicas = np.zeros(self.swarm_size, dtype=int)
        replica = self.swarm_size
        res = []
        while replica > 0:
            replica = int(replica)
            weights[0:replica] /= weights[0:replica].sum()
            for weight_order, idx in enumerate(reversed(np.argsort(weights))):
                if weight_order >= replica:
                    break
                fractional_replicas = replica * weights[idx]
                # for rounding,
                # see https://stackoverflow.com/questions/28617841/rounding-to-nearest-int-with-numpy-rint-not-consistent-for-5
                n_replicas[idx] += int(np.floor(fractional_replicas) + 0.5)
                if n_replicas.sum() >= self.swarm_size:
                    n_replicas[idx] -= n_replicas.sum() - self.swarm_size
            replica = self.swarm_size - n_replicas.sum()
            res.append(replica)

        return n_replicas
Ejemplo n.º 18
0
def normalize(array: np.array) -> np.array:
    '''
    Input:
        array: a numpy array of dimension m x n, where m := number of samples and n := vector size.
    '''
    return (array - array.mean(axis=1).reshape(len(array), 1)) / (array.std(
        axis=1).reshape(len(array), 1))
Ejemplo n.º 19
0
def pca(X: array) -> array:
    """Principal Component Analysis

    input: X, matrix with training data stored as flattened arrays, in rows
    return: projection matrix (with important dimensions first), variance
    and mean."""

    # get dimensions
    num_data, dim = X.shape

    # centre data
    mean_X = X.mean(axis=0)
    X = X - mean_X

    if dim > num_data:
        # PCA - compact trick used
        M = dot(X, X.T) # covariance matrix
        e, EV = linalg.eigh(M) # eigenvalues and eigenvectors
        tmp = dot(X.T, EV).T # this is the compact trick
        V = tmp[::-1] # reverse since last eigenvectors are the ones we want
        S = sqrt(e)[::-1] # reverse since eigenvalues are in increasing order
        for i in range(V.shape[1]):
            V[:,i] /= S
    else:
        # PCA - SVD used
        U, S, V = linalg.svd(X)
        V = V[:num_data] # only makes sense ot return the first num_data

    # return the projection matrix, the variance and the mean
    return V, S, mean_X
Ejemplo n.º 20
0
def local_autoscale_ms(img: np.array) -> np.array:
    '''
    :return:
        Linearly normilized image.
        Output image will have 0 mean and 1 std.
    '''
    return (img - img.mean()) / img.std()
Ejemplo n.º 21
0
def point_is_outlier(point:np.array,lastn:np.array,var_threshold:float,length:int=-1,output_current_ratio:bool=False)->tuple:
    """
    Determines whether a data point is an outlier through variance and adds it to fixed-size set.
    For the first point, the function expects lastn=None and length=desired set size.
    For the others, it expects lastn to be the set returned by the last iteration
    If (point-mean)**2/(len(set)*variance(set))-1>var_threshold, returns (True,newset)
    otherwise, returns (False,newset)
    """
    if lastn is None:
        if output_current_ratio:
            return (False,np.array([point]),0)
        return (False,np.array([point]))
    else:
        if(len(lastn)<length):
            lastn=np.insert(lastn,0,np.zeros(point.shape),0)
            if output_current_ratio:
                return (False,lastn,0)
            else:
                return (False,lastn)
        #Shift the current values left
        lastn[0:-1] = lastn[1:]
        #Put our last point in the rightmost position
        lastn[-1] = point
        #Calculate the variance
        var = lastn.var()
        if var == 0:
            #To avoid NaNs
            var = 1e-10
        #Mean
        mean = lastn.mean(0)
        ratio = ((point-mean)**2).sum()/(len(lastn)*var)
        if output_current_ratio:
            return (ratio>var_threshold,lastn,ratio)
        else:
            return (ratio>var_threshold,lastn)
Ejemplo n.º 22
0
def calculate_metrics(scores: np.array, true_labels: np.array, score_name: str, verbose=True):
    # ROC-AUC & APS
    roc_auc = roc_auc_score(true_labels, scores)
    aps = average_precision_score(true_labels, scores)

    # Mean score on validation
    mean_score = scores.mean()

    # F1-score & optimal threshold
    # if opt_threshold is None:  # validation
    #     precision, recall, thresholds = precision_recall_curve(y_true=true_labels, probas_pred=scores)
    #     f1_scores = (2 * precision * recall / (precision + recall))
    #     f1 = np.nanmax(f1_scores)
    #     opt_threshold = thresholds[np.nanargmax(f1_scores)]
    # else:  # testing
    #     y_pred = (scores > opt_threshold).astype(int)
    #     f1 = f1_score(y_true=true_labels, y_pred=y_pred)

    if verbose:
        print(f'ROC-AUC on {score_name}: {roc_auc}. APS on {score_name}: {aps}. Mean {score_name}: {mean_score}')
    # print(f'F1-score on {type}: {f1}. Optimal threshold on {type}: {opt_threshold}')

    return {f"roc-auc_{score_name}": roc_auc,
            f"aps_{score_name}": aps,
            f"mean_{score_name}": mean_score}
def transform_using_values(arr_in: np.array, values: list, cval=-1, cval_mean=False):
    '''
    Applies an affine transformation to `arr_in` using the parameter values in `values`.
    '''
    assert len(values) == 6
    scale_x = values[0]
    scale_y = values[1]
    shear_radians = values[2]
    rotate_radians = values[3]
    offset_x = values[4]
    offset_y = values[5]
    # Image must be shifted by minus half each dimension, then transformed, then shifted back.
    # This way, rotations and shears will be about the centre of the image rather than the top-left corner.
    shift_x = -0.5 * arr_in.shape[1]
    shift_y = -0.5 * arr_in.shape[0]
    a0 = scale_x * math.cos(rotate_radians)
    a1 = -scale_y * math.sin(rotate_radians + shear_radians)
    a2 = a0 * shift_x + a1 * shift_y + offset_x - shift_x
    b0 = scale_x * math.sin(rotate_radians)
    b1 = scale_y * math.cos(rotate_radians + shear_radians)
    b2 = b0 * shift_x + b1 * shift_y + offset_y - shift_y
    tform = skimage.transform.AffineTransform(matrix=np.array([[a0, a1, a2], [b0, b1, b2], [0, 0, 1]]))
    if cval_mean:
        cval = arr_in.mean()
    arr_out = skimage.transform.warp(arr_in.astype(float), tform.inverse, cval=cval)
    return arr_out
Ejemplo n.º 24
0
def x_radius(x: np.array) -> (int, np.array):
    """
    Return mean radius for matrix of distributions of radiuses

    Example :   1,2,3,4  5   6   7     8  ,9, 10 .....
              [[0.....  0.4 0.2  0.5  0  .......]
               [0.....  0.2  0   0.8   0  .......]
    mean is
    mean_x  =  [0.....  0.3  0.2 0.65   0  .......]

    multiply by size of radiuses
               [1 2 3 4 5 6 7 8 9]

    5*0.3 + 6*0.2 + 7*0.65 = 7.25 is a mean radius over the whole matrix of radiuses
    distributions

    Args:
        x: np.array - matrix of normalized radiuses distribution of train image
        of shape (x, 29),
        [i, j] cell shows which part of all hough circles on image #i
        take circles with radius of j pixels
    Returns:
        int: - mean radius in pixels
        np.array: - mean radius bin array (mean_x of shape (29, ))
    """
    mean_x = x.mean(axis=0)
    return np.inner(mean_x, list(range(1, 30))), mean_x
def calculate_p_value(samples: np.array):
    mean = samples.mean()
    std = samples.std()
    mu = 1 / 3

    z_score = (mean - mu) / std
    return stats.norm.sf(z_score)
Ejemplo n.º 26
0
def gen_features_way_1(x: np.array):
    x = x / np.abs(x).sum()
    features = [x.mean(), (x**2).mean(), x.var()]
    features += [
        np.percentile(x, p) for p in np.linspace(0, 100, PERCENTILE_COUNT)
    ]
    return features
Ejemplo n.º 27
0
def single_sample_t_test(observations: np.array, mean0, alpha):
    """
    This is an implementation of single-sided and single sample test of the mean of a normal distribution
    with unknown variance. In the context of this project, observations are some form of monthly returns or monthly return difference.
    
    NOTE: Requires that observations are pre-computed.
    """

    mean_obs = observations.mean()
    sample_std = math.sqrt(
        square(observations - mean_obs).sum() / (len(observations) - 1))

    t_statistic = (mean_obs - mean0) / (sample_std /
                                        math.sqrt(len(observations)))

    #  test if t_statistic > t(alpha, n-1)
    # ppf(q, df, loc=0, scale=1)	Percent point function (inverse of cdf — percentiles).
    critical_value = t.ppf(1 - alpha, df=len(observations) - 1)
    p_value = (1.0 - t.cdf(t_statistic, df=len(observations) - 1))

    if t_statistic > critical_value:
        return "Reject H0 (mean of observations are greater) with t_statistic={}, p-value={}, critical_value={} and alpha={}".format(
            t_statistic, p_value, critical_value, alpha)

    else:
        return "Filed to reject H0 (mean of observation are not greater) with t_statistic={}, p-value={}, critical_value={} and alpha={}".format(
            t_statistic, p_value, critical_value, alpha)
Ejemplo n.º 28
0
def zoom_out(image: np.array, boxes: np.array):
    """
    Perform zooming out of an image by placing the image in a larger canvas
    of filler values.
    filler will be the mean of the image

    Helps learning smaller values

    :param img: np.array, Depth Image (1, h, w)
    :param boxes: np.array, bounding boxes of the objects
    :return: expanded image, updated coordinates of bounding box
    """
    h = image.shape[0]
    w = image.shape[1]
    max_scale = const.MAX_ZOOM_OUT
    scale = random.uniform(1, max_scale)
    new_h = int(h * scale)
    new_w = int(w * scale)

    filler = image.mean()
    new_image = np.ones((new_h, new_w), dtype=np.float) * filler

    left = random.randint(0, new_w - w)
    right = left + w
    top = random.randint(0, new_h - h)
    bottom = top + h
    new_image[top:bottom, left:right] = image

    new_boxes = boxes + np.array([left, top, left, top], dtype=np.float32)

    return new_image, new_boxes
Ejemplo n.º 29
0
    def autocorr(x: np.array, lags: range) -> np.array:
        """Make an autocorrelation curve"""
        mean = x.mean()
        var = np.var(x)
        xp = x - mean
        corr = np.correlate(xp, xp, "full")[len(x) - 1 :] / var / len(x)

        return corr[: len(lags)]
Ejemplo n.º 30
0
def print_valurange_summary(onebigline: np.array, username: str):
    n = len(onebigline)
    nh = n // 2
    print('user: '******' vals: ', n, ' mean: ', onebigline.mean(),
          ' std: ', onebigline.std(), ' min: ', onebigline.min(), ' max: ',
          onebigline.max())
    print(onebigline[:4], "...", onebigline[nh:nh + 4], "...", onebigline[-4:])
    print("")