Ejemplo n.º 1
0
def test_memory_leak() -> None:
    import resource

    arr = np.arange(1).reshape((1, 1))

    n_attempts = 3
    results = []

    for _ in range(n_attempts):
        starting = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss

        for _ in range(1000):
            for axis in [None, 0, 1]:
                bn.nansum(arr, axis=axis)
                bn.nanargmax(arr, axis=axis)
                bn.nanargmin(arr, axis=axis)
                bn.nanmedian(arr, axis=axis)
                bn.nansum(arr, axis=axis)
                bn.nanmean(arr, axis=axis)
                bn.nanmin(arr, axis=axis)
                bn.nanmax(arr, axis=axis)
                bn.nanvar(arr, axis=axis)

        ending = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss

        diff = ending - starting
        diff_bytes = diff * resource.getpagesize()
        # For 1.3.0 release, this had value of ~100kB
        if diff_bytes:
            results.append(diff_bytes)
        else:
            break

    assert len(results) < n_attempts
Ejemplo n.º 2
0
def test_memory_leak():
    import resource

    arr = np.arange(1).reshape((1, 1))

    starting = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss

    for i in range(1000):
        for axis in [None, 0, 1]:
            bn.nansum(arr, axis=axis)
            bn.nanargmax(arr, axis=axis)
            bn.nanargmin(arr, axis=axis)
            bn.nanmedian(arr, axis=axis)
            bn.nansum(arr, axis=axis)
            bn.nanmean(arr, axis=axis)
            bn.nanmin(arr, axis=axis)
            bn.nanmax(arr, axis=axis)
            bn.nanvar(arr, axis=axis)

    ending = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss

    diff = ending - starting
    diff_bytes = diff * resource.getpagesize()
    print(diff_bytes)
    # For 1.3.0 release, this had value of ~100kB
    assert diff_bytes == 0
Ejemplo n.º 3
0
    def fit(self, X, y):
        X_y = self._check_params(X, y)
        self.X = X_y[0]
        self.y = X_y[1].reshape((-1, 1))
        n, p = X.shape

        S = []    # list of selected features
        F = range(p)    # list of unselected features

        if self.n_features != 'auto':
            feature_mi_matrix = np.zeros((self.n_features, p))
        else:
            feature_mi_matrix = np.zeros((n, p))
        feature_mi_matrix[:] = np.nan
        S_mi = []

        # Find the first feature
        k_min = 3
        range_k = 7
        xy_MI = np.empty((range_k, p))
        for i in range(range_k):
            xy_MI[i, :] = self._get_first_mi_vector(i + k_min)
        xy_MI = bn.nanmedian(xy_MI, axis=0)

        S, F = self._add_remove(S, F, bn.nanargmax(xy_MI))
        S_mi.append(bn.nanmax(xy_MI))

        if self.verbose > 0:
            self._info_print(S, S_mi)

        # Find the next features
        if self.n_features == 'auto':
            n_features = np.inf
        else:
            n_features = self.n_features

        while len(S) < n_features:
            s = len(S) - 1
            feature_mi_matrix[s, F] = self._get_mi_vector(F, S[-1])
            fmm = feature_mi_matrix[:len(S), F]
            if bn.allnan(bn.nanmean(fmm, axis=0)):
                break
            MRMR = xy_MI[F] - bn.nanmean(fmm, axis=0)
            if np.isnan(MRMR).all():
                break
            selected = F[bn.nanargmax(MRMR)]
            S_mi.append(bn.nanmax(bn.nanmin(fmm, axis=0)))
            S, F = self._add_remove(S, F, selected)
            if self.verbose > 0:
                self._info_print(S, S_mi)
            if self.n_features == 'auto' and len(S) > 10:
                MI_dd = signal.savgol_filter(S_mi[1:], 9, 2, 1)
                if np.abs(np.mean(MI_dd[-5:])) < 1e-3:
                    break
        self.n_features_ = len(S)
        self.ranking_ = S
        self.mi_ = S_mi

        return self
Ejemplo n.º 4
0
    def get_first_maximum_index(self, wfm, peak_minimum_power):
        """
        Return the index of the first peak (first maximum) on
        the leading edge before the absolute power maximum.
        The first peak is only valid if its power exceeds a certain threshold
        """

        # Get the main maximum first
        absolute_maximum_index = bn.nanargmax(wfm)

        # Find relative maxima before the absolute maximum

        try:
            peaks = cytfmra_findpeaks(wfm[0:absolute_maximum_index])
        except:
            return -1

        # Check if relative maximum are above the required threshold
        leading_maxima = np.where(wfm[peaks] >= peak_minimum_power)[0]

        # Identify the first maximum
        first_maximum_index = absolute_maximum_index
        if len(leading_maxima) > 0:
            # first_maximum_index = leading_maxima[0]
            first_maximum_index = peaks[leading_maxima[0]]

        return first_maximum_index
Ejemplo n.º 5
0
 def compute_draw_info(self, x, ys):
     bs = self.compute_baseline(x, ys)
     im = bottleneck.nanargmax(ys-bs, axis=1)
     lines = (x[im], bs[np.arange(bs.shape[0]), im]), (x[im], ys[np.arange(ys.shape[0]), im])
     return [("curve", (x, self.compute_baseline(x, ys), INTEGRATE_DRAW_BASELINE_PENARGS)),
             ("curve", (x, ys, INTEGRATE_DRAW_BASELINE_PENARGS)),
             ("line", lines)]
Ejemplo n.º 6
0
def _most_likely_cp(xs, minnobs):
    N = len(xs)
    check_nobs(N, minnobs)
    start, end = compute_endpoints(N, minnobs)
    wstats = np.array([welch(xs[:i], xs[i:]) for i in xrange(start, end)])
    cp = bn.nanargmax(wstats)
    stat = wstats[cp]
    return cp + start, stat
Ejemplo n.º 7
0
    def compute(self, today, assets, out, data):
        drawdowns = fmax.accumulate(data, axis=0) - data
        drawdowns[isnan(drawdowns)] = NINF
        drawdown_ends = nanargmax(drawdowns, axis=0)

        # TODO: Accelerate this loop in Cython or Numba.
        for i, end in enumerate(drawdown_ends):
            peak = nanmax(data[:end + 1, i])
            out[i] = (peak - data[end, i]) / data[end, i]
Ejemplo n.º 8
0
def reducepoints(x, y, n=2000, further=True):
    """
    Reduce the total number of x, y coordinates for plotting. The
    algorithm looks windows roughly one pixel wide and plots the
    minimum and maximum point within that window. NOTE: both the min
    and max for each n will be determined. This will yield a length
    of approximately 2n. If further, remove nonessential points.
    """

    # Can only work on blocks
    if len(x) < n*3: return (x, y)

    # Calculate the block size to average over
    block = int(math.floor(float(len(x))/n))
    newn = int(math.ceil(float(len(x))/block))
    ox, oy = np.zeros(2*newn), np.zeros(2*newn)

    # Search over each block for the min and max y value, order
    # correctly, and add to the output
    for i in range(newn):
        # Avoid just adding NaN's for all NaN blocks
        try:
            pmn = nanargmin(y[i*block:(i + 1)*block])
        except ValueError:
            pmn = 0
        try:
            pmx = nanargmax(y[i*block:(i + 1)*block])
        except ValueError:
            pmx = 0

        if pmn < pmx:
            ox[2*i], oy[2*i] = x[i*block + pmn], y[i*block + pmn]
            ox[2*i + 1], oy[2*i + 1] = x[i*block + pmx], y[i*block + pmx]
        else:
            ox[2*i + 1], oy[2*i + 1] = x[i*block + pmn], y[i*block + pmn]
            ox[2*i], oy[2*i] = x[i*block + pmx], y[i*block + pmx]

    if further:
        last = -1
        match = 0

        # Search through all values and set >= triplets to 0
        for i in range(len(ox)):
            if oy[i] != last:
                last = oy[i]
                match = 0
            else:
                match += 1
                if match > 1:
                    ox[i - 1] = np.nan

        # Eliminate those positions where ox is nan
        if np.sum(np.isnan(ox)) > 0:
            oy = oy[np.isfinite(ox)]
            ox = ox[np.isfinite(ox)]

    return ox, oy
Ejemplo n.º 9
0
    def compute(self, today, assets, out, data):
        drawdowns = fmax.accumulate(data, axis=0) - data
        drawdowns[isnan(drawdowns)] = NINF
        drawdown_ends = nanargmax(drawdowns, axis=0)

        # TODO: Accelerate this loop in Cython or Numba.
        for i, end in enumerate(drawdown_ends):
            peak = nanmax(data[:end + 1, i])
            out[i] = (peak - data[end, i]) / data[end, i]
Ejemplo n.º 10
0
def least_square_method(dspt):
    npol = 6
    com = np.array([bn.nanmean(dspt.lon), bn.nanmean(dspt.lat)])
    timeseries = False
    ncc = dspt.lon.size
    dlon = []
    dlat = []
    for i in range(ncc):
        # haversine(p1,p2)
        dlon.append(
            haversine([dspt.lon[i], com[1]], com) * 1000 *
            np.sign(dspt.lon[i] - com[0]))
        dlat.append(
            haversine([com[0], dspt.lat[i]], com) * 1000 *
            np.sign(dspt.lat[i] - com[1]))

    dlon = np.array(dlon)
    dlat = np.array(dlat)
    if not timeseries:
        R = np.mat(np.vstack((np.ones((ncc)), dlon, dlat)).T)
        u0 = np.mat(dspt.u.values).T
        v0 = np.mat(dspt.v.values).T

        if (np.isnan(u0).sum() == 0) & (np.isnan(v0).sum()
                                        == 0) & (np.isnan(R).sum() == 0):
            A, _, _, _ = la.lstsq(R, u0)
            B, _, _, _ = la.lstsq(R, v0)
        else:
            A = np.nan * np.ones(ncc)
            B = np.nan * np.ones(ncc)

    points = np.vstack([dlon, dlat])
    if (np.isfinite(dlon).sum() == npol) and (np.isfinite(dlat).sum() == npol):
        # careful with nans
        cov = np.cov(points)
        w, v = np.linalg.eig(cov)
        aspect = bn.nanmin(w) / bn.nanmax(w)

        if aspect < 0.99:
            ind = bn.nanargmax(w)
            angle = np.arctan(v[ind, 1] / v[ind, 0]) * 180 / np.pi
            if (angle < 0):
                angle += 360.
        else:
            angle = np.nan
    else:
        aspect = np.nan
        angle = np.nan

    dspt['ux'] = float(A[1])
    dspt['uy'] = float(A[2])
    dspt['vx'] = float(B[1])
    dspt['vy'] = float(B[2])
    dspt['aspect'] = aspect
    dspt['angle'] = angle

    return dspt
Ejemplo n.º 11
0
Archivo: CRP.py Proyecto: cbg-ethz/BnpC
 def _normalize_log_probs(probs):
     max_i = bn.nanargmax(probs)
     try:
         exp_probs = np.exp(probs[np.arange(probs.size) != max_i] \
             - probs[max_i])
     except FloatingPointError:
         exp_probs = np.exp(
             np.clip(probs[np.arange(probs.size) != max_i] - probs[max_i],
                     log_EPSILON, 0))
     probs_norm = probs - probs[max_i] - np.log1p(bn.nansum(exp_probs))
     return np.exp(np.clip(probs_norm, log_EPSILON, 0))
Ejemplo n.º 12
0
 def evaluate_rule(self, rule):
     # as an exception, when target class is not set,
     # the majority class is chosen to stand against
     # all others
     tc = rule.target_class
     dist = rule.curr_class_dist
     if tc is None:
         tc = bn.nanargmax(dist)
     target = dist[tc]
     p_dist = rule.prior_class_dist
     pa = p_dist[tc] / p_dist.sum()
     return (target + self.m * pa) / (dist.sum() + self.m)
Ejemplo n.º 13
0
 def compute_integral(self, x_s, y_s):
     y_s = y_s - self.compute_baseline(x_s, y_s)
     if len(x_s) == 0:
         return np.zeros((y_s.shape[0],)) * np.nan
     # avoid whole nan rows
     whole_nan_rows = np.isnan(y_s).all(axis=1)
     y_s[whole_nan_rows] = 0
     # select positions
     pos = x_s[bottleneck.nanargmax(y_s, axis=1)]
     # set unknown results
     pos[whole_nan_rows] = np.nan
     return pos
Ejemplo n.º 14
0
def rpredict(classifier, x, categories):
    """
    Same as predict, but outputs names only.

    :param classifier: The trained classifier.
    :param x: List of x to predict on.
    :param categories: List of all model names.
    :return: List of predicted names.
    """
    encoder = OnehotEncoder(categories)
    prediction = classifier.predict(fix_dims(x), verbose=1)
    index = [nanargmax(prob) for prob in prediction]
    label = encoder.label(index)
    return label.tolist()
Ejemplo n.º 15
0
    def _normalize_log(probs):
        max_i = bn.nanargmax(probs, axis=0)
        try:
            log_probs_norm = probs - probs[max_i] - np.log1p(
                bn.nansum(
                    np.exp(probs[np.arange(probs.size) != max_i] -
                           probs[max_i])))
        except FloatingPointError:
            if probs[0] > probs[1]:
                log_probs_norm = np.array([0, log_EPSILON])
            else:
                log_probs_norm = np.array([log_EPSILON, 0])

        return log_probs_norm
Ejemplo n.º 16
0
def confusion_matrix(classifier, x, y, categories):
    """
    Runs a Keras classifier to create a confusion matrix.

    :param classifier: The trained classifier.
    :param x: A list of x values to predict on.
    :return: A confusion matrix, with proportions in [0, 1]
    """
    encoder = OnehotEncoder(categories)
    index = encoder.index(y)
    prediction = classifier.predict(fix_dims(x), verbose=1)
    n = len(categories)
    res = np.zeros((n, n))
    weight = np.zeros(n)
    for k, (prob, row) in enumerate(zip(prediction, index)):
        mle = nanargmax(prob)
        res[row][mle] += 1
        weight[row] += 1
    return res / weight[:, None]  # TODO: divide row or column?
Ejemplo n.º 17
0
    def _calc_parameters(self, wfm_counts):
        # loop over the waveforms
        for i in np.arange(self._n): 
            y = wfm_counts[i, :].flatten().astype(np.float32)
            y -= bn.nanmean(y[0:11])  # Remove Noise
            y[np.where(y < 0.0)[0]] = 0.0  # Set negative counts to zero
            yp = np.nanmax(y)  # Waveform peak value
            ypi = bn.nanargmax(y)  # Waveform peak index

            # AMANDINE: implementation for wf of 256 bins (128 zero-padded)?
            onediv = float(1)/float(41)

            # AMANDINE: here i seems to be understood as gate index but it is wf index!?
            if i == 256: 
                break
            if [i > (ypi + 100)] and [i < (ypi + 140)]:             # AMANDINE: syntax to be checked
                try:
                    self._ltpp[i] = (onediv*float(y[i]))/float(yp)  # AMANDINE: where is the sum in this formula?
                except ZeroDivisionError:
                    self._ltpp[i] = np.nan
Ejemplo n.º 18
0
def find_nearest(array, value):
	"""
	Search array for value and return the index where the value is closest.

	Parameters:
		array (ndarray): Array to search.
		value: Value to search array for.

	Returns:
		int: Index of ``array`` closest to ``value``.

	Raises:
		ValueError: If ``value`` is NaN.

	.. codeauthor:: Rasmus Handberg <*****@*****.**>
	"""
	if np.isnan(value):
		raise ValueError("Invalid search value")
	if np.isposinf(value):
		return nanargmax(array)
	if np.isneginf(value):
		return nanargmin(array)
	return nanargmin(np.abs(array - value))
    def fit(self, num_workers=7, debug_plots=True, force=False):
        """
        Train A+1 classifiers (the first one is for no features observed.)
        Find order of A features to run in.
        """
        if not force and os.path.exists(self.filename):
            with open(self.filename) as f:
                sc = pickle.load(f)
            self.action_inds = sc.action_inds
            self.clfs = sc.clfs
            self.has_been_fit = True
            return

        ds = self.ds
        instances = ds.X
        labels = ds.y
        A = len(ds.actions)

        # Train classifier on initially empty states.
        action_inds = []
        states = get_states(ds, instances, action_inds)
        clf, score, entropy = get_classifier(
            ds, states, labels, 1, num_workers)

        # We will collect values for visualization.
        scores = np.empty((A, A))
        scores.fill(np.nan)
        entropies = np.empty((A, A))
        entropies.fill(np.nan)
        infogains = np.empty((A, A))
        infogains.fill(np.nan)

        # While there are feasible actions, consider them.
        costs = ds.action_costs.copy()
        remaining_mask = np.ones(len(ds.actions), dtype=bool)
        selected_clfs = [clf]
        for iteration in xrange(A):
            print('-'*80)
            print('Iteration {}'.format(iteration))

            feas_inds = np.flatnonzero(remaining_mask & (costs <= ds.max_budget))
            if len(feas_inds) == 0:
                break

            new_clfs = np.empty(A, dtype=object)
            for action_ind in feas_inds:
                print(ds.actions[action_ind]),
                new_action_inds = action_inds + [action_ind]
                states = get_states(ds, instances, new_action_inds)

                new_clf, new_score, new_entropy = get_classifier(
                    ds, states, labels, 1, num_workers)
                new_clfs[action_ind] = new_clf

                infogains[action_ind, iteration] = entropy - new_entropy
                scores[action_ind, iteration] = new_score
                entropies[action_ind, iteration] = new_entropy

            rewards = infogains[:, iteration] / ds.action_costs
            ind = bn.nanargmax(rewards)
            selected_clfs.append(new_clfs[ind])
            action_inds.append(ind)
            print('Selected {} with infogain {:.2f} and cost {:.2f}'.format(ds.actions[ind], infogains[ind, iteration], ds.action_costs[ind]))

            remaining_mask[ind] = False
            costs += ds.action_costs[ind]
            entropy = entropies[ind, iteration]

        actions = np.take(ds.actions, action_inds)
        print('Selected actions in order: {}'.format(actions))

        self.action_inds = action_inds
        self.clfs = selected_clfs
        assert(len(self.clfs) == len(self.action_inds) + 1)
        self.has_been_fit = True
        if debug_plots:
            self.plot_stuff(scores, entropies, infogains, rewards)
        self.save()
Ejemplo n.º 20
0
def func1(tnlhf, tnlhf_curr, residual, y, e, o, a, _s_prev, p, indT):
    m, n = y.shape
    w = arange(m)

    if p.probType == 'IP':
        oc_modL, oc_modU = o[:, :n], o[:, n:]
        ac_modL, ac_modU = a[:, :n], a[:, n:]
        #            # TODO: handle nans
        mino = where(oc_modL < oc_modU, oc_modL, oc_modU)
        maxa = where(ac_modL < ac_modU, ac_modU, ac_modL)

        # Prev
        tmp = a[:, 0:n] - o[:, 0:n] + a[:, n:] - o[:, n:]
        t = nanargmin(tmp, 1)
        d = 0.5 * tmp[w, t]

        #New
        #        tmp = a - o
        #        t_ = nanargmin(tmp,1)
        #        t = t_% n
        #        d = tmp[w, t_]

        #        ind = 2**(-n) >= (_s_prev - d)/asarray(d, 'float64')
        ind = 2**(1.0 / n) * d >= _s_prev
        #new
        #        ind = 2**(1.0/n) * d >= nanmax(maxa-mino, 1)

        #ind = 2**(-n) >= (_s_prev - _s)/asarray(_s, 'float64')

        #s2 = nanmin(maxa - mino, 1)
        #print (abs(s2/_s))

        # Prev
        _s = nanmin(maxa - mino, 1)

        # New
        #_s = nanmax(maxa - mino, 1)
        #        _s = nanmax(a - o, 1)

        #ind = _s_prev  <= _s + ((2**-n / log(2)) if n > 15 else log2(1+2**-n))
        indD = logical_not(ind)
        indD = ind
        indD = None
        #print len(where(indD)[0]), len(where(logical_not(indD))[0])
#    elif p.probType == 'MOP':
#
#        raise 'unimplemented'
    else:
        if p.solver.dataHandling == 'sorted':
            _s = func13(o, a)

            t = nanargmin(a, 1) % n

            d = nanmax([a[w, t] - o[w, t], a[w, n + t] - o[w, n + t]], 0)

            ## !!!! Don't replace it by (_s_prev /d- 1) to omit rounding errors ###
            #ind = 2**(-n) >= (_s_prev - d)/asarray(d, 'float64')

            #NEW
            ind = d >= _s_prev / 2**(1.0e-12 / n)
            #ind = d  >=  _s_prev / 2 ** (1.0/n)
            indD = empty(m, bool)
            indD.fill(True)
            #ind.fill(False)
            ###################################################
        elif p.solver.dataHandling == 'raw':
            if p.probType == 'MOP':
                t = p._t[:m]
                p._t = p._t[m:]
                d = _s = p.__s[:m]
                p.__s = p.__s[m:]
            else:
                #                tnlh_1, tnlh_2 = tnlhf[:, 0:n], tnlhf[:, n:]
                #                TNHLF_min =  where(logical_or(tnlh_1 > tnlh_2, isnan(tnlh_1)), tnlh_2, tnlh_1)
                #               # Set _s
                #                _s = nanmin(TNHLF_min, 1)
                T = tnlhf_curr
                tnlh_curr_1, tnlh_curr_2 = T[:, 0:n], T[:, n:]
                TNHL_curr_min = where(
                    logical_or(tnlh_curr_1 < tnlh_curr_2, isnan(tnlh_curr_2)),
                    tnlh_curr_1, tnlh_curr_2)
                t = nanargmin(TNHL_curr_min, 1)
                T = tnlhf
                d = nanmin(vstack(([T[w, t], T[w, n + t]])), 0)
                _s = d

            #OLD
            #!#!#!#! Don't replace it by _s_prev - d <= ... to omit inf-inf = nan !#!#!#
            #ind = _s_prev  <= d + ((2**-n / log(2)) if n > 15 else log2(1+2**-n))
            #ind = _s_prev - d <= ((2**-n / log(2)) if n > 15 else log2(1+2**-n))

            #NEW
            if any(_s_prev < d):
                pass
            ind = _s_prev <= d + 1.0 / n
            #            T = TNHL_curr_min
            #ind2 = nanmin(TNHL_curr_min, 0)

            indQ = d >= _s_prev - 1.0 / n
            #indQ = logical_and(indQ, False)
            indD = logical_or(indQ, logical_not(indT))


#            print '------'
#            print indQ[:10]
#            print indD[:10]
#            print _s_prev[:2], d[:2]
#print len(where(indD)[0]), len(where(indQ)[0]), len(where(indT)[0])
#print _s_prev - d
###################################################
#d = ((tnlh[w, t]* tnlh[w, n+t])**0.5)
        else:
            assert 0

    if any(ind):
        r10 = where(ind)[0]
        #print('r10:', r10)
        #        print _s_prev
        #        print ((_s_prev -d)*n)[r10]
        #        print('ind length: %d' % len(where(ind)[0]))
        #        print where(ind)[0].size
        #bs = e[ind] - y[ind]
        #t[ind] = nanargmax(bs, 1) # ordinary numpy.argmax can be used as well
        bs = e[r10] - y[r10]
        t[r10] = nanargmax(bs, 1)  # ordinary numpy.argmax can be used as well

    return t, _s, indD
Ejemplo n.º 21
0
    def _fit(self, X, y):
        self.X, y = self._check_params(X, y)
        n, p = X.shape
        self.y = y.reshape((n, 1))

        # list of selected features
        S = []
        # list of all features
        F = range(p)

        if self.n_features != 'auto':
            feature_mi_matrix = np.zeros((self.n_features, p))
        else:
            feature_mi_matrix = np.zeros((n, p))
        feature_mi_matrix[:] = np.nan
        S_mi = []

        # ----------------------------------------------------------------------
        # FIND FIRST FEATURE
        # ----------------------------------------------------------------------

        # check a range of ks (3-10), and choose the one with the max median MI
        k_min = 3
        k_max = 11
        xy_MI = np.zeros((k_max-k_min, p))
        xy_MI[:] = np.nan
        for i, k in enumerate(range(k_min, k_max)):
            xy_MI [i, :] = mi.get_first_mi_vector(self, k)
        xy_MI = bn.nanmedian(xy_MI, axis=0)

        # choose the best, add it to S, remove it from F
        S, F = self._add_remove(S, F, bn.nanargmax(xy_MI))
        S_mi.append(bn.nanmax(xy_MI))

        # notify user
        if self.verbose > 0:
            self._print_results(S, S_mi)

        # ----------------------------------------------------------------------
        # FIND SUBSEQUENT FEATURES
        # ----------------------------------------------------------------------

        while len(S) < self.n_features:
            # loop through the remaining unselected features and calculate MI
            s = len(S) - 1
            feature_mi_matrix[s, F] = mi.get_mi_vector(self, F, s)

            # make decision based on the chosen FS algorithm
            fmm = feature_mi_matrix[:len(S),F]
            if self.method == 'JMI':
                selected = F[bn.nanargmax(bn.nansum(fmm, axis=0))]
            elif self.method == 'JMIM':
                selected = F[bn.nanargmax(bn.nanmin(fmm, axis=0))]
            elif self.method == 'MRMR':
                MRMR = xy_MI[F] - bn.nanmean(fmm, axis=0)
                selected = F[bn.nanargmax(MRMR)]

            # record the JMIM of the newly selected feature and add it to S
            S_mi.append(bn.nanmax(bn.nanmin(fmm, axis=0)))
            S, F = self._add_remove(S, F, selected)

            # notify user
            if self.verbose > 0:
                self._print_results(S, S_mi)

            # if n_features == 'auto', let's check the S_mi to stop
            if self.n_features == 'auto' and len(S) > 10:
                # smooth the 1st derivative of the MI values of previously sel
                MI_dd = signal.savgol_filter(S_mi[1:],9,2,1)
                # does the mean of the last 5 converge to 0?
                if np.abs(np.mean(MI_dd[-5:])) < 1e-3:
                    break

        # ----------------------------------------------------------------------
        # SAVE RESULTS
        # ----------------------------------------------------------------------
        
        self.n_features_ = len(S)
        self.support_ = np.zeros(p, dtype=np.bool)
        self.support_[S] = 1
        self.ranking_ = S
        self.mi_ = S_mi

        return self
Ejemplo n.º 22
0
Archivo: mifs.py Proyecto: bacalfa/mifs
    def _fit(self, X, y):
        self.X, y = self._check_params(X, y)
        n, p = X.shape
        self.y = y.reshape((n, 1))

        # list of selected features
        S = []
        # list of all features
        F = [v for v in range(p)]

        if self.n_features != 'auto':
            feature_mi_matrix = np.zeros((self.n_features, p))
        else:
            feature_mi_matrix = np.zeros((n, p))
        feature_mi_matrix[:] = np.nan
        S_mi = []

        # ---------------------------------------------------------------------
        # FIND FIRST FEATURE
        # ---------------------------------------------------------------------

        # check a range of ks (3-10), and choose the one with the max median MI
        k_min = 3
        k_max = 11
        xy_MI = np.zeros((k_max - k_min, p))
        xy_MI[:] = np.nan
        for i, k in enumerate(range(k_min, k_max)):
            xy_MI[i, :] = mi.get_first_mi_vector(self, k)
        xy_MI = bn.nanmedian(xy_MI, axis=0)

        # choose the best, add it to S, remove it from F
        S, F = self._add_remove(S, F, bn.nanargmax(xy_MI))
        S_mi.append(bn.nanmax(xy_MI))

        # notify user
        if self.verbose > 0:
            self._print_results(S, S_mi)

        # ---------------------------------------------------------------------
        # FIND SUBSEQUENT FEATURES
        # ---------------------------------------------------------------------

        while len(S) < self.n_features if not isinstance(self.n_features,
                                                         str) else True:
            # loop through the remaining unselected features and calculate MI
            s = len(S) - 1
            feature_mi_matrix[s, F] = mi.get_mi_vector(self, F, s)

            # make decision based on the chosen FS algorithm
            fmm = feature_mi_matrix[:len(S), F]
            if self.method == 'JMI':
                selected = F[bn.nanargmax(bn.nansum(fmm, axis=0))]
            elif self.method == 'JMIM':
                selected = F[bn.nanargmax(bn.nanmin(fmm, axis=0))]
            elif self.method == 'MRMR':
                MRMR = xy_MI[F] - bn.nanmean(fmm, axis=0)
                selected = F[bn.nanargmax(MRMR)]

            # record the JMIM of the newly selected feature and add it to S
            S_mi.append(bn.nanmax(bn.nanmin(fmm, axis=0)))
            S, F = self._add_remove(S, F, selected)

            # notify user
            if self.verbose > 0:
                self._print_results(S, S_mi)

            # if n_features == 'auto', let's check the S_mi to stop
            if self.n_features == 'auto' and len(S) > 10:
                # smooth the 1st derivative of the MI values of previously sel
                MI_dd = signal.savgol_filter(S_mi[1:], 9, 2, 1)
                # does the mean of the last 5 converge to 0?
                if np.abs(np.mean(MI_dd[-5:])) < 1e-3:
                    break

        # ---------------------------------------------------------------------
        # SAVE RESULTS
        # ---------------------------------------------------------------------

        self.n_features_ = len(S)
        self.support_ = np.zeros(p, dtype=np.bool)
        self.support_[S] = 1
        self.ranking_ = S
        self.mi_ = S_mi

        return self
Ejemplo n.º 23
0
def func1(tnlhf, tnlhf_curr, residual, y, e, o, a, _s_prev, p, indT):
    m, n = y.shape
    w = arange(m)
    
    if p.probType == 'IP':
        oc_modL, oc_modU = o[:, :n], o[:, n:]
        ac_modL, ac_modU = a[:, :n], a[:, n:]
#            # TODO: handle nans
        mino = where(oc_modL < oc_modU, oc_modL, oc_modU)
        maxa = where(ac_modL < ac_modU, ac_modU, ac_modL)
    
        # Prev
        tmp = a[:, 0:n]-o[:, 0:n]+a[:, n:]-o[:, n:]
        t = nanargmin(tmp,1)
        d = 0.5*tmp[w, t]
        
        
        #New
#        tmp = a - o
#        t_ = nanargmin(tmp,1)
#        t = t_% n
#        d = tmp[w, t_]

#        ind = 2**(-n) >= (_s_prev - d)/asarray(d, 'float64')
        ind = 2**(1.0/n) * d >= _s_prev
        #new
#        ind = 2**(1.0/n) * d >= nanmax(maxa-mino, 1)
        
        #ind = 2**(-n) >= (_s_prev - _s)/asarray(_s, 'float64')
    
        #s2 = nanmin(maxa - mino, 1)
        #print (abs(s2/_s))
        
        # Prev
        _s = nanmin(maxa - mino, 1)
        
        # New
        #_s = nanmax(maxa - mino, 1)
#        _s = nanmax(a - o, 1)
        
        #ind = _s_prev  <= _s + ((2**-n / log(2)) if n > 15 else log2(1+2**-n)) 
        indD = logical_not(ind)
        indD = ind
        indD = None
        #print len(where(indD)[0]), len(where(logical_not(indD))[0])
#    elif p.probType == 'MOP':
#
#        raise 'unimplemented'
    else:
        if p.solver.dataHandling == 'sorted':
            _s = func13(o, a)
            t = nanargmin(a, 1) % n
            d = nanmax([a[w, t] - o[w, t], 
                    a[w, n+t] - o[w, n+t]], 0)
            
            ## !!!! Don't replace it by (_s_prev /d- 1) to omit rounding errors ###
            #ind = 2**(-n) >= (_s_prev - d)/asarray(d, 'float64')
            
            #NEW
            ind = d  >=  _s_prev / 2 ** (1.0e-12/n)
            #ind = d  >=  _s_prev / 2 ** (1.0/n)
            indD = empty(m, bool)
            indD.fill(True)
            #ind.fill(False)
            ###################################################
        elif p.solver.dataHandling == 'raw':
            if p.probType == 'MOP':
                t = p._t[:m]
                p._t = p._t[m:]
                d = _s = p.__s[:m]
                p.__s = p.__s[m:]
            else:
#                tnlh_1, tnlh_2 = tnlhf[:, 0:n], tnlhf[:, n:]
#                TNHLF_min =  where(logical_or(tnlh_1 > tnlh_2, isnan(tnlh_1)), tnlh_2, tnlh_1)
#               # Set _s
#                _s = nanmin(TNHLF_min, 1)
                T = tnlhf_curr
                tnlh_curr_1, tnlh_curr_2 = T[:, 0:n], T[:, n:]
                TNHL_curr_min =  where(logical_or(tnlh_curr_1 < tnlh_curr_2, isnan(tnlh_curr_2)), tnlh_curr_1, tnlh_curr_2)
                t = nanargmin(TNHL_curr_min, 1)
                T = tnlhf
                d = nanmin(vstack(([T[w, t], T[w, n+t]])), 0)
                _s = d

            #OLD
            #!#!#!#! Don't replace it by _s_prev - d <= ... to omit inf-inf = nan !#!#!#
            #ind = _s_prev  <= d + ((2**-n / log(2)) if n > 15 else log2(1+2**-n)) 
            #ind = _s_prev - d <= ((2**-n / log(2)) if n > 15 else log2(1+2**-n)) 
            
            #NEW
            if any(_s_prev < d):
                pass
            ind = _s_prev  <= d + 1.0/n
#            T = TNHL_curr_min
            #ind2 = nanmin(TNHL_curr_min, 0)
            
            indQ = d >= _s_prev - 1.0/n 
            #indQ = logical_and(indQ, False)
            indD = logical_or(indQ, logical_not(indT))
#            print _s_prev[:2], d[:2]
            #print len(where(indD)[0]), len(where(indQ)[0]), len(where(indT)[0])
            #print _s_prev - d
            ###################################################
            #d = ((tnlh[w, t]* tnlh[w, n+t])**0.5)
        else:
            assert 0

    if any(ind):
        r10 = where(ind)[0]
        #print('r10:', r10)
#        print _s_prev
#        print ((_s_prev -d)*n)[r10]
#        print('ind length: %d' % len(where(ind)[0]))
#        print where(ind)[0].size
        #bs = e[ind] - y[ind]
        #t[ind] = nanargmax(bs, 1) # ordinary numpy.argmax can be used as well
        bs = e[r10] - y[r10]
        t[r10] = nanargmax(bs, 1) # ordinary numpy.argmax can be used as well

    return t, _s, indD
    def fit(self, num_workers=7, debug_plots=True, force=False):
        """
        Train A+1 classifiers (the first one is for no features observed.)
        Find order of A features to run in.
        """
        if not force and os.path.exists(self.filename):
            with open(self.filename) as f:
                sc = pickle.load(f)
            self.action_inds = sc.action_inds
            self.clfs = sc.clfs
            self.has_been_fit = True
            return

        ds = self.ds
        instances = ds.X
        labels = ds.y
        A = len(ds.actions)

        # Train classifier on initially empty states.
        action_inds = []
        states = get_states(ds, instances, action_inds)
        clf, score, entropy = get_classifier(ds, states, labels, 1, num_workers)

        # We will collect values for visualization.
        scores = np.empty((A, A))
        scores.fill(np.nan)
        entropies = np.empty((A, A))
        entropies.fill(np.nan)
        infogains = np.empty((A, A))
        infogains.fill(np.nan)

        # While there are feasible actions, consider them.
        costs = ds.action_costs.copy()
        remaining_mask = np.ones(len(ds.actions), dtype=bool)
        selected_clfs = [clf]
        for iteration in xrange(A):
            print ("-" * 80)
            print ("Iteration {}".format(iteration))

            feas_inds = np.flatnonzero(remaining_mask & (costs <= ds.max_budget))
            if len(feas_inds) == 0:
                break

            new_clfs = np.empty(A, dtype=object)
            for action_ind in feas_inds:
                print (ds.actions[action_ind]),
                new_action_inds = action_inds + [action_ind]
                states = get_states(ds, instances, new_action_inds)

                new_clf, new_score, new_entropy = get_classifier(ds, states, labels, 1, num_workers)
                new_clfs[action_ind] = new_clf

                infogains[action_ind, iteration] = entropy - new_entropy
                scores[action_ind, iteration] = new_score
                entropies[action_ind, iteration] = new_entropy

            rewards = infogains[:, iteration] / ds.action_costs
            ind = bn.nanargmax(rewards)
            selected_clfs.append(new_clfs[ind])
            action_inds.append(ind)
            print (
                "Selected {} with infogain {:.2f} and cost {:.2f}".format(
                    ds.actions[ind], infogains[ind, iteration], ds.action_costs[ind]
                )
            )

            remaining_mask[ind] = False
            costs += ds.action_costs[ind]
            entropy = entropies[ind, iteration]

        actions = np.take(ds.actions, action_inds)
        print ("Selected actions in order: {}".format(actions))

        self.action_inds = action_inds
        self.clfs = selected_clfs
        assert len(self.clfs) == len(self.action_inds) + 1
        self.has_been_fit = True
        if debug_plots:
            self.plot_stuff(scores, entropies, infogains, rewards)
        self.save()
    def fit(self, num_workers=1, debug_plots=True, force=False):
        if not force and os.path.exists(self.filename):
            with open(self.filename) as f:
                sc = pickle.load(f)
            self.__dict__.update(sc.__dict__)
            return

        instances = ds.X
        labels = ds.y

        instances_train, instances_val, labels_train, labels_val = train_test_split(
            instances, labels, test_size=1 / 3.0
        )
        A = len(ds.actions)
        N_train = instances_train.shape[0]

        # Initialize imputation mechanism
        if self.impute_method == "mean":
            mi = tc.MeanImputer(ds.action_dims).fit(instances_train)
        else:
            mi = tc.GaussianImputer(ds.action_dims).fit(instances_train)

        # Train classifier on initially empty states.
        action_inds = []
        states_train = get_states(ds, instances_train, action_inds, mi)
        states_val = get_states(ds, instances_val, action_inds, mi)

        if self.clf_method == "logreg":
            clf, score_train, entropy_train = get_classifier(ds, states_train, labels_train, self.num_clf, num_workers)
        else:
            clf = tc.StateClassifierImagenet(ds)
        score_val, entropy_val = eval_classifier(clf, states_val, labels_val)

        # We collect values for visualization.
        scores = np.empty((A, A))
        scores.fill(np.nan)
        entropies = np.empty((A, A))
        entropies.fill(np.nan)
        infogains = np.empty((A, A))
        infogains.fill(np.nan)

        # While there are feasible actions, consider them.
        costs = ds.action_costs.copy()
        remaining_mask = np.ones(len(ds.actions), dtype=bool)
        policy_masks = [remaining_mask.copy()]
        for iteration in xrange(A):
            print ("-" * 80)
            print ("Iteration {}".format(iteration))

            feas_inds = np.flatnonzero(remaining_mask & (costs <= ds.max_budget))
            if len(feas_inds) == 0:
                break

            # Train classifier with new mask distribution.
            if self.clf_method != "imagenet":
                new_masks = []
                for action_ind in feas_inds:
                    mask = remaining_mask.copy()
                    mask[action_ind] = False
                    new_masks.append(mask)
                md = tc.MaskDistribution()
                md.update(np.array(policy_masks + new_masks))
                N = N_train * ((iteration + 1) + len(feas_inds))
                states_, labels_ = get_states_from_mask_distribution(ds, md, instances_train, labels_train, N, mi)
                clf, score_train, entropy_train = get_classifier(ds, states_, labels_, self.num_clf, num_workers)

            # Evaluate the infogain of individual features.
            for action_ind in feas_inds:
                print (ds.actions[action_ind]),
                states_val = get_states(ds, instances_val, action_inds + [action_ind], mi)
                new_score_val, new_entropy_val = eval_classifier(clf, states_val, labels_val)

                infogains[action_ind, iteration] = entropy_val - new_entropy_val
                scores[action_ind, iteration] = new_score_val
                entropies[action_ind, iteration] = new_entropy_val

            rewards = infogains[:, iteration] / ds.action_costs
            ind = bn.nanargmax(rewards)
            action_inds.append(ind)

            entropy_val = entropies[ind, iteration]
            print (
                "Selected {} with infogain {:.2f} and cost {:.2f}".format(
                    ds.actions[ind], infogains[ind, iteration], ds.action_costs[ind]
                )
            )

            remaining_mask[ind] = False
            costs += ds.action_costs[ind]
            policy_masks += [remaining_mask.copy()]

        # Fit imputer with all data
        self.mi = mi.fit(instances)

        # Train final classifier, with the final masks and on full data
        if self.clf_method != "imagenet":
            md = tc.MaskDistribution()
            md.update(np.array(policy_masks))
            N = N_train * len(policy_masks)
            states_, labels_ = get_states_from_mask_distribution(ds, md, instances, labels, N, self.mi)
            clf, score, entropy = get_classifier(ds, states_, labels_, self.num_clf, num_workers)

        actions = np.take(ds.actions, action_inds)
        print ("Selected actions in order: {}".format(actions))

        self.clf = clf
        self.action_inds = action_inds
        self.has_been_fit = True
        if debug_plots:
            self.plot_stuff(scores, entropies, infogains, rewards)
        self.save()
Ejemplo n.º 26
0
        tit = time.time()
    # Compute responsibilities
    for i in xrange(tb, te, ll):
        il = i - tb
        Ss.select_hyperslab((il, 0), (ll, N))
        S.id.read(ms, Ss, tS)
        Rs.select_hyperslab((il, 0), (ll, N))
        R.id.read(ms, Rs, tRold)

        As.select_hyperslab((i, 0), (ll, N))
        A.id.read(ms, As, tAS)
        #tAS = A[i, :]
        tAS += tS
        #tRold = R[i, :]

        tI = bn.nanargmax(tAS, axis=1)
        tY = tAS[ind, tI]
        tAS[ind, tI[ind]] = z
        tY2 = bn.nanmax(tAS, axis=1)

        tR = tS - tY[:, np.newaxis]
        tR[ind, tI[ind]] = tS[ind, tI[ind]] - tY2[ind]
        tR = (1 - damping) * tR + damping * tRold

        tRp = np.maximum(tR, 0)

        for il in xrange(ll):
            tRp[il, i + il] = tR[il, i + il]
            tdR[i - tb + il] = tR[il, i + il]

        if disk is True:
Ejemplo n.º 27
0
def filter_phase(t, x, Plist, smooth_factor=1000):
	"""
	Filter out specific periods by smoothing the phase-curve.

	Parameters:
		t (ndarray): Time vector (days).
		x (ndarray): Flux vector.
		P (list): List of periods to remove.
		smooth_factor (float, optional): Factor of phase to use as smooth width.

	Returns:
		Filter flux vector that can be removed from timeseries.

	Note:
		Does not require time to be sorted.
		Can handle NaN in flux vector.
	"""

	# Prepare arrays:
	Plist = np.atleast_1d(Plist) # Hack to handle 0-dim input
	Np = len(Plist)
	Nt = len(t)
	phase = zeros((Np,Nt), dtype='float64')
	indx = zeros((Np,Nt), dtype='int')
	indx_inv = zeros((Np,Nt), dtype='int')
	phase_tot = zeros(Nt, dtype='float64')
	phase_smooth_t = zeros((Np,Nt), dtype='float64')
	dphase = zeros(Np, dtype='float64')

	# Loop through periods to be removed:
	for k in range(Np):
		# Calculate the phase and sort it:
		phase[k] = mod(t, Plist[k])
		indx[k] = argsort(phase[k])
		indx_inv[k] = argsort(indx[k])
		dphase[k] = median(diff( phase[k,indx[k]] ))

		# Calculate smooth version of the phase curve:
		phase_smooth = _filter_single_phase(phase[k,indx[k]], x[indx[k]]-phase_tot[indx[k]], Plist[k]/smooth_factor, dphase[k])
		# Un-sort phase_smoooth back to time-sorted order:
		phase_smooth_t[k] = phase_smooth[indx_inv[k]]
		# Add to the total phase filter:
		phase_tot += phase_smooth_t[k,:]

		# If removing multiple periods perform iterative procedure where
		# phase curves are added and removed to avoid cross-talk between periods:
		if k != 0:
			for j in range(k):
				# Add the transit back into to the timeseries (by subtracting it from the filter):
				phase_tot -= phase_smooth_t[j,:]
				# Re-calculate the phase curve of the transit:
				phase_smooth = _filter_single_phase(phase[j,indx[j]], x[indx[j]]-phase_tot[indx[j]], Plist[j]/smooth_factor, dphase[j])
				phase_smooth_t[j] = phase_smooth[indx_inv[j]]
				# Remove the transit again:
				phase_tot += phase_smooth_t[j,:]

	# Make plots of phase curves:
	if not _output_folder is None:
		# Find the point on the smoothed curve that deviates the most from zero:
		imax = nanargmax(np.abs(phase_smooth_t), axis=1)

		s = nanstd(x)
		fig = plt.figure()
		fig.canvas.set_window_title('phasecurve')
		fig.subplots_adjust(hspace=0.05)
		for k,P in enumerate(Plist):
			# Plot phasecurve for this period:
			ax = plt.subplot(Np, 1, k+1)
			ax.plot(phase[k]/P, x, 'k.', markersize=2) # No need to sort if we only plot points
			ax.plot(phase[k,indx[k]]/P, phase_smooth_t[k,indx[k]], 'r-')
			ax.axvline(phase[k,imax[k]]/P, color='b', linestyle='--') # Line indicating the (likely) planet transit
			ax.set_xlim(0, 1)
			ax.set_ylim(-6*s, 6*s)
			ax.text(0.02, 0.97, 'P = %f d'%(P), horizontalalignment='left', verticalalignment='top', transform=ax.transAxes, backgroundcolor='w', color='k')
			if k!=Np-1: plt.setp(ax.get_xticklabels(), visible=False)
		ax.set_xlabel('Phase')
		fig.text(0.03, 0.5, u'Flux (counts/s)', ha='center', va='center', rotation='vertical', transform=fig.transFigure)
		if _output_format != 'native':
			fig.savefig(os.path.join(_output_folder, _output_prefix+'phasecurve.'+_output_format), format=_output_format, bbox_inches='tight')
			plt.close(fig)

	# Return the total time-sorted phase curve:
	return phase_tot
    def fit(self, num_workers=1, debug_plots=True, force=False):
        if not force and os.path.exists(self.filename):
            with open(self.filename) as f:
                sc = pickle.load(f)
            self.__dict__.update(sc.__dict__)
            return

        instances = ds.X
        labels = ds.y

        instances_train, instances_val, labels_train, labels_val = \
            train_test_split(instances, labels, test_size=1/3.)
        A = len(ds.actions)
        N_train = instances_train.shape[0]

        # Initialize imputation mechanism
        if self.impute_method == 'mean':
            mi = tc.MeanImputer(ds.action_dims).fit(instances_train)
        else:
            mi = tc.GaussianImputer(ds.action_dims).fit(instances_train)

        # Train classifier on initially empty states.
        action_inds = []
        states_train = get_states(ds, instances_train, action_inds, mi)
        states_val = get_states(ds, instances_val, action_inds, mi)

        if self.clf_method == 'logreg':
            clf, score_train, entropy_train = get_classifier(
                ds, states_train, labels_train, self.num_clf, num_workers)
        else:
            clf = tc.StateClassifierImagenet(ds)
        score_val, entropy_val = eval_classifier(clf, states_val, labels_val)

        # We collect values for visualization.
        scores = np.empty((A, A))
        scores.fill(np.nan)
        entropies = np.empty((A, A))
        entropies.fill(np.nan)
        infogains = np.empty((A, A))
        infogains.fill(np.nan)

        # While there are feasible actions, consider them.
        costs = ds.action_costs.copy()
        remaining_mask = np.ones(len(ds.actions), dtype=bool)
        policy_masks = [remaining_mask.copy()]
        for iteration in xrange(A):
            print('-'*80)
            print('Iteration {}'.format(iteration))

            feas_inds = np.flatnonzero(remaining_mask & (costs <= ds.max_budget))
            if len(feas_inds) == 0:
                break

            # Train classifier with new mask distribution.
            if self.clf_method != 'imagenet':
                new_masks = []
                for action_ind in feas_inds:
                    mask = remaining_mask.copy()
                    mask[action_ind] = False
                    new_masks.append(mask)
                md = tc.MaskDistribution()
                md.update(np.array(policy_masks + new_masks))
                N = N_train * ((iteration + 1) + len(feas_inds))
                states_, labels_ = get_states_from_mask_distribution(
                    ds, md, instances_train, labels_train, N, mi)
                clf, score_train, entropy_train = get_classifier(
                    ds, states_, labels_, self.num_clf, num_workers)

            # Evaluate the infogain of individual features.
            for action_ind in feas_inds:
                print(ds.actions[action_ind]),
                states_val = get_states(
                    ds, instances_val, action_inds + [action_ind], mi)
                new_score_val, new_entropy_val = eval_classifier(
                    clf, states_val, labels_val)

                infogains[action_ind, iteration] = entropy_val - new_entropy_val
                scores[action_ind, iteration] = new_score_val
                entropies[action_ind, iteration] = new_entropy_val

            rewards = infogains[:, iteration] / ds.action_costs
            ind = bn.nanargmax(rewards)
            action_inds.append(ind)

            entropy_val = entropies[ind, iteration]
            print('Selected {} with infogain {:.2f} and cost {:.2f}'.format(ds.actions[ind], infogains[ind, iteration], ds.action_costs[ind]))

            remaining_mask[ind] = False
            costs += ds.action_costs[ind]
            policy_masks += [remaining_mask.copy()]

        # Fit imputer with all data
        self.mi = mi.fit(instances)

        # Train final classifier, with the final masks and on full data
        if self.clf_method != 'imagenet':
            md = tc.MaskDistribution()
            md.update(np.array(policy_masks))
            N = N_train * len(policy_masks)
            states_, labels_ = get_states_from_mask_distribution(
                ds, md, instances, labels, N, self.mi)
            clf, score, entropy = get_classifier(
                ds, states_, labels_, self.num_clf, num_workers)

        actions = np.take(ds.actions, action_inds)
        print('Selected actions in order: {}'.format(actions))

        self.clf = clf
        self.action_inds = action_inds
        self.has_been_fit = True
        if debug_plots:
            self.plot_stuff(scores, entropies, infogains, rewards)
        self.save()
Ejemplo n.º 29
0
 def time_nanargmax(self, dtype, shape):
     bn.nanargmax(self.arr)
Ejemplo n.º 30
0
    def argf(self, *args, **kwargs): return bn.nanargmax(*args, **kwargs)
    

class Extremum(ch.Ch):
Ejemplo n.º 31
0
def aff_cluster(Sfn,
                conv_iter=15,
                max_iter=2000,
                damping=0.95,
                mpi=None,
                verbose=False,
                debug=False,
                *args,
                **kwargs):

    comm, NPROCS, rank = mpi

    NPROCS_LOCAL = int(os.environ['OMPI_COMM_WORLD_LOCAL_SIZE'])

    #Init storage for matrices
    #Get file name
    #Open matrix file in parallel mode
    SSf = h5py.File(Sfn, 'r+', driver='mpio', comm=comm)
    SSf.atomic = True
    #Open table with data for clusterization
    SS = SSf['cluster']
    SSs = SS.id.get_space()

    params = {
        'N': 0,
        'l': 0,
        'll': 0,
        'TMfn': '',
        'disk': False,
        'preference': 0.0
    }

    P = Bunch(params)

    ft = np.float32

    if rank == 0:

        N, N1 = SS.shape

        if N != N1:
            raise ValueError("S must be a square array \
                (shape=%s)" % repr((N, N1)))
        else:
            P.N = N

        try:
            preference = SS.attrs['preference']
        except:
            raise ValueError('Unable to get preference from cluster matrix')

        if max_iter < 0:
            raise ValueError('max_iter must be > 0')

        if not 0 < conv_iter < max_iter:
            raise ValueError('conv_iter must lie in \
                interval between 0 and max_iter')

        if damping < 0.5 or damping >= 1:
            raise ValueError('damping must lie in interval between 0.5 and 1')

        print '#' * 10, 'Main params', '#' * 10
        print 'preference: %.3f' % preference
        print 'damping: %.3f' % damping
        print 'conv_iter: %d' % conv_iter
        print 'max_iter: %d' % max_iter
        print '#' * 31

        P.TMbfn = str(uuid.uuid1())
        P.TMfn = P.TMbfn + '.hdf5'

        # Magic 4 to fit MPI.Gather
        r = N % (NPROCS * 4)
        N -= r
        l = N // NPROCS
        if r > 0:
            print 'Truncating matrix to %sx%s to fit on %d procs' \
                % (N, N, NPROCS)
        P.N = N

        # Fit to memory
        MEM = psutil.virtual_memory().available / NPROCS_LOCAL
        # MEM = 500 * 10 ** 6
        ts = np.dtype(ft).itemsize * N  # Python give bits
        ts *= 8 * 1.1  # Allocate memory for e, tE, and ...
        # MEM -= ts  # ----
        tl = int(MEM // ts)  # Allocate memory for tS, tA, tR....

        def adjust_cache(tl, l):
            while float(l) % float(tl) > 0:
                tl -= 1
            return tl

        if tl < l:
            P.disk = True
            try:
                cache = 0
                #                cache = int(sys.argv[1])
                #                print sys.argv[1]
                assert cache < l
            except:
                cache = tl
                #print 'Wrong cache settings, set cache to %d' % tl
            tl = adjust_cache(tl, l)
            P.l = l
            P.ll = tl
        else:
            P.l = l
            P.ll = l

        if verbose:
            print "Available memory per process: %.2fG" % (MEM / 10.0**9)
            print "Memory per row: %.2fM" % (ts / 10.0**6)
            print "Estimated memory per process: %.2fG" \
                % (ts * P.ll / 10.0 ** 9)
            print 'Cache size is %d of %d' % (P.ll, P.l)

    P = comm.bcast(P)

    N = P.N
    l = P.l
    ll = P.ll

    ms = h5s.create_simple((ll, N))
    ms_l = h5s.create_simple((N, ))

    tb, te = task(N, NPROCS, rank)

    tS = np.ndarray((ll, N), dtype=ft)
    tSl = np.ndarray((N, ), dtype=ft)

    disk = P.disk

    if disk is True:
        TMLfd = tempfile.mkdtemp()
        TMLfn = osp(TMLfd, P.TMbfn + '_' + str(rank) + '.hdf5')
        TMLf = h5py.File(TMLfn, 'w')
        TMLf.atomic = True

        S = TMLf.create_dataset('S', (l, N), dtype=ft)
        Ss = S.id.get_space()

    #Copy input data and
    #place preference on diagonal
    z = -np.finfo(ft).max

    for i in range(tb, te, ll):
        SSs.select_hyperslab((i, 0), (ll, N))
        SS.id.read(ms, SSs, tS)

        if disk is True:
            Ss.select_hyperslab((i - tb, 0), (ll, N))
            S.id.write(ms, Ss, tS)

    if disk is True:
        R = TMLf.create_dataset('R', (l, N), dtype=ft)
        Rs = R.id.get_space()

    tRold = np.zeros((ll, N), dtype=ft)
    tR = np.zeros((ll, N), dtype=ft)
    tdR = np.zeros((l, ), dtype=ft)

    #Shared storage
    TMf = h5py.File(P.TMfn, 'w', driver='mpio', comm=comm)
    TMf.atomic = True

    Rp = TMf.create_dataset('Rp', (N, N), dtype=ft)
    Rps = Rp.id.get_space()

    tRp = np.ndarray((ll, N), dtype=ft)
    tRpa = np.ndarray((N, ll), dtype=ft)

    A = TMf.create_dataset('A', (N, N), dtype=ft)
    As = A.id.get_space()

    tAS = np.ndarray((ll, N), dtype=ft)
    tAold = np.ndarray((N, ll), dtype=ft)
    tA = np.ndarray((N, ll), dtype=ft)
    tdA = np.ndarray((l, ), dtype=ft)

    e = np.ndarray((N, conv_iter), dtype=np.int8)
    tE = np.ndarray((N, ), dtype=np.int8)
    ttE = np.ndarray((l, ), dtype=np.int8)

    converged = False
    cK = 0
    K = 0
    ind = np.arange(ll)

    for it in range(max_iter):
        if rank == 0:
            if verbose is True:
                print '=' * 10 + 'It %d' % (it) + '=' * 10
                tit = time.time()
        # Compute responsibilities
        for i in range(tb, te, ll):
            if disk is True:
                il = i - tb
                Ss.select_hyperslab((il, 0), (ll, N))
                S.id.read(ms, Ss, tS)
                #tS = S[i, :]
                Rs.select_hyperslab((il, 0), (ll, N))
                R.id.read(ms, Rs, tRold)
            else:
                tRold = tR.copy()

            As.select_hyperslab((i, 0), (ll, N))
            A.id.read(ms, As, tAS)
            #Tas = a[I, :]
            tAS += tS
            #tRold = R[i, :]

            tI = bn.nanargmax(tAS, axis=1)
            tY = tAS[ind, tI]
            tAS[ind, tI[ind]] = z
            tY2 = bn.nanmax(tAS, axis=1)

            tR = tS - tY[:, np.newaxis]
            tR[ind, tI[ind]] = tS[ind, tI[ind]] - tY2[ind]
            tR = (1 - damping) * tR + damping * tRold

            tRp = np.maximum(tR, 0)

            for il in range(ll):
                tRp[il, i + il] = tR[il, i + il]
                tdR[i - tb + il] = tR[il, i + il]

            if disk is True:
                R.id.write(ms, Rs, tR)
                #R[i, :] = tR

            Rps.select_hyperslab((i, 0), (ll, N))
            Rp.id.write(ms, Rps, tRp)

            #Rp[i, :] = tRp
        if rank == 0:
            if verbose is True:
                teit1 = time.time()
                print 'R T %s' % (teit1 - tit)

        comm.Barrier()

        # Compute availabilities
        for j in range(tb, te, ll):

            As.select_hyperslab((0, j), (N, ll))

            if disk is True:
                A.id.read(ms, As, tAold)
            else:
                tAold = tA.copy()

            Rps.select_hyperslab((0, j), (N, ll))
            Rp.id.read(ms, Rps, tRpa)
            #tRp = Rp[:, j]

            tA = bn.nansum(tRpa, axis=0)[np.newaxis, :] - tRpa
            for jl in range(ll):
                tdA[j - tb + jl] = tA[j + jl, jl]

            tA = np.minimum(tA, 0)

            for jl in range(ll):
                tA[j + jl, jl] = tdA[j - tb + jl]

            tA *= (1 - damping)
            tA += damping * tAold

            for jl in range(ll):
                tdA[j - tb + jl] = tA[j + jl, jl]

            A.id.write(ms, As, tA)

        if rank == 0:
            if verbose is True:
                teit2 = time.time()
                print 'A T %s' % (teit2 - teit1)

        ttE = np.array(((tdA + tdR) > 0), dtype=np.int8)

        if NPROCS > 1:
            comm.Gather([ttE, MPI.INT], [tE, MPI.INT])
            comm.Bcast([tE, MPI.INT])
        else:
            tE = ttE
        e[:, it % conv_iter] = tE
        pK = K
        K = bn.nansum(tE)

        if rank == 0:
            if verbose is True:
                teit = time.time()
                cc = ''
                if K == pK:
                    if cK == 0:
                        cK += 1
                    elif cK > 1:
                        cc = ' Conv %d of %d' % (cK, conv_iter)
                else:
                    cK = 0

                print 'Total K %d T %s%s' % (K, teit - tit, cc)

        if it >= conv_iter:

            if rank == 0:
                se = bn.nansum(e, axis=1)
                converged = (bn.nansum((se == conv_iter) + (se == 0)) == N)

                if (converged == np.bool_(True)) and (K > 0):
                    if verbose is True:
                        print("Converged after %d iterations." % (it))
                    converged = True
                else:
                    converged = False

            converged = comm.bcast(converged, root=0)

        if converged is True:
            break

    if not converged and verbose and rank == 0:
        print("Failed to converge after %d iterations." % (max_iter))

    if K > 0:

        I = np.nonzero(e[:, 0])[0]
        C = np.zeros((N, ), dtype=np.int)
        tC = np.zeros((l, ), dtype=np.int)

        for i in range(l):
            if disk is True:
                Ss.select_hyperslab((i, 0), (1, N))
                S.id.read(ms_l, Ss, tSl)
            else:
                tSl = tS[i]

            tC[i] = bn.nanargmax(tSl[I])

        comm.Gather([tC, MPI.INT], [C, MPI.INT])

        if rank == 0:
            C[I] = np.arange(K)

        comm.Bcast([C, MPI.INT])

        for k in range(K):
            ii = np.where(C == k)[0]
            tN = ii.shape[0]

            tI = np.zeros((tN, ), dtype=np.float32)
            ttI = np.zeros((tN, ), dtype=np.float32)
            tttI = np.zeros((tN, ), dtype=np.float32)
            ms_k = h5s.create_simple((tN, ))

            j = rank
            while j < tN:
                ind = [(ii[i], ii[j]) for i in range(tN)]
                SSs.select_elements(ind)
                SS.id.read(ms_k, SSs, tttI)

                ttI[j] = bn.nansum(tttI)
                j += NPROCS

            comm.Reduce([ttI, MPI.FLOAT], [tI, MPI.FLOAT])

            if rank == 0:
                I[k] = ii[bn.nanargmax(tI)]

        I.sort()
        comm.Bcast([I, MPI.INT])

        for i in range(l):
            if disk is True:
                Ss.select_hyperslab((i, 0), (1, N))
                S.id.read(ms_l, Ss, tSl)
            else:
                tSl = tS[i]

            tC[i] = bn.nanargmax(tSl[I])

        comm.Gather([tC, MPI.INT], [C, MPI.INT])

        if rank == 0:
            C[I] = np.arange(K)

    else:
        if rank == 0:
            I = np.zeros(())
            C = np.zeros(())

    #Cleanup
    SSf.close()
    TMf.close()

    if disk is True:
        TMLf.close()
        shutil.rmtree(TMLfd)

    comm.Barrier()

    if rank == 0:

        os.remove(P.TMfn)

        if verbose:
            print 'APN: %d' % K

        if I.size and C.size:

            Sf = h5py.File(Sfn, 'r+', driver='sec2')

            if 'aff_labels' in Sf.keys():
                del Sf['aff_labels']

            LM = Sf.require_dataset('aff_labels', shape=C.shape, dtype=np.int)
            LM[:] = C[:]

            if 'aff_centers' in Sf.keys():
                del Sf['aff_centers']

            CM = Sf.require_dataset('aff_centers', shape=I.shape, dtype=np.int)
            CM[:] = I[:]
            Sf.close()
Ejemplo n.º 32
0
 def _normalize_log_probs(probs):
     max_i = bn.nanargmax(probs)
     probs_norm = probs - probs[max_i] - np.log1p(
         bn.nansum(
             np.exp(probs[np.arange(probs.size) != max_i] - probs[max_i])))
     return np.exp(probs_norm)
Ejemplo n.º 33
0
 def time_nanargmax(self, dtype, shape, order, axis):
     bn.nanargmax(self.arr, axis=axis)
Ejemplo n.º 34
0
            il = i - tb
            Ss.select_hyperslab((il, 0), (ll, N))
            S.id.read(ms, Ss, tS)
        #tS = S[i, :]
            Rs.select_hyperslab((il, 0), (ll, N))
            R.id.read(ms, Rs, tRold)
        else:
            tRold = tR.copy()

        As.select_hyperslab((i, 0), (ll, N))
        A.id.read(ms, As, tAS)
        #Tas = a[I, :]
        tAS += tS
        #tRold = R[i, :]

        tI = bn.nanargmax(tAS, axis=1)
        tY = tAS[ind, tI]
        tAS[ind, tI[ind]] = z
        tY2 = bn.nanmax(tAS, axis=1)

        tR = tS - tY[:, np.newaxis]
        tR[ind, tI[ind]] = tS[ind, tI[ind]] - tY2[ind]
        tR = (1 - damping) * tR + damping * tRold

        tRp = np.maximum(tR, 0)

        for il in xrange(ll):
            tRp[il, i + il] = tR[il, i + il]
            tdR[i - tb + il] = tR[il, i + il]

        if disk is True:
Ejemplo n.º 35
0
    def fit(self, X, y):
        """
        Fits the MI_FS feature selection with the chosen MI_FS method.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            The training input samples.

        y : array-like, shape = [n_samples]
            The target values.
        """

        # Check if n_jobs is negative
        if self.n_jobs < 0:
            self.n_jobs = NUM_CORES - self.n_jobs

        self.X, y = self._check_params(X, y)
        n, p = X.shape
        self.y = y.reshape((n, 1))

        # list of selected features
        S = []
        # list of all features
        F = list(range(p))

        if self.n_features != 'auto':
            feature_mi_matrix = np.zeros((self.n_features, p))
        else:
            feature_mi_matrix = np.zeros((n, p))
        feature_mi_matrix[:] = np.nan
        S_mi = []

        # ---------------------------------------------------------------------
        # FIND FIRST FEATURE
        # ---------------------------------------------------------------------
        xy_MI = np.array(mimy.get_first_mi_vector(self, self.k))
        #print(xy_MI)
        #xy_MI[np.where(np.isnan(xy_MI))]=0.
        #print("first", sorted(enumerate(xy_MI), key=lambda x:x[1], reverse=True)[0])

        # choose the best, add it to S, remove it from F
        S, F = self._add_remove(S, F, bn.nanargmax(xy_MI))
        S_mi.append(bn.nanmax(xy_MI))

        # notify user
        if self.verbose > 0:
            self._print_results(S, S_mi)

        # ---------------------------------------------------------------------
        # FIND SUBSEQUENT FEATURES
        # ---------------------------------------------------------------------
        if self.n_features == 'auto': n_features = np.inf
        else: n_features = self.n_features

        while len(S) < n_features:
            # loop through the remaining unselected features and calculate MI
            s = len(S) - 1
            # Calculate s-th row of feature_mi_matrix which contains the JMI score of the last element in S
            # with all remaining features in F
            feature_mi_matrix[s, F] = mimy.get_mi_vector(self, F, S[-1])

            # make decision based on the chosen FS algorithm
            fmm = feature_mi_matrix[:len(S), F]
            if self.method == 'JMI':
                # Which feature in F has the largest \sum_{s\in S}
                selected = F[bn.nanargmax(bn.nansum(fmm, axis=0))]
                # Find out which pair of features is the jmim for
                if self.verbose > 0:
                    jmim = bn.nanmax(bn.nanmin(fmm, axis=0))
                    jmi_vals = fmm[:, bn.nanargmax(bn.nanmin(fmm, axis=0))]
                    jmi_idx = np.where(jmi_vals == jmim)[0]
                    print(jmim, S[jmi_idx[0]], selected)
            elif self.method == 'JMIM':
                if bn.allnan(bn.nanmin(fmm, axis=0)):
                    break
                selected = F[bn.nanargmax(bn.nanmin(fmm, axis=0))]
                # Find out which pair of features is the jmim for
                if self.verbose > 0:
                    jmim = bn.nanmax(bn.nanmin(fmm, axis=0))
                    jmi_vals = fmm[:, bn.nanargmax(bn.nanmin(fmm, axis=0))]
                    jmi_idx = np.where(jmi_vals == jmim)[0]
                    print(jmim, S[jmi_idx[0]], selected)
            elif self.method == 'MRMR':
                if bn.allnan(bn.nanmean(fmm, axis=0)):
                    break
                MRMR = xy_MI[F] - bn.nanmean(fmm, axis=0)
                selected = F[bn.nanargmax(MRMR)]
                S_mi.append(bn.nanmax(MRMR))

            # record the JMIM of the newly selected feature and add it to S
            if self.method != 'MRMR':
                S_mi.append(bn.nanmax(bn.nanmin(fmm, axis=0)))
            S, F = self._add_remove(S, F, selected)

            # notify user
            if self.verbose > 0:
                self._print_results(S, S_mi)

            # if n_features == 'auto', let's check the S_mi to stop
            if self.n_features == 'auto' and len(S) > 10:
                # smooth the 1st derivative of the MI values of previously sel
                MI_dd = signal.savgol_filter(S_mi[1:], 9, 2, 1)
                # does the mean of the last 5 converge to 0?
                if np.abs(np.mean(MI_dd[-5:])) < 1e-3:
                    break

        # ---------------------------------------------------------------------
        # SAVE RESULTS
        # ---------------------------------------------------------------------

        self.n_features_ = len(S)
        self._support_mask = np.zeros(p, dtype=np.bool)
        self._support_mask[S] = True
        self.ranking_ = S
        self.mi_ = S_mi

        return self
Ejemplo n.º 36
0
 def argf(self, *args, **kwargs):
     return bn.nanargmax(*args, **kwargs)