Example #1
0
def compute_cvm(predictions, masses, n_neighbours=200, step=50):
    """
    Computing Cramer-von Mises (cvm) metric on background events: take average of cvms calculated for each mass bin.
    In each mass bin global prediction's cdf is compared to prediction's cdf in mass bin.

    :param predictions: array-like, predictions
    :param masses: array-like, in case of Kaggle tau23mu this is reconstructed mass
    :param n_neighbours: count of neighbours for event to define mass bin
    :param step: step through sorted mass-array to define next center of bin
    :return: average cvm value
    """
    predictions = numpy.array(predictions)
    masses = numpy.array(masses)
    assert len(predictions) == len(masses)

    # First, reorder by masses
    predictions = predictions[numpy.argsort(masses)]

    # Second, replace probabilities with order of probability among other events
    predictions = numpy.argsort(numpy.argsort(predictions))

    # Now, each window forms a group, and we can compute contribution of each group to CvM
    cvms = []
    for window in __rolling_window(predictions, window_size=n_neighbours)[::step]:
        cvms.append(__cvm(subindices=window, total_events=len(predictions)))
    return numpy.mean(cvms)
def test_template():
    size = 100
    # Float prefactors ensure that image range is between 0 and 1
    image = np.full((400, 400), 0.5)
    target = 0.1 * (np.tri(size) + np.tri(size)[::-1])
    target_positions = [(50, 50), (200, 200)]
    for x, y in target_positions:
        image[x:x + size, y:y + size] = target
    np.random.seed(1)
    image += 0.1 * np.random.uniform(size=(400, 400))

    result = match_template(image, target)
    delta = 5

    positions = peak_local_max(result, min_distance=delta)

    if len(positions) > 2:
        # Keep the two maximum peaks.
        intensities = result[tuple(positions.T)]
        i_maxsort = np.argsort(intensities)[::-1]
        positions = positions[i_maxsort][:2]

    # Sort so that order matches `target_positions`.
    positions = positions[np.argsort(positions[:, 0])]

    for xy_target, xy in zip(target_positions, positions):
        assert_almost_equal(xy, xy_target)
def fitting(d0, d1):
    idx_list=[]
    pos_list=[]
    for tp in['beta', 'sw']:
        e=0
        for net, sl in zip(['Net_0', 'Net_1'], [slice(2,4), slice(0,4)]):
            z=d0[tp][net]['mean_rates'][:,sl]
            target=d1[tp][net]['mean_rates'][sl]
            target=numpy.array([target]*z.shape[0])
            if e is 0:
                e=z-target
            else:
                e=numpy.concatenate((z-target, e),axis=1)
        
        e**=2
        e=numpy.sqrt(numpy.mean(e, axis=1))
        idx=numpy.argsort(e)
#         idx_list.append(idx)
#         l=[]
#         for i, _id in enumerate(idx_list[-2]):
#             j=list(idx_list[-1]).index(_id)
#             l.append([i,j])
#         l=numpy.array(l)
#         pos_list.append(l)
#         e=numpy.mean(l,axis=1)
        idx=numpy.argsort(e)
        
#         pp(list(l[idx,:]))
            
#         print idx
#         print e[idx]
        print tp
        for _id in idx[:100]:
            print d0[tp]['Net_0']['ylabels'][_id], d1[tp]['Net_0']['mean_rates'][:], numpy.round(d0[tp]['Net_0']['mean_rates'][_id,:],1),e[_id]
            print d0[tp]['Net_1']['ylabels'][_id], d1[tp]['Net_1']['mean_rates'][:], numpy.round(d0[tp]['Net_1']['mean_rates'][_id,:],1)
Example #4
0
def calcFreqs(X, timeStep, minFreq=0, maxFreq=np.inf):
    if (X.ndim > 1):
        freqs = scipy.fftpack.fftfreq(X.shape[1], timeStep)
        idx1 = np.argsort(freqs)
        freqs = freqs[idx1]
        idx2 = np.where((freqs >= minFreq) & (freqs <= maxFreq))[0]
        freqs = freqs[idx2]
        return freqs, idx1, idx2, 0
    else:
        # sometimes no all the time steps are the same
        allFreqs, lengths = [], []
        idx1s, idx2s = [], []
        if (isinstance(timeStep, float)):
            timeStep = np.ones((len(X))) * timeStep
        for x, dt in zip(X, timeStep):
            freqs = scipy.fftpack.fftfreq(x.shape[0], dt)
            idx1 = np.argsort(freqs)
            freqs = freqs[idx1]
            idx2 = np.where((freqs > minFreq) & (freqs < maxFreq))[0]
            freqs = freqs[idx2]
            allFreqs.append(freqs)
            lengths.append(len(freqs))
            idx1s.append(idx1)
            idx2s.append(idx2)
        maxLenInd = np.argmax(lengths)
        return allFreqs, idx1s, idx2s, maxLenInd
Example #5
0
def _test_corr(old_func, new_func, sel_item):
    from nose.tools import assert_equal, assert_raises
    n_obs = 20
    n_dims = 10
    np.random.seed(0)
    y = np.random.rand(n_obs) * n_obs
    X = np.tile(y, [n_dims, 1]).T + np.random.randn(n_obs, n_dims)
    rho_fast = new_func(X, y)
    # test dimensionality
    assert_equal(rho_fast.ndim, 1)
    assert_equal(rho_fast.shape[0], n_dims)
    # test data
    rho_slow = np.ones(n_dims)
    for dim in range(n_dims):
        rho_slow[dim] = np.array(old_func(X[:, dim], y)).item(sel_item)
    np.testing.assert_array_equal(rho_fast.shape, rho_slow.shape)
    np.testing.assert_array_almost_equal(rho_fast, rho_slow)
    # test errors
    new_func(np.squeeze(X[:, 0]), y)
    assert_raises(ValueError, new_func, y, X)
    assert_raises(ValueError, new_func, X, y[1:])
    # test dtype
    X = np.argsort(X, axis=0) * 2  # ensure no bug at normalization
    y = np.argsort(y, axis=0) * 2
    rho_fast = new_func(X, y, dtype=int)
    rho_slow = np.ones(n_dims)
    for dim in range(n_dims):
        rho_slow[dim] = np.array(old_func(X[:, dim], y)).item(sel_item)
    np.testing.assert_array_almost_equal(rho_fast, rho_slow)
Example #6
0
def _sort_neurons(sort, gids, network):
    max_nest_gid = network.nest_gid.max() + 1
    sorting = np.zeros(max_nest_gid)
    if isinstance(sort, str):
        sorted_ids = None
        if "degree" in sort:
            deg_type = sort[:sort.find("-")]
            degrees = network.get_degrees(deg_type)
            sorted_ids = np.argsort(degrees)
        elif sort == "betweenness":
            betw = network.get_betweenness(btype="node")
            sorted_ids = np.argsort(betw)
        else:
            raise InvalidArgument(
                '''Unknown sorting parameter {}; choose among "in-degree",
                "out-degree", "total-degree" or "betweenness".'''.format(sort))
        num_sorted = 1
        _, sorted_groups = _sort_groups(network.population)
        for group in sorted_groups:
            gids = network.nest_gid[group.id_list]
            order = np.argsort(sorted_ids[group.id_list])
            sorting[gids] = num_sorted + order
            num_sorted += len(group.id_list)
    else:
        sorting[network.nest_gid[sort]] = sort
    return sorting
Example #7
0
def carbonylorcarboxyl(allligand,index,bond_dist):

	allligandcoods = allligand.positions
	ocoods = np.zeros((1,3), dtype = float)
	ocoods[0,:] = allligandcoods[index,:]
	ocoods = np.float32(ocoods)

	tempdist = MDAnalysis.lib.distances.distance_array(ocoods,allligandcoods)
	A = np.argsort(tempdist)
	temp = int(A[0,1])

	Omatecood = np.zeros((1,3), dtype = float)
	Omatecood[0,:] = allligandcoods[temp,:]
	Omatecood = np.float32(Omatecood)

	tempdist2 = MDAnalysis.lib.distances.distance_array(Omatecood, allligandcoods)
	B = np.argsort(tempdist2)
	B = np.delete(B,0,axis = 1)
	for i in xrange(0,B.size):
		if B[0,i] == index:
			C = np.delete(B,i,axis = 1)
			break

	base1 = int(C[0,0])
	base2 = int(C[0,1])
	type1 = allligand[base1].type
	type2 = allligand[base2].type

	if type1 == 'O' or type2 == 'O':
		atype = 'carboxyl'
	else:
		atype = 'carbonyl'

	return atype
Example #8
0
    def downsize(self, coefs, cut=None, verbose=True):
        """
        Given a set of coefs, sort the coefs and get rid of the bottom cut
        percent of variables with lowest cut coefs. Return the new coefs.
        """


        downsized_coefs = np.squeeze(np.array(coefs))

        if cut is None:
            cut = self.cut

        n_trash = int(floor(cut * self.n_features))

        if verbose:
            print("Downsampling...")
            print("Current shape:", self.Xview.shape)
            print("Removing {} columns... ".format(n_trash))


        self.tail_start -= n_trash

        if self.tail_start <= 0:
            raise ValueError("Trying to downsize more variables than present")

        # get sorted order of coefs
        csort = np.squeeze(np.argsort(np.argsort(np.absolute(coefs))))
        keep_feature = np.squeeze(csort >= n_trash)

        tail_start = self.tail_start

        # columns in the tail we want to keep
        keep_idx = np.squeeze(
            np.where(keep_feature[tail_start:tail_start+n_trash]))
        keep_idx += tail_start

        # columns we want to move to the tail
        trash_idx = np.squeeze(np.where(keep_feature[0:tail_start] == False))
        if len(trash_idx) != len(keep_idx):
            raise ValueError("trash_idx and keep_idx not the same length")

        # swap the columns
        for trash, keep in zip(trash_idx, keep_idx):
            #print(keep, trash)
            keep_col = self.X[:, keep].copy()
            self.X[:, keep] = self.X[:, trash]
            self.X[:, trash] = keep_col
            self.orig_feature_index[trash], self.orig_feature_index[keep] = self.orig_feature_index[keep], self.orig_feature_index[trash]
            downsized_coefs[trash], downsized_coefs[keep] = downsized_coefs[keep], downsized_coefs[trash]
            if self.test_subj is not None:
                self.X_test[:, (trash, keep)] = self.X_test[:, (keep, trash)]

        self.n_features -= n_trash
        self.Xview = self.X.view()[:, :self.n_features]
        if self.test_subj is not None:
            self.X_testview = self.X_test.view()[:, :self.n_features]

        print("New Xview shape:", self.Xview.shape)

        return downsized_coefs[:-n_trash]
Example #9
0
def trustworthiness(X, X_embedded, n_neighbors=5, precomputed=False):
    """Expresses to what extent the local structure is retained.

    The trustworthiness is within [0, 1]. It is defined as

    .. math::

        T(k) = 1 - \frac{2}{nk (2n - 3k - 1)} \sum^n_{i=1}
            \sum_{j \in U^{(k)}_i} (r(i, j) - k)

    where :math:`r(i, j)` is the rank of the embedded datapoint j
    according to the pairwise distances between the embedded datapoints,
    :math:`U^{(k)}_i` is the set of points that are in the k nearest
    neighbors in the embedded space but not in the original space.

    * "Neighborhood Preservation in Nonlinear Projection Methods: An
      Experimental Study"
      J. Venna, S. Kaski
    * "Learning a Parametric Embedding by Preserving Local Structure"
      L.J.P. van der Maaten

    Parameters
    ----------
    X : array, shape (n_samples, n_features) or (n_samples, n_samples)
        If the metric is 'precomputed' X must be a square distance
        matrix. Otherwise it contains a sample per row.

    X_embedded : array, shape (n_samples, n_components)
        Embedding of the training data in low-dimensional space.

    n_neighbors : int, optional (default: 5)
        Number of neighbors k that will be considered.

    precomputed : bool, optional (default: False)
        Set this flag if X is a precomputed square distance matrix.

    Returns
    -------
    trustworthiness : float
        Trustworthiness of the low-dimensional embedding.
    """
    if precomputed:
        dist_X = X
    else:
        dist_X = pairwise_distances(X, squared=True)
    dist_X_embedded = pairwise_distances(X_embedded, squared=True)
    ind_X = np.argsort(dist_X, axis=1)
    ind_X_embedded = np.argsort(dist_X_embedded, axis=1)[:, 1:n_neighbors + 1]

    n_samples = X.shape[0]
    t = 0.0
    ranks = np.zeros(n_neighbors)
    for i in range(n_samples):
        for j in range(n_neighbors):
            ranks[j] = np.where(ind_X[i] == ind_X_embedded[i, j])[0][0]
        ranks -= n_neighbors
        t += np.sum(ranks[ranks > 0])
    t = 1.0 - t * (2.0 / (n_samples * n_neighbors *
                          (2.0 * n_samples - 3.0 * n_neighbors - 1.0)))
    return t
Example #10
0
def generate_misclassifications(top_words):
    log("Generating artificial misclassification rate ..")
    from numpy.random import normal
    w = len(top_words)
    mis = np.zeros((w, w))
    for i in xrange(w):
        for j in xrange(i+1):
            distance = edit_distance(top_words[i], top_words[j])
            mis[i][j] = max(0.0, normal(0.4 ** distance, 0.05))
            mis[j][i] = max(0.0, normal(0.4 ** distance, 0.05))
    normalize_matrix(mis)
    mostly_wrong = list(sorted([(mis[i][i], i) for i in xrange(w)]))
    log("Top 10 words likely to be wrong:")
    for prob, idx in mostly_wrong[:10]:
        log("    %s (%.3lf%%) => %s", top_words[idx], prob*100.0,
                " ".join(["%s (%.3lf%%)" % (top_words[cand],
                    mis[idx][cand]*100.0)
                    for cand in reversed(np.argsort(mis[idx])[-4:])]))
    log("Top 10 words likely to be right:")
    for prob, idx in mostly_wrong[-10:]:
        log("    %s (%.3lf%%) => %s", top_words[idx], prob*100.0,
                " ".join(["%s (%.3lf%%)" % (top_words[cand],
                    mis[idx][cand]*100.0)
                    for cand in reversed(np.argsort(mis[idx])[-4:])]))
    return mis
Example #11
0
def rankImages( imdists, query_id, dist_type ):
    # PRE [DO NOT TOUCH]
    ranking = []

    # WRITE YOUR CODE HERE
    related_img = []
    related_img = imdists[query_id,:]
    
    # smaller, order asc
    if dist_type == 'euclidean':
        ranking = np.argsort(related_img)
        
    # larger, order desc
    elif dist_type == 'l2':
        ranking = np.argsort(-related_img)
    
    # larger, order desc
    elif dist_type == 'intersect' or dist_type == 'l1':
        ranking = np.argsort(-related_img)
        
    # smaller, order asc
    elif dist_type == 'chi2':
        ranking = np.argsort(related_img)

    # larger, order desc
    elif dist_type == 'hellinger':
        ranking = np.argsort(-related_img)
    
    
    # RETURN [DO NOT TOUCH]
    return ranking
Example #12
0
 def predict_scores(self, test_data, N):
     dinx = np.array(list(self.train_drugs))
     DS = self.dsMat[:, dinx]
    # print DS drug-drug sim with 0 diagonal entries
     tinx = np.array(list(self.train_targets))
     TS = self.tsMat[:, tinx]
    # print TS target-target sim with 0 diagonal entries
     scores = []
     for d, t in test_data:
         if d in self.train_drugs: 
             if t in self.train_targets:
                 val = np.sum(self.U[d, :]*self.V[t, :])
             else:
                 jj = np.argsort(TS[t, :])[::-1][:N]
                 val = np.sum(self.U[d, :]*np.dot(TS[t, jj], self.V[tinx[jj], :]))/np.sum(TS[t, jj])
         else:
             if t in self.train_targets:
                 ii = np.argsort(DS[d, :])[::-1][:N]
                 val = np.sum(np.dot(DS[d, ii], self.U[dinx[ii], :])*self.V[t, :])/np.sum(DS[d, ii])
             else:
                 ii = np.argsort(DS[d, :])[::-1][:N]
                 jj = np.argsort(TS[t, :])[::-1][:N]
                 v1 = DS[d, ii].dot(self.U[dinx[ii], :])/np.sum(DS[d, ii])
                 v2 = TS[t, jj].dot(self.V[tinx[jj], :])/np.sum(TS[t, jj])
                 val = np.sum(v1*v2)
         if np.isnan(val):
             scores.append(0)
         else:
             scores.append(np.exp(val)/(1+np.exp(val)))
    # print smat #whole prediction matrix
     return np.array(scores) #from original code
Example #13
0
 def evaluation(self, test_data, test_label):
     dinx = np.array(list(self.train_drugs))
     DS = self.dsMat[:, dinx]
     tinx = np.array(list(self.train_targets))
     TS = self.tsMat[:, tinx]
     scores = []
     if self.K2 > 0:
         for d, t in test_data:
             if d in self.train_drugs:
                 if t in self.train_targets:
                     val = np.sum(self.U[d, :]*self.V[t, :])
                 else:
                     jj = np.argsort(TS[t, :])[::-1][:self.K2]
                     val = np.sum(self.U[d, :]*np.dot(TS[t, jj], self.V[tinx[jj], :]))/np.sum(TS[t, jj])
             else:
                 if t in self.train_targets:
                     ii = np.argsort(DS[d, :])[::-1][:self.K2]
                     val = np.sum(np.dot(DS[d, ii], self.U[dinx[ii], :])*self.V[t, :])/np.sum(DS[d, ii])
                 else:
                     ii = np.argsort(DS[d, :])[::-1][:self.K2]
                     jj = np.argsort(TS[t, :])[::-1][:self.K2]
                     v1 = DS[d, ii].dot(self.U[dinx[ii], :])/np.sum(DS[d, ii])
                     v2 = TS[t, jj].dot(self.V[tinx[jj], :])/np.sum(TS[t, jj])
                     val = np.sum(v1*v2)
             scores.append(np.exp(val)/(1+np.exp(val)))
     elif self.K2 == 0:
         for d, t in test_data:
             val = np.sum(self.U[d, :]*self.V[t, :])
             scores.append(np.exp(val)/(1+np.exp(val)))
     prec, rec, thr = precision_recall_curve(test_label, np.array(scores))
     aupr_val = auc(rec, prec)
     fpr, tpr, thr = roc_curve(test_label, np.array(scores))
     auc_val = auc(fpr, tpr)
     return aupr_val, auc_val
def get_heatmap(data_mat, name_for_saving_files,  pp,stimulus_on_time, stimulus_off_time,delta_ff, f0_start, f0_end):
    
    #Plot heatmap for validation 
    A1 = np.reshape(data_mat, (np.size(data_mat,0)*np.size(data_mat,1), np.size(data_mat,2)))
    if delta_ff == 1:
        delta_ff_A1 = np.zeros(np.shape(A1))
        for ii in xrange(0,np.size(A1,0)):
            delta_ff_A1[ii,:] = (A1[ii,:]-np.mean(A1[ii,f0_start:f0_end]))/(np.std(A1[ii,f0_start:f0_end])+0.1)
        B = np.argsort(np.mean(delta_ff_A1, axis=1))  
        print np.max(delta_ff_A1)
    else:
        B = np.argsort(np.mean(A1, axis=1)) 
        print np.max(A1)

    with sns.axes_style("white"):
        C = A1[B,:][-2000:,:]

        fig2 = plt.imshow(C,aspect='auto', cmap='jet', vmin = np.min(C), vmax = np.max(C))
        
        plot_vertical_lines_onset(stimulus_on_time)
        plot_vertical_lines_offset(stimulus_off_time)
        plt.title(name_for_saving_files)
        plt.colorbar()
        fig2 = plt.gcf()
        pp.savefig(fig2)
        plt.close()
Example #15
0
    def make_plot(self):
        #plot gets arguments
        dates, prices = self.cmod.arguments_plot(buyerField=self.argCH_plot())
        print(dates)
        print(prices)
        #creating plot
        dates = np.array(dates)#converting list
        prices = np.array(prices)#converting list
        fig, self.plotTK = plt.subplots()
        s = np.argsort(dates)#hang price to date
        f = np.argsort(prices)#hang price to date
        self.plotTK.plot_date(dates[s], prices[f], 'bo-')

        self.plotTK.xaxis.set_major_formatter(DateFormatter('%Y-%m-%d'))
        self.plotTK.fmt_xdata = DateFormatter('%Y-%m-%d %H:%M:%S')
        fig.autofmt_xdate()
        #merge plot and tkinter
        self.canvas = FigureCanvasTkAgg(fig, self.cview.frames[view.AboutPage].leftFrame)
        self.canvas.show()
        self.canvas.get_tk_widget().pack(side=tk.BOTTOM, fill=tk.BOTH, expand=True)
        #creating toolbar
        toolbar = NavigationToolbar2TkAgg(self.canvas, self.cview.frames[view.AboutPage].leftFrame)
        toolbar.update()
        #packing plot
        self.canvas._tkcanvas.pack(side=tk.TOP, fill=tk.BOTH, expand=True)
Example #16
0
def get_informative_features(vectorizers, clf, class_labels, N):
    """
    Return text with features with the highest absolute coefficient
    values, per class.
    """
    feature_names = []
    for vec_name, vec in vectorizers:
        feature_names += ["%30s  %s" % (vec_name, name) for name in vec.get_feature_names()]
    features_by_class = []
    for i, class_label in enumerate(class_labels):
        topN = np.argsort(clf.coef_[i])[-N:]
        bottomN = np.argsort(clf.coef_[i])[:N]
        res = []

        for j in reversed(topN):
            coef = clf.coef_[i][j]
            if coef > 0:
                res.append("+%0.4f: %s" % (coef, feature_names[j]))

        if (len(topN) >= N) or (len(bottomN) >= N):
            res.append("   ...")

        for j in reversed(bottomN):
            coef = clf.coef_[i][j]
            if coef < 0:
                res.append("%0.4f: %s" % (coef, feature_names[j]))
        features_by_class.append((class_label, "\n".join(res)))
    return features_by_class
Example #17
0
def argsort(x, topn=None, reverse=False):
    """Get indices of the `topn` smallest elements in array `x`.

    Parameters
    ----------
    x : array_like
        Array to sort.
    topn : int, optional
        Number of indices of the smallest(greatest) elements to be returned if given,
        otherwise - indices of all elements will be returned in ascending(descending) order.
    reverse : bool, optional
        If True - return the `topn` greatest elements, in descending order.

    Returns
    -------
    numpy.ndarray
        Array of `topn` indices that.sort the array in the required order.

    """
    x = np.asarray(x)  # unify code path for when `x` is not a np array (list, tuple...)
    if topn is None:
        topn = x.size
    if topn <= 0:
        return []
    if reverse:
        x = -x
    if topn >= x.size or not hasattr(np, 'argpartition'):
        return np.argsort(x)[:topn]
    # np >= 1.8 has a fast partial argsort, use that!
    most_extreme = np.argpartition(x, topn)[:topn]
    return most_extreme.take(np.argsort(x.take(most_extreme)))  # resort topn into order
Example #18
0
    def get_filtered_intersections(self, sort_by, inters_size_bounds, inters_degree_bounds):
        """
        Filter the intersection data according to the user's directives and return it.

        :param sort_by: 'degree'|'size'. Whether to sort intersections by degree or size.
        :param inters_size_bounds: tuple. Specifies the size interval of the intersections that will be plotted.
        :param inters_degree_bounds: tuple. Specifies the degree interval of the intersections that will be plotted.
        :return: Array of int (sizes), array of tuples (sets included in intersection), array of tuples (sets
        excluded from intersection), all filtered and sorted.
        """
        inters_sizes = np.array([self.inters_df_dict[x].shape[0] for x in self.in_sets_list])
        inters_degrees = np.array(self.inters_degrees)

        size_clip = (inters_sizes <= inters_size_bounds[1]) & (inters_sizes >= inters_size_bounds[0]) & (
            inters_degrees >= inters_degree_bounds[0]) & (inters_degrees <= inters_degree_bounds[1])

        in_sets_list = np.array(self.in_sets_list)[size_clip]
        out_sets_list = np.array(self.out_sets_list)[size_clip]
        inters_sizes = inters_sizes[size_clip]
        inters_degrees = inters_degrees[size_clip]

        # sort as requested
        if sort_by == 'size':
            order = np.argsort(inters_sizes)[::-1]
        elif sort_by == 'degree':
            order = np.argsort(inters_degrees)

        # store ordered data
        self.filtered_inters_sizes = inters_sizes[order]
        self.filtered_in_sets = in_sets_list[order]
        self.filtered_out_sets = out_sets_list[order]

        return self.filtered_inters_sizes, self.filtered_in_sets, self.filtered_out_sets
Example #19
0
def sort_by_pause_length(file_name):
    pause_before, pause_after = [], []
    conn = sqlite3.connect('./alignment_data/{}.db'.format(file_name))
    cur = conn.cursor()

    cur.execute("SELECT word, word_index, count_before, count_after, avg_pause_before, avg_pause_after FROM words")
    vals = cur.fetchall()

    for val in vals:
        entry_before = (val[0], val[4], val[2])
        entry_after  = (val[0], val[5], val[3])

        pause_before.append(entry_before)
        pause_after.append(entry_after)

    pause_before = np.asarray(pause_before)
    pause_after  = np.asarray(pause_after)

    idx_before = np.argsort(pause_before[:, 1])[::-1]
    idx_after  = np.argsort(pause_after[:, 1])[::-1]

    idx_count_before = np.argsort(pause_before[:, 2])[::-1]
    idx_count_after = np.argsort(pause_after[:, 2])[::-1]

    return pause_before[idx_before], pause_after[idx_after], pause_before[idx_count_before], pause_after[idx_count_after]
Example #20
0
def plotres(psr,deleted=False,group=None,**kwargs):
    """Plot residuals, compute unweighted rms residual."""

    res, t, errs = psr.residuals(), psr.toas(), psr.toaerrs
    
    if (not deleted) and N.any(psr.deleted != 0):
        res, t, errs = res[psr.deleted == 0], t[psr.deleted == 0], errs[psr.deleted == 0]
        print("Plotting {0}/{1} nondeleted points.".format(len(res),psr.nobs))

    meanres = math.sqrt(N.mean(res**2)) / 1e-6
    
    if group is None:
        i = N.argsort(t)
        P.errorbar(t[i],res[i]/1e-6,yerr=errs[i],fmt='x',**kwargs)
    else:
        if (not deleted) and N.any(psr.deleted):
            flagmask = psr.flagvals(group)[~psr.deleted]
        else:
            flagmask = psr.flagvals(group)

        unique = list(set(flagmask))
            
        for flagval in unique:
            f = (flagmask == flagval)
            flagres, flagt, flagerrs = res[f], t[f], errs[f]
            i = N.argsort(flagt)
            P.errorbar(flagt[i],flagres[i]/1e-6,yerr=flagerrs[i],fmt='x',**kwargs)
        
        P.legend(unique,numpoints=1,bbox_to_anchor=(1.1,1.1))

    P.xlabel('MJD'); P.ylabel('res [us]')
    P.title("{0} - rms res = {1:.2f} us".format(psr.name,meanres))
def spike_find(input_array, t, max_spike_width):
    """
    Find the spikes in the input_array.
    Inputs:
        input_array              : a numpy array (1-dimensional) holding 
                                   floats.
        t                        : threshold for spike detection
        max_spike_width          : crossings further apart than this will 
                                   disqualify the spike
    Returns:
        spikes                   : a numpy array (1-dimensional) holding
                                   integers (spike index values)
    """
    crossings = fast_thresh_detect(input_array, threshold=t)
    spikes = []
    if len(crossings) > 1:
        if t > 0.0:
            # find first positive crossing then pair up crossings
            first_p = numpy.argwhere(input_array[crossings] < t)[0]
            for p, n in itertools.izip(crossings[first_p::2], crossings[first_p + 1 :: 2]):
                if abs(p - n) <= max_spike_width:
                    peak_index = numpy.argsort(input_array[p : n + 1])[-1] + p
                    spikes.append(peak_index)
        else:
            # find first negative crossing then pair up crossings
            first_n = numpy.argwhere(input_array[crossings] > t)[0]
            for n, p in itertools.izip(crossings[first_n::2], crossings[first_n + 1 :: 2]):
                if abs(p - n) <= max_spike_width:
                    peak_index = numpy.argsort(input_array[n : p + 1])[0] + n
                    spikes.append(peak_index)
    return numpy.array(spikes)
Example #22
0
    def SNfunc(self,data,sig,significancefloor=0.5):
        D=data.ravel()
        S=sig.ravel()

        args=numpy.argsort(-D/S)
        D=numpy.take(D,args)
        S=numpy.take(S,args)
        Dsum=numpy.cumsum(D)
        Ssum=numpy.cumsum(S**2)**0.5
        SN=(Dsum/Ssum).max()

        #regional SN
        import scipy.ndimage as  ndimage
        data[data/sig<significancefloor]=0
        masks, multiplicity = ndimage.measurements.label(data)
        labels=numpy.arange(1, multiplicity+1)
        SNs=numpy.zeros(multiplicity+1)
        SNs[0]=SN
        for i in range(multiplicity):
            D=data[masks==i+1].ravel()
            S=sig[masks==i+1].ravel()
            args=numpy.argsort(-D/S)
            D=numpy.take(D,args)
            S=numpy.take(S,args)
            Dsum=numpy.cumsum(D)
            Ssum=numpy.cumsum(S**2)**0.5
            SNi=(Dsum/Ssum).max()
            SNs[i+1]=SNi
        SNs=-numpy.sort(-SNs)
        return SNs
Example #23
0
def scale_score(x, kind="quicksort", kind2="quicksort"):
    y = x.copy()
    order = np.argsort(x.flat, kind=kind)
    # Black magic ;-) Probably the smartest thing I came up with today.
    order_order = np.argsort(order, kind=kind2)
    y.flat[:] = order_order.astype(y.dtype)
    return y
Example #24
0
 def _get_sorted_theta(self):
     '''sorts the integral points by bond in descending order'''
     depsf_arr = np.array([])
     V_f_arr = np.array([])
     E_f_arr = np.array([])
     xi_arr = np.array([])
     stat_weights_arr = np.array([])
     nu_r_arr = np.array([])
     r_arr = np.array([])
     for reinf in self.cont_reinf_lst:
         n_int = len(np.hstack((np.array([]), reinf.depsf_arr)))
         depsf_arr = np.hstack((depsf_arr, reinf.depsf_arr))
         V_f_arr = np.hstack((V_f_arr, np.repeat(reinf.V_f, n_int)))
         E_f_arr = np.hstack((E_f_arr, np.repeat(reinf.E_f, n_int)))
         xi_arr = np.hstack((xi_arr, np.repeat(reinf.xi, n_int)))
         stat_weights_arr = np.hstack((stat_weights_arr, reinf.stat_weights))
         nu_r_arr = np.hstack((nu_r_arr, reinf.nu_r))
         r_arr = np.hstack((r_arr, reinf.r_arr))
     argsort = np.argsort(depsf_arr)[::-1]
     # sorting the masks for the evaluation of F
     idxs = np.array([])
     for i, reinf in enumerate(self.cont_reinf_lst):
         idxs = np.hstack((idxs, i * np.ones_like(reinf.depsf_arr)))
     masks = []
     for i, reinf in enumerate(self.cont_reinf_lst):
         masks.append((idxs == i)[argsort])
     max_depsf = [np.max(reinf.depsf_arr) for reinf in self.cont_reinf_lst]
     masks = [masks[i] for i in np.argsort(max_depsf)[::-1]]
     return depsf_arr[argsort], V_f_arr[argsort], E_f_arr[argsort], \
             xi_arr[argsort], stat_weights_arr[argsort], \
             nu_r_arr[argsort], masks, r_arr[argsort]
Example #25
0
def rforests(trainx, trainy, test, n_estimators=100, k=5):
	trainy = np.ravel(trainy)

	forest = RandomForestClassifier(n_estimators)
	forest.fit(trainx, trainy)


	prob_train = forest.predict_proba(trainx)
	prob_test = forest.predict_proba(test)

	# Since the index is the number of the country that's been chosen
	# we can use these with argsort to get the maximum 5., we will have to do this
	# for the entire matrix though.
	sort_train = np.argsort(prob_train)[:,-k:]
	sort_test = np.argsort(prob_test)[:,-k:]

	# Now we need to transform these back to countries, but to map I need to
	# have a dataframe.
	col_names = []

	for i in range(k):
		name = "country_destination_" + str(i+1)
		col_names.append(name)

	pred_train = pd.DataFrame(sort_train, columns=col_names)
	pred_test = pd.DataFrame(sort_test, columns=col_names)

	for name in col_names:
		pred_train[name] = pred_train[name].map(dicts.country)
		pred_test[name] = pred_test[name].map(dicts.country)

	pred_train = np.fliplr(pred_train)
	pred_test = np.fliplr(pred_test)

	return forest, pred_train, pred_test
Example #26
0
def regenerate_dim(x):
    """ assume x in ns since epoch from the current time """
    msg = None  # msg allows us to see which shot/diag was at fault
    diffs = np.diff(x)
    # bincount needs a positive input and needs an array with N elts where N is the largest number input
    small = (diffs > 0) & (diffs < 1000000)
    sorted_diffs = np.sort(diffs[np.where(small)[0]])
    counts = np.bincount(sorted_diffs)
    bigcounts, bigvals = myhist(diffs[np.where(~small)[0]])

    if pyfusion.VERBOSE>0:
        print('[[diff, count],....]')
        print('small:', [[argc, counts[argc]] for argc in np.argsort(counts)[::-1][0:5]])
        print('big or negative:', [[bigvals[argc], bigcounts[argc]] for argc in np.argsort(bigcounts)[::-1][0:10]])

    dtns = 1 + np.argmax(counts[1:])  # skip the first position - it is 0
    # wgt0 = np.where(sorted_diffs > 0)[0]  # we are in ns, so no worry about rounding
    histo = plt.hist if pyfusion.DBG() > 1 else np.histogram
    cnts, vals = histo(x, bins=200)[0:2]
    # ignore the two end bins - hopefully there will be very few there
    wmin = np.where(cnts[1:-1] < np.max(cnts[1:-1]))[0]
    if len(wmin)>0:
        print('**********\n*********** Gap in data > {p:.2f}%'.format(p=100*len(wmin)/float(len(cnts))))
    x01111 = np.ones(len(x))  # x01111 will be all 1s except for the first elt.
    x01111[0] = 0
    errcnt = np.sum(bigcounts) + np.sum(np.sort(counts)[::-1][1:])
    if errcnt>0 or (pyfusion.VERBOSE > 0): 
        msg = str('** repaired length of {l:,}, dtns={dtns:,}, {e} erroneous utcs'
              .format(l=len(x01111), dtns=dtns, e=errcnt))

    fixedx = np.cumsum(x01111)*dtns
    wbad = np.where((x - fixedx)>1e8)[0]
    fixedx[wbad] = np.nan
    debug_(pyfusion.DEBUG, 3, key="repair", msg="repair of W7-X scrambled Langmuir timebase") 
    return(fixedx, msg)
    def show_heatmap(self, order_by = None,
                     order_by_row = None, order_by_col = None):
        if order_by:
            title = 'Network ordered by node covariate\n"%s"' % order_by
            o = np.argsort(self.node_covariates[order_by][:])
        elif order_by_row:
            title = 'Network ordered by row covariate\n"%s"' % order_by_row
            o = np.argsort(self.row_covariates[order_by_row][:])
        elif order_by_col:
            title = 'Network ordered by column covariate\n"%s"' % order_by_col
            o = np.argsort(self.col_covariates[order_by_col][:])
        else:
            title, o = 'Unordered adjacency matrix', np.arange(self.N)

        f, (ax_im, ax_ord) = plt.subplots(2, sharex = True)
        f.set_figwidth(3)
        f.set_figheight(6)
        A = self.adjacency_matrix()
        ax_im.imshow(A[o][:,o]).set_cmap('binary')
        ax_im.set_ylim(0, self.N - 1)
        ax_im.set_xticks([])
        ax_im.set_yticks([])
        ax_im.set_title(title)
        #plt.setp([ax_im.get_xticklabels(), ax_im.get_yticklabels()],
        #         visible = False)
        if order_by:
            ax_ord.scatter(np.arange(self.N), self.node_covariates[order_by][o])
            ax_ord.set_xlim(0, self.N - 1)
            ax_ord.set_ylim(self.node_covariates[order_by][o[0]],
                            self.node_covariates[order_by][o[-1]])
        plt.show()
Example #28
0
def rowwise_rank(array, mask=None):
    """
    Take a 2D array and return the 0-indexed sorted position of each element in
    the array for each row.

    Example
    -------
    In [5]: data
    Out[5]:
    array([[-0.141, -1.103, -1.0171,  0.7812,  0.07  ],
           [ 0.926,  0.235, -0.7698,  1.4552,  0.2061],
           [ 1.579,  0.929, -0.557 ,  0.7896, -1.6279],
           [-1.362, -2.411, -1.4604,  1.4468, -0.1885],
           [ 1.272,  1.199, -3.2312, -0.5511, -1.9794]])

    In [7]: argsort(argsort(data))
    Out[7]:
    array([[2, 0, 1, 4, 3],
           [3, 2, 0, 4, 1],
           [4, 3, 1, 2, 0],
           [2, 0, 1, 4, 3],
           [4, 3, 0, 2, 1]])
    """
    # note that unlike scipy.stats.rankdata, the output here is 0-indexed, not
    # 1-indexed.
    return argsort(argsort(array))
	def target_neurons(self,nConnectPerInput,network,strCorr,bAntiCorr=False):
		numInput = self.dicProperties["IODim"]
		numNodesReservoir = self.dicProperties["ReservoirDim"]
		matTargetNeurons = np.zeros((numInput,nConnectPerInput))
		if strCorr == "Betweenness":
			self.lstBetweenness = betweenness_list(network)[0].a #get edge betweenness array
			lstSortedNodes = np.argsort(self.lstBetweenness)
			if not bAntiCorr:
				lstSortedNodes = lstSortedNodes[::-1]
			for i in range(numInput):
				lstRandIdx = rand_int_trunc_exp(0,numNodesReservoir,0.2,nConnectPerInput) # characteristic exponential decay is a fifth of the reservoir's size
				matTargetNeurons[i,:] = lstSortedNodes[lstRandIdx]
		elif "degree" in strCorr:
			# get the degree type
			idxDash = strCorr.find("-")
			strDegType = strCorr[:idxDash].lower()
			lstDegrees = degree_list(network,strDegType)
			# sort the nodes by their importance
			lstSortedNodes = np.argsort(lstDegrees)
			if not bAntiCorr:
				lstSortedNodes = lstSortedNodes[::-1]
			for i in range(numInput):
				lstRandIdx = rand_int_trunc_exp(0,numNodesReservoir,0.2,nConnectPerInput) # characteristic exponential decay is a fifth of the reservoir's size
				matTargetNeurons[i,:] = lstSortedNodes[lstRandIdx]
		else:
			matTargetNeurons = np.random.randint(0,numNodesReservoir,(numInput,nConnectPerInput))
		return matTargetNeurons.astype(int)
Example #30
0
    def __call__(self, filt, mask=None):
        '''
        Provide the iterator over the levels.
        '''
        self._check_filter(filt, mask)
        # This cover method is only for one-dimensional filter functions.
        assert(self.dim==1)
        # The interval length measures indices, not filter values
        # in this case.
        self.interval_length = 1. / \
            ( self.intervals[0] - (self.intervals[0]-1)*self.fract_overlap )
        self.step_size = self.interval_length*(1-self.fract_overlap)

        if mask is None:
            self.n = len(self.filt)
            self.sortorder = np.argsort(np.ravel(self.filt))
        else:
            idx = np.flatnonzero(mask)
            self.n = len(idx)
            sortorder = np.argsort(np.ravel(self.filt[mask]))
            self.sortorder = idx[sortorder]

        assert len(self.sortorder)==self.n

        self.iter = range(self.intervals[0]).__iter__()
        return self
def shi_malik(st_mat,eig_thresh=0.95,cut=0,group=None):
    """
    Given a stochastic matrix describing the strength
    of the relationship between pairs of items,
    determines an aggregation of the items using
    the spectral approach of Shi and Malik.

    A column-stochastic matrix T will always have a leading
    eigenvalue of 1 and a leading uniform right-eigenvector, 
    u=(1,...,1), which is a fixed point of the map:

    T u = u

    If T has no disconnected components then u is the
    unique fixed point (up to a constant scaling) 
    and the sub-leading eigenvalue
    is strictly less than one; otherwise, the eigenvalue
    1 is degenerate. In the first case, if the sub-leading
    eigenvalue is close to 1, then the sub-leading
    right-eigenvector y may be used to partition the indices into
    two slowly-decaying communities.

    The Shi-Malik algorithm is recursive, taking
    the sub-leading eigenvector of T (as long as the
    corresponding eigenvalue is above a threshold),
    using it to bipartition the indices, and then
    repeating these steps on the partitions with a reweighted
    matrix. This implementation cuts the vector y by value,
    by default into components y>0 and y<=0, because of the
    orthogonality relationship

    <y>_pi = y . pi = 0

    which indicates that the mean value of y
    under the stationary distribution pi 
    (left-eigenvector of T)
    must always be zero, making this a value of significance.

    The algorithm halts when no community has a sub-leading
    eigenvector above the threshold, and the final partitioning
    is returned as an Aggregation.

    Arguments
    ---------
    st_mat :        A square stochastic matrix describing a Markov dynamic.

    Keyword Arguments
    -----------------
    eig_thresh :    The smallest value the subleading eigenvalue may have to continue the recursion.

    cut :           The value used to "cut" the subleading eigenvector into two clusters.

    group :         The group which labels the indices of st_mat, and which will be the item set of the returned Aggregation.

    Output
    ------
    Aggregation of the indices of st_mat
    """
    if group is None:
        group = _Group(_np.arange(st_mat.shape[0]))

    num_items = group.size
    clusts = _Aggregation(
        group,
        _Group(_np.array([0])),
        {0:_np.arange(len(group))}
    )
    change = True
    while change:
        new_clusts = []
        change = False
        for k,c in clusts:
            if len(c)>1:
                T = _utils.stoch(st_mat[
                    _np.ix_(c.in_superset,c.in_superset)
                ])
                eigs,evecs = _la.eig(T)
                einds = _np.flip(_np.argsort(_np.abs(eigs)))
                if eigs[einds[1]]>eig_thresh:
                    y = _np.real(evecs[:,einds[1]])
                    ind_agg = split_by_vals(y/_np.sum(y),group=c,cuts=cut)
                    if ind_agg.clusters.size>1:
                        new_clusts.append(c.in_superset[ind_agg[0].in_superset])
                        new_clusts.append(c.in_superset[ind_agg[1].in_superset])
                    else:
                        ind_agg = split_by_gaps(y,group=c)
                        new_clusts.append(c.in_superset[ind_agg[0].in_superset])
                        new_clusts.append(c.in_superset[ind_agg[1].in_superset])
                    change = True
                else:
                    new_clusts.append(c.in_superset)
            else:
                new_clusts.append(c.in_superset)

        new_agg = {j:new_clusts[j] for j in range(len(new_clusts))}
        clusts = _Aggregation(
            group,
            _Group(_np.arange(len(new_clusts))),
            new_agg
        )
    return clusts
Example #32
0
    def classify_boxes(self, images, image_names, detection_json,
                       classification):
        classification_graph = self.models[classification]
        class_names = self.class_names[classification]
        # json_with_classes = self.add_classification_categories(detection_json, class_names)
        classification_predictions = {}

        with classification_graph.as_default():
            with tf.Session(graph=classification_graph) as sess:
                # Get input and output tensors of classification model
                image_tensor = classification_graph.get_tensor_by_name(
                    'input:0')
                predictions_tensor = classification_graph.get_tensor_by_name(
                    'output:0')
                predictions_tensor = tf.squeeze(predictions_tensor, [0])

                # For each image
                n_images = len(images)
                for i_image in tqdm.tqdm(list(range(0, n_images))):
                    images = [np.asarray(image, np.uint8) for image in images]
                    image_data = images[i_image]

                    # Scale pixel values to [0,1]
                    image_data = image_data / 255
                    image_height, image_width, _ = image_data.shape

                    image_description = detection_json[image_names[i_image]]
                    classification_predictions[image_names[i_image]] = list()
                    # For each box
                    n_detections = len(image_description)
                    for i_box in range(n_detections):

                        cur_detection = image_description[i_box]

                        # Skip detections with low confidence
                        if cur_detection[4] < self.default_confidence_threshold:
                            continue

                        # Skip if detection category is not in whitelist
                        if not str(cur_detection[5]
                                   ) in self.detection_category_whitelist:
                            continue

                        # box ymin, xim, ymax, xmax
                        x_min = cur_detection[1]
                        y_min = cur_detection[0]
                        width_of_box = cur_detection[1] + cur_detection[3]
                        height_of_box = cur_detection[0] + cur_detection[2]

                        # Get current box in relative coordinates and format [x_min, y_min, width_of_box, height_of_box]
                        box_orig = [x_min, y_min, width_of_box, height_of_box]
                        # Convert to [ymin, xmin, ymax, xmax] and
                        # store it as 1x4 numpy array so we can re-use the generic multi-box padding code
                        box_coords = np.array([[
                            box_orig[1], box_orig[0],
                            box_orig[1] + box_orig[3],
                            box_orig[0] + box_orig[2]
                        ]])
                        # Convert normalized coordinates to pixel coordinates
                        box_coords_abs = (box_coords *
                                          np.tile([image_height, image_width],
                                                  (1, 2)))
                        # Pad the detected animal to a square box and additionally by PADDING_FACTOR, the result will be in crop_boxes
                        # However, we need to make sure that it box coordinates are still within the image
                        bbox_sizes = np.vstack([
                            box_coords_abs[:, 2] - box_coords_abs[:, 0],
                            box_coords_abs[:, 3] - box_coords_abs[:, 1]
                        ]).T
                        offsets = (self.padding_factor *
                                   np.max(bbox_sizes, axis=1, keepdims=True) -
                                   bbox_sizes) / 2
                        crop_boxes = box_coords_abs + np.hstack(
                            [-offsets, offsets])
                        crop_boxes = np.maximum(0, crop_boxes).astype(int)
                        # Get the first (and only) row as our bbox to classify
                        crop_box = crop_boxes[0]
                        # Get the image data for that box
                        cropped_img = image_data[crop_box[0]:crop_box[2],
                                                 crop_box[1]:crop_box[3]]

                        # Run inference
                        predictions = sess.run(
                            predictions_tensor,
                            feed_dict={image_tensor: cropped_img})
                        current_predicitions = []
                        # Add the *num_annotated_classes* top scoring classes
                        for class_idx in np.argsort(
                                -predictions)[:self.num_annotated_classes]:
                            class_conf = self.truncate_float(
                                predictions[class_idx].item())
                            for idx, name in enumerate(class_names):
                                if class_idx == idx:
                                    current_predicitions.append(
                                        [f'{name}', class_conf])

                        classification_predictions[
                            image_names[i_image]].append(current_predicitions)

        return classification_predictions
Example #33
0
    def convert(self,
                mol,  # type: ignore
                state_attributes: List = None,
                full_pair_matrix: bool = True) -> Dict:
        """
        Compute the representation for a molecule

        Args:
            mol (pybel.Molecule): Molecule to generate features for
            state_attributes (list): State attributes. Uses average mass and number of bonds per atom as default
            full_pair_matrix (bool): Whether to generate info for all atom pairs, not just bonded ones
        Returns:
            (dict): Dictionary of features
        """

        # Get the features features for all atoms and bonds
        atom_features = []
        atom_pairs = []
        for idx, atom in enumerate(mol.atoms):
            f = self.get_atom_feature(mol, atom)
            atom_features.append(f)
        atom_features = sorted(atom_features, key=lambda x: x["coordid"])
        num_atoms = mol.OBMol.NumAtoms()
        for i, j in itertools.combinations(range(0, num_atoms), 2):
            bond_feature = self.get_pair_feature(mol, i, j, full_pair_matrix)
            if bond_feature:
                atom_pairs.append(bond_feature)
            else:
                continue

        # Compute the graph distance, if desired
        if 'graph_distance' in self.bond_features:
            graph_dist = self._dijkstra_distance(atom_pairs)
            for i in atom_pairs:
                i.update({'graph_distance': graph_dist[i['a_idx'], i['b_idx']]})

        # Generate the state attributes (that describe the whole network)
        state_attributes = state_attributes or [
            [mol.molwt / num_atoms,
             len([i for i in atom_pairs if i['bond_type'] > 0]) / num_atoms]
        ]

        # Get the atom features in the order they are requested by the user as a 2D array
        atoms = []
        for atom in atom_features:
            atoms.append(self._create_atom_feature_vector(atom))

        # Get the bond features in the order request by the user
        bonds = []
        index1_temp = []
        index2_temp = []
        for bond in atom_pairs:
            # Store the index of each bond
            index1_temp.append(bond.pop('a_idx'))
            index2_temp.append(bond.pop('b_idx'))

            # Get the desired bond features
            bonds.append(self._create_pair_feature_vector(bond))

        # Given the bonds (i,j), make it so (i,j) == (j, i)
        index1 = index1_temp + index2_temp
        index2 = index2_temp + index1_temp
        bonds = bonds + bonds

        # Sort the arrays by the beginning index
        sorted_arg = np.argsort(index1)
        index1 = np.array(index1)[sorted_arg].tolist()
        index2 = np.array(index2)[sorted_arg].tolist()
        bonds = np.array(bonds)[sorted_arg].tolist()

        return {'atom': atoms,
                'bond': bonds,
                'state': state_attributes,
                'index1': index1,
                'index2': index2}
Example #34
0
    def accumulate(self, p=None):
        '''
        Accumulate per image evaluation results and store the result in self.eval
        :param p: input params for evaluation
        :return: None
        '''
        print('Accumulating evaluation results...')
        tic = time.time()
        if not self.evalImgs:
            print('Please run evaluate() first')
        # allows input customized parameters
        if p is None:
            p = self.params
        p.catIds = p.catIds if p.useCats == 1 else [-1]
        T = len(p.iouThrs)
        R = len(p.recThrs)
        K = len(p.catIds) if p.useCats else 1
        A = len(p.areaRng)
        M = len(p.maxDets)
        # -1 for the precision of absent categories
        precision = -np.ones((T, R, K, A, M))
        recall = -np.ones((T, K, A, M))
        scores = -np.ones((T, R, K, A, M))

        # create dictionary for future indexing
        _pe = self._paramsEval
        catIds = _pe.catIds if _pe.useCats else [-1]
        setK = set(catIds)
        setA = set(map(tuple, _pe.areaRng))
        setM = set(_pe.maxDets)
        setI = set(_pe.imgIds)
        # get inds to evaluate
        k_list = [n for n, k in enumerate(p.catIds) if k in setK]
        m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
        a_list = [
            n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng))
            if a in setA
        ]
        i_list = [n for n, i in enumerate(p.imgIds) if i in setI]
        I0 = len(_pe.imgIds)
        A0 = len(_pe.areaRng)
        # retrieve E at each category, area range, and max number of detections
        for k, k0 in enumerate(k_list):
            Nk = k0 * A0 * I0
            for a, a0 in enumerate(a_list):
                Na = a0 * I0
                for m, maxDet in enumerate(m_list):
                    E = [self.evalImgs[Nk + Na + i] for i in i_list]
                    E = [e for e in E if e is not None]
                    if len(E) == 0:
                        continue
                    dtScores = np.concatenate(
                        [e['dtScores'][0:maxDet] for e in E])

                    # different sorting method generates slightly different results.
                    # mergesort is used to be consistent as Matlab
                    # implementation.
                    inds = np.argsort(-dtScores, kind='mergesort')
                    dtScoresSorted = dtScores[inds]

                    dtm = np.concatenate(
                        [e['dtMatches'][:, 0:maxDet] for e in E], axis=1)[:,
                                                                          inds]
                    dtIg = np.concatenate(
                        [e['dtIgnore'][:, 0:maxDet] for e in E], axis=1)[:,
                                                                         inds]
                    gtIg = np.concatenate([e['gtIgnore'] for e in E])
                    npig = np.count_nonzero(gtIg == 0)
                    if npig == 0:
                        continue
                    tps = np.logical_and(dtm, np.logical_not(dtIg))
                    fps = np.logical_and(np.logical_not(dtm),
                                         np.logical_not(dtIg))

                    tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float)
                    fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float)
                    for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
                        tp = np.array(tp)
                        fp = np.array(fp)
                        nd = len(tp)
                        rc = tp / npig
                        pr = tp / (fp + tp + np.spacing(1))
                        q = np.zeros((R, ))
                        ss = np.zeros((R, ))

                        if nd:
                            recall[t, k, a, m] = rc[-1]
                        else:
                            recall[t, k, a, m] = 0

                        # numpy is slow without cython optimization for accessing elements
                        # use python array gets significant speed improvement
                        pr = pr.tolist()
                        q = q.tolist()

                        for i in range(nd - 1, 0, -1):
                            if pr[i] > pr[i - 1]:
                                pr[i - 1] = pr[i]

                        inds = np.searchsorted(rc, p.recThrs, side='left')
                        try:
                            for ri, pi in enumerate(inds):
                                q[ri] = pr[pi]
                                ss[ri] = dtScoresSorted[pi]
                        except BaseException:
                            pass
                        precision[t, :, k, a, m] = np.array(q)
                        scores[t, :, k, a, m] = np.array(ss)
        self.eval = {
            'params': p,
            'counts': [T, R, K, A, M],
            'date': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
            'precision': precision,
            'recall': recall,
            'scores': scores,
        }
        toc = time.time()
        print('DONE (t={:0.2f}s).'.format(toc - tic))
Example #35
0
    def evaluateImg(self, imgId, catId, aRng, maxDet):
        '''
        perform evaluation for single category and image
        :return: dict (single image results)
        '''
        p = self.params
        if p.useCats:
            gt = self._gts[imgId, catId]
            dt = self._dts[imgId, catId]
        else:
            gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]]
            dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]]
        if len(gt) == 0 and len(dt) == 0:
            return None

        for g in gt:
            if g['ignore'] or (g['area'] < aRng[0] or g['area'] > aRng[1]):
                g['_ignore'] = 1
            else:
                g['_ignore'] = 0

        # sort dt highest score first, sort gt ignore last
        gtind = np.argsort([g['_ignore'] for g in gt], kind='mergesort')
        gt = [gt[i] for i in gtind]
        dtind = np.argsort([-d['score'] for d in dt], kind='mergesort')
        dt = [dt[i] for i in dtind[0:maxDet]]
        iscrowd = [int(o['iscrowd']) for o in gt]
        # load computed ious
        ious = self.ious[imgId, catId][:, gtind] if len(
            self.ious[imgId, catId]) > 0 else self.ious[imgId, catId]

        T = len(p.iouThrs)
        G = len(gt)
        D = len(dt)
        gtm = np.zeros((T, G))
        dtm = np.zeros((T, D))
        gtIg = np.array([g['_ignore'] for g in gt])
        dtIg = np.zeros((T, D))
        if not len(ious) == 0:
            for tind, t in enumerate(p.iouThrs):
                for dind, d in enumerate(dt):
                    # information about best match so far (m=-1 -> unmatched)
                    iou = min([t, 1 - 1e-10])
                    m = -1
                    for gind, g in enumerate(gt):
                        # if this gt already matched, and not a crowd, continue
                        if gtm[tind, gind] > 0 and not iscrowd[gind]:
                            continue
                        # if dt matched to reg gt, and on ignore gt, stop
                        if m > -1 and gtIg[m] == 0 and gtIg[gind] == 1:
                            break
                        # continue to next gt unless better match made
                        if ious[dind, gind] < iou:
                            continue
                        # if match successful and best so far, store
                        # appropriately
                        iou = ious[dind, gind]
                        m = gind
                    # if match made store id of match for both dt and gt
                    if m == -1:
                        continue
                    dtIg[tind, dind] = gtIg[m]
                    dtm[tind, dind] = gt[m]['id']
                    gtm[tind, m] = d['id']
        # set unmatched detections outside of area range to ignore
        a = np.array([d['area'] < aRng[0] or d['area'] > aRng[1]
                      for d in dt]).reshape((1, len(dt)))
        dtIg = np.logical_or(dtIg, np.logical_and(dtm == 0, np.repeat(a, T,
                                                                      0)))
        # store results for given image and category
        return {
            'image_id': imgId,
            'category_id': catId,
            'aRng': aRng,
            'maxDet': maxDet,
            'dtIds': [d['id'] for d in dt],
            'gtIds': [g['id'] for g in gt],
            'dtMatches': dtm,
            'gtMatches': gtm,
            'dtScores': [d['score'] for d in dt],
            'gtIgnore': gtIg,
            'dtIgnore': dtIg,
        }
Example #36
0
def show_dets_gt_boxes(im,
                       dets,
                       classes,
                       gt_boxes,
                       gt_classes,
                       scale=1.0,
                       FS=22,
                       LW=3.5,
                       save_file_path='temp_det_gt.png'):
    import matplotlib.pyplot as plt
    import numpy as np
    from random import random as rand
    from random import randint
    fig = plt.figure(1)
    fig.set_size_inches((2 * 8.5, 1 * 11), forward=False)

    plt.subplot(121)
    plt.cla()
    plt.axis("off")
    plt.imshow(im)

    for cls_dets, cls_name in zip(dets, classes):
        scores = []
        if len(cls_dets) == 0:
            continue
        for det in cls_dets:
            scores += [det[-1]]
        ord = np.argsort(scores)
        cls_dets = cls_dets[ord]
        for det in cls_dets:
            bbox = det[:4] * scale
            color = (rand(), rand(), rand())
            rect = plt.Rectangle((bbox[0], bbox[1]),
                                 bbox[2] - bbox[0],
                                 bbox[3] - bbox[1],
                                 fill=False,
                                 edgecolor=color,
                                 linewidth=LW)
            plt.gca().add_patch(rect)
            score = det[-1]
            corner = randint(1, 2)
            if corner == 1:
                x0 = bbox[0]
                y0 = bbox[1]
            if corner == 2:
                x0 = bbox[0]
                y0 = bbox[3]
            # if corner == 3:
            #   x0 = bbox[2]; y0 = bbox[1]
            # if corner == 4:
            #     x0 = bbox[2]; y0 = bbox[3]

            plt.gca().text(x0,
                           y0,
                           '{:s} {:.3f}'.format(cls_name, score),
                           bbox=dict(facecolor=color, alpha=0.6),
                           fontsize=FS,
                           color='white')

    plt.subplot(122)
    plt.cla()
    plt.axis("off")
    plt.imshow(im)

    for cls_idx, cls_name in enumerate(gt_classes):
        bbox = gt_boxes[cls_idx]
        color = (rand(), rand(), rand())
        rect = plt.Rectangle((bbox[0], bbox[1]),
                             bbox[2] - bbox[0],
                             bbox[3] - bbox[1],
                             fill=False,
                             edgecolor=color,
                             linewidth=LW)
        plt.gca().add_patch(rect)

        plt.gca().text(bbox[0],
                       bbox[1],
                       '{:s}'.format(cls_name),
                       bbox=dict(facecolor=color, alpha=0.6),
                       fontsize=FS,
                       color='white')

    plt.gca().set_axis_off()
    plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0)
    plt.margins(0, 0)
    fig.savefig(save_file_path, bbox_inches='tight', pad_inches=0)
    plt.close(fig)
Example #37
0
def param_effects_fig(placeholder=True, supervised=True):
    sb.set_context('talk')
    # sb.set_context('poster')
    # sb.set_context('notebook')
    # fig, axes = plt.subplots(2, figsize=(6, 8))
    # fig, axes = plt.subplots(1, 2, figsize=(10, 6))
    if supervised:
        fig, axes = plt.subplots(1, 2, figsize=(10, 5.2))
    else:
        fig, axes = plt.subplots(1, 2, figsize=(10, 5.72))  # 10% taller

    KEEP_HOW_MANY = 10  # plotting too many makes fig hideous

    if supervised:
        df_fc = pd.read_csv(NET_SIZE_PATH_SUPERVISED)
        df_pool = pd.read_csv(POOL_SIZE_PATH_SUPERVISED)
    else:
        df_fc = pd.read_csv(NET_SIZE_PATH_UNSUPERVISED)
        df_pool = pd.read_csv(POOL_SIZE_PATH_UNSUPERVISED)

    # print "df fc:"
    # print df_fc
    # print "df_pool"
    # print df_pool
    # return

    # make sure both use the same datasets, because otherwise the
    # legend will break / be wrong
    dsets_fc = sorted(df_fc[DATASET_COL].unique())
    dsets_pool = sorted(df_pool[DATASET_COL].unique())
    assert np.array_equal(dsets_fc, dsets_pool)
    dsets = dsets_fc

    dset_names_lens = np.array([len(name) for name in dsets])
    sort_idxs = np.argsort(dset_names_lens)
    dsets = [dsets[i] for i in sort_idxs[:KEEP_HOW_MANY]]

    # print "param_effects_fig: using datasets: ", dsets
    # return

    # ------------------------ top plot: fc layer size

    fc_params = (df_fc, NET_SIZE_COL, axes[0])
    pool_params = (df_pool, POOL_SIZE_COL, axes[1])
    for (df, xcol, ax) in (fc_params, pool_params):
        for dset in dsets:
            sub_df = df[df[DATASET_COL] == dset]
            sub_df = sub_df.sort_values(xcol)
            xvals, yvals = sub_df[xcol], sub_df[ACC_COL]
            yvals /= yvals.max()
            # name = dset.replace('_', ' ').replace('-', ' ').capitalize()
            name = dset.replace('_', ' ').replace('-', ' ')
            ax.plot(xvals, yvals, label=name)

    leg_lines, leg_labels = ax.get_legend_handles_labels()
    plt.figlegend(leg_lines,
                  leg_labels,
                  loc='lower center',
                  ncol=5,
                  labelspacing=0)

    ax = axes[0]
    ax.set_title("Effect of Fully Connected Layer Size", y=1.03)
    if supervised:
        ax.set_xlabel("Neurons in Each Fully Connected Layer")
    else:
        ax.set_xlabel("Neurons in Each Fully Connected Layer\n"
                      "(Fraction of # of classes)")
    ax.set_ylabel("Normalized Accuracy")
    ax = axes[1]
    ax.set_title("Effect of Max Pool Size", y=1.03)
    ax.set_xlabel("Fraction of Mean Time Series Length")
    # ax.set_xlabel("Max Pool Size\n(Fraction of Mean Time Series Length)")
    ax.set_ylabel("Normalized Accuracy")

    # plt.tight_layout(w_pad=.02)
    # plt.tight_layout(h_pad=2.0)
    plt.tight_layout(h_pad=1.8)
    # plt.tight_layout()
    # plt.subplots_adjust(bottom=.32)  # this one with horz but 2 legend cols
    # plt.subplots_adjust(bottom=.23)  # this one for vertical subplots
    if supervised:
        plt.subplots_adjust(bottom=.25)
    else:
        plt.subplots_adjust(bottom=.27)

    # plt.show()
    figname = 'param_effects'
    if not supervised:
        figname += '_unsupervised'
    save_fig_png(figname)
Example #38
0
    def __init__(self, track, params=visualizerParams()):
        self.track = track

        self.visualizer_params = params
        plot_subplots = params.plot_subplots
        parking_spot_width = params.parking_spot_width
        num_parking_spots = params.num_parking_spots

        self.fsm_state_ids = list(state_num_dict.values())
        self.fsm_state_names = list(state_num_dict.keys())
        sort_idxs = np.argsort(self.fsm_state_ids)

        self.fsm_state_ids = [self.fsm_state_ids[i] for i in sort_idxs]
        self.fsm_state_names = [self.fsm_state_names[i] for i in sort_idxs]

        # Initialize figure
        figsize = (14, 7) if plot_subplots else (7, 7)
        self.fig = plt.figure(figsize=figsize)
        self.fig.suptitle("BARC OBCA Plotter", fontsize=16)
        plt.ion()

        self.axs = dict()

        if params.trajectory_file is not None:
            trajectory_scaling = params.trajectory_scaling
            trajectory_init = params.trajectory_init
            trajectory = load_vehicle_trajectory(params.trajectory_file)
            trajectory -= np.array([trajectory[0, 0], trajectory[0, 1], 0, 0])
            trajectory = np.multiply(
                trajectory,
                np.array([
                    trajectory_scaling['x'], trajectory_scaling['y'], 1,
                    trajectory_scaling['v']
                ]))
            trajectory += np.array(
                [trajectory_init['x'], trajectory_init['y'], 0, 0])

            # waypoints, next_ref_start = get_trajectory_waypoints(trajectory, 20, 0.1)
            waypoints = np.array([])

        ################ Trajectory Subplot ################
        if plot_subplots:
            axtr = self.fig.add_subplot(3, 2, 1)
            axtr.set_title("Trajectories")
            axtr.set_xlabel("X")
            axtr.set_ylabel("Y")
        else:
            axtr = self.fig.add_subplot(3, 1, 1)

        if params.trajectory_file is not None:
            axtr.plot(trajectory[:, 0], trajectory[:, 1])
            if waypoints.size > 0:
                axtr.plot(waypoints[:, 0], waypoints[:, 1], 'ro')
                axtr.plot(trajectory[next_ref_start, 0],
                          trajectory[next_ref_start, 1], 'bx')

        # User Defined map plotting
        parking_spot_length = 0.6
        track_length = self.track.track_length
        track_width = self.track.track_width

        # Plot lanes
        self.track.plot_map(axtr)

        # Plot parking spots
        axtr.plot([0, track_length], [
            track_width / 2 + parking_spot_length,
            track_width / 2 + parking_spot_length
        ],
                  color='#908E8E',
                  linewidth=1.5)
        axtr.plot([0, track_length], [
            -track_width / 2 - parking_spot_length,
            -track_width / 2 - parking_spot_length
        ],
                  color='#908E8E',
                  linewidth=1.5)
        axtr.plot([0, 0],
                  [track_width / 2, track_width / 2 + parking_spot_length],
                  color='#908E8E',
                  linewidth=1.5)
        axtr.plot([0, 0],
                  [-track_width / 2, -track_width / 2 - parking_spot_length],
                  color='#908E8E',
                  linewidth=1.5)
        for i in range(num_parking_spots):
            axtr.plot([(i + 1) * parking_spot_width,
                       (i + 1) * parking_spot_width],
                      [track_width / 2, track_width / 2 + parking_spot_length],
                      color='#908E8E',
                      linewidth=1.5)
            axtr.plot(
                [(i + 1) * parking_spot_width, (i + 1) * parking_spot_width],
                [-track_width / 2, -track_width / 2 - parking_spot_length],
                color='#908E8E',
                linewidth=1.5)

        axtr.set_aspect('equal')
        self.axs['track'] = axtr

        if plot_subplots:
            ################ Speed Subplot ################
            axv = self.fig.add_subplot(4, 2, 2)
            axv.set_ylabel("vel")
            axv.yaxis.set_major_formatter(FormatStrFormatter('%.2f'))
            self.axs['vel'] = axv

            ################ PsiDot Subplot ################
            axpsiDot = self.fig.add_subplot(4, 2, 4)
            axpsiDot.set_ylabel("yaw rate")
            axpsiDot.yaxis.set_major_formatter(FormatStrFormatter('%.2f'))
            self.axs['yaw_rate'] = axpsiDot

            ################ u_a Subplot ################
            axua = self.fig.add_subplot(4, 2, 6)
            axua.set_ylabel("motor")
            axua.yaxis.set_major_formatter(FormatStrFormatter('%.2f'))
            self.axs['throttle'] = axua

            ################ u_df Subplot ################
            axudf = self.fig.add_subplot(4, 2, 8)
            axudf.set_ylabel("servo")
            axudf.yaxis.set_major_formatter(FormatStrFormatter('%.2f'))
            self.axs['steering'] = axudf

        plt.tight_layout()
        plt.show()

        self.plotters = []

# Feature importances of tree based classifiers
names_classifiers = [('RandomForest', best_rf_clf), ('AdaBoost', best_ada_clf), 
                     ('GradientBoosting', best_gb_clf), ('ExtraTrees', best_extra_clf), 
                     ('LogisticRegression', best_logreg)]

fig, ax = plt.subplots(2, 2, figsize=(15, 15))

index = 0
for row in range(2):
    for col in range(2):
        name = names_classifiers[index][0]
        clf = names_classifiers[index][1]
        feature_importances = clf.feature_importances_
        indices = np.argsort(feature_importances)[::-1] # Least to most important features
        graph = sns.barplot(y=X_train.columns[indices], x=feature_importances[indices],
                           ax=ax[row][col])
        graph.set_xlabel('Relative Importance')
        graph.set_ylabel('Features')
        graph.set_title(name + ' feature importances')
        index += 1
# Title, sex, age, fare, and family size are most important features
# Might remove series of cabin features and create a 'HasCabin' feature


# In[171]:


test_survived_rf_clf = pd.Series(best_rf_clf.predict(test), name='Rf')
test_survived_ada_clf = pd.Series(best_ada_clf.predict(test), name='Ada')
Example #40
0
            if detector not in trigfile:
                continue

            grp = trigfile[detector]

            # show gates
            if 'gates' in grp:
                for gate in grp['gates'][:]:
                    plot_gate(ax[detector], gate)

            # show triggers
            if 'end_time' not in grp or len(grp['end_time']) == 0:
                continue
            ar_time.update(grp['end_time'])
            ar_dur.update(grp['template_duration'])
            sorter = np.argsort(grp['snr'][:])
            sc = ax[detector].scatter(grp['end_time'][:][sorter], grp['template_duration'][:][sorter],
                                      c=grp['snr'][:][sorter], cmap='plasma_r', vmin=4.5, vmax=10)

ax[detectors[-1]].set_xlabel('GPS time')

for detector in detectors:
    ax[detector].set_xlim(ar_time.low, ar_time.high)
    ax[detector].set_ylim(ar_dur.low, ar_dur.high)
    ax[detector].set_yscale('log')
    for ht in (args.highlight_times or []):
        ax[detector].axvline(ht, ls='--', color='green')
    for g in (args.gates or []):
        gate = g.split(',')
        if gate[0] != detector:
            continue
Example #41
0
                            cosmean, coserr = bootstrap(cosamp)
                            sinmean, sinerr = bootstrap(sinamp)
                        else:
                            cosmean = np.mean(cosamp)
                            sinmean = np.mean(sinamp)
                        coserr = np.std(cosamp) / np.sqrt(ngood)
                        sinerr = np.std(sinamp) / np.sqrt(ngood)
                        cosamps[iline] = cosmean
                        sinamps[iline] = sinmean
                        coserrs[iline] = coserr
                        sinerrs[iline] = sinerr
                        ffac = 2 * np.pi * fline * t
                        template += cosmean*np.cos(ffac) + sinmean*np.sin(ffac)
                        iline += 1

                ind = np.argsort(linefreq)

                if col == '':
                    lw = 5
                else:
                    lw = 2

                plt.subplot(3, 1, 1)
                plt.errorbar(linefreq[ind], cosamps[ind]*scale, coserrs[ind]*scale,
                             fmt='-o', label=det+col, lw=lw)
                plt.subplot(3, 1, 2)
                plt.errorbar(linefreq[ind], sinamps[ind]*scale, sinerrs[ind]*scale,
                             fmt='-o', label=det+col, lw=lw)
                plt.subplot(3, 1, 3)
                plt.plot(t, template*scale, label=det+col, lw=lw)
Example #42
0
 def update(self, experiment):
     result = DataReader.get_episodes_success_counts(experiment)
     self.result_permutation = np.argsort(result)
     result = np.sort(result)
     self.episode_grid_heat_map.color = result.reshape(11, 51)
     self.color_result = result
def diffArea(nest, outlier = 0, data = 0, kinds = 'all', axis = 'probability', ROI = 20 , mu = 0, sigma = 1, weight = False, interpolator = 'linear', distribuition = 'normal',seed = None, plot = True):
    
    """
    Return an error area between a analitic function and a estimated discretization from a distribuition.

    Parameters
    ----------
    nest: int
        The number of estimation points.
    outlier: int, optional
        Is the point of an outlier event, e.g outlier = 50 will put an event in -50 and +50 if mu = 0.
        Defaut is 0
    data: int, optional
        If data > 0, a randon data will be inserted insted analitcs data.
        Defaut is 0.
    kinds: str or array, optional
        specifies the kind of distribuition to analize.
        ('Linspace', 'CDFm', 'PDFm', 'iPDF1', 'iPDF2', 'all').
        Defaut is 'all'.
    axis: str, optional
        specifies the x axis to analize
        ('probability', 'derivative', '2nd_derivative', 'X').
        Defaut is 'probability'.
    ROI: int, optional
        Specifies the number of regions of interest.
        Defaut is 20.
    mu: int, optional
        Specifies the mean of distribuition.
        Defaut is 0.
    sigma: int, optional
        Specifies the standard desviation of a distribuition.
        Defaut is 1.
    weight: bool, optional
        if True, each ROI will have a diferent weight to analyze.
        Defaut is False
    interpolator: str, optional
        Specifies the kind of interpolation as a string
        ('linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic'
        where 'zero', 'slinear', 'quadratic' and 'cubic' refer to a spline
        interpolation of zeroth, first, second or third order) or as an
        integer specifying the order of the spline interpolator to use.
        Default is 'linear'.
    distribuition: str, optional
        Select the distribuition to analyze.
        ('normal', 'lognormal')
        Defaut is 'normal'
    plot: bool, optional
        If True, a plot will be ploted with the analyzes
        Defaut is True
        
    Returns
    -------
    a, [b,c]: float and float of ndarray. area,[probROIord,areaROIord]
       returns the sum of total error area and the 'x' and 'y' values.   
    

    """    
    import numpy as np
    from scipy.stats import norm, lognorm
    from scipy.interpolate import interp1d
    from numpy import  exp
    import matplotlib.pyplot as plt
    from statsmodels.distributions import ECDF
    from distAnalyze import pdf, dpdf, ddpdf, PDF, dPDF, ddPDF

    area = []
    n = []
    data = int(data)
    if distribuition == 'normal': 
        outlier_inf = outlier_sup = outlier
    elif distribuition == 'lognormal': 
        outlier_inf = 0
        outlier_sup = outlier

    ngrid = int(1e6)
    truth = pdf
        
    if axis == 'probability':
        truth1 = pdf
    elif axis == 'derivative':
        truth1 = dpdf
    elif axis == '2nd_derivative':
        truth1 = ddpdf
    elif axis == 'X':
        truth1 = lambda x,mu,sigma,distribuition: x
    #else: return 'No valid axis'
            
    probROIord = {}
    areaROIord = {}
    div = {}
    if seed is not None:
          np.random.set_state(seed)
    if data:
          if distribuition == 'normal':
                d = np.random.normal(mu,sigma,data)
          elif distribuition == 'lognormal':
                d = np.random.lognormal(mu, sigma, data)
          
                
          
    if kinds == 'all': 
        kinds = ['Linspace', 'CDFm', 'PDFm', 'iPDF1', 'iPDF2']
    elif type(kinds) == str:
        kinds = [kinds]

    for kind in kinds:
        if distribuition == 'normal':
              inf, sup = norm.interval(0.9999, loc = mu, scale = sigma)
            
        elif distribuition == 'lognormal':
              inf, sup = lognorm.interval(0.9999, sigma, loc = 0, scale = exp(mu))
            

        xgrid = np.linspace(inf,sup,ngrid)
        xgridROI = xgrid.reshape([ROI,ngrid//ROI])
        
        dx = np.diff(xgrid)[0]
        
        if kind == 'Linspace':
            if not data:  
                  xest = np.linspace(inf-outlier_inf,sup+outlier_sup,nest)
            else:
                  if distribuition == 'normal':
                        #d = np.random.normal(loc = mu, scale = sigma, size = data)
                        inf,sup = min(d),max(d)
                        xest = np.linspace(inf-outlier_inf,sup+outlier_sup,nest)
                  elif distribuition == 'lognormal':
                        #d = np.random.lognormal(mean = mu, sigma = sigma, size = data)
                        inf,sup = min(d),max(d)
                        xest = np.linspace(inf-outlier_inf,sup+outlier_sup,nest)
                        
            yest = pdf(xest,mu,sigma,distribuition)
            
        elif kind == 'CDFm':
            eps = 5e-5
            yest = np.linspace(0+eps,1-eps,nest)
            if distribuition == 'normal':
                if not data:
                      xest = norm.ppf(yest, loc = mu, scale = sigma)
                      yest = pdf(xest,mu,sigma,distribuition)
                else:
                      #d = np.random.normal(loc = mu, scale = sigma, size = data)
                      ecdf = ECDF(d)
                      inf,sup = min(d),max(d)
                      xest = np.linspace(inf,sup,data)
                      yest = ecdf(xest)
                      interp = interp1d(yest,xest,fill_value = 'extrapolate', kind = 'nearest')
                      yest = np.linspace(eps,1-eps,nest)
                      xest = interp(yest)
                
            elif distribuition == 'lognormal':
                if not data:
                      xest = lognorm.ppf(yest, sigma, loc = 0, scale = exp(mu))
                      yest = pdf(xest,mu,sigma,distribuition)
                else:
                      #d = np.random.lognormal(mean = mu, sigma = sigma, size = data)
                      ecdf = ECDF(d)
                      inf,sup = min(d),max(d)
                      xest = np.linspace(inf,sup,nest)
                      yest = ecdf(xest)
                      interp = interp1d(yest,xest,fill_value = 'extrapolate', kind = 'nearest')
                      yest = np.linspace(eps,1-eps,nest)
                      xest = interp(yest)
            
            
        elif kind == 'PDFm':
            xest, yest = PDF(nest,mu,sigma, distribuition, outlier, data, seed)
        elif kind == 'iPDF1':
            xest, yest = dPDF(nest,mu,sigma, distribuition, outlier, data, 10, seed)
        elif kind == 'iPDF2':
            xest, yest = ddPDF(nest,mu,sigma, distribuition, outlier, data, 10, seed)      
       
        YY = pdf(xest,mu, sigma,distribuition)
        fest = interp1d(xest,YY,kind = interpolator, bounds_error = False, fill_value = (YY[0],YY[-1]))
        
        #fest = lambda x: np.concatenate([fest1(x)[fest1(x) != -1],np.ones(len(fest1(x)[fest1(x) == -1]))*fest1(x)[fest1(x) != -1][-1]])
            
        yestGrid = []
        ytruthGrid = []
        ytruthGrid2 = []
        divi = []
        
        for i in range(ROI):
            yestGrid.append([fest(xgridROI[i])])
            ytruthGrid.append([truth(xgridROI[i],mu,sigma,distribuition)])
            ytruthGrid2.append([truth1(xgridROI[i],mu,sigma,distribuition)])
            divi.append(len(np.intersect1d(np.where(xest >= min(xgridROI[i]))[0], np.where(xest < max(xgridROI[i]))[0])))

        diff2 = np.concatenate(abs((np.array(yestGrid) - np.array(ytruthGrid))*dx))
        #diff2[np.isnan(diff2)] = 0
        areaROI = np.sum(diff2,1)
        
        divi = np.array(divi)   
        divi[divi == 0] = 1
        
        try:
            probROI = np.mean(np.sum(ytruthGrid2,1),1)
        except:
            probROI = np.mean(ytruthGrid2,1)
        
        
        probROIord[kind] = np.sort(probROI)
        index = np.argsort(probROI)
        
        areaROIord[kind] = areaROI[index]
        #deletes = ~np.isnan(areaROIord[kind])
        #areaROIord[kind] = areaROIord[kind][deletes]
        #probROIord[kind] = probROIord[kind][deletes]
        
        area = np.append(area,np.sum(areaROIord[kind]))
        n = np.append(n,len(probROIord[kind]))
        div[kind] = divi[index]
        if plot:
            if weight:
                plt.logy(probROIord[kind],areaROIord[kind]*div[kind],'-o',label = kind, ms = 3)
            else: plt.plot(probROIord[kind],areaROIord[kind],'-o',label = kind, ms = 3)
            

            plt.yscale('log')
            plt.xlabel(axis)
            plt.ylabel('Error')
            plt.legend()
        
        #plt.title('%s - Pontos = %d, div = %s - %s' %(j,nest, divs,interpolator))
        
    return area,[probROIord,areaROIord]
Example #44
0
def slip_distribution_profile(ox, t_step, t_step_subseismic, t_step_seismic, slip_ref, depth=15e3):
    """
    Helper routine to plot the snapshot data (slip contours)
    """
    mask = np.isfinite(ox["x"])
    x = ox["x"][mask].unique()
    x_order = np.argsort(x)
    t_vals = np.sort(ox["t"].unique())

    z0 = 0
    z_max = depth*1e-3
    z = np.linspace(0, z_max, len(x)) + z0

    Nx = len(x)
    Nt = len(t_vals)
    t_vals = t_vals[:-1]
    slip = ox["slip"][:Nx*(Nt-1)].values.reshape((Nt-1, Nx))
    slip = slip.T[x_order].T
    v = ox["v"][:Nx*(Nt-1)].values.reshape((Nt-1, Nx))
    v = v.T[x_order].T

    v_max = np.array([np.nanmax(v[i]) for i in range(Nt-1)])

    v_subseismic = 1e-7
    v_seismic = 1e-3

    t_prev = 0
    inds_seismic = (v_max >= v_seismic)
    inds_subseismic = (v_max >= v_subseismic) & (v_max < v_seismic)

    ref_ind = np.where(slip[:,0] > slip_ref)[0][0]
    slip_ref = slip[ref_ind,:]

    fig = plt.figure(figsize=(15,8), facecolor="white")
    colours = seaborn.color_palette("deep", 5)
    colours[0] = "b"
    colours[1] = "r"
    colours[2] = "b"

    ax = fig.add_subplot(111)

    for i in range(Nt-1):
        if inds_seismic[i]:
            if t_vals[i] > t_prev + t_step_seismic:
                plt.plot(slip[i]-slip_ref, z, ls="--", c=colours[1], lw=0.8)
                t_prev = t_vals[i]
        elif inds_subseismic[i]:
            if t_vals[i] > t_prev + t_step_subseismic:
                plt.plot(slip[i]-slip_ref, z, ls="-", c=colours[2], lw=1.0)
                t_prev = t_vals[i]
        else:
            if t_vals[i] > t_prev + t_step:
                plt.plot(slip[i]-slip_ref, z, ls="-", c=colours[0], lw=1.5)
                t_prev = t_vals[i]

    t_day = 24*3600.0
    t_yr = 365*t_day
    plt.plot([np.nan]*2, [np.nan]*2, "-", c=colours[0], label="Interseismic (%.0f yr)" % (t_step/t_yr))
    plt.plot([np.nan]*2, [np.nan]*2, "-", c=colours[2], label="Subseismic (%.1f day)" % (t_step_subseismic/t_day))
    plt.plot([np.nan]*2, [np.nan]*2, "--", c=colours[1], label="Coseismic (%.1f sec)" % (t_step_seismic))
    plt.legend(bbox_to_anchor=(0.0, 1.1, 1.0, .102), loc="center", ncol=3, borderaxespad=0.0)

    plt.ylim((np.min(z), np.max(z)))
    plt.xlim((0, np.max(slip)-np.max(slip_ref)))
    plt.ylabel("depth [km]")
    plt.xlabel("accumulated slip [m]")
    plt.gca().invert_yaxis()
    ax.xaxis.tick_top()
    ax.xaxis.set_label_position('top')
    plt.tight_layout()
    plt.subplots_adjust(top=0.85)
    plt.show()
Example #45
0
    def kllucb(self, anchors: list, init_stats: dict, epsilon: float, delta: float, batch_size: int, top_n: int,
               verbose: bool = False, verbose_every: int = 1) -> np.ndarray:
        """
        Implements the KL-LUCB algorithm (Kaufmann and Kalyanakrishnan, 2013).

        Parameters
        ----------
        anchors:
            A list of anchors from which two critical anchors are selected (see Kaufmann and Kalyanakrishnan, 2013).
        init_stats
            Dictionary with lists containing nb of samples used and where sample predictions equal the desired label.
        epsilon
            Precision bound tolerance for convergence.
        delta
            Used to compute beta.
        batch_size
            Number of samples.
        top_n
            Min of beam width size or number of candidate anchors.
        verbose
            Whether to print intermediate output.
        verbose_every
            Whether to print intermediate output every verbose_every steps.

        Returns
        -------
            Indices of best result options. Number of indices equals min of beam width or nb of candidate anchors.
        """

        # n_features equals to the nb of candidate anchors
        n_features = len(anchors)

        # arrays for total number of samples & positives (# samples where prediction equals desired label)
        n_samples, positives = init_stats['n_samples'], init_stats['positives']
        anchors_to_sample, anchors_idx = [], []
        for f in np.where(n_samples == 0)[0]:
            anchors_to_sample.append(anchors[f])
            anchors_idx.append(f)

        if anchors_idx:
            pos, total = self.draw_samples(anchors_to_sample, 1)
            positives[anchors_idx] += pos
            n_samples[anchors_idx] += total

        if n_features == top_n:  # return all options b/c of beam search width
            return np.arange(n_features)

        # update the upper and lower precision bounds until the difference between the best upper ...
        # ... precision bound of the low precision anchors and the worst lower precision bound of the high ...
        # ... precision anchors is smaller than eps
        means = positives / n_samples  # fraction sample predictions equal to desired label
        ub, lb = np.zeros(n_samples.shape), np.zeros(n_samples.shape)
        t = 1
        crit_a_idx = self.select_critical_arms(means, ub, lb, n_samples, delta, top_n, t)
        B = ub[crit_a_idx.ut] - lb[crit_a_idx.lt]
        verbose_count = 0

        while B > epsilon:

            verbose_count += 1
            if verbose and verbose_count % verbose_every == 0:
                ut, lt = crit_a_idx
                print('Best: %d (mean:%.10f, n: %d, lb:%.4f)' %
                      (lt, means[lt], n_samples[lt], lb[lt]), end=' ')
                print('Worst: %d (mean:%.4f, n: %d, ub:%.4f)' %
                      (ut, means[ut], n_samples[ut], ub[ut]), end=' ')
                print('B = %.2f' % B)

            # draw samples for each critical result, update anchors' mean, upper and lower
            # bound precision estimate
            selected_anchors = [anchors[idx] for idx in crit_a_idx]
            pos, total = self.draw_samples(selected_anchors, batch_size)
            idx = list(crit_a_idx)
            positives[idx] += pos
            n_samples[idx] += total
            means = positives / n_samples
            t += 1
            crit_a_idx = self.select_critical_arms(means, ub, lb, n_samples, delta, top_n, t)
            B = ub[crit_a_idx.ut] - lb[crit_a_idx.lt]
        sorted_means = np.argsort(means)

        return sorted_means[-top_n:]
Example #46
0
    def fit(self, X, y, overwrite_X=False, overwrite_y=False, verbose=False):
        """ Fit an OASIS model. """

        if not overwrite_X:
            X = X.copy()
        if not overwrite_y:
            y = y.copy()

        n_samples, n_features = X.shape

        self.init = np.random.RandomState(self.random_seed)

        # Parameter initialization
        self._weights = np.eye(n_features).flatten()
        # self._weights = np.random.randn(n_features,n_features).flatten()
        W = self._weights.view()
        W.shape = (n_features, n_features)

        ind = np.argsort(y)

        y = y[ind]
        X = X[ind, :]

        classes = np.unique(y)
        classes.sort()

        n_classes = len(classes)

        # Translate class labels to serial integers 0, 1, ...
        y_new = np.empty((n_samples, ), dtype='int')

        for ii in xrange(n_classes):
            y_new[y == classes[ii]] = ii

        y = y_new
        class_sizes = [None] * n_classes
        class_start = [None] * n_classes

        for ii in xrange(n_classes):
            class_sizes[ii] = np.sum(y == ii)
            # This finds the first occurrence of that class
            class_start[ii] = np.flatnonzero(y == ii)[0]

        loss_steps = np.empty((self.n_iter, ), dtype='bool')
        n_batches = int(np.ceil(self.n_iter / self.save_every))
        steps_vec = np.ones((n_batches, ), dtype='int') * self.save_every
        steps_vec[-1] = self.n_iter - (n_batches - 1) * self.save_every

        if verbose:
            print 'n_batches = %d, total n_iter = %d' % (n_batches,
                                                         self.n_iter)

        for bb in xrange(n_batches):
            if verbose:
                print 'run batch %d/%d, for %d steps ("." = 100 steps)\n' \
                      % (bb + 1, n_batches, self.save_every)

            W, loss_steps_batch = self._fit_batch(W,
                                                  X,
                                                  y,
                                                  class_start,
                                                  class_sizes,
                                                  steps_vec[bb],
                                                  verbose=verbose)

            # print "loss_steps_batch = %d" % sum(loss_steps_batch)
            loss_steps[bb * self.save_every:min(
                (bb + 1) * self.save_every, self.n_iter)] = loss_steps_batch

            if self.do_sym:
                if np.mod(bb + 1, self.sym_every) == 0 or bb == n_batches - 1:
                    if verbose:
                        print "Symmetrizing"
                    symmetrize(W)

            if self.do_psd:
                if np.mod(bb + 1, self.psd_every) == 0 or bb == n_batches - 1:
                    if verbose:
                        print "PSD"
                    make_psd(W)

            if self.save_path is not None:
                self._save(bb + 1)  # back up model state

        return self
Example #47
0
    def _fit(self, x, y, sample_weight, check_input):
        time_init = time.perf_counter()

        if self.verbose:
            self._logger.info("Optimal binning started.")
            self._logger.info("Options: check parameters.")

        _check_parameters(**self.get_params())

        # Pre-processing
        if self.verbose:
            self._logger.info("Pre-processing started.")

        self._n_samples = len(x)

        if self.verbose:
            self._logger.info("Pre-processing: number of samples: {}"
                              .format(self._n_samples))

        time_preprocessing = time.perf_counter()

        [x_clean, y_clean, x_missing, y_missing, x_special, y_special,
         y_others, categories, cat_others, sw_clean, sw_missing,
         sw_special, sw_others] = split_data(
            self.dtype, x, y, self.special_codes, self.cat_cutoff,
            self.user_splits, check_input, self.outlier_detector,
            self.outlier_params, None, None, self.class_weight, sample_weight)

        self._time_preprocessing = time.perf_counter() - time_preprocessing

        if self.verbose:
            n_clean = len(x_clean)
            n_missing = len(x_missing)
            n_special = len(x_special)

            self._logger.info("Pre-processing: number of clean samples: {}"
                              .format(n_clean))

            self._logger.info("Pre-processing: number of missing samples: {}"
                              .format(n_missing))

            self._logger.info("Pre-processing: number of special samples: {}"
                              .format(n_special))

            if self.outlier_detector is not None:
                n_outlier = self._n_samples-(n_clean + n_missing + n_special)
                self._logger.info("Pre-processing: number of outlier samples: "
                                  "{}".format(n_outlier))

            if self.dtype == "categorical":
                n_categories = len(categories)
                n_categories_others = len(cat_others)
                n_others = len(y_others)

                self._logger.info("Pre-processing: number of others samples: "
                                  "{}".format(n_others))

                self._logger.info("Pre-processing: number of categories: {}"
                                  .format(n_categories))

                self._logger.info("Pre-processing: number of categories "
                                  "others: {}".format(n_categories_others))

            self._logger.info("Pre-processing terminated. Time: {:.4f}s"
                              .format(self._time_preprocessing))

        # Pre-binning
        if self.verbose:
            self._logger.info("Pre-binning started.")

        time_prebinning = time.perf_counter()

        if self.user_splits is not None:
            n_splits = len(self.user_splits)

            if self.verbose:
                self._logger.info("Pre-binning: user splits supplied: {}"
                                  .format(n_splits))

            if not n_splits:
                splits = self.user_splits
                n_nonevent = np.array([])
                n_event = np.array([])
            else:
                if self.dtype == "numerical":
                    user_splits = check_array(
                        self.user_splits, ensure_2d=False, dtype=None,
                        force_all_finite=True)

                    if len(set(user_splits)) != len(user_splits):
                        raise ValueError("User splits are not unique.")

                    sorted_idx = np.argsort(user_splits)
                    user_splits = user_splits[sorted_idx]
                else:
                    [categories, user_splits, x_clean, y_clean, y_others,
                     cat_others, sw_clean, sw_others, sorted_idx,
                     ] = preprocessing_user_splits_categorical(
                        self.user_splits, x_clean, y_clean, sw_clean)

                if self.user_splits_fixed is not None:
                    self.user_splits_fixed = np.asarray(
                        self.user_splits_fixed)[sorted_idx]

                splits, n_nonevent, n_event = self._prebinning_refinement(
                    user_splits, x_clean, y_clean, y_missing, y_special,
                    y_others, sw_clean, sw_missing, sw_special, sw_others)
        else:
            splits, n_nonevent, n_event = self._fit_prebinning(
                x_clean, y_clean, y_missing, y_special, y_others,
                self.class_weight, sw_clean, sw_missing, sw_special, sw_others)

        self._n_prebins = len(n_nonevent)

        self._categories = categories
        self._cat_others = cat_others

        self._time_prebinning = time.perf_counter() - time_prebinning

        if self.verbose:
            self._logger.info("Pre-binning: number of prebins: {}"
                              .format(self._n_prebins))
            self._logger.info("Pre-binning: number of refinements: {}"
                              .format(self._n_refinements))

            self._logger.info("Pre-binning terminated. Time: {:.4f}s"
                              .format(self._time_prebinning))

        # Optimization
        self._fit_optimizer(splits, n_nonevent, n_event)

        # Post-processing
        if self.verbose:
            self._logger.info("Post-processing started.")
            self._logger.info("Post-processing: compute binning information.")

        time_postprocessing = time.perf_counter()

        if not len(splits):
            t_info = target_info_samples(y_clean, sw_clean)
            n_nonevent = np.array([t_info[0]])
            n_event = np.array([t_info[1]])

        self._n_nonevent, self._n_event = bin_info(
            self._solution, n_nonevent, n_event, self._n_nonevent_missing,
            self._n_event_missing, self._n_nonevent_special,
            self._n_event_special, self._n_nonevent_cat_others,
            self._n_event_cat_others, cat_others)

        if self.dtype == "numerical":
            min_x = x_clean.min()
            max_x = x_clean.max()
        else:
            min_x = None
            max_x = None

        self._binning_table = BinningTable(
            self.name, self.dtype, self._splits_optimal, self._n_nonevent,
            self._n_event, min_x, max_x, self._categories,
            self._cat_others, self.user_splits)

        self._time_postprocessing = time.perf_counter() - time_postprocessing

        if self.verbose:
            self._logger.info("Post-processing terminated. Time: {:.4f}s"
                              .format(self._time_postprocessing))

        self._time_total = time.perf_counter() - time_init

        if self.verbose:
            self._logger.info("Optimal binning terminated. Status: {}. "
                              "Time: {:.4f}s"
                              .format(self._status, self._time_total))

        # Completed successfully
        self._class_logger.close()
        self._is_fitted = True

        return self
Example #48
0
def greengrass_classification_sample_run():
    client.publish(topic=PARAM_TOPIC_NAME, payload="OpenVINO: Initializing...")
    model_bin = os.path.splitext(PARAM_MODEL_XML)[0] + ".bin"

    # Plugin initialization for specified device and load extensions library if specified
    plugin = IEPlugin(device=PARAM_DEVICE, plugin_dirs="")
    if "CPU" in PARAM_DEVICE:
        plugin.add_cpu_extension(PARAM_CPU_EXTENSION_PATH)
    # Read IR
    net = IENetwork.from_ir(model=PARAM_MODEL_XML, weights=model_bin)
    assert len(
        net.inputs.keys()) == 1, "Sample supports only single input topologies"
    assert len(
        net.outputs) == 1, "Sample supports only single output topologies"
    input_blob = next(iter(net.inputs))
    out_blob = next(iter(net.outputs))
    # Read and pre-process input image
    n, c, h, w = net.inputs[input_blob].shape
    cap = cv2.VideoCapture(PARAM_INPUT_SOURCE)
    exec_net = plugin.load(network=net)
    del net
    client.publish(topic=PARAM_TOPIC_NAME,
                   payload="Starting inference on %s" % PARAM_INPUT_SOURCE)
    start_time = timeit.default_timer()
    inf_seconds = 0.0
    frame_count = 0
    res_json = []
    labeldata = None
    if PARAM_LABELMAP_FILE is not None:
        with open(PARAM_LABELMAP_FILE) as labelmap_file:
            labeldata = json.load(labelmap_file)

    while (cap.isOpened()):
        ret, frame = cap.read()
        if not ret:
            break
        frameid = cap.get(cv2.CAP_PROP_POS_FRAMES)
        initial_w = cap.get(3)
        initial_h = cap.get(4)
        in_frame = cv2.resize(frame, (w, h))
        in_frame = in_frame.transpose(
            (2, 0, 1))  # Change data layout from HWC to CHW
        in_frame = in_frame.reshape((n, c, h, w))
        # Start synchronous inference
        inf_start_time = timeit.default_timer()
        res = exec_net.infer(inputs={input_blob: in_frame})
        inf_seconds += timeit.default_timer() - inf_start_time
        top_ind = np.argsort(res[out_blob],
                             axis=1)[0, -PARAM_NUM_TOP_RESULTS:][::-1]
        # Parse detection results of the current request
        res_json = OrderedDict()
        res_json["Candidates"] = OrderedDict()
        frame_timestamp = datetime.datetime.now()

        for i in top_ind:
            classlabel = labeldata[str(i)] if labeldata else str(i)
            res_json["Candidates"][classlabel] = round(res[out_blob][0, i], 2)

        frame_count += 1
        # Measure elapsed seconds since the last report
        seconds_elapsed = timeit.default_timer() - start_time
        if seconds_elapsed >= reporting_interval:
            res_json["timestamp"] = frame_timestamp.isoformat()
            res_json["frame_id"] = int(frameid)
            res_json["inference_fps"] = frame_count / inf_seconds
            start_time = timeit.default_timer()
            report(res_json, frame)
            frame_count = 0
            inf_seconds = 0.0

    client.publish(topic=PARAM_TOPIC_NAME,
                   payload="End of the input, exiting...")
    del exec_net
    del plugin
    '''
       for Toeplitz Matrices 
    '''
    params = [0.01*np.random.randn(numInput+numHidden1),0.01*np.random.randn(numHidden1+numHidden2),0.01*np.random.randn(numHidden2+numOutput)] # Adding +1 is to counter the bias in the correponding layer 
    #params = [np.zeros(numInput+numHidden1),np.zeros(numHidden1+numHidden2),np.zeros(numHidden2+numOutput)] # Adding +1 is to counter the bias in the correponding layer 
     
    reward_episode=[]
    
    for episode in range (num_episodes):
        print('episode : ',episode)
    
        seeds = np.random.randint(10000,size=num_workers)
        reward_workers,epsilon_wi,epsilon_wh,epsilon_wo =  [list(x) for x in  zip(*main(seeds,params))]
        reward_episode.append([np.mean(reward_workers),np.median(reward_workers)])

        index_sort = np.argsort(reward_workers)
        reward_workers = np.sort(reward_workers)
        fitness = fitness_shaping_paper(reward_workers)
        
        
        print("moy reward:")
        print(np.mean(reward_workers))
        
        print("median reward:")
        print(np.median(reward_workers))
        
        print("max reward:")
        print(np.max(reward_workers))
        
        print("min reward:")
        print(np.min(reward_workers))
            tick_label=order)

# Get a 20CR data for the grid metadata
ic=twcr.load('prate',datetime.datetime(1969,3,12,6),
                           version='2c')
ic=ic.extract(iris.Constraint(member=1))

# Get the  autoencoder
model_save_file=("%s/Machine-Learning-experiments/"+
           "simple_autoencoder_variables/prate/"+
           "saved_models/Epoch_%04d") % (
                 os.getenv('SCRATCH'),100)
autoencoder=tf.keras.models.load_model(model_save_file)

# Get the order of the hidden weights - most to least important
order=numpy.argsort(numpy.abs(autoencoder.get_weights()[1]))[::-1]

# Make a comparison plot - Input, hidden, and output weights
fig=Figure(figsize=(10,12),  # Width, Height (inches)
           dpi=100,
           facecolor=(0.88,0.88,0.88,1),
           edgecolor=None,
           linewidth=0.0,
           frameon=False,
           subplotpars=None,
           tight_layout=None)
canvas=FigureCanvas(fig)

# Hidden layer
plot_hidden(autoencoder.get_weights()[1])
def power_diagram(face, uv, h=None, dh=None):
    if h is None:
        h = np.zeros((uv.shape[0], 1))

    if dh is None:
        dh = h * 0

    nf = face.shape[0]
    c = 1

    while True:
        h = h - c * dh
        pl = np.concatenate((uv, np.reshape(np.square(norm(uv, axis=1)), (-1, 1)) - h), axis=1)
        hull = ConvexHull(pl, qhull_options='Qt')
        face = hull.simplices
        # fix ups for the convex hull, as the orientation may inverse
        fn_from_hull = hull.equations[:,2]
        fn = calculate_face_normal(face, pl)
        for i in range(face.shape[0]):
            if fn[i,2] * fn_from_hull[i] < 0 :  # orientation difff
                face[i,:] = face[i,[0, 2, 1]]



        for i in range(face.shape[0]):
            mif = np.argmin(face[i,:])
            face[i, :] = face[i, np.mod(np.arange(mif,mif+3),3)]
        face = face[np.argsort(face[:, 0] * np.max(face) + face[:, 1]), :]
        fn = calculate_face_normal(face, pl)
        ind = fn[:, 2] < 0

        if np.sum(ind) < nf:
            h = h + c * dh
            c = c / 2
        else:
            break

        if np.max(abs(dh)) == 0:
            break

    fn = calculate_face_normal(face, pl)
    ind = fn[:, 2] < 0
    face = face[ind, :]
    pd = dict()
    pd['face'] = face
    vr = compute_vertex_ring(face, uv, ordered=True)
    pd['uv'] = uv
    pd['dp'] = np.zeros((face.shape[0], 2))
    pd['cell'] = [[] for i in range(pl.shape[0])]

    for i in range(face.shape[0]):
        dp = face_dual_uv(pl[face[i,:],:])
        pd['dp'][i,:] = dp

    K =  ConvexHull(uv, qhull_options='Qt').vertices
    ks = np.argmin(K)
    K = np.concatenate((K[ks::],  K[0:ks]), axis=0)
    K = np.append(K,K[0])
    vb = np.zeros((K.shape[0] - 1, 2))
    mindp = np.min(pd["dp"], axis=0) - 1
    maxdp = np.max(pd["dp"], axis=0) + 1
    minx = mindp[0]
    miny = mindp[1]
    maxx = maxdp[0]
    maxy = maxdp[1]
    box = np.array([minx, miny, maxx, miny, maxx, maxy, minx, maxy, minx, miny]).reshape((-1,2))

    for i in range(K.shape[0]- 1):
        i1 = K[i]
        i2 = K[i + 1]
        vec = uv[i2,:] - uv[i1,:]
        vec = np.array([vec[1], -vec[0]])
        mid = (uv[i2,:] + uv[i1,:]) / 2.0
        intersect = intersectRayPolygon(mid, vec, box)
        vb[i,:] = intersect

    pd["dpe"] = np.concatenate((pd["dp"], vb), axis=0)

    vvif, _, _= compute_connectivity(face)

    for i in range(uv.shape[0]):
        vri = vr[i]
        pb = np.argwhere(K==i)
        if pb.size > 0 :
            pb = pb[0][0]
            fr = np.zeros((len(vri) + 1,)).astype(int)
            fr[-1] = face.shape[0] + pb
            if pb == 0:
                fr[0] = face.shape[0] + K.shape[0]-2
            else:
                fr[0] = face.shape[0] + pb - 1
            for j in range(len(vri) - 1):
                fr[j+1] = vvif[i, vri[j]]
        else:
            fr = np.zeros((len(vri),)).astype(int)
            for j in range(len(vri)):
                fr[j] = vvif[i, vri[j]]
        pd["cell"][i] = np.flip(fr)

    return pd, h
Example #52
0
    def plda(
        self,
        stat_server=None,
        output_file_name=None,
        whiten=False,
        w_stat_server=None,
    ):
        """Trains PLDA model with no within class covariance matrix but full residual covariance matrix.

        Arguments
        ---------
        stat_server : speechbrain.processing.PLDA_LDA.StatObject_SB
            Contains vectors and meta-information to perform PLDA
        rank_f : int
            Rank of the between-class covariance matrix.
        nb_iter : int
            Number of iterations to run.
        scaling_factor : float
            Scaling factor to downscale statistics (value between 0 and 1).
        output_file_name : str
            Name of the output file where to store PLDA model.
        """

        # Dimension of the vector (x-vectors stored in stat1)
        vect_size = stat_server.stat1.shape[1]  # noqa F841

        # Whitening (Optional)
        if whiten is True:
            w_mean = w_stat_server.get_mean_stat1()
            w_Sigma = w_stat_server.get_total_covariance_stat1()
            stat_server.whiten_stat1(w_mean, w_Sigma)

        # Initialize mean and residual covariance from the training data
        self.mean = stat_server.get_mean_stat1()
        self.Sigma = stat_server.get_total_covariance_stat1()

        # Sum stat0 and stat1 for each speaker model
        model_shifted_stat, session_per_model = stat_server.sum_stat_per_model(
        )

        # Number of speakers (classes) in training set
        class_nb = model_shifted_stat.modelset.shape[0]

        # Multiply statistics by scaling_factor
        model_shifted_stat.stat0 *= self.scaling_factor
        model_shifted_stat.stat1 *= self.scaling_factor
        session_per_model *= self.scaling_factor

        # Covariance for stat1
        sigma_obs = stat_server.get_total_covariance_stat1()
        evals, evecs = linalg.eigh(sigma_obs)

        # Initial F (eigen voice matrix) from rank
        idx = numpy.argsort(evals)[::-1]
        evecs = evecs.real[:, idx[:self.rank_f]]
        self.F = evecs[:, :self.rank_f]

        # Estimate PLDA model by iterating the EM algorithm
        for it in range(self.nb_iter):

            # E-step
            # print(
            #    f"E-step: Estimate between class covariance, it {it+1} / {nb_iter}"
            # )

            # Copy stats as they will be whitened with a different Sigma for each iteration
            local_stat = copy.deepcopy(model_shifted_stat)

            # Whiten statistics (with the new mean and Sigma)
            local_stat.whiten_stat1(self.mean, self.Sigma)

            # Whiten the EigenVoice matrix
            eigen_values, eigen_vectors = linalg.eigh(self.Sigma)
            ind = eigen_values.real.argsort()[::-1]
            eigen_values = eigen_values.real[ind]
            eigen_vectors = eigen_vectors.real[:, ind]
            sqr_inv_eval_sigma = 1 / numpy.sqrt(eigen_values.real)
            sqr_inv_sigma = numpy.dot(eigen_vectors,
                                      numpy.diag(sqr_inv_eval_sigma))
            self.F = sqr_inv_sigma.T.dot(self.F)

            # Replicate self.stat0
            index_map = numpy.zeros(vect_size, dtype=int)
            _stat0 = local_stat.stat0[:, index_map]

            e_h = numpy.zeros((class_nb, self.rank_f))
            e_hh = numpy.zeros((class_nb, self.rank_f, self.rank_f))

            # loop on model id's
            fa_model_loop(
                batch_start=0,
                mini_batch_indices=numpy.arange(class_nb),
                factor_analyser=self,
                stat0=_stat0,
                stat1=local_stat.stat1,
                e_h=e_h,
                e_hh=e_hh,
            )

            # Accumulate for minimum divergence step
            _R = numpy.sum(e_hh, axis=0) / session_per_model.shape[0]

            _C = e_h.T.dot(local_stat.stat1).dot(linalg.inv(sqr_inv_sigma))
            _A = numpy.einsum("ijk,i->jk", e_hh, local_stat.stat0.squeeze())

            # M-step
            # print("M-step")
            self.F = linalg.solve(_A, _C).T

            # Update the residual covariance
            self.Sigma = sigma_obs - self.F.dot(_C) / session_per_model.sum()

            # Minimum Divergence step
            self.F = self.F.dot(linalg.cholesky(_R))
Example #53
0
    def __initasteroid(self):
        """Initialize the asteroid properties
        """
        # define the mass properties of the asteroid
        if self.name == 'castalia':
            self.M = 1.4091e12
            self.sigma = 2.1  # g/cm^3
            self.axes = np.array([1.6130, 0.9810, 0.8260]) / 2.0
            self.omega = 2 * np.pi / 4.07 / 3600

            # self.C20 = -7.275e-2
            # self.C22 = 2.984e-2
        elif self.name == 'itokawa':
            self.M = 3.51e10
            self.sigma = 1.9  # g/cm^3
            self.axes = np.array([535, 294, 209]) / 2 / 1.0e3  # size in meters
            self.omega = 2 * np.pi / 12.132 / 3600

        elif self.name == 'eros':
            self.M = 4.463e-4 / self.G
            self.sigma = 2.67 # g/cm^3
            self.axes = np.array([34.4, 11.7, 11.7])  # size in kilometers
            self.omega = 2 * np.pi / 5.27 / 3600
        elif self.name == 'cube':
            self.M = 1
            self.sigma = 1
            self.axes=np.array([0.9, 1.0, 1.1])
            self.omega = 1
        elif self.name == 'tetrahedron':
            self.M = 1
            self.sigma = 1
            self.axes = np.array([0.9, 1.0, 1.1])
            self.omega = 1
        else:
            self.logger.error('Unknown asteroid name : {}'.format(self.name))
            self.logger.error('Just assuming default values')
            self.M = 1
            self.sigma = 1
            self.axes = np.array([0.9, 1.0, 1.1])
            self.omega = 1

        self.mu = self.G * self.M
        self.sigma = self.sigma / 1000 * \
            (100 / 1)**3 * (1000 / 1)**3  # kg/km^3

        # Compute some inertia properties
        self.Ixx = self.M / 5 * (self.axes[1]**2 + self.axes[2]**2)
        self.Iyy = self.M / 5 * (self.axes[0]**2 + self.axes[2]**2)
        self.Izz = self.M / 5 * (self.axes[0]**2 + self.axes[1]**2)

        self.mass_param = (self.Iyy - self.Ixx) / (self.Izz - self.Ixx)
        self.res_radius = (self.mu / self.omega**2)**(1.0 / 3)
        self.dist_scale = self.res_radius
        self.time_scale = self.omega
        self.C20 = -1.0 / 2 * (self.Izz - self.Ixx) * \
            (2 - self.mass_param) / self.dist_scale**2 / self.M
        self.C22 = 1.0 / 4 * (self.Izz - self.Ixx) * \
            self.mass_param / self.dist_scale**2 / self.M
        # calculate the distance
        self.r = np.sqrt(self.V[:, 0]**2 + self.V[:, 1]**2 + self.V[:, 2]**2)
        self.long = np.arctan2(self.V[:, 1], self.V[:, 0]) * 180 / np.pi
        self.lat = np.arcsin(self.V[:, 2] / self.r) ** 180 / np.pi

        # sort in order of increasing radius
        index = np.argsort(self.r)
        self.r = self.r[index]
        self.long = self.long[index]
        self.lat = self.lat[index]
Example #54
0
               col].set_title("Predicted label :{}\nTrue label :{}".format(
                   pred_errors[error], obs_errors[error]))
            n += 1


# Probabilities of the wrong predicted numbers
Y_pred_errors_prob = np.max(Y_pred_errors, axis=1)

# Predicted probabilities of the true values in the error set
true_prob_errors = np.diagonal(np.take(Y_pred_errors, Y_true_errors, axis=1))

# Difference between the probability of the predicted label and the true label
delta_pred_true_errors = Y_pred_errors_prob - true_prob_errors

# Sorted list of the delta prob errors
sorted_dela_errors = np.argsort(delta_pred_true_errors)

# Top 6 errors
most_important_errors = sorted_dela_errors[-6:]

# Show the top 6 errors
display_errors(most_important_errors, X_val_errors, Y_pred_classes_errors,
               Y_true_errors)

plt.show()

results = model.predict(x_test)

# select the indix with the maximum probability
results = np.argmax(results, axis=1)
Example #55
0
#######################################################       CREATE GIFS       ########################################################
max_gif_steps = 200
delay = 20
for i, exp_folder in enumerate(onlyfolders):
    print('\nCreating gifs for experiment: ', exp_folder)
    result_paths = ''
    for subdir in [
            'dataset_plotter_data_only', 'dataset_plotter_data_real',
            'plot2D_dist', 'plot2D_dist_b_labeled', 'barplot', 'Train/Fixed/0'
    ]:
        subdir_path = all_experiments_dir + exp_folder + '/Visualization/' + subdir + '/*.png'
        files = glob.glob(subdir_path)
        try:
            order = list(
                np.argsort(
                    [int(filename.split('_')[-3]) for filename in files]))
        except:
            order = list(
                np.argsort([
                    int(filename.split('_')[-1][:-len('.png')])
                    for filename in files
                ]))
        ordered_files = [files[ind] for ind in order]
        ordered_files = ordered_files[:max_gif_steps]
        ordered_files_str = ''
        for f in ordered_files:
            ordered_files_str = ordered_files_str + ' ' + f

        print('Creating gif for', subdir, '(Number of images ==> ',
              len(ordered_files))
        # os.system('convert -resize 800x800 -delay '+str(delay)+' -loop 0 '+ordered_files_str+' '+all_experiments_dir+exp_folder+'/Visualization/'+subdir+'.gif')
                                       verbose=1)

    with open('predicts.pkl', 'wb') as f:
        pickle.dump(predicts, f)


with open('predicts.pkl', 'rb') as f:
    predicts = pickle.load(f)

print('predicts', predicts.shape)

# Loop through all images
for p in tqdm(predicts):
    # Predict Top N Image Classes
    # print('p', p)
    topn_preds = np.argsort(p)[::-1][0:topn]
    # print(topn_preds)

    p0.append(topn_preds[0])
    p1.append(topn_preds[1])
    p2.append(topn_preds[2])

# Create dataframe for later usage
topn_df = pd.DataFrame()
topn_df['filename'] = np.array(all_images)
topn_df['p0'] = np.array(p0)
topn_df['p1'] = np.array(p1)
topn_df['p2'] = np.array(p2)
topn_df.to_csv('topn_class_numbers.csv', index = False)

# Summary
Example #57
0
subcode = 'sub-01'

for o in origdirs:
    for r in range(1, 3):
        rundata = []
        rundir = os.path.join(o, 'run%d' % r)
        infiles = glob.glob(os.path.join(rundir, '*txt'))
        cond = []
        for i in infiles:
            condition = os.path.basename(i).replace('.txt', '')
            for l in open(i).readlines():
                l_s = [float(j) for j in l.strip().split('\t')]
                rundata.append(l_s[:2])
                cond.append(condition)
        rundata_array = numpy.array(rundata)
        idx = numpy.argsort(rundata_array[:, 0])
        sesscode = os.path.basename(o).replace('sess', 'ses-')
        odir = os.path.join(outdir,
                            '%s/%s/%s/func' % (outdir, subcode, sesscode))
        if not os.path.exists(odir):
            os.makedirs(odir)
        outfile = os.path.join(
            odir,
            '%s_%s_task-objects_run-%03d_events.tsv' % (subcode, sesscode, r))

        f = open(outfile, 'w')
        f.write('onset\tduration\tcondition\n')
        for i in idx:
            f.write('%s\t%s\n' % ('\t'.join(['%f' % j
                                             for j in rundata[i]]), cond[i]))
        f.close()
Example #58
0
    def local_(self):
        bin_var = self.parameters.y[0]
        control_variable = self.parameters.x[0]
        outcome_pos = self.parameters.outcome_pos
        outcome_neg = self.parameters.outcome_neg
        total_duration = self.parameters.total_duration

        data = self.data.db.read_longitudinal_data_from_db(self._args)
        data.replace("", np.nan, inplace=True)
        data = data.dropna()
        data = data[(data[bin_var] == outcome_pos) | (data[bin_var] == outcome_neg)]
        if len(data) < PRIVACY_MAGIC_NUMBER:
            raise PrivacyError("Query results in illegal number of datapoints.")
        levels = list(set(data[control_variable]))
        data_dict = {level: data[data[control_variable] == level] for level in levels}

        timelines_dict = {
            k: build_timelines(d, time_axis="subjectvisitdate", var=bin_var)
            for k, d in data_dict.items()
        }
        # Remove patients who tested positive on first visit
        for key, timelines in timelines_dict.items():
            timelines = [tl for tl in timelines if tl[1][0] != outcome_pos]
            timelines_dict[key] = timelines
        durations_dict = {}
        events_dict = {}
        for k, tl in timelines_dict.items():
            durations_dict[k], events_dict[k] = convert_timelines_to_events(
                total_duration, outcome_pos, tl
            )

        grouped_durations_observed_dict = {}
        grouped_durations_non_observed_dict = {}
        for key, events in events_dict.items():
            durations = durations_dict[key]
            # Sort events by ascending duration
            idx = np.argsort(durations)
            events = events[idx]
            durations = durations[idx]

            # Split events into observed and non_observed groups
            durations_observed = np.array(
                [d for d, e in zip(durations, events) if e == 1]
            )
            durations_non_observed = np.array(
                [total_duration for e in events if e == 0]
            )

            # Remove some observations at random to allow grouping (see below)
            n_rem_o = len(durations_observed) % PRIVACY_MAGIC_NUMBER
            if n_rem_o:
                idx_rem = np.random.permutation(len(durations_observed))[:n_rem_o]
                durations_observed = np.delete(durations_observed, idx_rem)

            n_rem_n = len(durations_non_observed) % PRIVACY_MAGIC_NUMBER
            if n_rem_n:
                idx_rem = np.random.permutation(len(durations_non_observed))[:n_rem_n]
                durations_non_observed = np.delete(durations_non_observed, idx_rem)

            # Group observations by multiples of PRIVACY_MAGIC_NUMBER
            grouped_durations_observed_dict[key] = []
            for group in durations_observed.reshape(-1, PRIVACY_MAGIC_NUMBER):
                grouped_durations_observed_dict[key] += [group[-1]]
            grouped_durations_non_observed_dict[key] = []
            for group in durations_non_observed.reshape(-1, PRIVACY_MAGIC_NUMBER):
                grouped_durations_non_observed_dict[key] += [group[-1]]

        if all(not val for val in grouped_durations_observed_dict.values()):
            msg = (
                "There are not enough transitions from {neg} to {pos} in the data. "
                "Please try with different values".format(
                    pos=outcome_pos, neg=outcome_neg
                )
            )
            raise ExaremeError(msg)

        self.push_and_concat(
            grouped_durations_observed_dict=grouped_durations_observed_dict
        )
        self.push_and_concat(
            grouped_durations_non_observed_dict=grouped_durations_non_observed_dict
        )
        self.push_and_agree(control_variable=control_variable)
Example #59
0
    print('kb_rel_basis_pred:', kb_rel_basis_pred.shape)
    print('kb_rel_mask:', kb_rel_mask.shape)
    # kb_rel_mask = (torch.arange(kb_rel_basis_pred.shape[1]) < args.n_basis_kb)\
    # .to(dtype=kb_rel_basis_pred.dtype, device=kb_rel_basis_pred.device).view(1, -1, 1).expand(kb_rel_basis_pred.shape)

    kb_rel_basis_pred = kb_rel_basis_pred * kb_rel_mask
    kb_rel_basis_pred_norm = kb_rel_basis_pred / (
        1e-12 + kb_rel_basis_pred.norm(dim=2, keepdim=True))
    kb_rel_output_emb_norm = kb_rel_output_emb / (
        1e-12 + kb_rel_output_emb.norm(dim=2, keepdim=True))

    coeff_sum = coeff_pred.cpu().detach().numpy()
    coeff_sum_diff = coeff_pred[:, :, 0] - coeff_pred[:, :, 1]
    coeff_sum_diff_pos = coeff_sum_diff.clamp(min=0)
    coeff_sum_diff_cpu = coeff_sum_diff.cpu().detach().numpy()
    coeff_order = np.argsort(coeff_sum_diff_cpu, axis=1)
    coeff_order = np.flip(coeff_order, axis=1)

    log.info("Basis preds and relation embs obtained.")
    # Convert basis pred and emb to numpy ndarrays
    kb_rel_basis_pred_np = kb_rel_basis_pred_norm.cpu().detach().numpy()
    kb_rel_output_emb_np = kb_rel_output_emb_norm.cpu().detach().numpy()

    # Find nearest entity pairs for each dimension
    basis_norm_pred = kb_rel_basis_pred_norm.permute(0, 2, 1)
    top_values = []
    top_indices = []
    for basis_norm_pred_batch in basis_norm_pred:
        sim_pairwise = torch.matmul(target_norm_emb,
                                    basis_norm_pred_batch).unsqueeze(0)
        top_value, top_index = torch.topk(sim_pairwise,
Example #60
0
 def prioritise(self):
     index = np.argsort(self.order)
     self.order = self.order[index]
     self.queue = self.queue[index]
     for i, Node in enumerate(self.queue):
         Node.q_pos = i - 1