Beispiel #1
0
def test_reductions_2D_int():
    x = np.arange(1, 122).reshape((11, 11)).astype('i4')
    a = da.from_array(x, chunks=(4, 4))

    reduction_2d_test(da.sum, a, np.sum, x)
    reduction_2d_test(da.prod, a, np.prod, x)
    reduction_2d_test(da.mean, a, np.mean, x)
    reduction_2d_test(da.var, a, np.var, x, False)  # Difference in dtype algo
    reduction_2d_test(da.std, a, np.std, x, False)  # Difference in dtype algo
    reduction_2d_test(da.min, a, np.min, x, False)
    reduction_2d_test(da.max, a, np.max, x, False)
    reduction_2d_test(da.any, a, np.any, x, False)
    reduction_2d_test(da.all, a, np.all, x, False)

    reduction_2d_test(da.nansum, a, np.nansum, x)
    with ignoring(AttributeError):
        reduction_2d_test(da.nanprod, a, np.nanprod, x)
    reduction_2d_test(da.nanmean, a, np.mean, x)
    reduction_2d_test(da.nanvar, a, np.nanvar, x, False)  # Difference in dtype algo
    reduction_2d_test(da.nanstd, a, np.nanstd, x, False)  # Difference in dtype algo
    reduction_2d_test(da.nanmin, a, np.nanmin, x, False)
    reduction_2d_test(da.nanmax, a, np.nanmax, x, False)

    assert eq(da.argmax(a, axis=0), np.argmax(x, axis=0))
    assert eq(da.argmin(a, axis=0), np.argmin(x, axis=0))
    assert eq(da.nanargmax(a, axis=0), np.nanargmax(x, axis=0))
    assert eq(da.nanargmin(a, axis=0), np.nanargmin(x, axis=0))
    assert eq(da.argmax(a, axis=1), np.argmax(x, axis=1))
    assert eq(da.argmin(a, axis=1), np.argmin(x, axis=1))
    assert eq(da.nanargmax(a, axis=1), np.nanargmax(x, axis=1))
    assert eq(da.nanargmin(a, axis=1), np.nanargmin(x, axis=1))
Beispiel #2
0
def dynamic(quality_matrix):
  size = quality_matrix.shape[0]
  optimal_score = np.empty(size)
  optimal_score.fill(-np.inf)
  optimal_score[0] = 0
  previous_end = np.empty(size)
  previous_end.fill(-1)
  domain_defining = np.empty(size)
  np.set_printoptions(threshold=np.nan)
  for i in range(size):
    cand_nodomain = np.nanargmax(optimal_score)
    with_domain = optimal_score + quality_matrix[:, i]
    cand_domain = np.nanargmax(with_domain)
    if optimal_score[cand_nodomain] > with_domain[cand_domain]:
      domain_defining[i] = 0
      previous_end[i] = cand_nodomain
      optimal_score[i] = optimal_score[cand_nodomain]
    else:
      domain_defining[i] = 1
      previous_end[i] = cand_domain
      optimal_score[i] = with_domain[cand_domain]
  current_end = size - 2 
  result = []
  while current_end > 0:
    if domain_defining[current_end] == 1:
      result.append(Domain(Bin(previous_end[current_end]), Bin(current_end), 0))
    current_end = previous_end[current_end]
  return result[::-1]
Beispiel #3
0
    def get_max_social_welfare(self, by_role=False):
        """Returns the maximum social welfare over the known profiles.

        If by_role is specified, then max social welfare applies to each role
        independently."""
        if by_role:
            if self.num_profiles:
                welfares = self.role_reduce(self.profiles * self.payoffs)
                prof_inds = np.nanargmax(welfares, 0)
                return (welfares[prof_inds, np.arange(self.num_roles)],
                        self.profiles[prof_inds])
            else:
                welfares = np.empty(self.num_roles)
                welfares.fill(np.nan)
                profiles = np.empty(self.num_roles, dtype=object)
                profiles.fill(None)
                return welfares, profiles

        else:
            if self.num_profiles:
                welfares = np.sum(self.profiles * self.payoffs, 1)
                prof_ind = np.nanargmax(welfares)
                return welfares[prof_ind], self.profiles[prof_ind]
            else:
                return np.nan, None
Beispiel #4
0
def test_reductions_1D(dtype):
    x = np.arange(5).astype(dtype)
    a = da.from_array(x, chunks=(2,))

    reduction_1d_test(da.sum, a, np.sum, x)
    reduction_1d_test(da.prod, a, np.prod, x)
    reduction_1d_test(da.mean, a, np.mean, x)
    reduction_1d_test(da.var, a, np.var, x)
    reduction_1d_test(da.std, a, np.std, x)
    reduction_1d_test(da.min, a, np.min, x, False)
    reduction_1d_test(da.max, a, np.max, x, False)
    reduction_1d_test(da.any, a, np.any, x, False)
    reduction_1d_test(da.all, a, np.all, x, False)

    reduction_1d_test(da.nansum, a, np.nansum, x)
    with ignoring(AttributeError):
        reduction_1d_test(da.nanprod, a, np.nanprod, x)
    reduction_1d_test(da.nanmean, a, np.mean, x)
    reduction_1d_test(da.nanvar, a, np.var, x)
    reduction_1d_test(da.nanstd, a, np.std, x)
    reduction_1d_test(da.nanmin, a, np.nanmin, x, False)
    reduction_1d_test(da.nanmax, a, np.nanmax, x, False)

    assert eq(da.argmax(a, axis=0), np.argmax(x, axis=0))
    assert eq(da.argmin(a, axis=0), np.argmin(x, axis=0))
    assert eq(da.nanargmax(a, axis=0), np.nanargmax(x, axis=0))
    assert eq(da.nanargmin(a, axis=0), np.nanargmin(x, axis=0))

    assert eq(da.argmax(a, axis=0, split_every=2), np.argmax(x, axis=0))
    assert eq(da.argmin(a, axis=0, split_every=2), np.argmin(x, axis=0))
    assert eq(da.nanargmax(a, axis=0, split_every=2), np.nanargmax(x, axis=0))
    assert eq(da.nanargmin(a, axis=0, split_every=2), np.nanargmin(x, axis=0))
Beispiel #5
0
def predict_ana( model, a, a2, b, realb2 ):
    questWordIndices = [ model.word2id[x] for x in (a,a2,b) ]
    # b2 is effectively iterating through the vocab. The row is all the cosine values
    b2a2 = model.sim_row(a2)
    b2a  = model.sim_row(a)
    b2b  = model.sim_row(b)
    addsims = b2a2 - b2a + b2b

    addsims[questWordIndices] = -10000

    iadd = np.nanargmax(addsims)
    b2add  = model.vocab[iadd]

    # For debugging purposes
    ia = model.word2id[a]
    ia2 = model.word2id[a2]
    ib = model.word2id[b]
    ib2 = model.word2id[realb2]
    realaddsim = addsims[ib2]

    mulsims = ( b2a2 + 1 ) * ( b2b + 1 ) / ( b2a + 1.001 )
    mulsims[questWordIndices] = -10000
    imul = np.nanargmax(mulsims)
    b2mul  = model.vocab[imul]

    return b2add, b2mul
def extract_stamp(im, xy, box_size):
    """ Extracts stamp centered on star/spot in image based on initial guess
    Args:
        image - a slice of the original data cube
        xy - initial xy coordinate guess to center of spot
        box_size - size of stamp to be extracted (actually, size of radial mask, box is 4 pixels bigger)
    Return:
        output - box cutout of spot with optimized center 
    """
    
    box_size = float(box_size)
    xguess = float(xy[0])
    yguess = float(xy[1])

    #Exctracts a 10px stamp centered on the guess and refines based on maximum pixel location
    for i in range(0, 2):
        x,y = gen_xy(10.0)
        x += (xguess-10/2.)
        y += (yguess-10/2.)
        output = pixel_map(im,x,y)
        xguess = x[np.unravel_index(np.nanargmax(output), np.shape(output))]
        yguess = y[np.unravel_index(np.nanargmax(output), np.shape(output))]

    #Fits location of star/spot
    xc,yc = return_pos(output, (xguess,yguess), x,y)
    
    #Extracts a box_size + 4 width stamp centered on exact position
    x,y = gen_xy(box_size+4)
    x += (xc-np.round((box_size+4)/2.))
    y += (yc-np.round((box_size+4)/2.))
    output = pixel_map(im,x,y)

    return output
Beispiel #7
0
def max_pure_social_welfare(game, *, by_role=False):
    """Returns the maximum social welfare over the known profiles.

    If by_role is specified, then max social welfare applies to each role
    independently. If there are no profiles with full payoff data for a role,
    an arbitrary profile will be returned."""
    if by_role: # pylint: disable=no-else-return
        if game.num_profiles: # pylint: disable=no-else-return
            welfares = np.add.reduceat(
                game.profiles() * game.payoffs(), game.role_starts, 1)
            prof_inds = np.nanargmax(welfares, 0)
            return (welfares[prof_inds, np.arange(game.num_roles)],
                    game.profiles()[prof_inds])
        else:
            welfares = np.full(game.num_roles, np.nan)
            profiles = np.full(game.num_roles, None)
            return welfares, profiles

    else:
        if game.num_complete_profiles: # pylint: disable=no-else-return
            welfares = np.einsum('ij,ij->i', game.profiles(), game.payoffs())
            prof_ind = np.nanargmax(welfares)
            return welfares[prof_ind], game.profiles()[prof_ind]
        else:
            return np.nan, None
Beispiel #8
0
    def _single_node_deletion(self, chroms, genes, samples):
        """
        The single node deletion routine of the algorithm.

        Parameters
        ----------
        chroms : ndarray
            Contains 1 for a chromosome pair that belongs to the tricluster
            currently examined, 0 otherwise.
        genes : ndarray
            Contains 1 for a gene that belongs to the tricluster currently
            examined, 0 otherwise.
        samples : ndarray
            Contains 1 for a sample that belongs to the tricluster currently
            examined, 0 otherwise.

        Returns
        -------
        chroms : ndarray
            Contains 1 for a chromosome pair that belongs to the tricluster
            examined, 0 otherwise.
        genes : ndarray
            Contains 1 for a gene that belongs to the tricluster examined,
            0 otherwise.
        samples : ndarray
            Contains 1 for a sample that belongs to the tricluster examined,
            0 otherwise.
        """
        self._compute_MSR(chroms, genes, samples)

        while (self.MSR > self.delta):
            chrom_idx = np.nanargmax(self.MSR_chrom)
            gene_idx = np.nanargmax(self.MSR_gene)
            sample_idx = np.nanargmax(self.MSR_sample)

            with warnings.catch_warnings():  # We expect mean of NaNs here
                warnings.simplefilter("ignore", category=RuntimeWarning)
                if (self.MSR_chrom[chrom_idx] > self.MSR_gene[gene_idx]):
                    if (self.MSR_chrom[chrom_idx] > self.MSR_sample[sample_idx]):
                        # Delete chrom
                        nonz_idx = chroms.nonzero()[0]
                        chroms.put(nonz_idx[chrom_idx], 0)
                    else:
                        # Delete sample
                        nonz_idx = samples.nonzero()[0]
                        samples.put(nonz_idx[sample_idx], 0)
                else:
                    if (self.MSR_gene[gene_idx] > self.MSR_sample[sample_idx]):
                        # Delete gene
                        nonz_idx = genes.nonzero()[0]
                        genes.put(nonz_idx[gene_idx], 0)
                    else:
                        # Delete sample
                        nonz_idx = samples.nonzero()[0]
                        samples.put(nonz_idx[sample_idx], 0)

            self._compute_MSR(chroms, genes, samples)

        return chroms, genes, samples
def get_best_threshold(y_ref, y_pred_score, plot=True):
    """ Get threshold on scores that maximizes f1 score.

    Parameters
    ----------
    y_ref : array
        Reference labels (binary).
    y_pred_score : array
        Predicted scores.
    plot : bool
        If true, plot ROC curve

    Returns
    -------
    best_threshold : float
        threshold on score that maximized f1 score
    max_fscore : float
        f1 score achieved at best_threshold
    """
    pos_weight = 1.0 - float(len(y_ref[y_ref == 1]))/float(len(y_ref))
    neg_weight = 1.0 - float(len(y_ref[y_ref == 0]))/float(len(y_ref))
    sample_weight = np.zeros(y_ref.shape)
    sample_weight[y_ref == 1] = pos_weight
    sample_weight[y_ref == 0] = neg_weight

    print "max prediction value = %s" % np.max(y_pred_score)
    print "min prediction value = %s" % np.min(y_pred_score)

    precision, recall, thresholds = \
            metrics.precision_recall_curve(y_ref, y_pred_score, pos_label=1,
                                           sample_weight=sample_weight)
    beta = 1.0
    btasq = beta**2.0
    fbeta_scores = (1.0 + btasq)*(precision*recall)/((btasq*precision)+recall)

    max_fscore = fbeta_scores[np.nanargmax(fbeta_scores)]
    best_threshold = thresholds[np.nanargmax(fbeta_scores)]

    if plot:
        plt.figure(1)
        plt.subplot(1, 2, 1)
        plt.plot(recall, precision, '.b', label='PR curve')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.0])
        plt.xlabel('Recall')
        plt.ylabel('Precision')
        plt.title('Precision-Recall Curve')
        plt.legend(loc="lower right", frameon=True)
        plt.subplot(1, 2, 2)
        plt.plot(thresholds, fbeta_scores[:-1], '.r', label='f1-score')
        plt.xlabel('Probability Threshold')
        plt.ylabel('F1 score')
        plt.show()

    plot_data = (recall, precision, thresholds, fbeta_scores[:-1])

    return best_threshold, max_fscore, plot_data
def mapmean(tempDF, meta, name = '', option = 0): 
    import cartopy.crs as ccrs
    from cartopy.io.img_tiles import MapQuestOSM
    from mpl_toolkits.axes_grid1 import make_axes_locatable
    #fig  = plt.figure(figsize=(30, 30))
    x = meta['location:Longitude'].values
    y = meta['location:Latitude'].values
    c = tempDF[meta.index].mean()
    marker_size = 350 
    imagery = MapQuestOSM()
    fig = plt.figure(figsize=[15,15])
    ax = plt.axes(projection=imagery.crs)
    
    ax.set_extent(( meta['location:Longitude'].min()-.005, 
                   meta['location:Longitude'].max()+.005 , 
                   meta['location:Latitude'].min()-.005,
                   meta['location:Latitude'].max()+.005))
    ax.add_image(imagery, 14)

    cmap = matplotlib.cm.OrRd
    bounds = np.linspace(round((c.mean()-3)),round((c.mean()+3)),13)
    norm = matplotlib.colors.BoundaryNorm(bounds, cmap.N)
    plotHandle = ax.scatter(x,y,c = c, s = marker_size, transform=ccrs.Geodetic(), 
                 cmap = cmap,
                 norm = norm)
    
    if option ==0 : 
        cbar1 = plt.colorbar(plotHandle, label = 'Temperature in $^\circ $C')
    else : 
        cbar1 = plt.colorbar(plotHandle, label = option)

    lon = x[np.nanargmax(c)]
    lat = y[np.nanargmax(c)]
    at_x, at_y = ax.projection.transform_point(lon, lat,
                                               src_crs=ccrs.Geodetic())
    plt.annotate(
        '%2.1f'%np.nanmax(c.values), xy=(at_x, at_y), #xytext=(30, 20), textcoords='offset points',
        color='black', backgroundcolor='none', size=22,
        )

    lon = x[np.nanargmin(c)]
    lat = y[np.nanargmin(c)]
    at_x, at_y = ax.projection.transform_point(lon, lat,
                                               src_crs=ccrs.Geodetic())
    plt.annotate(
        '%2.1f'%np.nanmin(c.values), xy=(at_x, at_y), #xytext=(30, 20), textcoords='offset points',
        color='black', size = 22, backgroundcolor='none')

    plt.annotate(
        '$\mu = $ %2.1f, $\sigma = $ %2.1f'%(np.nanmean(c.values), np.nanstd(c.values)), (0.01,0.01), xycoords ='axes fraction', #xytext=(30, 20), textcoords='offset points',
        color='black', size = 22, backgroundcolor='none')
    
    plt.title('Mean Temperature %s'%name)
    filename = './plots/meantempmap%s.eps'%name
    plt.savefig(filename, format = 'eps', dpi = 600)
	def predict(self, X):
		"""
		Predict class
		"""
		n_frame = len(X)
		n_label = len(le.classes_)
		self.labels_predicted = np.empty(n_frame, dtype=int)

		#尤度保存用行列
		matP = np.empty((n_frame, n_label))
		#初期確率はクラス0が0.99, その他は当確率とする
		matP[0, 0] = 0.99
		for i in xrange(1,n_label):
			matP[0, i] = (1 - 0.99) / (n_label - 1)

		#ラベル保存用行列
		matL = np.empty((n_frame, n_label))

		#ヴィタビ経路の計算
		for j in xrange(1, n_frame):
			for yj in xrange(n_label):
				prob = np.empty(n_label)
				for yk in xrange(n_label):
					#出力確率または遷移確率が0の場合はNone
					if (self.emit_prob[X[j], yj] == 0.) or (self.trans_prob[yk, yj] == 0.):
						prob[yk] = None
					else:
						prob[yk] = self.emit_prob[X[j], yj] * self.trans_prob[yk, yj] * matP[j-1, yk]

				#logprobが全てnanの場合はnanを返す
				count = 0
				for i in prob:
					if np.isnan(i) == True:
						count += 1

				if count == len(prob):
					matP[j, yj] = None
					matL[j, yj] = None
				else:
					matP[j, yj] = np.nanmax(prob)
					matL[j, yj] = np.nanargmax(prob)

			#クラスごとの確率を足すと1になるように正規化
			matP[j, :] = matP[j, :] / np.sum(matP[j, :])

		self.likelihoods = matP

		#推定ラベル列の決定
		self.labels_predicted[n_frame-1] = np.nanargmax(matP[n_frame-1, :])
		for j in reversed(xrange(n_frame-1)):
			self.labels_predicted[j] = matL[j+1, self.labels_predicted[j+1]]

		return self.labels_predicted
    def _ifws_peak_bins(self, ws):
        '''
        Gives the bin indices of the first and last peaks (of spectra 0) in the IFWS
        @param ws :: input workspace
        return    :: [xmin,xmax]
        '''

        y = mtd[ws].readY(0)
        size = len(y)
        mid = int(size / 2)
        imin = np.nanargmax(y[0:mid])
        imax = np.nanargmax(y[mid:size]) + mid
        return imin, imax
    def _monitor_max_range(self, ws):
        """
        Gives the bin indices of the first and last peaks in the monitor
        @param ws :: input workspace name
        return    :: [xmin,xmax]
        """

        y = mtd[ws].readY(0)
        size = len(y)
        mid = int(size / 2)
        imin = np.nanargmax(y[0:mid])
        imax = np.nanargmax(y[mid:size]) + mid
        return imin, imax
def get3MaxDerivatives(eda,num_max=3):
    deriv, second_deriv = getDerivatives(eda)
    d = copy.deepcopy(deriv)
    d2 = copy.deepcopy(second_deriv)
    max_indices = []
    for i in range(num_max):
        maxd_idx = np.nanargmax(abs(d))
        max_indices.append(maxd_idx)
        d[maxd_idx] = 0
        max2d_idx = np.nanargmax(abs(d2))
        max_indices.append(max2d_idx)
        d2[max2d_idx] = 0
    
    return max_indices, abs(deriv), abs(second_deriv)
Beispiel #15
0
 def sim_print(self, input_word, corpus_word, sim_matrix, number=5):
     for input_sent, sim_vector in zip(input_word, sim_matrix):
         print("input=", input_sent)
         for count in range(0, number):  # 上位n個を出す(n未満の配列には対応しないので注意)
             ans_sim = [np.nanmax(sim_vector), np.nanargmax(sim_vector)]
             print('配列番号:', np.nanargmax(sim_vector), 'No.', count, 'sim=', ans_sim[0])
             print('output=', corpus_word[ans_sim[1]])
             src_set = set(input_sent.split())
             tag_set = set(corpus_word[ans_sim[1]].split())
             print('共通部分', list(src_set & tag_set))
             print()
             sim_vector[np.nanargmax(sim_vector)] = -1
         print()
     return 0
Beispiel #16
0
def findpeaks(f, fft, f_cent, f_span, points, fig, ax, line1, line2):
    center = round(points/2.)
    region = round(points/8.)
    lc = center - region
    rc = center + region
    region1 = round(points/6.)
    l = lc - region1
    r = rc + region1
    mu1 = nanargmax(fft[l:lc]) + l
    mu2 = nanargmax(fft[lc:rc]) + lc
    mu3 = nanargmax(fft[rc:r]) + rc
    args = [mu1, mu2, mu3]
    line1[0].set_data(f[args], fft[args])
    return args
def clustercoordsbymax1d(arr, pkind, critsepind):#results will be sorted. wherever there are peak indeces too close together. the peak index next to the peak index with highest arr value gets removed
    pkind.sort()
    indindslow=numpy.where((pkind[1:]-pkind[:-1])<critsepind)[0]
    indindshigh=indindslow+1
    while indindslow.size>0:
        maxindindindlow=numpy.nanargmax(arr[pkind[(indindslow,)]])
        maxindindindhigh=numpy.nanargmax(arr[pkind[(indindshigh,)]])
        if arr[pkind[indindslow[maxindindindlow]]]>arr[pkind[indindshigh[maxindindindhigh]]]:
            pkind=numpy.delete(pkind, indindshigh[maxindindindlow])
        else:
            pkind=numpy.delete(pkind, indindslow[maxindindindhigh])

        indindslow=numpy.where((pkind[1:]-pkind[:-1])<critsepind)[0]
        indindshigh=indindslow+1
    return pkind
Beispiel #18
0
def test_reductions_2D_nans():
    # chunks are a mix of some/all/no NaNs
    x = np.full((4, 4), np.nan)
    x[:2, :2] = np.array([[1, 2], [3, 4]])
    x[2, 2] = 5
    x[3, 3] = 6
    a = da.from_array(x, chunks=(2, 2))

    reduction_2d_test(da.sum, a, np.sum, x, False, False)
    reduction_2d_test(da.prod, a, np.prod, x, False, False)
    reduction_2d_test(da.mean, a, np.mean, x, False, False)
    reduction_2d_test(da.var, a, np.var, x, False, False)
    reduction_2d_test(da.std, a, np.std, x, False, False)
    reduction_2d_test(da.min, a, np.min, x, False, False)
    reduction_2d_test(da.max, a, np.max, x, False, False)
    reduction_2d_test(da.any, a, np.any, x, False, False)
    reduction_2d_test(da.all, a, np.all, x, False, False)

    reduction_2d_test(da.nansum, a, np.nansum, x, False, False)
    reduction_2d_test(da.nanprod, a, nanprod, x, False, False)
    reduction_2d_test(da.nanmean, a, np.nanmean, x, False, False)
    with pytest.warns(None):  # division by 0 warning
        reduction_2d_test(da.nanvar, a, np.nanvar, x, False, False)
    with pytest.warns(None):  # division by 0 warning
        reduction_2d_test(da.nanstd, a, np.nanstd, x, False, False)
    with pytest.warns(None):  # all NaN axis warning
        reduction_2d_test(da.nanmin, a, np.nanmin, x, False, False)
    with pytest.warns(None):  # all NaN axis warning
        reduction_2d_test(da.nanmax, a, np.nanmax, x, False, False)

    assert_eq(da.argmax(a), np.argmax(x))
    assert_eq(da.argmin(a), np.argmin(x))
    with pytest.warns(None):  # all NaN axis warning
        assert_eq(da.nanargmax(a), np.nanargmax(x))
    with pytest.warns(None):  # all NaN axis warning
        assert_eq(da.nanargmin(a), np.nanargmin(x))
    assert_eq(da.argmax(a, axis=0), np.argmax(x, axis=0))
    assert_eq(da.argmin(a, axis=0), np.argmin(x, axis=0))
    with pytest.warns(None):  # all NaN axis warning
        assert_eq(da.nanargmax(a, axis=0), np.nanargmax(x, axis=0))
    with pytest.warns(None):  # all NaN axis warning
        assert_eq(da.nanargmin(a, axis=0), np.nanargmin(x, axis=0))
    assert_eq(da.argmax(a, axis=1), np.argmax(x, axis=1))
    assert_eq(da.argmin(a, axis=1), np.argmin(x, axis=1))
    with pytest.warns(None):  # all NaN axis warning
        assert_eq(da.nanargmax(a, axis=1), np.nanargmax(x, axis=1))
    with pytest.warns(None):  # all NaN axis warning
        assert_eq(da.nanargmin(a, axis=1), np.nanargmin(x, axis=1))
def quick_analyze(sp, freq_name_mapping, minvelo, maxvelo):
    """ get peak of spectrum, subtract continuum, etc. """
    argmax = np.nanargmax(sp.data)

    # can have empty spectra passed to this, apparently
    if not np.isfinite(sp.data[argmax]):
        return (np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, False, '',
                np.nan)

    cont = np.nanpercentile(sp.data, 20)

    shift = (minvelo+maxvelo)/2. / constants.c

    sp.data -= cont
    sp.xarr.convert_to_unit(u.GHz)
    peak = np.nanmax(sp.data)
    peakfreq = sp.xarr[argmax]
    assert sp.data[argmax] == peak
    peakfreq_shifted = peakfreq * (1+shift)
    freqlist = list(freq_name_mapping.keys())
    #reverse_freq_name_mapping = {v:k for k,v in freq_name_mapping.items()}
    bestmatch = np.argmin(np.abs(peakfreq_shifted - u.Quantity(freqlist)))
    closest_freq = freqlist[bestmatch]
    peakvelo = ((closest_freq-peakfreq)/closest_freq *
                constants.c).to(u.km/u.s)
    velo_OK = (minvelo < peakvelo) and (peakvelo < maxvelo)
    peakspecies = (freq_name_mapping[closest_freq] if velo_OK else 'none')

    return (cont, peak, peakfreq, peakfreq_shifted, bestmatch, peakvelo,
            velo_OK, peakspecies, argmax)
Beispiel #20
0
    def _create_new_neuron(self):
        '''
        create new neuron if t mod \lambda = 0 and |K| < \theta
            a. find neuron q with the greatest counter: q := arg max_{n \in K} e_n
            b. find neighbor f of q with f := arg max_{n \in N_q} e_n
            c. initialize new neuron l
                K := K \cup l
                w_l := 1/2 * (w_q + w_f)
                c_l := 1/2 * (c_q + c_f)
                e_l := \delta * (e_f + e_q)
            d. adapt connections: E := (E \ {(q, f)}) \cup {(q, n), (n, f)}
            e. decrease counter of q and f by the factor \delta
                e_q := (1 - \deta) * e_q
                e_f := (1 - \deta) * e_f
        '''
        q = np.nanargmax(self.errors)
        N_q = None
        if q:
            N_q = self.model.neighbors(q)
        if N_q:
            f = max(N_q, key=lambda n: self.errors[n])
            l = self._add_node(e=self.delta*(self.errors[q] + self.errors[f]),
                               w=(self.weights[q] + self.weights[f]) / 2,
                               c=(self.contexts[q] + self.contexts[f]) / 2)
            self.model.remove_edge(q, f)
            self._add_edge(q, l)
            self._add_edge(f, l)
            self.errors[q] *= (1 - self.delta)
            self.errors[f] *= (1 - self.delta)

            return l
Beispiel #21
0
def rank_worms(complete_df, a_variable, a_time, return_all = False, egg_mode = True):
	'''
	Rank worms according to their measured value of a_variable at a_time.
	'''
	if a_time != None:
		my_data = complete_df.mloc(measures = [a_variable], times = [a_time])[:, 0, 0]
		my_index = list(complete_df.worms)
		for i in range(0, len(my_index)):
			a_worm = my_index[i]
			my_time = closest_real_time(complete_df, a_worm, a_time, egg_mode = egg_mode)
			my_index[i] += ' ' + my_time
		my_data = pd.Series(my_data, index = my_index).dropna()
		my_data.sort()
	else:
		my_data = complete_df.mloc(measures = [a_variable])[:, 0, :].copy()
		flat_data = np.ndarray.flatten(my_data)
		true_max = np.nanargmax(flat_data)
		sorted_arguments = np.argsort(flat_data)
		sorted_arguments = sorted_arguments[:np.where(sorted_arguments == true_max)[0] + 1]
		sorted_indices = np.array(np.unravel_index(sorted_arguments, my_data.shape)).transpose()

		the_lowest = [complete_df.worms[sorted_indices[i][0]] + ' ' + closest_real_time(complete_df, complete_df.worms[sorted_indices[i][0]], complete_df.times[sorted_indices[i][1]]) for i in range(0, 20)]
		the_highest = [complete_df.worms[sorted_indices[-i][0]] + ' ' + closest_real_time(complete_df, complete_df.worms[sorted_indices[-i][0]], complete_df.times[sorted_indices[-i][1]]) for i in range(20, 0, -1)]
		together_list = list(the_lowest)
		together_list.extend(the_highest)
		
		together_data = np.concatenate((flat_data[sorted_arguments[:20]], flat_data[sorted_arguments[-20:]]))
		my_data = pd.Series(together_data, index = together_list)
		if return_all:
			the_full = [complete_df.worms[sorted_indices[i][0]] + ' ' + closest_real_time(complete_df, complete_df.worms[sorted_indices[i][0]], complete_df.times[sorted_indices[i][1]]) for i in range(0, len(sorted_indices))]
			return (my_data, the_full)
	return my_data
Beispiel #22
0
    def lookup_max(self, region=None):
        """
        Find position of maximum in a image.

        Parameters
        ----------
        region : `~regions.SkyRegion` (optional)
            Limit lookup of maximum to that given sky region.

        Returns
        -------
        (position, value): `~astropy.coordinates.SkyCoord`, float
            Position and value of the maximum.
        """
        if region:
            region_pix = region.to_pixel(self.wcs)
            coords_pix = self.coordinates_pix()
            mask = region_pix.contains(coords_pix)
        else:
            mask = np.ones_like(self.data)

        idx = np.nanargmax(self.data * mask)
        y, x = np.unravel_index(idx, self.data.shape)
        pos = self.wcs_pixel_to_skycoord(xp=x, yp=y)
        return pos, self.data[y, x]
Beispiel #23
0
    def plot(self):
        
        
        if self.y is not None:
            pp.subplot(2, 1, 1)    
            pp.plot(self.var_grid, self.y)
        
        if hasattr(self.optimizer.chooser, 'ei'):
            
            pp.subplot(2, 1, 1)
            
            func_m = self.optimizer.chooser.func_m[self.sort_idx]
            func_s = np.sqrt(self.optimizer.chooser.func_v[self.sort_idx])
            pp.plot(self.var_grid, func_m)
            pp.plot(self.var_grid, func_m + func_s)
            pp.plot(self.var_grid, func_m - func_s)

            
            pp.subplot(2, 1, 2)
            
            ei = self.optimizer.chooser.ei[self.sort_idx]
            pp.plot(self.var_grid, ei)
            
            best_idx = np.nanargmax(ei)
            pp.plot(self.var_grid[best_idx], ei[best_idx], '.', markersize=10)
            
            
        pp.show()
Beispiel #24
0
    def scan_callback(self, scan):
        # Calculate angles.
        angles = scan.angle_min + np.arange(scan.ranges.shape[0]) * scan.angle_increment

        # Blur ranges.
        blur_width_angle = np.deg2rad(30)
        blur_width = blur_width_angle / scan.angle_increment
        use_ranges = scipy.ndimage.filters.gaussian_filter(scan.ranges, blur_width)

        # Nan angles which are out of steering range.
        # Angles go from -2.something to +2.something.
        min_angle = np.deg2rad(-60)
        max_angle = np.deg2rad(60)
        use_ranges[np.where((angles < min_angle) | (angles > max_angle))] = np.nan

        index = np.nanargmax(use_ranges)
        dist = scan.ranges[index]
        too_close = dist < self.close_thresh
        theta = angles[index]

        view_dist = 1.
        self.pub_point.publish(
            PointStamped(
                scan.header,
                Point(
                    np.cos(theta) * view_dist,
                    np.sin(theta) * view_dist,
                    0
                )
            )
        )
        self.pub_angle.publish(Float32(theta))
        self.pub_blur.publish(Int32(blur_width))
        self.pub_too_close.publish(Bool(too_close))
	def decide_migration_migrationlikelihood_woi(self):
		migrate_me_maybe = (self.window_overload_index > self.relocation_thresholds)[0]
		if np.sum(migrate_me_maybe) > 0:
			indexes = np.array(np.where(migrate_me_maybe)).tolist()[0] # potential migration sources
			set_of_vms = list()
			for i in indexes:
				partial = (self.location[:, i] == 1).transpose()
				newly_found = np.array(np.where(partial)).tolist()
				set_of_vms += newly_found[0]
			set_of_vms = sorted(set_of_vms)
			pms = [x.get_pm() for x in self.vms]
			pm_volumes = np.array([x.get_volume() for x in self.pms])
			vm_volumes = np.array([x.get_volume_actual() for x in self.vms])
			vm_migrations = np.array([x.get_migrations() for x in self.vms])
			available_volume_per_pm = pm_volumes - self.physical_volume_vector
			available_capacity = [available_volume_per_pm[x.get_pm()] for x in self.vms]
			plan_coefficients = np.array([x.plan.get_coefficient() for x in self.vms])
			minimize_me = -1.0/plan_coefficients * (vm_volumes + available_capacity) + plan_coefficients * vm_migrations
			vm_migrate = np.nanargmin(minimize_me)
			pm_source = self.vms[vm_migrate].get_pm()
			# avoiding to select the source machine as destination by using nan
			available_volume_per_pm[pm_source] = np.nan
			pm_destination = np.nanargmax(available_volume_per_pm)
			self.migrate(vm_migrate, pm_source, pm_destination)
			self.integrated_overload_index[0,pm_source] = 0
			
Beispiel #26
0
def decode_location(likelihood, pos_centers, time_centers):
    """Finds the decoded location based on the centers of the position bins.

    Parameters
    ----------
    likelihood : np.array
        With shape(n_timebins, n_positionbins)
    pos_centers : np.array
    time_centers : np.array

    Returns
    -------
    decoded : nept.Position
        Estimate of decoded position.

    """
    keep_idx = np.sum(np.isnan(likelihood), axis=1) < likelihood.shape[1]
    likelihood = likelihood[keep_idx]

    max_decoded_idx = np.nanargmax(likelihood, axis=1)

    decoded_data = pos_centers[max_decoded_idx]

    decoded_time = time_centers[keep_idx]

    return nept.Position(decoded_data, decoded_time)
def rngWorker(inputEdges, queue):
    """
    Work done by each processor
    :param inputEdges: set of edges (p, q)
    :param queue: shared Queue to place results
    """
    edges = set()
    for p, q in inputEdges:
        relationPQ = _globalRelationalMatrix[p, q]
        row = _globalRelationalMatrix[p]
        # maxJRow = getBestScore(relationMatrix[q])
        # non-numerical distances/similarities will not be counted as edges
        if np.isnan(relationPQ):
            isEdge = False
        # if there is a numeric value
        else:
            isEdge = True   # assume edge until proven wrong
            # loop through all columns in the ith row
            # relationPR is weight of edge p,r ***************************************************** (N^3)/2
            for r, relationPR in enumerate(row):
                # skip rows p and q and any points for which there is no distance value
                if p != r != q and (not np.isnan(relationPR)) and (not np.isnan(_globalRelationalMatrix[q, r])):
                    # for triangle prq, if pq is the longest distance, then p and q are not neighbors
                    lengths = [relationPR, _globalRelationalMatrix[q, r]]
                    if lengths[np.nanargmax(lengths)] < relationPQ:
                        isEdge = False      # not an edge!
                        break               # break to next q
        # if p and q are neighbors
        if isEdge:
            edges.add(frozenset((p, q)))    # add (p,q) tuple to edges set
    queue.put(edges)
def original_ensemble_selection(predictions, labels, ensemble_size, task_type,
                                metric, do_pruning=False):
    """Rich Caruana's ensemble selection method."""

    ensemble = []
    trajectory = []
    order = []

    if do_pruning:
        n_best = 20
        indices = pruning(predictions, labels, n_best, task_type, metric)
        for idx in indices:
            ensemble.append(predictions[idx])
            order.append(idx)
            ensemble_ = np.array(ensemble).mean(axis=0)
            ensemble_performance = calculate_score(
                labels, ensemble_, task_type, metric, ensemble_.shape[1])
            trajectory.append(ensemble_performance)
        ensemble_size -= n_best

    for i in range(ensemble_size):
        scores = np.zeros([predictions.shape[0]])
        for j, pred in enumerate(predictions):
            ensemble.append(pred)
            ensemble_prediction = np.mean(np.array(ensemble), axis=0)
            scores[j] = calculate_score(labels, ensemble_prediction,
                                        task_type, metric,
                                        ensemble_prediction.shape[1])
            ensemble.pop()
        best = np.nanargmax(scores)
        ensemble.append(predictions[best])
        trajectory.append(scores[best])
        order.append(best)

    return np.array(order), np.array(trajectory)
def model_schreiben(datum,data_original,name):
	global fill_typ
	gemittelt,fill_typ=30,np.nan
	fill_zw=np.empty(data_original.shape[0])
	fill_zw[:]=np.nan
	fill=mittelung(fill_zw[:],gemittelt)	
	data=mittelung(datetime.datetime.strptime(datum,'%Y%m%d').timetuple().tm_yday+data_original[:,0]/(24*60*60),gemittelt)
	data=np.hstack((data,mittelung(data_original[:,0],gemittelt)))
	data=np.hstack((data,fill[:]))
	data=np.hstack((data,fill[:]))
	for i in range(len(data)):	
		for j in range(2):
			a=data_original[i*gemittelt:(i+1)*gemittelt,45+j]
			if np.isnan(a[a.argsort()][0])==True:
				data[i,2+j]=fill_typ
			else:
				anzahl_non_nans= np.argmin(abs(np.nanargmax(a)-a.argsort()[:]))+1
				pos_non_nans=a.argsort()[0:anzahl_non_nans]
				data[i,2+j]=a[pos_non_nans[np.argmin(abs(14-pos_non_nans[:]))]]
	index_list=[47,40,41,42,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,5,23,24,26,28,-99,-99,-99,-99]
	for i in range(len(index_list)):
		if index_list[i]==-99:	data=np.hstack((data,fill[:]))
		else: 	data=np.hstack((data,mittelung(	data_original[:,index_list[i]]	,gemittelt)))
	np.savetxt(name,data,fmt='%.5f',delimiter="\t")
	punkt=open(name,'r').read()
	komma=open(name,'w')
	komma.write(punkt.replace(".",",").replace("nan","-999,9"))
	komma.close()
	def decide_migration_loadaware_woi(self):
		migrate_me_maybe = (self.window_overload_index > self.relocation_thresholds)[0]
		if np.sum(migrate_me_maybe) > 0:
			indexes = np.array(np.where(migrate_me_maybe)).tolist()[0] # potential migration sources
			pm_source = random.choice(indexes)
			set_of_vms = (self.location[:, pm_source] == 1).transpose()
			vm_set_migration = np.array(np.where(set_of_vms)).tolist()[0]

			volumes = np.array([x.get_volume() for x in self.pms])
			available_volume_per_pm = volumes - self.physical_volume_vector
			aware_matrix = np.zeros((self.num_vms, self.num_pms))
			for col in range(0,self.num_pms):
				aware_matrix[:, col] = available_volume_per_pm[col]
			for row in range(0,self.num_vms):
				if row in vm_set_migration:
					vol_to_remove = self.volumes[row]
				else:
					vol_to_remove = np.inf
				aware_matrix[row, :] = aware_matrix[row, :] - vol_to_remove
			aware_matrix[:, pm_source] = np.nan
			aware_matrix[aware_matrix<0] = np.nan

			if not np.isnan(aware_matrix).all():
				argmaxidx = np.nanargmax(aware_matrix)
				coordinates = np.unravel_index(argmaxidx, (self.num_vms, self.num_pms))
				vm_migrate = coordinates[0]
				pm_destination = coordinates[1]
				self.migrate(vm_migrate, pm_source, pm_destination)
				self.integrated_overload_index[0,pm_source] = 0
def compute_features(frameManager, featureExtractor, grasp_begin, grasp_end,
                     pmax, max_matrix_1, max_matrix_5):
    # Values computed in "calibrate_impression_depth.py"
    max_val_matrix_1 = 3554.0
    max_val_matrix_5 = 2493.0
    impression_depth = 1.0  # Just an estimate of the maximal impression in [mm]
    impression_factor_1 = impression_depth / max_val_matrix_1
    impression_factor_5 = impression_depth / max_val_matrix_5

    # Determine more robust frames of interest (begin and end frame of the grasp)
    # by taking the objects diameter into account

    # head + tail <= thresh_sequence
    head_elem = 10
    tail_elem = 10

    miniballs = np.empty([grasp_end - grasp_begin + 1, 4])
    miniballs.fill(None)
    #for i, frameID in enumerate(range(grasp_end-tail_elem+1, grasp_end+1)):
    for i, frameID in enumerate(range(grasp_begin, grasp_end + 1)):
        theta = frameManager.get_corresponding_jointangles(frameID)
        miniballs[
            i] = featureExtractor.compute_minimal_bounding_sphere_centroid(
                frameID, theta)

    # Compensate for force dependent sensor matrix impression
    diameter = (2 * miniballs[:, 3] +
                max_matrix_1[grasp_begin:grasp_end + 1] * impression_factor_1 +
                max_matrix_5[grasp_begin:grasp_end + 1] * impression_factor_5)

    slice_tail = diameter[-tail_elem:]
    end_position = (grasp_end - tail_elem) + find_nearest_idx(
        slice_tail, np.median(slice_tail))

    # Problem:
    # The object's initial size cannot be measured accurately enough if the grasp applies torque.
    # In that case, the contact surface between object and both sensor matrices is tilted leading to an
    # overestimation of the real diameter. This asymetry disappears when all forces reach an equilibrium state.
    # In order to get more robust object size features, the profile's centroids of the end position frame
    # is used to recalculate the diameter during each step of the grasp.
    centroid_matrix_1 = featureExtractor.compute_centroid(end_position, 1)
    centroid_matrix_5 = featureExtractor.compute_centroid(end_position, 5)
    points = np.array([[1.0, centroid_matrix_1[0], centroid_matrix_1[1]],
                       [5.0, centroid_matrix_5[0], centroid_matrix_5[1]]],
                      dtype=np.float64)
    miniballs_refined = np.empty([grasp_end - grasp_begin + 1, 4])
    miniballs_refined.fill(None)
    for i, frameID in enumerate(range(grasp_begin, grasp_end + 1)):
        theta = frameManager.get_corresponding_jointangles(frameID)
        miniballs_refined[
            i] = featureExtractor.compute_minimal_bounding_sphere_points(
                points, theta)

    # Compensate for force dependent sensor matrix impression
    diameter_refined = (
        2 * miniballs_refined[:, 3] +
        max_matrix_1[grasp_begin:grasp_end + 1] * impression_factor_1 +
        max_matrix_5[grasp_begin:grasp_end + 1] * impression_factor_5)

    # Initial position: max diameter of minimal bounding sphere
    slice_head = diameter_refined[0:head_elem]
    initial_position = grasp_begin + np.nanargmax(slice_head)

    # Local indices
    initial_position_grasp = initial_position - grasp_begin
    end_position_grasp = end_position - grasp_begin

    # Compute features
    #grasp_diameter = diameter_refined[initial_position]
    #grasp_diameter = np.median(diameter_refined)
    #grasp_diameter = stats.mode(diameter_refined)[0][0]
    grasp_diameter = stats.mode(diameter)[0][0]
    compressibility = diameter_refined[initial_position_grasp] - diameter_refined[
        end_position_grasp]  # Change of minimal bounding sphere's size during grasp
    std_dev_matrix_1 = featureExtractor.compute_standard_deviation(
        end_position,
        1)  # Standard deviation of intensity values (not 2D image moments)
    std_dev_matrix_5 = featureExtractor.compute_standard_deviation(
        end_position, 5)
    moments_matrix_1 = featureExtractor.compute_chebyshev_moments(
        end_position, 1, pmax).reshape(-1)  # frameID, matrixID, pmax
    moments_matrix_5 = featureExtractor.compute_chebyshev_moments(
        end_position, 5, pmax).reshape(-1)

    return grasp_diameter, compressibility, std_dev_matrix_1, std_dev_matrix_5, moments_matrix_1, moments_matrix_5
    def addImg(self, img, roi=None):
        '''
        img - background, flat field, ste corrected image
        roi - [(x1,y1),...,(x4,y4)] -  boundaries where points are
        '''
        self.img = imread(img, 'gray')
        s0, s1 = self.img.shape

        if roi is None:
            roi = ((0, 0), (s0, 0), (s0, s1), (0, s1))

        k = self.kernel_size
        hk = k // 2

        # mask image
        img2 = self.img.copy()  # .astype(int)

        mask = np.zeros(self.img.shape)
        cv2.fillConvexPoly(mask, np.asarray(roi, dtype=np.int32), color=1)
        mask = mask.astype(bool)
        im = img2[mask]

        bg = im.mean()  # assume image average with in roi == background
        mask = ~mask
        img2[mask] = -1

        # find points from local maxima:
        self.points = np.zeros(shape=(self.max_points, 2), dtype=int)
        thresh = 0.8 * bg + 0.2 * im.max()

        _findPoints(img2, thresh, self.min_dist, self.points)
        self.points = self.points[:np.argmin(self.points, axis=0)[0]]

        # correct point position, to that every point is over max value:
        for n, p in enumerate(self.points):
            sub = self.img[p[1] - hk:p[1] + hk + 1, p[0] - hk:p[0] + hk + 1]
            i, j = np.unravel_index(np.nanargmax(sub), sub.shape)
            self.points[n] += [j - hk, i - hk]

        # remove points that are too close to their neighbour or the border
        mask = maximum_filter(mask, hk)
        i = np.ones(self.points.shape[0], dtype=bool)
        for n, p in enumerate(self.points):
            if mask[p[1], p[0]]:  # too close to border
                i[n] = False
            else:
                # too close to other points
                for pp in self.points[n + 1:]:
                    if norm(p - pp) < hk + 1:
                        i[n] = False
        isum = i.sum()
        ll = len(i) - isum
        print('found %s points' % isum)
        if ll:
            print('removed %s points (too close to border or other points)' %
                  ll)
            self.points = self.points[i]

#         self.n_points += len(self.points)

# for finding best peak position:
#         def fn(xy,cx,cy):#par
#             (x,y) = xy
#             return 1-(((x-cx)**2 + (y-cy)**2)*(1/8)).flatten()

#         x,y = np.mgrid[-2:3,-2:3]
#         x = x.flatten()
#         y = y.flatten()
# for shifting peak:
        xx, yy = np.mgrid[0:k, 0:k]
        xx = xx.astype(float)
        yy = yy.astype(float)

        self.subs = []

        #         import pylab as plt
        #         plt.figure(20)
        #         img = self.drawPoints()
        #         plt.imshow(img, interpolation='none')
        # #                 plt.figure(21)
        # #                 plt.imshow(sub2, interpolation='none')
        #         plt.show()

        #thresh = 0.8*bg + 0.1*im.max()
        for i, p in enumerate(self.points):
            sub = self.img[p[1] - hk:p[1] + hk + 1,
                           p[0] - hk:p[0] + hk + 1].astype(float)
            sub2 = sub.copy()

            mean = sub2.mean()
            mx = sub2.max()
            sub2[sub2 < 0.5 * (mean + mx)] = 0  # only select peak
            try:
                # SHIFT SUB ARRAY to align peak maximum exactly in middle:
                # only eval a 5x5 array in middle of sub:
                # peak = sub[hk-3:hk+4,hk-3:hk+4]#.copy()

                #                 peak -= peak.min()
                #                 peak/=peak.max()
                #                 peak = peak.flatten()
                # fit paraboloid to get shift in x,y:
                #                 p, _ = curve_fit(fn, (x,y), peak, (0,0))
                c0, c1 = center_of_mass(sub2)

                #                 print (p,c0,c1,hk)

                #coords = np.array([xx+p[0],yy+p[1]])
                coords = np.array([xx + (c0 - hk), yy + (c1 - hk)])

                #print (c0,c1)

                #import pylab as plt
                #plt.imshow(sub2, interpolation='none')

                # shift array:
                sub = map_coordinates(sub, coords,
                                      mode='nearest').reshape(k, k)
                # plt.figure(2)
                #plt.imshow(sub, interpolation='none')
                # plt.show()

                #normalize:
                bg = 0.25 * (sub[0].mean() + sub[-1].mean() +
                             sub[:, 0].mean() + sub[:, -1].mean())

                sub -= bg
                sub /= sub.max()

                #                 import pylab as plt
                #                 plt.figure(20)
                #                 plt.imshow(sub, interpolation='none')
                # #                 plt.figure(21)
                # #                 plt.imshow(sub2, interpolation='none')
                #                 plt.show()

                self._psf += sub

                if self.calc_std:
                    self.subs.append(sub)
            except ValueError:
                pass  #sub.shape == (0,0)
Beispiel #33
0
 def test_nanargmax(self):
     tgt = np.argmax(self.mat)
     for mat in self.integer_arrays():
         assert_equal(np.nanargmax(mat), tgt)
def key_points(face,
               d_nose_x1=30,
               d_nose_x2=5,
               d_nose_y=5,
               d_lip_y1=25,
               d_lip_y2=70,
               d_lip_y3=4,
               d_lip_x1=50,
               d_chin_x=3,
               d_chin_y1=50,
               d_chin_y2=75,
               d_eye_x=2,
               d_eye_y=50):
    """
    Rotate and zoom the face to create a full frame face. This is based on the
    fact that the nose is the highest point of the picture
    """

    # We apply surfature to calculate the first and second derivates
    K, H, Pmax, Pmin = surfature(face)

    # Remove all key points
    face.key_points.clear()

    #
    # Nose
    #
    nose_x, nose_y = max_xy(face.Z)
    face.key_points["nose"] = (nose_x, nose_y)

    #
    # Nose left and right
    #
    nose_left = Pmin[(nose_y - d_nose_y):(nose_y + d_nose_y),
                     (nose_x - d_nose_x1):(nose_x - d_nose_x2)]
    nose_right = Pmin[(nose_y - d_nose_y):(nose_y + d_nose_y),
                      (nose_x + d_nose_x2):(nose_x + d_nose_x1)]

    nose_left_x, nose_left_y = min_xy(nose_left,
                                      offset_x=(nose_x - d_nose_x1),
                                      offset_y=(nose_y - d_nose_y))
    nose_right_x, nose_right_y = min_xy(nose_right,
                                        offset_x=(nose_x + d_nose_x2),
                                        offset_y=(nose_y - d_nose_y))

    face.key_points["nose_left"] = (nose_left_x, nose_left_y)
    face.key_points["nose_right"] = (nose_right_x, nose_right_y)

    #
    # Upper, lower, left right lip
    #
    lip_y = numpy.nanargmax(Pmax[(nose_y + d_lip_y1):(nose_y + d_lip_y2),
                                 nose_x]) + (nose_y + d_lip_y1)
    lip_left = Pmax[(lip_y - d_lip_y3):(lip_y + d_lip_y3),
                    (nose_x - d_lip_x1):nose_x]
    lip_right = Pmax[(lip_y - d_lip_y3):(lip_y + d_lip_y3),
                     nose_x:(nose_x + d_lip_x1)]

    lip_left_x = find_peak_start(numpy.sum(lip_left,
                                           axis=0)) + (nose_x - d_lip_x1)
    lip_left_y = numpy.nanargmax(Pmax[(lip_y - d_lip_y3):(lip_y + d_lip_y3),
                                      lip_left_x]) + (lip_y - d_lip_y3)

    lip_right_x = find_peak_stop(numpy.sum(lip_right, axis=0)) + nose_x
    lip_right_y = numpy.nanargmax(Pmax[(lip_y - d_lip_y3):(lip_y + d_lip_y3),
                                       lip_right_x]) + (lip_y - d_lip_y3)

    face.key_points['lip'] = (nose_x, lip_y)
    face.key_points['lip_left'] = (lip_left_x, lip_left_y)
    face.key_points['lip_right'] = (lip_right_x, lip_right_y)

    #
    # Chin
    #
    chin = numpy.gradient(
        signal.bspline(face.Z[(lip_y + d_chin_y1):, nose_x], 25))
    chin_x, chin_y = nose_x, numpy.nanargmin(chin) + (lip_y + d_chin_y1)

    face.key_points["chin"] = (chin_x, chin_y)

    #
    # Eyes
    #
    eye_left = Pmax[d_eye_y:nose_left_y - d_eye_y,
                    nose_left_x - d_eye_x:nose_left_x + d_eye_x]
    eye_right = Pmax[d_eye_y:nose_right_y - d_eye_y,
                     nose_right_x - d_eye_x:nose_right_x + d_eye_x]

    eye_left_x, eye_left_y = max_xy(eye_left, nose_left_x - d_eye_x, d_eye_y)
    eye_right_x, eye_right_y = max_xy(eye_right, nose_right_x - d_eye_x,
                                      d_eye_y)

    face.key_points["eye_left"] = (eye_left_x, eye_left_y)
    face.key_points["eye_right"] = (eye_right_x, eye_right_y)

    #
    # Nose face border
    #
    nose_line = numpy.gradient(face.Z[nose_y, :])
    border_nose_left_x, border_nose_left_y = numpy.nanargmax(
        nose_line[:lip_left_x - 10]), nose_y
    border_nose_right_x, border_nose_right_y = numpy.nanargmin(
        nose_line[lip_right_x + 10:]) + lip_right_x + 10, nose_y

    face.key_points["border_nose_left"] = (border_nose_left_x,
                                           border_nose_left_y)
    face.key_points["border_nose_right"] = (border_nose_right_x,
                                            border_nose_right_y)

    #
    # Lip face border
    #
    lip_line = numpy.gradient(face.Z[lip_y, :])
    border_lip_left_x, border_lip_left_y = numpy.nanargmax(
        lip_line[:lip_left_x - 10]), lip_y
    border_lip_right_x, border_lip_right_y = numpy.nanargmin(
        lip_line[lip_right_x + 10:]) + lip_right_x + 10, lip_y

    face.key_points["border_lip_left"] = (border_lip_left_x, border_lip_left_y)
    face.key_points["border_lip_right"] = (border_lip_right_x,
                                           border_lip_right_y)

    #
    # Forehead border
    #
    forehead_line = numpy.gradient(face.Z[nose_y - (chin_y - nose_y), :])
    border_forehead_left_x, border_forehead_left_y = numpy.nanargmax(
        forehead_line[:lip_left_x - 10]), nose_y - (chin_y - nose_y)
    border_forehead_right_x, border_forehead_right_y = numpy.nanargmin(
        forehead_line[lip_right_x +
                      10:]) + lip_right_x + 10, nose_y - (chin_y - nose_y)

    face.key_points["border_forehead_left"] = (border_forehead_left_x,
                                               border_forehead_left_y)
    face.key_points["border_forehead_right"] = (border_forehead_right_x,
                                                border_forehead_right_y)
Beispiel #35
0
def Strong_RRQR(A, k, f):

    #    #   Strong Rank Revealing QR with fixed rank 'k'
    #
    #    #       A(:, p) = Q * R = Q [R11, R12;
    ##                              0, R22]
    ##   where R11 and R12 satisfies that matrix (inv(R11) * R12) has entries
    ##   bounded by a pre-specified constant which should be not less than 1.
    ##
    ##   Input:
    ##       A, matrix,  target matrix that is appoximated.
    ##       f, scalar,  constant that bound the entries of calculated (inv(R11) * R12)#
    ##       k, integer, dimension of R11.
    #
    #    #   Output:
    ##       A(:, p) = [Q1, Q2] * [R11, R12;
    ##                               0, R22]
    ##               approx Q1 * [R11, R12];
    ##       Only truncated QR decomposition is returned as
    ##           Q = Q1,
    ##           R = [R11, R12];
    ##       where Q is a m * k matrix and R is a k * n matrix
    ##
    ##   Reference:
    ##       Gu, Ming, and Stanley C. Eisenstat. "Efficient algorithms for
    ##       computing a strong rank-revealing QR factorization." SIAM Journal
    ##       on Scientific Computing 17.4 (1996): 848-869.
    #
    #    #   Note:
    ##       Algorithm 4 in the above ref. is implemented.
    #

    #   dimension of the given matrix
    m, n = np.shape(A)

    Q, R, p = linalg.qr(A, mode="full", pivoting=True)
    print(p)
    #print(R[0,0])
    print(p[0:10])
    #print(np.shape(Q))
    s_R = np.sign(np.diag(R))
    #print(s_R[0])
    for i in list(range(n)):
        R[i, i] = s_R[i] * R[i, i]
        Q[i, i] = s_R[i] * Q[i, i]

    #   Initialization of A^{-1}B ( A refers to R11, B refers to R12)
    R11 = deepcopy(R[0:k, 0:k])
    R12 = deepcopy(R[0:k, k:])
    R22 = deepcopy(R[k:, k:])
    AB = deepcopy(np.dot(np.linalg.inv(R11), R12))
    #AB = solve_triangular(R11,R12)
    #print("AB")
    #print(np.amax(AB))
    #print(np.shape(AB))
    #print("ga11")
    #print(R[k-1,k-1])
    #   Initialization of gamma, i.e., norm of C's columns (C refers to R22)
    gamma = np.transpose(np.sqrt(np.diag(np.dot(np.transpose(R22), R22))))
    #print("gamma")
    #print(np.shape(gamma))
    #   Initialization of omega, i.e., reciprocal of inv(A)'s row norm
    tmp = np.linalg.pinv(R11)

    omega = 1. / np.sqrt(np.diag(np.dot(tmp, np.transpose(tmp))))
    #print("omega")
    #print(np.shape(omega))
    #print(omega)

    ##   "while" loop for interchanging the columns from first k columns and
    ##   the remaining (n-k) columns.
    #
    counter = 0
    while 1:
        tmp2 = np.power(np.outer(1. / omega, np.transpose(gamma)),
                        2) + np.power(AB, 2)
        #print("tmp2")
        #print(np.shape(tmp2))
        #print(p[0:k])
        i_, j_ = np.where(tmp2 > np.power(f, 2))
        print("size")
        print(i_.size)
        ind = np.unravel_index(np.nanargmax(tmp2, axis=None), tmp2.shape)
        i = ind[0]
        j = ind[1]
        print("max tmp2")
        print(tmp2[i, j])
        #if tmp2[i,j] <= np.power(f,2):
        #    break
        #if i_.size>0 and j_.size>0:
        #    print("yes")
        #    i = i_[0]
        #    j = j_[0]
        #else:
        #    break
        counter = counter + 1
        #print("counter")
        #print(counter)
        print("AB")
        print(np.amax(AB))

        #    Interchange the i th and (k+j) th column of target matrix A and
        #    update QR decomposition (Q, R, p), AB, gamma, and omega.
        ##   First step : interchanging the k+1 and k+j th columns

        if j > 0:
            #AB[:, [0, j]] = AB[:, [j, 0]]
            AB_d_0, _ = np.shape(AB)
            perm_AB_0 = get_transposition_list(AB_d_0, 0, j)
            AB = AB[perm_AB_0]
            #gamma[[0, j]] = gamma[[j, 0]]
            gamma_tmp = gamma[0]
            gamma[0] = gamma[j]
            gamma[j] = gamma_tmp
            _, R_d_2 = np.shape(R)
            perm_R_1 = get_transposition_list(R_d_2, k, k + j - 1)
            R = R[:, perm_R_1]
            #print("ga22")
            #print(R[k-1,k-1])
            #R[:, [k, k+j-1]] =R[:, [k+j-1, k]]
            #p[[k, k+j-1]] = p[[k+j-1, k]]
            p_tmp = p[k + j - 1]
            p[k + j - 1] = p[k]
            p[k] = p_tmp
##   Second step : interchanging the i and k th columns
        if i < k:
            _, R_d_2 = np.shape(R)
            perm_R_2 = get_cyclic_permutation_list(R_d_2, i, k - 1)
            p = p[perm_R_2]

            R = R[:, perm_R_2]
            #print("ga33")
            #print(R[k-1,k-1])
            omega_d_1 = np.shape(omega)[0]
            perm_omega_3 = get_cyclic_permutation_list(omega_d_1, i, k - 1)
            omega = omega[perm_omega_3]

            AB_d_1, _ = np.shape(AB)
            perm_AB_3 = get_transposition_list(AB_d_1, i, k - 1)

            #print(omega)

            AB = AB[perm_AB_3, :]
            #print(AB)
            #            %   givens rotation for the triangulation of R(1:k, 1:k)
            for ii in list(range(i, k)):
                G = givens_rotation_matrix_2(R[ii, ii], R[ii + 1, ii])
                if np.dot(G[0, :], R[ii:ii + 2, ii]) < 0:
                    G = -G  #  guarantee R(ii,ii) > 0
                    print("ok")
                R[ii:ii + 2, :] = np.dot(G, R[ii:ii + 2, :])
                Q[:, ii:ii + 2] = np.dot(Q[:, ii:ii + 2], np.transpose(G))

            if R[k - 1, k - 1] < 0:
                #print("ok")
                R[k - 1, :] = -R[k - 1, :]
                Q[:, k - 1] = -Q[:, k - 1]

##   Third step : zeroing out the below-diag of k+1 th columns
        R_m, R_n = np.shape(R)

        if k < R_m:
            for ii in list(range(k + 1, R_m)):
                G = givens_rotation_matrix_2(R[k, k], R[ii, k])

                R_vstack = np.transpose(np.asarray([R[k, k], R[ii, k]]))
                if np.dot(G[0, :], R_vstack) < 0:
                    G = -G  #%  guarantee R(k+1,k+1) > 0
                _, R_d_4 = np.shape(R)
                #perm_R_4 = get_transposition_list(R_d_4,k,ii)
                R[[k, ii], :] = np.dot(G, R[[k, ii], :])
                Q[:, [k, ii]] = np.dot(Q[:, [k, ii]], np.transpose(G))
##   Fourth step : interchaing the k and k+1 th columns
#p[[k-1,k]] = p[[k, k-1]]
        p_tmp = p[k - 1]
        p[k - 1] = p[k]
        p[k] = p_tmp
        ga = deepcopy(R[k - 1, k - 1])
        mu = deepcopy(R[k - 1, k]) / ga
        if k < R_m:
            nu = deepcopy(R[k, k]) / ga
        else:
            nu = 0
        rho = np.sqrt(mu * mu + nu * nu)
        ga_bar = ga * rho
        b1 = R[0:k - 1, k - 1]
        b2 = R[0:k - 1, k]
        c1T = R[k - 1, k + 1:]
        c2T = R[k, k + 1:]
        #print(R[0,0])
        c1T_bar = (mu * c1T + nu * c2T) / rho
        c2T_bar = (nu * c1T - mu * c2T) / rho

        R[0:k - 1, k - 1] = b2
        R[0:k - 1, k] = b1
        R[k - 1, k - 1] = ga_bar
        R[k - 1, k] = np.dot(ga, mu) / rho
        R[k, k] = np.dot(ga, nu) / rho
        R[k - 1, k + 1:] = c1T_bar
        R[k, k + 1:] = c2T_bar

        R_submatrix_tmp = deepcopy(R[0:k - 1, 0:k - 1])
        u = np.dot(np.linalg.pinv(R_submatrix_tmp), b1)
        u1 = AB[0:k - 1, 0]
        AB[0:k - 1, 0] = ((mu * mu) * u - mu * u1) / (rho * rho)
        AB[k - 1, 0] = mu / (rho * rho)
        AB[k - 1, 1:] = c1T_bar / ga_bar
        AB[0:k - 1, 1:] = AB[0:k - 1, 1:] + (nu * np.outer(u, c2T_bar) -
                                             np.outer(u1, c1T_bar)) / ga_bar
        gamma[0] = ga * nu / rho
        gamma[1:] = np.power(
            (np.power(gamma[1:], 2) + np.power(np.transpose(c2T_bar), 2) -
             np.power(np.transpose(c2T), 2)), 1 / 2)

        u_bar = u1 + mu * u
        omega[k - 1] = ga_bar
        #print(np.power(omega[0:k-1],(-2)))
        #print("ga_bar")
        #print(ga_bar)
        #print(mu)
        #print("mu")
        #print(np.shape(omega))
        if counter == 0:
            omega[0:k - 1] = np.power(
                np.abs((np.power(omega[0:k - 1], (-2)) + np.power(u_bar, 2) /
                        (ga_bar * ga_bar) - np.power(u, 2) / (ga * ga))),
                (-1 / 2))
        else:
            omega[0:k - 1] = np.power(
                (np.power(omega[0:k - 1], (-2)) + np.power(u_bar, 2) /
                 (ga_bar * ga_bar) - np.power(u, 2) / (ga * ga)), (-1 / 2))
        #print("counter")
        #print(counter)
        print(p[0:20])
        #Eliminate new R(k+1, k) by orthgonal transformation

        Gk = np.asarray([[mu / rho, nu / rho], [nu / rho, -mu / rho]])
        #print(np.dot(Gk,np.transpose(Gk)))
        R_d_final, _ = np.shape(R)
        if k < R_d_final:
            Q[:, [k, k + 1]] = np.dot(Q[:, [k, k + 1]], np.transpose(Gk))

#
    return p[0:k]
Beispiel #36
0
def file_loop(f):
    print('Doing file: ' + f)

    dic = xr.open_dataset(f)
    edate = pd.Timestamp(dic.time.values)

    out = dictionary()
    res = []
    outt = dic['tc_lag0'].values
    outp = dic['p'].values

    out['lon'] = dic['lon'].values
    out['lat'] = dic['lat'].values
    out['hour'] = dic['time.hour'].item()
    out['month'] = dic['time.month'].item()
    out['year'] = dic['time.year'].item()
    out['date'] = dic['time'].values

    if np.nanmin(dic['tc_lag0'].values) > -53:
        return
    #ipdb.set_trace()
    out['clat'] = np.min(out['lat'])+((np.max(out['lat'])-np.min(out['lat']))*0.5)
    out['clon'] = np.min(out['lon']) + ((np.max(out['lon']) - np.min(out['lon'])) * 0.5)

    if (out['clat']<9) | (out['clon']<-15) | (out['clon']>15):
        print('MCS out of box')
        return


    # if edate.hour < 17:
    #     return

    try:
        era_pl = xr.open_dataset(cnst.ERA5_HOURLY_PL+'ERA5_'+str(dic['time.year'].values)+'_'+str(dic['time.month'].values).zfill(2)+'_pl.nc')
    except:
        print('ERA5 missing')
        return
    try:
        era_srfc = xr.open_dataset(cnst.ERA5_HOURLY_SRFC+'ERA5_'+str(dic['time.year'].values)+'_'+str(dic['time.month'].values).zfill(2)+'_srfc.nc')
    except:
        print('ERA5 srfc missing')
        return
    era_pl = uda.flip_lat(era_pl)
    era_srfc = uda.flip_lat(era_srfc)

    edate = edate.replace(hour=12, minute=0)

    era_pl_day = era_pl.sel(time=edate, longitude=slice(-16,17), latitude=slice(4,26))
    era_srfc_day = era_srfc.sel(time=edate, longitude=slice(-16,17), latitude=slice(4,26))


    tminpos = np.where(dic['tc_lag0'].values == np.nanmin(dic['tc_lag0'].values)) # era position close to min temp
    if len(tminpos[0])>1:
        ptmax = np.nanmax((dic['p'].values)[tminpos])
        if ptmax > 0:
            prpos = np.where((dic['p'].values)[tminpos] == ptmax)
            tminpos = ((tminpos[0])[prpos], (tminpos[1])[prpos] )
        else:
            tminpos = ((tminpos[0])[0], (tminpos[1])[0])

    elon = dic['lon'].values[tminpos]
    elat = dic['lat'].values[tminpos]

    era_day = era_pl_day.sel(latitude=elat, longitude=elon , method='nearest') # take point of minimum T
    era_day_srfc = era_srfc_day.sel(latitude=elat, longitude=elon , method='nearest') # take point of minimum T

    del era_srfc_day

    e925 = era_day.sel(level=925).mean()

    e850 = era_pl_day['t'].sel(level=850)
    elow = era_day.sel(level=slice(925,850)).mean('level').mean()
    e650 = era_day.sel(level=650).mean()
    emid = era_day.sel(level=slice(600,700)).mean('level').mean()
    srfc = era_day_srfc.mean()


    t_thresh = -50  # -40C ~ 167 W m-2
    mask = np.isfinite(outp) & (outt<=t_thresh) & np.isfinite(outt)
    mask_area = (outt<=t_thresh) & np.isfinite(outt)
    mask70 = (outt<=-70) & np.isfinite(outt)

    if np.sum(mask) < 3:
        return

    print(np.nanmax(outt[mask]))   # can be bigger than cutout threshold because of interpolation to 5km grid after cutout

    out['area'] = np.sum(mask_area)
    out['area70'] = np.sum(mask70)

    out['tmin'] = np.min(outt[mask])
    out['tmean'] = np.mean(outt[mask])

    maxpos = np.unravel_index(np.nanargmax(outp), outp.shape)
    out['pmax'] = np.nanmean(ua.cut_kernel(outp,maxpos[1], maxpos[0],1)) #np.max(outp[mask])
    out['pmean'] = np.mean(outp[mask])

    dbox = e850.copy(deep=True)
    minlon = era_pl_day.sel(latitude=8, longitude=np.min(out['lon']), method='nearest')
    maxlon = era_pl_day.sel(latitude=8, longitude=np.max(out['lon']), method='nearest')

    del era_pl_day

    tgrad = dbox.sel(longitude=slice(minlon.longitude.values, maxlon.longitude.values)).mean('longitude')

    tmin = np.nanargmin(tgrad.values)
    tmax = np.nanargmax(tgrad.values)
    tgrad = tgrad.isel(latitude=slice(tmin, tmax))

    lingress = uda.linear_trend_lingress(tgrad)

    out['tgrad'] = lingress['slope'].values

    tgrad2 = dbox.sel(longitude=slice(np.min(out['lon']), np.max(out['lon'])), latitude=slice(10, 20)).mean(
        ['longitude', 'latitude']) - \
             dbox.sel(longitude=slice(np.min(out['lon']), np.max(out['lon'])), latitude=slice(5, 7)).mean(['longitude', 'latitude'])
    out['tbox'] = tgrad2.values

    try:
        out['q925'] =float(e925['q'])
    except TypeError:
        return

    out['q650'] = float(e650['q'])
    out['v925'] = float(e925['v'])
    out['v650'] = float(e925['v'])
    out['u925'] = float(e925['u'])
    out['u650'] = float(e650['u'])
    out['w925'] = float(e925['w'])
    out['w650'] = float(e650['w'])
    out['rh925'] = float(e925['r'])
    out['rh650'] = float(e650['r'])
    out['t925'] = float(e925['t'])
    out['t650'] = float(e650['t'])
    out['pv925'] = float(e925['pv'])
    out['pv650'] = float(e650['pv'])
    out['div925'] = float(e925['d'])
    out['div650'] = float(e650['d'])
    out['q_low'] = float(elow['q'])
    out['q_mid'] = float(emid['q'])
    out['tcwv'] = float(srfc['tcwv'])

    out['shear'] = float(e650['u']-e925['u'])

    theta_down = u_met.theta_e(925,e925['t']-273.15, e925['q'])
    theta_up = u_met.theta_e(650,e650['t']-273.15, e650['q'])

    out['dtheta'] =  (theta_down-theta_up).values
    out['thetaup'] = theta_up.values
    out['thetadown'] = theta_down.values

    out['pgt30'] = np.sum(outp[mask]>=30)
    out['isvalid'] = np.sum(mask)
    out['pgt01'] = np.sum(outp[mask]>=0.1)
    #
    out['p'] = outp[mask]
    out['t'] = outt[mask]
    #ipdb.set_trace()
    dic.close()

    return out
Beispiel #37
0
def x2dspec(x2dfile, traceloc='max', extrsize='stsci', bksize='stsci', bkoff='stsci', x1dfile=None, fitsout=None,
            overwrite=True, bkmask=0):
    """
    Creates a spectrum from HST STIS (or maybe also COS?) data from HST using the x2d file provided by the default 
    STScI pipeline.

    Parameters
    ----------
    x2dfile : str
        Path of the x2d file.
    traceloc : {int|'max'|'lya'}, optional
        Location of the spectral trace.
        int : the midpoint pixel
        'max' : use the mean y-location of the pixel with highest S/N
    extrsize, bksize, bkoff : {int|'stsci'}, optional
        The height of the signal extraction region, the height of the
        background extraction regions, and the offset above and below the
        spectral trace at which to center the background extraction regions.
        'stsci' : use the value used by STScI in making the x1d (requires
            x1dfile)
        int : user specified value in pixels
    x1dfile : str, optional if 'stsci' is not specfied for any other keyword
        Path of the x1d file.
    fitsout : str, optional
        Path for saving a FITS file version of the spectrum.
    overwrite : {True|False}, optional
        Whether to overwrite the existing FITS file.
    bkmask : int, optional
        Data quality flags to mask the background. Background pixels that have
        at least one of these flags will be discarded.

    Returns
    -------
    spectbl : astropy table
        The wavelength, flux, error, and data quality flag values of the extracted
        spectrum.

    Cautions
    --------
    Using a non-stsci extraction size will cause a systematic error because a
    flux correction factor is applied that assumes the STScI extraction
    ribbon was used.

    This still isn't as good as an x1d, mainly because the wavelength dependency
    of the slit losses is not accounted for.
    """

    x2d = _fits.open(x2dfile)

    # get the flux and error from the x2d
    f, e, q = x2d['sci'].data, x2d['err'].data, x2d['dq'].data

    inst = x2d[0].header['instrume']
    if inst != 'STIS':
        raise NotImplementedError("This function cannot handle {} data at "
                                  "present.".format(inst))

    # make sure x1d is available if 'stsci' is specified for anything
    if 'stsci' in [traceloc, extrsize, bksize, bkoff]:
        try:
            x1d = _fits.open(x1dfile)
            xd = x1d[1].data
        except:
            raise ValueError("An open x1d file is needed if 'stsci' is "
                             "specified for any of the keywords.")

    # get the ribbon values
    if extrsize == 'stsci': extrsize = _np.mean(xd['extrsize'])
    if bksize == 'stsci': bksize = _np.mean([xd['bk1size'], xd['bk2size']])
    if bkoff == 'stsci':
        bkoff = _np.mean(_np.abs([xd['bk1offst'], xd['bk2offst']]))

    # select the trace location
    if traceloc == 'max':
        sn = f / e
        sn[q > 0] = 0.0
        sn[e <= 0.0] = 0.0
        maxpixel = _np.nanargmax(sn)
        traceloc = _np.unravel_index(maxpixel, f.shape)[0]
    if traceloc == 'lya':
        xmx = _np.nanmedian(_np.argmax(f, 1))
        redsum = _np.nansum(f[:, xmx+4:xmx+14], 1)
        smoothsum = data_structures._smooth_sum(redsum, extrsize) / float(extrsize)
        traceloc = _np.argmax(smoothsum) + extrsize/2

    # convert everything to integers so we can make slices
    try:
        intrnd = lambda x: int(round(x))
        traceloc, extrsize, bksize, bkoff = map(intrnd, [traceloc, extrsize, bksize, bkoff])
    except ValueError:
        raise ValueError("Invalid input for either traceloc, extrsize, bksize, "
                         "or bkoff. See docstring.")

    # convert intensity to flux
    fluxfac = x2d['sci'].header['diff2pt']
    f, e = f * fluxfac, e * fluxfac

    # get slices for the ribbons
    sigslice = slice(traceloc - extrsize // 2, traceloc + extrsize // 2 + 1)
    bk0slice = slice(traceloc - bkoff - bksize // 2, traceloc - bkoff + bksize // 2 + 1)
    bk1slice = slice(traceloc + bkoff - bksize // 2, traceloc + bkoff + bksize // 2 + 1)
    slices = [sigslice, bk0slice, bk1slice]

    # mask bad values in background regions
    if bkmask:
        badpix = (q & bkmask) > 0
        badpix[sigslice] = False  # but don't modify the signal region
        f[badpix], e[badpix], q[badpix] = 0.0, 0.0, 0
        # make a background area vector to account for masked pixels
        goodpix = ~badpix
        bkareas = [_np.sum(goodpix[slc, :], 0) for slc in slices[1:]]
        bkarea = sum(bkareas)
    else:
        bkarea = bksize * 2

    # sum fluxes in each ribbon
    fsig, fbk0, fbk1 = [_np.sum(f[slc, :], 0) for slc in slices]

    # sum errors in each ribbon
    esig, ebk0, ebk1 = [_np.sqrt(_np.sum(e[slc, :]**2, 0)) for slc in slices]

    # condense dq flags in each ribbon
    bitor = lambda a: reduce(lambda x, y: x | y, a)
    qsig, qbk0, qbk1 = [bitor(q[slc, :]) for slc in slices]

    # subtract the background
    area_ratio = float(extrsize) / bkarea
    f1d = fsig - area_ratio * (fbk0 + fbk1)
    e1d = _np.sqrt(esig**2 + (area_ratio * ebk0)**2 + (area_ratio * ebk1)**2)

    # make sure no zero errors
    e1d[e1d == 0] = e1d.min()

    # propagate the data quality flags
    q1d = qsig | qbk0 | qbk1

    # construct wavelength array
    wedges = _get_x2d_waveedges(x2d)
    w0, w1 = wedges[:-1], wedges[1:]

    # construct exposure time array
    expt = _np.ones(f.shape[0]) * x2d['sci'].header['exptime']

    #region PUT INTO TABLE
    # make data columns
    colnames = ['w0', 'w1', 'w', 'flux', 'error', 'dq', 'exptime']
    units = ['Angstrom'] * 3 + ['ergs/s/cm2/Angstrom'] * 2 + ['s']
    descriptions = ['left (short,blue) edge of the wavelength bin',
                    'right (long,red) edge of the wavelength bin',
                    'midpoint of the wavelength bin',
                    'average flux over the bin',
                    'error on the flux',
                    'data quality flags',
                    'cumulative exposure time for the bin']
    dataset = [w0, w1, (w0+w1)/2., f1d, e1d, q1d, expt]
    cols = [_tbl.Column(d, n, unit=u, description=dn) for d, n, u, dn in
            zip(dataset, colnames, units, descriptions)]

    # make metadata dictionary
    descriptions = {'rootname': 'STScI identifier for the dataset used to '
                                'create this spectrum.'}
    meta = {'descriptions': descriptions,
            'rootname': x2d[1].header['rootname'],
            'traceloc': traceloc,
            'extrsize': extrsize,
            'bkoff': bkoff,
            'bksize': bksize}

    # put into table
    tbl = _tbl.Table(cols, meta=meta)
    #endregion

    #region PUT INTO FITS
    if fitsout is not None:
        # spectrum hdu
        fmts = ['E'] * 5 + ['I', 'E']
        cols = [_fits.Column(n, fm, u, array=d) for n, fm, u, d in
                zip(colnames, fmts, units, dataset)]
        del meta['descriptions']
        spechdr = _fits.Header(meta.items())
        spechdu = _fits.BinTableHDU.from_columns(cols, header=spechdr,
                                                name='spectrum')

        # make primary header
        prihdr = _fits.Header()
        prihdr['comment'] = ('Spectrum generated from an x2d file produced by '
                             'STScI. The dataset is identified with the header '
                             'keywrod rootname. All pixel locations refer to '
                             'the x2d and are indexed from 0. '
                             'Created with spectralPhoton software '
                             'http://github.com/parkus/spectralPhoton')
        prihdr['date'] = _strftime('%c')
        prihdr['rootname'] = x2d[1].header['rootname']
        prihdu = _fits.PrimaryHDU(header=prihdr)

        hdulist = _fits.HDUList([prihdu, spechdu])
        hdulist.writeto(fitsout, clobber=overwrite)
    #endregion

    return tbl
def county_facility_x_correlation(facility, county, start_date, end_date,
                                  facility_name, county_pop):
    county_name = county.head(1)['county'].values[0]
    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)
    facility['Date'] = pd.to_datetime(facility['Date'])
    facility_mask = (facility['Date'] > start_date) & (facility['Date'] <=
                                                       end_date)
    facility = facility.loc[facility_mask]

    county['date'] = pd.to_datetime(county['date'])
    county_mask = (county['date'] > start_date) & (county['date'] <= end_date)
    county = county.loc[county_mask]

    plt.plot(facility['Residents.Confirmed'])
    plt.xlabel('Days')
    plt.ylabel('Cumulative case count')
    plt.show()

    plt.plot(county['cases'])
    plt.xlabel('Days')
    plt.ylabel('Cumulative case count')
    plt.show()

    # plt.figure('facility')
    # facility['Rolling_diff'] = moving_average(np.array(facility['Residents.Confirmed'].diff(1))[1:])
    # # plt.plot(facility['Date'], facility['Residents.Confirmed'].diff(1), color='blue')
    # plt.plot(facility['Date'], facility['Rolling_diff'], color='blue')
    # plt.xticks(rotation=45)
    # plt.xlabel('Days')
    # plt.ylabel('Cumulative case count')
    # plt.title(f'7 Day Rolling Avg - {facility_name}')
    # plt.show()
    #
    # plt.figure('county')
    # county['Rolling_diff'] = moving_average(np.array(county['cases'].diff(1))[1:])
    # # plt.plot(county['date'], county['cases'].diff(1), color='orange')
    # plt.plot(county['date'], county['Rolling_diff'], color='orange')
    # plt.xticks(rotation=45)
    # plt.xlabel('Days')
    # plt.ylabel('Cumulative case count')
    # plt.title(f'7 Day Rolling Avg - County {county_name}')
    # plt.show()

    joined_df = county.join(facility.set_index('Date'), on='date', how='left')

    ## TODO before doing the correlation, need to join on the date column to get the same date values for NYT and ICE data
    ## basically need to build up some more of my data tools first
    # Compute rolling window synchrony
    d1 = joined_df['Residents.Active'].fillna(
        method='ffill').dropna()[1:] / 338 * 10000
    d2 = joined_df['cases'].fillna(method='ffill').dropna()[1:] / 30000 * 10000
    rs = np.array([
        crosscorr(d1, d2, lag)
        for lag in range(-min(len(joined_df), 21), min(len(joined_df), 21))
    ])  #21 days
    rs_not_nan = rs[~np.isnan(rs)]
    offset = np.floor(len(rs) / 2) - np.nanargmax(rs)
    f, ax = plt.subplots(figsize=(14, 5))
    ax.plot(rs)
    ax.axvline(np.ceil(len(rs) / 2), color='k', linestyle='--', label='Center')
    ax.axvline(np.nanargmax(rs),
               color='r',
               linestyle='--',
               label='Peak synchrony')
    ax.set(
        title=
        f'{facility_name} cross correlation with {county_name} county \n Date Offset = {offset} frames',
        xlabel='Offset',
        ylabel='Pearson r')
    # ax.set_xticks(np.arange(len(joined_df)))
    # ax.set_xticklabels(joined_df['Date'])
    plt.legend()

    plt.figure('compare case rates')
    avg_difference_in_rates = np.average(
        joined_df['Residents.Active'].fillna(method='ffill')[1:] /
        facility.head(1)['Population.Feb20'].values[0] * 10000) / np.average(
            joined_df['cases'].diff(10).fillna(method='bfill') / county_pop *
            10000)
    plt.ylabel('Active case rate per 10,000 people')
    plt.title(
        f'Active case rates for {facility_name} and surrounding county\n'
        f'Avg rate of detention facility is {np.round(avg_difference_in_rates,1)}X higher than county rate'
    )
    plt.plot(joined_df['date'],
             joined_df['Residents.Active'].fillna(method='ffill') /
             facility.head(1)['Population.Feb20'].values[0] * 10000,
             label=f'{facility_name} Detainee Rate')
    plt.plot(joined_df['date'],
             joined_df['cases'].diff(10).fillna(method='bfill') / county_pop *
             10000,
             label='County rate')
    plt.xticks(rotation=45)
    plt.ylim(1, 100000)
    plt.semilogy()
    plt.yticks([10, 100, 1000, 10000], labels=['10', '100', '1000', '10000'])
    plt.legend(loc='upper left')
    plt.show()
Beispiel #39
0
plt.figure(figsize=(12, 12))
fig = sns.heatmap(sq_dists,
                  cmap=plt.get_cmap('viridis'),
                  square=True,
                  mask=mask)
figx = fig.get_figrue()
figx.savefig('/home/u3749/result/matrix.png')

# upper triangle of matrix set to np.nan
sq_dists[np.triu_indices_from(mask)] = np.nan
sq_dists[0, 0] = np.nan

fig = plt.figure(figsize=(12, 8))
# maximally dissimilar image
ax = fig.add_subplot(1, 3, 1)
maximally_dissimilar_image_idx = np.nanargmax(np.nanmean(sq_dists, axis=1))
plt.imshow(all_images[maximally_dissimilar_image_idx])
plt.title('maximally dissimilar')

# maximally similar image
ax = fig.add_subplot(1, 3, 2)
maximally_similar_image_idx = np.nanargmin(np.nanmean(sq_dists, axis=1))
plt.imshow(all_images[maximally_similar_image_idx])
plt.title('maximally similar')

# now compute the mean image
ax = fig.add_subplot(1, 3, 3)
mean_img = gray_imgs_mat.mean(axis=0).reshape(rescaled_dim, rescaled_dim, 3)
plt.imshow(cv2.normalize(mean_img, None, 0.0, 1.0, cv2.NORM_MINMAX))
plt.title('mean image')
Beispiel #40
0
def train_the_model(train_R_indices, train_R, cv_R_indices, cv_R,
                    test_R_indices, test_R, BATCH_SIZE, NUM_EPOCHS, LAMBDA, lr,
                    train_op, loss_op, y_pred_op, X, y, n_investors, n_input,
                    threshold):

    start = time.time()
    n_batches = len(train_R) // BATCH_SIZE
    init = tf.global_variables_initializer()
    batch = Batch(train_R_indices,
                  train_R,
                  n_investors,
                  n_input,
                  BATCH_SIZE=BATCH_SIZE)
    epoch_loss_train, _reg = 0, 0
    loss_cv_arr, loss_train_arr = [], []

    best_save_score = -np.inf

    print('NUM_EPOCHS: {}\nLAMBDA: {}\nlr: {}\nn_batches: {}\nBATCH_SIZE: {}\nthreshold: {}'.format(\
                                            NUM_EPOCHS, LAMBDA, lr, n_batches, BATCH_SIZE, threshold))

    # Add ops to save and restore all the variables.
    saver = tf.train.Saver()

    print('start SGD iterations...', end='\r')

    with tf.Session() as sess:
        sess.run(init)
        epoch_end = time.time()
        while not (batch.epoch == NUM_EPOCHS and batch.last_batch == True):
            batch_X, batch_y = batch.next()
            if batch.i0 == 0:
                print('Epoch %d %s' % (batch.epoch, '_' * 62))

            _, _batch_loss_train = sess.run([train_op, loss_op],
                                            feed_dict={
                                                X: batch_X,
                                                y: batch_y
                                            })

            epoch_loss_train += _batch_loss_train * (batch.i1 - batch.i0)

            print("batch_no:{}/{}, loss_train:{:6.4f}, t={:0.1f} sec".format(
                batch.batch_no, n_batches, epoch_loss_train / batch.i1,
                time.time() - epoch_end),
                  end='\r')

            if batch.last_batch:
                # collect some statistics for printing the loss function etc
                # fetch the losses

                print('\nEvaluating loss_cv and preds_cv on cv set... ',
                      end='\r')
                preds_cv, _loss_cv = evaluate_preds_and_loss(
                    sess, cv_R, cv_R_indices, loss_op, y_pred_op, X, y,
                    BATCH_SIZE, n_investors, n_input)

                # threshold ~ 0.7 <== an important parameter !!!
                print('Calculating ROC curve, threshold: {:3.1f}         '.
                      format(threshold),
                      end='\r')
                #_, _, _precision_cv, _recall_cv, _f1_score_cv, _ = ROC_statistics(preds_cv, cv_R, threshold=threshold)

                _, _, _, _, f1_score_cv, _ = ROC_statistics(preds_cv, cv_R)
                # f1_score_cv is an array.
                idx = np.nanargmax(f1_score_cv)
                _f1_score_cv = f1_score_cv[idx]
                # retrieve the threshold value where f1_score_cv reaches a maximum.
                threshold = np.linspace(0, 1, len(f1_score_cv))[idx]

                loss_cv_arr.append(_loss_cv)
                loss_train_arr.append(epoch_loss_train / (batch.i1))

                # resetting some iteration variables....
                epoch_loss_train, _reg = 0, 0
                epoch_end = time.time()

                # Save model if _f1_score_cv has reached a minimum.
                if (_f1_score_cv > best_save_score):
                    best_save_score = _f1_score_cv
                    save_path = saver.save(
                        sess, 'saved_models/DL_models/best_model.ckpt')
                    ckpt = '  !! CHECKPOINT!!'
                    batch.ckpt_epoch = batch.epoch
                else:
                    ckpt = ''
                # printing....
                print(
                    'loss_train: {0:6.4f}, **loss_cv: {1:6.4f}**, f1_score_cv: {2:6.4f} @threshold:{3:1.2f} {4:3s}'
                    .format(loss_train_arr[-1], loss_cv_arr[-1], _f1_score_cv,
                            threshold, ckpt))

                with open('saved_models/loss_train_and_cv.pkl', 'wb') as f:
                    pickle.dump((loss_train_arr, loss_cv_arr, batch), f)

    print()
    return loss_cv_arr, loss_train_arr, batch
Beispiel #41
0
    def compare(self,
                predicted,
                matchingFunc,
                output_fn,
                error_file=None,
                binary=False):
        ''' Compare gold against predicted using a specified matching function.
            Outputs PR curve to output_fn '''

        y_true = []
        y_scores = []
        errors = []
        correct = 0
        incorrect = 0

        correctTotal = 0
        unmatchedCount = 0
        predicted = Benchmark.normalizeDict(predicted)
        gold = Benchmark.normalizeDict(self.gold)
        if binary:
            predicted = Benchmark.binarize(predicted)
            gold = Benchmark.binarize(gold)
        #gold = self.gold

        # taking all distinct values of confidences as thresholds
        confidence_thresholds = set()
        for sent in predicted:
            for predicted_ex in predicted[sent]:
                confidence_thresholds.add(predicted_ex.confidence)

        confidence_thresholds = sorted(list(confidence_thresholds))
        num_conf = len(confidence_thresholds)

        results = {}
        p = np.zeros(num_conf)
        pl = np.zeros(num_conf)
        r = np.zeros(num_conf)
        rl = np.zeros(num_conf)

        for sent, goldExtractions in gold.items():

            if sent in predicted:
                predictedExtractions = predicted[sent]
            else:
                predictedExtractions = []

            scores = [[None for _ in predictedExtractions]
                      for __ in goldExtractions]

            # print("***Gold Extractions***")
            # print("\n".join([goldExtractions[i].pred + ' ' + " ".join(goldExtractions[i].args) for i in range(len(goldExtractions))]))
            # print("***Predicted Extractions***")
            # print("\n".join([predictedExtractions[i].pred+ " ".join(predictedExtractions[i].args) for i in range(len(predictedExtractions))]))

            for i, goldEx in enumerate(goldExtractions):
                for j, predictedEx in enumerate(predictedExtractions):
                    score = matchingFunc(goldEx,
                                         predictedEx,
                                         ignoreStopwords=True,
                                         ignoreCase=True)
                    scores[i][j] = score

            # OPTIMISED GLOBAL MATCH
            sent_confidences = [
                extraction.confidence for extraction in predictedExtractions
            ]
            sent_confidences.sort()
            prev_c = 0
            for conf in sent_confidences:
                c = confidence_thresholds.index(conf)
                ext_indices = []
                for ext_indx, extraction in enumerate(predictedExtractions):
                    if extraction.confidence >= conf:
                        ext_indices.append(ext_indx)

                recall_numerator = 0
                for i, row in enumerate(scores):
                    max_recall_row = max(
                        [row[ext_indx][1] for ext_indx in ext_indices],
                        default=0)
                    recall_numerator += max_recall_row

                precision_numerator = 0

                selected_rows = []
                selected_cols = []
                num_precision_matches = min(len(scores), len(ext_indices))
                for t in range(num_precision_matches):
                    matched_row = -1
                    matched_col = -1
                    matched_precision = -1  # initialised to <0 so that it updates whenever precision is 0 as well
                    for i in range(len(scores)):
                        if i in selected_rows:
                            continue
                        for ext_indx in ext_indices:
                            if ext_indx in selected_cols:
                                continue
                            if scores[i][ext_indx][0] > matched_precision:
                                matched_precision = scores[i][ext_indx][0]
                                matched_row = i
                                matched_col = ext_indx

                    selected_rows.append(matched_row)
                    selected_cols.append(matched_col)
                    precision_numerator += scores[matched_row][matched_col][0]

                p[prev_c:c + 1] += precision_numerator
                pl[prev_c:c + 1] += len(ext_indices)
                r[prev_c:c + 1] += recall_numerator
                rl[prev_c:c + 1] += len(scores)

                prev_c = c + 1

            # for indices beyond the maximum sentence confidence, len(scores) has to be added to the denominator of recall
            rl[prev_c:] += len(scores)

        prec_scores = [a / b if b > 0 else 1 for a, b in zip(p, pl)]
        rec_scores = [a / b if b > 0 else 0 for a, b in zip(r, rl)]

        f1s = [Benchmark.f1(p, r) for p, r in zip(prec_scores, rec_scores)]
        try:
            optimal_idx = np.nanargmax(f1s)
            optimal = (prec_scores[optimal_idx], rec_scores[optimal_idx],
                       f1s[optimal_idx])
        except ValueError:
            # When there is no prediction
            optimal = (0, 0, 0)

        # In order to calculate auc, we need to add the point corresponding to precision=1 , recall=0 to the PR-curve
        temp_rec_scores = rec_scores.copy()
        temp_prec_scores = prec_scores.copy()
        temp_rec_scores.append(0)
        temp_prec_scores.append(1)
        # print("AUC: {}\t Optimal (precision, recall, F1): {}".format( np.round(auc(temp_rec_scores, temp_prec_scores),3), np.round(optimal,3) ))

        with open(output_fn, 'w') as fout:
            fout.write('{0}\t{1}\t{2}\n'.format("Precision", "Recall",
                                                "Confidence"))
            for cur_p, cur_r, cur_conf in sorted(zip(prec_scores, rec_scores,
                                                     confidence_thresholds),
                                                 key=lambda cur: cur[1]):
                fout.write('{0}\t{1}\t{2}\n'.format(cur_p, cur_r, cur_conf))

        if len(f1s) > 0:
            return np.round(auc(temp_rec_scores, temp_prec_scores),
                            3), np.round(optimal, 3)
        else:
            # When there is no prediction
            return 0, (0, 0, 0)
Beispiel #42
0
def getErrors(true_behavior, logpos, grid):
    decoded_behavior = grid[np.nanargmax(logpos, axis=1)].flatten()
    assert (len(true_behavior) == len(decoded_behavior))
    errors = np.array( [np.linalg.norm(pred_i - true_behav_i) \
                for pred_i, true_behav_i in zip(decoded_behavior, true_behavior)])
    return errors
    def addEvidence(self, dataX, dataY):
        """
        @summary: Add training data to learner
        @param dataX: X values of data to add
        @param dataY: the Y training values
        """

        if dataX.shape[0] == 1:
            self.tree = np.array([[-1, dataY, -1, -1]], dtype=float)

            return self.tree
        if np.isclose(dataY, dataY[0]).all():
            self.tree = np.array([[-1, dataY[0], -1, -1]], dtype=float)

            return self.tree
        if dataX.shape[0] <= self.leaf_size:
            self.tree = np.array([[-1, np.mean(dataY), -1, -1]])

            return self.tree
        else:

            corr = []
            for i in range(0, dataX.shape[1]):
                c = np.corrcoef(dataX[:, i], dataY)
                corr.append(c[0, 1])

            corrArray = np.array(corr)
            max = np.nanmax(corrArray)
            index = np.nanargmax(corrArray)

            while True:

                SplitVal = np.median(dataX[:, index], axis=0)
                if np.isclose(dataX[:, index], dataX[0, index]).all():
                    corrArray = corrArray[corrArray < max]
                    max = np.nanmax(corrArray)
                    index = np.nanargmax(corrArray)
                    continue
                elif SplitVal >= np.nanmax(dataX[:, index]):

                    SplitVal = (dataX[:, index].max() +
                                dataX[:, index].min()) / 2
                    break

                else:
                    break

            lefttree = np.array(
                self.addEvidence(dataX[dataX[:, index] <= SplitVal],
                                 dataY[dataX[:, index] <= SplitVal]))
            righttree = np.array(
                self.addEvidence(dataX[dataX[:, index] > SplitVal],
                                 dataY[dataX[:, index] > SplitVal]))

            root = np.array([[index, SplitVal, 1, lefttree.shape[0] + 1]],
                            dtype=float)

            self.tree = np.vstack((root, lefttree))
            self.tree = np.vstack((self.tree, righttree))

            return self.tree
Beispiel #44
0
    N_star = ts.tabu_active(sMem, sMemVal, N, f_curr[0], sol_values)

    logger.info("length of sub-nbrhd: {} \n".format(len(N_star)))

    #Selecting a candidate
    #if all the solutions are tabu:
    if len(N_star) == 0:
        all_val = [cf.evaluate(s)[0] for s in N]
        s = ts.aspiration_criteria(neighborhood=N, values=all_val)
        f_s = cf.evaluate(s)
    else:
        #otherwise -
        #Pick the solution with the best value
        #from non-tabu members even if they are non-improving
        s_values = [cf.evaluate(s)[0] for s in N_star]
        s = N_star[np.nanargmax(s_values)]
        f_s = cf.evaluate(s)

    logger.info("candiddate solution: {} {} \n".format(f_s, s))

    #Finding where the flip occurred
    tabu_ind = ts.tabu_criteria(s, x_curr)

    logger.info("tabooed element index: {} \n".format(tabu_ind))

    #updating all variables
    sMem, sMemVal = ts.st_memory(update_ind=tabu_ind,
                                 mem=sMem,
                                 memValue=sMemVal,
                                 solution=s)
    logger.info("short term memory and value: {} {}".format(sMem, sMemVal))
Beispiel #45
0
    if os.path.isfile(avepath):
        print('Averaged data found at: %s' % avepath)
        avedict = np.load(
            avepath, allow_pickle=True).item()  # load here the above pickle
    else:
        sys.exit(
            'ERROR: no averaged data found - run prep_plumes.py via submit_interp.sh first on Cedar!'
        )

    #extract lcoations of max pm, w, temp ------------------------------
    PMmax_profile = np.nanmax(avedict['pm25'], 1)  #get max smoke profile
    top_threshold = max(
        PMmax_profile
    ) * 0.001  #variable threshold (based on near-surface high concentrations!!!!)
    PMmax_profile[PMmax_profile < top_threshold] = np.nan
    PMmax_idx = np.nanargmax(avedict['pm25'][np.isfinite(PMmax_profile)],
                             1)  #get donwind location
    PMmax_meters = PMmax_idx * plume.dx

    wave_plume = avedict['w'].copy()
    wave_plume[avedict['pm25'] <
               top_threshold] = np.nan  #mask where there is no plume
    wmax_profile = np.nanmax(wave_plume, 1)  #get the profiles
    wmax_idx = np.nanargmax(wave_plume[np.isfinite(wmax_profile)],
                            1)  #get downwind location (index)
    watPM_profile = np.array([
        avedict['w'][ni, i] for ni, i in enumerate(PMmax_idx)
    ])  #get the profiles

    tmax_profile = np.nanmax(avedict['temp'], 1)
    tmax_profile[np.isnan(PMmax_profile)] = np.nan
    tmax_idx = np.nanargmax(avedict['temp'][np.isfinite(tmax_profile)], 1)
Beispiel #46
0
# Section 3.1: Difference, correlation and BIAS for scatter plot
# Difference
dif_pr = (prns - pr).round(2)

# Pearson correlation coefficient
pearson_pr = round(stats.pearsonr(prns, pr)[0], 2)

# Bias = mean error
bias_pr = round(dif_pr.mean(), 2)

# Section 3.2: Maximum height storm top and max bin
maxh = (round(np.nanmax(hst), 2))
maxbin = int(176 - (maxh / 125))

# Max Lon, Lat and beam
lon_maxh = round(lon[np.nanargmax(hst)], 2)
lat_maxh = round(lat[np.nanargmax(hst)], 2)
beam_maxh = np.argmax(np.amax(ds_hst, axis=0))

# %%
# Section 3.3: Prepare the reflectivity profile (cross track)
# using  “zFactorCorrected” variable.
# through the maximun height:
dbz_hmax = ds['NS']['SLV']['zFactorCorrected'][2555:2780, beam_maxh, :]
dbz_hmax_c = np.where(dbz_hmax[:, :] <= -9999, np.nan, dbz_hmax[:, :])
lat_hmax = ds_lat[:, beam_maxh]
lon_hmax = ds_lon[:, beam_maxh]

# through the hurricane eye:
dbz_eye = ds['NS']['SLV']['zFactorCorrected'][2555:2780, 48, :]
dbz_eye_c = np.where(dbz_eye[:, :] <= -9999, np.nan, dbz_eye[:, :])
Beispiel #47
0
    def resp_newton(self, response, responsef, iterations, ky, kx, use_sz):
        n_scale = response.shape[2]
        index_max_in_row = np.argmax(response, 0)
        max_resp_in_row = np.max(response, 0)
        index_max_in_col = np.argmax(max_resp_in_row, 0)
        init_max_response = np.max(max_resp_in_row, 0)
        col = index_max_in_col.flatten(order="F")

        max_row_perm = index_max_in_row
        row = max_row_perm[col, np.arange(n_scale)]

        trans_row = (row - 1 + np.floor((use_sz[1] - 1) / 2)) % use_sz[1] \
                    - np.floor((use_sz[1] - 1) / 2) + 1
        trans_col = (col - 1 + np.floor((use_sz[0] - 1) / 2)) % use_sz[0] \
                    - np.floor((use_sz[0] - 1) / 2) + 1
        init_pos_y = np.reshape(2 * np.pi * trans_row / use_sz[1],
                                (1, 1, n_scale))
        init_pos_x = np.reshape(2 * np.pi * trans_col / use_sz[0],
                                (1, 1, n_scale))
        max_pos_y = init_pos_y
        max_pos_x = init_pos_x

        # pre-compute complex exponential
        iky = 1j * ky
        exp_iky = np.tile(iky[np.newaxis, :, np.newaxis], (1, 1, n_scale)) * \
                  np.tile(max_pos_y, (1, ky.shape[0], 1))
        exp_iky = np.exp(exp_iky)

        ikx = 1j * kx
        exp_ikx = np.tile(ikx[:, np.newaxis, np.newaxis], (1, 1, n_scale)) * \
                  np.tile(max_pos_x, (kx.shape[0], 1, 1))
        exp_ikx = np.exp(exp_ikx)

        # gradient_step_size = gradient_step_size / prod(use_sz)

        ky2 = ky * ky
        kx2 = kx * kx

        iter = 1
        while iter <= iterations:
            # Compute gradient
            ky_exp_ky = np.tile(ky[np.newaxis, :, np.newaxis],
                                (1, 1, exp_iky.shape[2])) * exp_iky
            kx_exp_kx = np.tile(kx[:, np.newaxis, np.newaxis],
                                (1, 1, exp_ikx.shape[2])) * exp_ikx
            y_resp = np.einsum('ilk,ljk->ijk', exp_iky, responsef)
            resp_x = np.einsum('ilk,ljk->ijk', responsef, exp_ikx)
            grad_y = -np.imag(np.einsum('ilk,ljk->ijk', ky_exp_ky, resp_x))
            grad_x = -np.imag(np.einsum('ilk,ljk->ijk', y_resp, kx_exp_kx))
            ival = 1j * np.einsum('ilk,ljk->ijk', exp_iky, resp_x)
            H_yy = np.tile(ky2[np.newaxis, :, np.newaxis],
                           (1, 1, n_scale)) * exp_iky
            H_yy = np.real(-np.einsum('ilk,ljk->ijk', H_yy, resp_x) + ival)

            H_xx = np.tile(kx2[:, np.newaxis, np.newaxis],
                           (1, 1, n_scale)) * exp_ikx
            H_xx = np.real(-np.einsum('ilk,ljk->ijk', y_resp, H_xx) + ival)
            H_xy = np.real(
                -np.einsum('ilk,ljk->ijk', ky_exp_ky,
                           np.einsum('ilk,ljk->ijk', responsef, kx_exp_kx)))
            det_H = H_yy * H_xx - H_xy * H_xy

            # Compute new position using newtons method
            diff_y = (H_xx * grad_y - H_xy * grad_x) / det_H
            diff_x = (H_yy * grad_x - H_xy * grad_y) / det_H
            max_pos_y = max_pos_y - diff_y
            max_pos_x = max_pos_x - diff_x

            # Evaluate maximum
            exp_iky = np.tile(iky[np.newaxis, :, np.newaxis], (1, 1, n_scale)) * \
                      np.tile(max_pos_y, (1, ky.shape[0], 1))
            exp_iky = np.exp(exp_iky)

            exp_ikx = np.tile(ikx[:, np.newaxis, np.newaxis], (1, 1, n_scale)) * \
                      np.tile(max_pos_x, (kx.shape[0], 1, 1))
            exp_ikx = np.exp(exp_ikx)

            iter = iter + 1

        max_response = 1 / np.prod(use_sz) * \
                       np.real(np.einsum('ilk,ljk->ijk',
                                         np.einsum('ilk,ljk->ijk', exp_iky, responsef),
                                         exp_ikx))

        # check for scales that have not increased in score
        ind = max_response < init_max_response
        max_response[0, 0, ind.flatten()] = init_max_response[ind.flatten()]
        max_pos_y[0, 0, ind.flatten()] = init_pos_y[0, 0, ind.flatten()]
        max_pos_x[0, 0, ind.flatten()] = init_pos_x[0, 0, ind.flatten()]

        sind = int(np.nanargmax(max_response, 2))
        disp_row = (np.mod(max_pos_y[0, 0, sind] + np.pi, 2 * np.pi) -
                    np.pi) / (2 * np.pi) * use_sz[1]
        disp_col = (np.mod(max_pos_x[0, 0, sind] + np.pi, 2 * np.pi) -
                    np.pi) / (2 * np.pi) * use_sz[0]

        return disp_row, disp_col, sind
def train_epochs(args, model, optimizer, params, dicts, struc_feats,
                 struc_labels):
    """
        Main loop. does train and test
    """
    metrics_hist = defaultdict(lambda: [])
    metrics_hist_te = defaultdict(lambda: [])
    metrics_hist_tr = defaultdict(lambda: [])

    test_only = args.test_model is not None

    print("\n\ntest_only: " + str(test_only))

    # Converting to csr sparse matrix form
    X = struc_feats.tocsr()

    print(X.shape[0])

    # Splitting into train, val and test --> need idx values passed as args
    X_train = X[:args.len_train]
    y_train = struc_labels[:args.len_train]

    X_val = X[args.len_train:args.len_train + args.len_val]
    X_test = X[args.len_train + args.len_val:args.len_train + args.len_val +
               args.len_test]

    # Standardizing features
    scaler = MaxAbsScaler().fit(X_train)
    X_train_std = scaler.transform(X_train)
    X_val_std = scaler.transform(X_val)
    X_test_std = scaler.transform(X_test)
    ################################

    opt_thresh = None  # Placeholder, only needed when predicting on test set, updated below

    #train for n_epochs unless criterion metric does not improve for [patience] epochs
    for epoch in range(args.n_epochs):

        #only test on train/test set on very last epoch
        if epoch == 0 and not args.test_model:
            model_dir = os.path.join(
                MODEL_DIR, '_'.join([
                    args.model, args.desc,
                    time.strftime('%b_%d_%H:%M', time.gmtime())
                ]))
            os.mkdir(model_dir)

        elif args.test_model:

            model_dir = os.getcwd(
            )  #just save things to where this script was called

        start = time.time()
        metrics_all = one_epoch(model, optimizer, epoch, args.n_epochs,
                                args.batch_size, args.data_path, test_only,
                                dicts, model_dir, args.gpu, args.quiet,
                                X_train_std, X_val_std, X_test_std, y_train,
                                args.train_frac, args.test_frac, opt_thresh,
                                args.struc_aux_loss_wt, args.conv_aux_loss_wt)
        end = time.time()
        print("\nEpoch Duration: " + str(end - start))

        # DISTRIBUTING results from metrics_all to respective dicts
        for name in metrics_all[0].keys():
            metrics_hist[name].append(metrics_all[0][name])
        for name in metrics_all[1].keys():
            metrics_hist_te[name].append(metrics_all[1][name])
        for name in metrics_all[2].keys():
            metrics_hist_tr[name].append(metrics_all[2][name])
        metrics_hist_all = (metrics_hist, metrics_hist_te, metrics_hist_tr)

        #save metrics, model, params
        persistence.save_everything(args, metrics_hist_all, model, model_dir,
                                    params, args.criterion)

        if test_only:
            break

        if (epoch == args.n_epochs - 2):
            opt_thresh = metrics_hist["opt_f1_thresh_micro"][np.nanargmax(
                metrics_hist[args.criterion])]
            print("Optimal f1 threshold: " + str(opt_thresh))

        if args.criterion in metrics_hist.keys():
            if (early_stop(metrics_hist, args.criterion, args.patience)):
                #stop training, do tests on test and train sets, and then stop the script
                print(
                    "%s hasn't improved in %d epochs, early stopping or just completed last epoch"
                    % (args.criterion, args.patience))
                test_only = True
                opt_thresh = metrics_hist["opt_f1_thresh_micro"][np.nanargmax(
                    metrics_hist[args.criterion])]
                print("Optimal f1 threshold: " + str(opt_thresh))
                model = torch.load(
                    '%s/model_best_%s.pth' %
                    (model_dir,
                     args.criterion))  # LOADING BEST MODEL FOR FINAL TEST

    return epoch + 1
Beispiel #49
0
def significant_figures(n, unc=None, max_sf=20, rtol=1e-20):
    """
    Iterative method to determine the number of significant digits for a given float,
    optionally providing an uncertainty.

    Parameters
    ----------
    n : :class:`float`
        Number from which to ascertain the significance level.
    unc : :class:`float`, :code:`None`
        Uncertainty, which if provided is used to derive the number of significant
        digits.
    max_sf : :class:`int`
        An upper limit to the number of significant digits suggested.
    rtol : :class:`float`
        Relative tolerance to determine similarity of numbers, used in calculations.

    Returns
    -------
    :class:`int`
        Number of significant digits.
    """
    if not hasattr(n, "__len__"):
        if np.isfinite(n):
            if unc is not None:
                mag_n = np.floor(np.log10(np.abs(n)))
                mag_u = np.floor(np.log10(unc))
                if not np.isfinite(mag_u) or not np.isfinite(mag_n):
                    return np.nan
                sf = int(max(0, int(1.0 + mag_n - mag_u)))
            else:
                sf = min(
                    [
                        ix
                        for ix in range(max_sf)
                        if np.isclose(round_sig(n, ix), n, rtol=rtol)
                    ]
                )
            return sf
        else:
            return 0
    else:  # this isn't working
        n = np.array(n)
        _n = n.copy()
        mask = np.isclose(n, 0.0)  # can't process zeros
        _n[mask] = np.nan
        if unc is not None:
            mag_n = np.floor(np.log10(np.abs(_n)))
            mag_u = np.floor(np.log10(unc))
            sfs = np.nanmax(
                np.vstack(
                    [np.zeros(mag_n.shape), (1.0 + mag_n - mag_u).astype(np.int)]
                ),
                axis=0,
            ).astype(np.int)
        else:
            rounded = np.vstack([_n] * max_sf).reshape(max_sf, *_n.shape)
            indx = np.indices(rounded.shape)[0]  # get the row indexes for no. sig figs
            rounded = round_sig(rounded, indx)
            sfs = np.nanargmax(np.isclose(rounded, _n, rtol=rtol), axis=0)
        sfs[np.isnan(sfs)] = 0
        return sfs
Beispiel #50
0
def test_hull_construction():
    # test case 1
    vals = np.array([[50, 60], [20, 40], [-74, 50], [-95, +10], [20, 60]])
    bh = BoundingConvexHull(vals)
    mask = bh.mask 
    assert mask.shape == (np.max(vals[:, 1]) - np.min(vals[:, 1]) + 1, np.max(vals[:, 0]) - np.min(vals[:, 0]) + 1)
    assert np.abs(mask.sum() - bh.area) / bh.area < 0.05 # integral mask area needs to be close to true area
    normalized_normals = bh.rnormals / np.linalg.norm(bh.rnormals, axis=1)[:, None]
    # test case 2
    for e, n in zip(bh.edges, normalized_normals):
        edge_vec = e[1] - e[0]
        assert np.all(np.abs(np.dot(edge_vec, n)) < 1.0e-8)

    # test case 3
    valsextract = np.array([[-10, 120], [90, 268], [293, 110],[40, -30]])
    bh_extract = BoundingConvexHull(valsextract)
    sinc_npx = 255
    sinc = np.sinc(np.linspace(-7, 7, sinc_npx))
    sinc2d = np.outer(sinc, sinc).reshape((1, 1, sinc_npx, sinc_npx))
    extracted_data, extracted_window_extents = BoundingConvexHull.regional_data(bh_extract, sinc2d, oob_value=np.nan)
    assert extracted_window_extents == [-10, 293, -30, 268]
    sparse_mask = np.array(bh_extract.sparse_mask)
    lines = np.hstack([bh_extract.corners, np.roll(bh_extract.corners, -1, axis=0)])
    minx = np.min(lines[:, 0:4:2]); maxx = np.max(lines[:, 0:4:2])
    miny = np.min(lines[:, 1:4:2]); maxy = np.max(lines[:, 1:4:2])
    sel = np.logical_and(np.logical_and(sparse_mask[:, 1] >= 0,
                                        sparse_mask[:, 1] < 255),
                         np.logical_and(sparse_mask[:, 0] >= 0,
                                        sparse_mask[:, 0] < 255))
    
    flat_index = (sparse_mask[sel][:, 0])*sinc_npx + (sparse_mask[sel][:, 1])
    sinc_integral = np.sum(sinc2d.ravel()[flat_index]) 
    assert np.abs(sinc_integral - np.nansum(extracted_data.ravel())) < 1.0e-8
    v = np.nanargmax(extracted_data)
    vx = v % extracted_data.shape[3]; vy = v // extracted_data.shape[3]
    cextracted = (extracted_window_extents[0] + vx,
                  extracted_window_extents[2] + vy)
    v = np.nanargmax(sinc2d)
    sincvx = v % sinc_npx; sincvy = v // sinc_npx
    csinc = tuple([sincvx, sincvy]) 
    assert csinc == cextracted
    
    # test case 4
    vals2 = np.array([[-20, -120], [0, 60], [40, -60]])
    vals3 = np.array([[-20, 58], [-40, 80], [20, 100]])
    bh2 = BoundingConvexHull(vals2)
    bh3 = BoundingConvexHull(vals3)
    assert bh.overlaps_with(bh2)
    assert not bh.overlaps_with(bh3)
    assert not bh2.overlaps_with(bh3)

    # test case 5
    assert (-1000, -1000) not in bh
    assert (30, 0) not in bh
    assert (0, 0) not in bh
    assert (-40, 30) in bh

    # test case 6
    bb = BoundingBox(-14, 20, 30, 49)
    assert bb.centre == [3, 39]
    assert bb.box_npx == (35, 20)
    assert bb.mask.shape == bb.box_npx[::-1]
    assert bb.area == 35 * 20
    
    assert np.sum(bb.mask) == bb.area
    assert (-15, 35) not in bb
    assert (0, 35) in bb

    bb2 = BoundingBoxFactory.AxisAlignedBoundingBox(bb) #enforce odd
    assert bb2.box_npx == (35, 21)
    assert bb2.area == 35 * 21
    assert (bb.sparse_mask == bb2.sparse_mask).all()
    assert (-15, 35) not in bb2
    assert (0, 35) in bb2

    bb3 = BoundingBoxFactory.AxisAlignedBoundingBox(bb, square=True) #enforce odd
    assert bb3.box_npx[0] == bb3.box_npx[1]
    assert bb3.box_npx[0] % 2 == 1 #enforce odd
    assert bb3.area == bb3.box_npx[0]**2
    assert (bb.sparse_mask == bb3.sparse_mask).all()
    assert (-15, 35) not in bb2
    assert (0, 35) in bb2

    # test case 7
    bb4s = BoundingBoxFactory.SplitBox(bb, nsubboxes=3)
    assert len(bb4s) == 9
    xlims = [(np.min(c.corners[:, 0]), np.max(c.corners[:, 0])) for c in bb4s][0:3]
    ylims = [(np.min(c.corners[:, 1]), np.max(c.corners[:, 1])) for c in bb4s][0::3]
    assert np.all(xlims == np.array([(-14, -3), (-2, 9), (10, 20)]))
    assert np.all(ylims == np.array([(30, 36), (37, 43), (44, 49)]))
    assert np.sum([b.area for b in bb4s]) == bb.area

    for bb4 in bb4s:
        assert bb4.area == np.sum(bb4.mask)

    # test case 8
    bb5 = BoundingBox(-14, 20, 30, 50)
    assert bb5.box_npx == (35, 21)
    bb6 = BoundingBoxFactory.PadBox(bb5, 41, 27)
    assert bb6.box_npx == (41, 27)
    assert bb5.centre == bb6.centre
    assert np.sum(bb5.mask) == np.sum(bb6.mask)
    bb7s = list(map(lambda x: BoundingBoxFactory.PadBox(x, 17, 11), bb4s))
    assert all([b.box_npx == (17, 11) for b in bb7s])
    assert np.sum([np.sum(b.mask) for b in bb7s]) == np.sum([np.sum(b.mask) for b in bb4s])

    # test case 9
    facet_regions = list(map(lambda f: BoundingBoxFactory.PadBox(f, 63, 63), 
                             BoundingBoxFactory.SplitBox(BoundingBoxFactory.AxisAlignedBoundingBox(bh_extract), nsubboxes=5)))
    facets = list(map(lambda pf: BoundingConvexHull.regional_data(pf, sinc2d, oob_value=np.nan),
                      facet_regions))
    stitched_image, stitched_region = BoundingBox.project_regions([f[0] for f in facets], facet_regions)
    assert np.abs(sinc_integral - np.nansum([np.nansum(f[0]) for f in facets])) < 1.0e-8
    assert np.abs(sinc_integral - np.sum(stitched_image)) < 1.0e-8
    v = np.argmax(stitched_image)
    vx = v % stitched_image.shape[3]; vy = v // stitched_image.shape[3]
    cstitched = (np.min(stitched_region.corners[:, 0]) + vx, np.min(stitched_region.corners[:, 1]) + vy)
    assert cstitched == csinc

    # test case 10
    olap_box1 = BoundingBox(110, 138, 110, 135)
    olap_box2 = BoundingBox(115, 150, 109, 150)
    olap_box3 = BoundingBox(125, 130, 125, 130)
    BoundingConvexHull.normalize_masks([olap_box1, olap_box2, olap_box3])
    ext1 = BoundingConvexHull.regional_data(olap_box1, sinc2d)[0]
    ext2 = BoundingConvexHull.regional_data(olap_box2, sinc2d)[0]
    ext3 = BoundingConvexHull.regional_data(olap_box3, sinc2d)[0]
    olaps_stitched_image, olaps_stitched_region = BoundingBox.project_regions([ext1, ext2, ext3], 
                                                                              [olap_box1, olap_box2, olap_box3])
    v = np.nanargmax(olaps_stitched_image)
    vx = v % olaps_stitched_image.shape[3]; vy = v // olaps_stitched_image.shape[3]
    cstitched_olap = (np.min(olaps_stitched_region.corners[:, 0]) + vx, 
                      np.min(olaps_stitched_region.corners[:, 1]) + vy)
    assert cstitched_olap == csinc
    assert np.abs(1.0 - np.nanmax(olaps_stitched_image)) < 1.0e-8
    
    # visual inspection
    if DEBUG:
        from matplotlib import pyplot as plt
        plt.figure(figsize=(7, 2.5))
        plt.title("Winding, normals and masking check")
        for h in [bh, bh2, bh3]:
            for ei, e in enumerate(h.edges):
                plt.plot(e[:, 0], e[:, 1], "r--")
                plt.text(e[0, 0], e[0, 1], str(ei))
        
        plt.plot(bh.edge_midpoints[:, 0], bh.edge_midpoints[:, 1], "ko")
        for e, n in zip(bh.edge_midpoints, normalized_normals):
            p0 = e 
            p = e + n*6
            plt.plot([p0[0], p[0]], [p0[1], p[1]], "b--", lw=2)
        
        plt.scatter(vals[:, 0], vals[:, 1])    
        plt.imshow(mask, extent=[np.min(vals[:, 0]), np.max(vals[:, 0]), np.max(vals[:, 1]), np.min(vals[:, 1])])

        plt.grid(True)
        plt.figure(figsize=(7, 2.5))
        plt.title("Data extraction check (global)")
        for h in [bh_extract]:
            for ei, e in enumerate(h.edges):
                plt.plot(e[:, 0], e[:, 1], "r--")
        plt.imshow(sinc2d[0, 0, :, :], extent=[0, sinc_npx, sinc_npx, 0])
        plt.grid(True)

        plt.figure(figsize=(7, 2.5))
        plt.title("Data extraction check (local)")
        for h in [bh_extract]:
            for ei, e in enumerate(h.edges):
                plt.plot(e[:, 0], e[:, 1], "r--")
        plt.imshow(extracted_data[0, 0, :, :],
            extent=[extracted_window_extents[0], extracted_window_extents[1],
                   extracted_window_extents[3], extracted_window_extents[2]])
        
        plt.figure(figsize=(7, 2.5))
        plt.title("Faceting check")
        for h in [bh_extract]:
            for ei, e in enumerate(h.edges):
                plt.plot(e[:, 0], e[:, 1], "r--")
        for f in facet_regions:
            for ei, e in enumerate(f.edges):
                plt.plot(e[:, 0], e[:, 1], "co--")
        

        plt.imshow(stitched_image[0, 0, :, :], 
            extent=[np.min(stitched_region.corners[:, 0]), np.max(stitched_region.corners[:, 0]),
                    np.max(stitched_region.corners[:, 1]), np.min(stitched_region.corners[:, 1])])

        plt.figure(figsize=(7, 2.5))
        plt.title("Overlapping faceting check")
        for f in [olap_box1, olap_box2, olap_box3]:
            for ei, e in enumerate(f.edges):
                plt.plot(e[:, 0], e[:, 1], "co--")
        

        plt.imshow(olaps_stitched_image[0, 0, :, :], 
            extent=[np.min(olaps_stitched_region.corners[:, 0]), np.max(olaps_stitched_region.corners[:, 0]),
                    np.max(olaps_stitched_region.corners[:, 1]), np.min(olaps_stitched_region.corners[:, 1])])
        plt.xlim((np.min(olaps_stitched_region.corners[:, 0]) - 15, 
                  np.max(olaps_stitched_region.corners[:, 0]) + 15))
        plt.ylim((np.min(olaps_stitched_region.corners[:, 1]) - 15, 
                  np.max(olaps_stitched_region.corners[:, 1]) + 15))
        plt.show(block=True)
Beispiel #51
0
 def ref_impl(a):
     return np.nanargmax(a)
Beispiel #52
0
    def argf(self, *args, **kwargs): return np.nanargmax(*args, **kwargs)
    

class Extremum(Ch):
Beispiel #53
0
    def plot_projection(self,
                        data,
                        ax=None,
                        index=None,
                        sample_cov=0,
                        **kwargs):

        if ax is None:
            fig, ax = plt.subplots()
        else:
            fig = ax.figure

        obs_dispersion, obs_flux, obs_ivar = self._slice_spectrum(data)

        # Apply masks.
        mask = _generate_mask(obs_dispersion, self.metadata["mask"]) \
             * np.isfinite(obs_flux * obs_ivar)

        if 0 in (obs_dispersion.size, mask.sum()):
            raise ValueError("no overlapping spectra with finite flux/ivar")

        #obs_dispersion = obs_dispersion[mask]
        #obs_flux, obs_ivar = obs_flux[mask], obs_ivar[mask]

        _ = np.where(mask)[0]
        si, ei = _[0], _[-1]

        # Show uncertainties.
        obs_sigma = np.sqrt(1.0 / obs_ivar)
        fill_between_steps(ax,
                           obs_dispersion[si:ei],
                           obs_flux[si:ei] - obs_sigma[si:ei],
                           obs_flux[si:ei] + obs_sigma[si:ei],
                           facecolor="#AAAAAA",
                           edgecolor="none",
                           alpha=1)

        # Limit to the edge of what is OK.
        ax.plot(obs_dispersion[si:ei],
                obs_flux[si:ei],
                c="#444444",
                drawstyle="steps-mid")

        obs_flux[~mask] = np.nan
        ax.plot(obs_dispersion, obs_flux, c='k', drawstyle="steps-mid")

        # Get the MAP value.
        if index is None:
            index = np.nanargmax(self._inference_result[1])
        op_theta = self._inference_result[3][index]

        model_disp, model_flux = utils.parse_spectrum(self.paths[index]).T

        y = self(obs_dispersion, model_disp, model_flux, *op_theta)
        y[~mask] = np.nan

        c = kwargs.pop("c", "r")
        ax.plot(obs_dispersion, y, c=c, **kwargs)

        # Get the covariance matrix?
        if sample_cov > 0:
            cov = self._inference_result[4][index]
            print(np.sqrt(np.diag(cov)))

            # Sample values from the cov matrix and project them.
            draws = np.random.multivariate_normal(
                self._inference_result[3][index],
                self._inference_result[4][index],
                size=sample_cov)

            for draw in draws:
                y_draw = self(obs_dispersion, model_disp, model_flux, *draw)
                y_draw[~mask] = np.nan

                ax.plot(obs_dispersion, y_draw, c=c, alpha=10.0 / sample_cov)

        # Draw fill_between in y?
        ax.set_title("Index {}: {}".format(index,
                                           self.stellar_parameters[index]))

        return fig
Beispiel #54
0
X[nan_locations] = np.take(means, nan_locations[1])

# Normalize X
X = preprocessing.scale(X)

regressor = linear_model.Ridge()
alpha_range = np.logspace(-5, 3, 20)
train_scores, valid_scores = validation_curve(regressor,
                                              X,
                                              y,
                                              "alpha",
                                              alpha_range,
                                              scoring="neg_mean_squared_error",
                                              n_jobs=-1)
train_scores = [np.mean(s) for s in train_scores]
valid_scores = [np.mean(s) for s in valid_scores]

plt.plot(alpha_range, train_scores)
plt.plot(alpha_range, valid_scores)
plt.xscale("log")
plt.show()

# Take the alpha giving the highest validation score, and test it on test set
best_alpha = alpha_range[np.nanargmax(valid_scores)]
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8)
regressor.set_params(alpha=best_alpha)
regressor.fit(X_train, y_train)
print("best alpha =", alpha_range[np.nanargmax(valid_scores)])
print("RMSLE =", root_mean_squared_log_error(y_test,
                                             regressor.predict(X_test)))
def train_epochs(args, model, optimizer, params, dicts):
    """
        Main loop. does train and test
    """
    metrics_hist = defaultdict(lambda: [])
    metrics_hist_te = defaultdict(lambda: [])
    metrics_hist_tr = defaultdict(lambda: [])

    test_only = args.test_model is not None

    print("\n\ntest_only: " + str(test_only))

    opt_thresh = None  # Placeholder, only needed when predicting on test set, updated below

    #train for n_epochs unless criterion metric does not improve for [patience] epochs
    for epoch in range(args.n_epochs):

        #only test on train/test set on very last epoch
        if epoch == 0 and not args.test_model:
            model_dir = os.path.join(
                MODEL_DIR, '_'.join([
                    args.model, args.desc,
                    time.strftime('%b_%d_%H:%M', time.gmtime())
                ]))
            os.mkdir(model_dir)

        elif args.test_model:

            model_dir = os.getcwd(
            )  #just save things to where this script was called

        start = time.time()
        metrics_all = one_epoch(model, optimizer, epoch, args.n_epochs,
                                args.batch_size, args.data_path, test_only,
                                dicts, model_dir, args.gpu, args.quiet,
                                opt_thresh, args.obs_limit)
        end = time.time()
        print("\nEpoch Duration: " + str(end - start))

        # DISTRIBUTING results from metrics_all to respective dicts
        for name in metrics_all[0].keys():
            metrics_hist[name].append(metrics_all[0][name])

        for name in metrics_all[1].keys():
            metrics_hist_te[name].append(metrics_all[1][name])

        for name in metrics_all[2].keys():
            metrics_hist_tr[name].append(metrics_all[2][name])

        metrics_hist_all = (metrics_hist, metrics_hist_te, metrics_hist_tr)

        #save metrics, model, params
        persistence.save_everything(
            args, metrics_hist_all, model, model_dir, params, args.criterion
        )  # SHOULD SAVE MODEL PARAMS AT EACH EPOCH, BELIEVE IS HAPPENING

        if test_only:
            break

        if (epoch == args.n_epochs - 2):
            opt_thresh = metrics_hist["opt_f1_thresh_micro"][np.nanargmax(
                metrics_hist[args.criterion])]
            print("Optimal f1 threshold: " + str(opt_thresh))

        if (args.criterion in metrics_hist.keys()):
            if (early_stop(metrics_hist, args.criterion, args.patience)):
                #stop training, do tests on test and train sets, and then stop the script
                print(
                    "%s hasn't improved in %d epochs, early stopping or just completed last epoch"
                    % (args.criterion, args.patience))
                test_only = True
                opt_thresh = metrics_hist["opt_f1_thresh_micro"][np.nanargmax(
                    metrics_hist[args.criterion])]
                print("Optimal f1 threshold: " + str(opt_thresh))
                model = torch.load(
                    '%s/model_best_%s.pth' %
                    (model_dir,
                     args.criterion))  # LOADING BEST MODEL FOR FINAL TEST

    return epoch + 1
Beispiel #56
0
                                             w=weights_k_dist)
                corr_pear_r[d, ] = wpearsonr(target_k,
                                             curr_train_k[:, d],
                                             w=weights_k_rank)
                corr_pear_n[d, ] = wpearsonr(target_k, curr_train_k[:, d])[0]

            corr_list = [
                corr_dist_d, corr_dist_r, corr_dist_n, corr_pear_d,
                corr_pear_r, corr_pear_n
            ]

            for j in range(len(m_list)):
                corr_k = corr_list[j]

                # pick the best one
                best_clf_ind = np.nanargmax(corr_k)
                pred_scores_best[i, j] = test_scores_norm[i, best_clf_ind]

                # pick the p dynamically
                threshold = corr_k.max() - corr_k.std() * alpha
                p = (corr_k >= threshold).sum()
                if p == 0:  # in case extreme cases [nan and all -1's]
                    p = 1
                pred_scores_ens[i, j] = np.max(
                    test_scores_norm[i, argmaxp(corr_k, p)])

        for m in range(len(m_list)):
            test_target_list.extend(
                [pred_scores_best[:, m], pred_scores_ens[:, m]])
            method_list.extend(
                ['DCSO_a_' + m_list[m], 'DCSO_moa_' + m_list[m]])
Beispiel #57
0
 def _get_ellipsoid_parameters_basic(self):
     np.seterr(all="ignore")
     # check if there are 4 particles to form an ellipsoid
     # neglecting to check if the 4 particles in the same plane,
     # that is almost certainly never to occur,
     # will deal with it later if it ever comes up
     if np.size(self["particle_position_x"]) < 4:
         mylog.warning("Too few particles for ellipsoid parameters.")
         return (0, 0, 0, 0, 0, 0, 0)
     # Calculate the parameters that describe the ellipsoid of
     # the particles that constitute the halo. This function returns
     # all the parameters except for the center of mass.
     com = self.center_of_mass()
     position = [
         self["particle_position_x"],
         self["particle_position_y"],
         self["particle_position_z"],
     ]
     # Locate the furthest particle from com, its vector length and index
     DW = np.array([self.gridsize[0], self.gridsize[1], self.gridsize[2]])
     position = [position[0] - com[0], position[1] - com[1], position[2] - com[2]]
     # different cases of particles being on other side of boundary
     for axis in range(np.size(DW)):
         cases = np.array(
             [position[axis], position[axis] + DW[axis], position[axis] - DW[axis]]
         )
         # pick out the smallest absolute distance from com
         position[axis] = np.choose(np.abs(cases).argmin(axis=0), cases)
     # find the furthest particle's index
     r = np.sqrt(position[0] ** 2 + position[1] ** 2 + position[2] ** 2)
     A_index = r.argmax()
     mag_A = r.max()
     # designate the A vector
     A_vector = (position[0][A_index], position[1][A_index], position[2][A_index])
     # designate the e0 unit vector
     e0_vector = A_vector / mag_A
     # locate the tB particle position by finding the max B
     e0_vector_copy = np.empty((np.size(position[0]), 3), dtype="float64")
     for i in range(3):
         e0_vector_copy[:, i] = e0_vector[i]
     rr = np.array(
         [position[0], position[1], position[2]]
     ).T  # Similar to tB_vector in old code.
     tC_vector = np.cross(e0_vector_copy, rr)
     te2 = tC_vector.copy()
     for dim in range(3):
         te2[:, dim] *= np.sum(tC_vector**2.0, axis=1) ** (-0.5)
     te1 = np.cross(te2, e0_vector_copy)
     length = np.abs(
         -np.sum(rr * te1, axis=1)
         * (1.0 - np.sum(rr * e0_vector_copy, axis=1) ** 2.0 * mag_A**-2.0)
         ** (-0.5)
     )
     # This problem apparently happens sometimes, that the NaNs are turned
     # into infs, which messes up the nanargmax below.
     length[length == np.inf] = 0.0
     tB_index = np.nanargmax(length)  # ignores NaNs created above.
     mag_B = length[tB_index]
     e1_vector = te1[tB_index]
     e2_vector = te2[tB_index]
     temp_e0 = rr.copy()
     temp_e1 = rr.copy()
     temp_e2 = rr.copy()
     for dim in range(3):
         temp_e0[:, dim] = e0_vector[dim]
         temp_e1[:, dim] = e1_vector[dim]
         temp_e2[:, dim] = e2_vector[dim]
     length = np.abs(
         np.sum(rr * temp_e2, axis=1)
         * (
             1
             - np.sum(rr * temp_e0, axis=1) ** 2.0 * mag_A**-2.0
             - np.sum(rr * temp_e1, axis=1) ** 2.0 * mag_B**-2.0
         )
         ** (-0.5)
     )
     length[length == np.inf] = 0.0
     tC_index = np.nanargmax(length)
     mag_C = length[tC_index]
     # tilt is calculated from the rotation about x axis
     # needed to align e1 vector with the y axis
     # after e0 is aligned with x axis
     # find the t1 angle needed to rotate about z axis to align e0 onto x-z plane
     t1 = np.arctan(-e0_vector[1] / e0_vector[0])
     RZ = get_rotation_matrix(t1, (0, 0, 1))
     r1 = np.dot(RZ, e0_vector)
     # find the t2 angle needed to rotate about y axis to align e0 to x
     t2 = np.arctan(r1[2] / r1[0])
     RY = get_rotation_matrix(t2, (0, 1, 0))
     r2 = np.dot(RY, np.dot(RZ, e1_vector))
     # find the tilt angle needed to rotate about x axis to align e1 to y and e2 to z
     tilt = np.arctan(-r2[2] / r2[1])
     return (mag_A, mag_B, mag_C, e0_vector[0], e0_vector[1], e0_vector[2], tilt)
n = inputData['n']
R = inputData['R']
epsilon = inputData['epsilon']
x0 = inputData['x']
y0 = inputData['y']

phi = data['phi']
phiCount = phi.size
Tphi = data['Tphi']
frequencies = data['frequencies']
countf = frequencies.size
Rk = data['Rk']

S = 2 * pi / phiCount * Rk * np.sum(Tphi, axis=1)
print(np.nanargmax(S))  # = 840
print(np.nanargmin(S[100:]) + 100)
print(np.nanargmax(S[900:]) + 900)

mpl.rcParams['mathtext.fontset'] = 'stix'
mpl.rcParams['font.family'] = 'STIXGeneral'
mpl.rcParams['legend.fontsize'] = 'medium'
mpl.rcParams['axes.labelsize'] = 'large'

plt.figure(figsize=(7, 3))
plt.plot(frequencies, S)
plt.ylim(top=10, bottom=5e-4)
plt.yscale("log")
plt.xlabel(r"$f$")
plt.ylabel(r"$S$")
plt.title(r"1 hexagon, $R=0.45$, $\epsilon = (1.1 \pm 0.1\mathrm{i})^2$")
Beispiel #59
0
def add_stat_annotation(ax,
                        plot='boxplot',
                        data=None,
                        x=None,
                        y=None,
                        hue=None,
                        units=None,
                        order=None,
                        hue_order=None,
                        box_pairs=None,
                        width=0.8,
                        perform_stat_test=True,
                        pvalues=None,
                        test_short_name=None,
                        test=None,
                        text_format='star',
                        pvalue_format_string=DEFAULT,
                        text_annot_custom=None,
                        loc='inside',
                        show_test_name=True,
                        pvalue_thresholds=DEFAULT,
                        stats_params=dict(),
                        comparisons_correction='bonferroni',
                        use_fixed_offset=False,
                        line_offset_to_box=None,
                        line_offset=None,
                        line_height=0.02,
                        text_offset=1,
                        color='0.2',
                        linewidth=1.5,
                        fontsize='medium',
                        verbose=1):
    """
    Optionally computes statistical test between pairs of data series, and add statistical annotation on top
    of the boxes/bars. The same exact arguments `data`, `x`, `y`, `hue`, `order`, `width`,
    `hue_order` (and `units`) as in the seaborn boxplot/barplot function must be passed to this function.

    This function works in one of the two following modes:
    a) `perform_stat_test` is True: statistical test as given by argument `test` is performed.
    b) `perform_stat_test` is False: no statistical test is performed, list of custom p-values `pvalues` are
       used for each pair of boxes. The `test_short_name` argument is then used as the name of the
       custom statistical test.

    :param plot: type of the plot, one of 'boxplot' or 'barplot'.
    :param line_height: in axes fraction coordinates
    :param text_offset: in points
    :param box_pairs: can be of either form: For non-grouped boxplot: `[(cat1, cat2), (cat3, cat4)]`. For boxplot grouped by hue: `[((cat1, hue1), (cat2, hue2)), ((cat3, hue3), (cat4, hue4))]`
    :param pvalue_format_string: defaults to `"{.3e}"`
    :param pvalue_thresholds: list of lists, or tuples. Default is: For "star" text_format: `[[1e-4, "****"], [1e-3, "***"], [1e-2, "**"], [0.05, "*"], [1, "ns"]]`. For "simple" text_format : `[[1e-5, "1e-5"], [1e-4, "1e-4"], [1e-3, "0.001"], [1e-2, "0.01"]]`
    :param pvalues: list or array of p-values for each box pair comparison.
    :param comparisons_correction: Method for multiple comparisons correction. `bonferroni` or None.
    """
    def find_x_position_box(box_plotter, boxName):
        """
        boxName can be either a name "cat" or a tuple ("cat", "hue")
        """
        if box_plotter.plot_hues is None:
            cat = boxName
            hue_offset = 0
        else:
            cat = boxName[0]
            hue = boxName[1]
            hue_offset = box_plotter.hue_offsets[box_plotter.hue_names.index(
                hue)]

        group_pos = box_plotter.group_names.index(cat)
        box_pos = group_pos + hue_offset
        return box_pos

    def get_box_data(box_plotter, boxName):
        """
        boxName can be either a name "cat" or a tuple ("cat", "hue")

        Here we really have to duplicate seaborn code, because there is not
        direct access to the box_data in the BoxPlotter class.
        """
        cat = box_plotter.plot_hues is None and boxName or boxName[0]

        index = box_plotter.group_names.index(cat)
        group_data = box_plotter.plot_data[index]

        if box_plotter.plot_hues is None:
            # Draw a single box or a set of boxes
            # with a single level of grouping
            box_data = remove_na(group_data)
        else:
            hue_level = boxName[1]
            hue_mask = box_plotter.plot_hues[index] == hue_level
            box_data = remove_na(group_data[hue_mask])

        return box_data

    # Set default values if necessary
    if pvalue_format_string is DEFAULT:
        pvalue_format_string = '{:.3e}'
        simple_format_string = '{:.2f}'
    else:
        simple_format_string = pvalue_format_string

    if pvalue_thresholds is DEFAULT:
        if text_format == "star":
            pvalue_thresholds = [[1e-4, "****"], [1e-3, "***"], [1e-2, "**"],
                                 [0.05, "*"], [1, "ns"]]
        else:
            pvalue_thresholds = [[1e-5, "1e-5"], [1e-4, "1e-4"],
                                 [1e-3, "0.001"], [1e-2, "0.01"]]

    fig = plt.gcf()

    # Validate arguments
    if perform_stat_test:
        if test is None:
            raise ValueError(
                "If `perform_stat_test` is True, `test` must be specified.")
        if pvalues is not None or test_short_name is not None:
            raise ValueError(
                "If `perform_stat_test` is True, custom `pvalues` "
                "or `test_short_name` must be `None`.")
        valid_list = [
            't-test_ind', 't-test_welch', 't-test_paired', 'Mann-Whitney',
            'Mann-Whitney-gt', 'Mann-Whitney-ls', 'Levene', 'Wilcoxon',
            'Kruskal'
        ]
        if test not in valid_list:
            raise ValueError(
                "test value should be one of the following: {}.".format(
                    ', '.join(valid_list)))
    else:
        if pvalues is None:
            raise ValueError(
                "If `perform_stat_test` is False, custom `pvalues` must be specified."
            )
        if test is not None:
            raise ValueError(
                "If `perform_stat_test` is False, `test` must be None.")
        if len(pvalues) != len(box_pairs):
            raise ValueError(
                "`pvalues` should be of the same length as `box_pairs`.")

    if text_annot_custom is not None and len(text_annot_custom) != len(
            box_pairs):
        raise ValueError(
            "`text_annot_custom` should be of same length as `box_pairs`.")

    assert_is_in(loc, ['inside', 'outside'], label='argument `loc`')
    assert_is_in(text_format, ['full', 'simple', 'star', 'custom'],
                 label='argument `text_format`')
    assert_is_in(comparisons_correction, ['bonferroni', None],
                 label='argument `comparisons_correction`')

    if verbose >= 1 and text_format == 'star':
        print("p-value annotation legend:")
        pvalue_thresholds = pd.DataFrame(pvalue_thresholds).sort_values(
            by=0, ascending=False).values
        for i in range(0, len(pvalue_thresholds)):
            if i < len(pvalue_thresholds) - 1:
                print('{}: {:.2e} < p <= {:.2e}'.format(
                    pvalue_thresholds[i][1], pvalue_thresholds[i + 1][0],
                    pvalue_thresholds[i][0]))
            else:
                print('{}: p <= {:.2e}'.format(pvalue_thresholds[i][1],
                                               pvalue_thresholds[i][0]))
        print()

    ylim = ax.get_ylim()
    yrange = ylim[1] - ylim[0]

    if line_offset is None:
        if loc == 'inside':
            line_offset = 0.05
            if line_offset_to_box is None:
                line_offset_to_box = 0.06
        # 'outside', see valid_list
        else:
            line_offset = 0.03
            if line_offset_to_box is None:
                line_offset_to_box = line_offset
    else:
        if loc == 'inside':
            if line_offset_to_box is None:
                line_offset_to_box = 0.06
        elif loc == 'outside':
            line_offset_to_box = line_offset
    y_offset = line_offset * yrange
    y_offset_to_box = line_offset_to_box * yrange

    if plot == 'boxplot':
        # Create the same plotter object as seaborn's boxplot
        box_plotter = sns.categorical._BoxPlotter(x,
                                                  y,
                                                  hue,
                                                  data,
                                                  order,
                                                  hue_order,
                                                  orient=None,
                                                  width=width,
                                                  color=None,
                                                  palette=None,
                                                  saturation=.75,
                                                  dodge=True,
                                                  fliersize=5,
                                                  linewidth=None)
    elif plot == 'barplot':
        # Create the same plotter object as seaborn's barplot
        box_plotter = sns.categorical._BarPlotter(x,
                                                  y,
                                                  hue,
                                                  data,
                                                  order,
                                                  hue_order,
                                                  estimator=np.mean,
                                                  ci=95,
                                                  n_boot=1000,
                                                  units=None,
                                                  orient=None,
                                                  color=None,
                                                  palette=None,
                                                  saturation=.75,
                                                  errcolor=".26",
                                                  errwidth=None,
                                                  capsize=None,
                                                  dodge=True)

    # Build the list of box data structures with the x and ymax positions
    group_names = box_plotter.group_names
    hue_names = box_plotter.hue_names
    if box_plotter.plot_hues is None:
        box_names = group_names
        labels = box_names
    else:
        box_names = [(group_name, hue_name) for group_name in group_names
                     for hue_name in hue_names]
        labels = [
            '{}_{}'.format(group_name, hue_name)
            for (group_name, hue_name) in box_names
        ]

    box_structs = [{
        'box':
        box_names[i],
        'label':
        labels[i],
        'x':
        find_x_position_box(box_plotter, box_names[i]),
        'box_data':
        get_box_data(box_plotter, box_names[i]),
        'ymax':
        np.amax(get_box_data(box_plotter, box_names[i]))
        if len(get_box_data(box_plotter, box_names[i])) > 0 else np.nan
    } for i in range(len(box_names))]
    # Sort the box data structures by position along the x axis
    box_structs = sorted(box_structs, key=lambda x: x['x'])
    # Add the index position in the list of boxes along the x axis
    box_structs = [
        dict(box_struct, xi=i) for i, box_struct in enumerate(box_structs)
    ]
    # Same data structure list with access key by box name
    box_structs_dic = {
        box_struct['box']: box_struct
        for box_struct in box_structs
    }

    # Build the list of box data structure pairs
    box_struct_pairs = []
    for i_box_pair, (box1, box2) in enumerate(box_pairs):
        valid = box1 in box_names and box2 in box_names
        if not valid:
            raise ValueError("box_pairs contains an invalid box pair.")
            pass
        # i_box_pair will keep track of the original order of the box pairs.
        box_struct1 = dict(box_structs_dic[box1], i_box_pair=i_box_pair)
        box_struct2 = dict(box_structs_dic[box2], i_box_pair=i_box_pair)
        if box_struct1['x'] <= box_struct2['x']:
            pair = (box_struct1, box_struct2)
        else:
            pair = (box_struct2, box_struct1)
        box_struct_pairs.append(pair)

    # Draw first the annotations with the shortest between-boxes distance, in order to reduce
    # overlapping between annotations.
    box_struct_pairs = sorted(box_struct_pairs,
                              key=lambda x: abs(x[1]['x'] - x[0]['x']))

    # Build array that contains the x and y_max position of the highest annotation or box data at
    # a given x position, and also keeps track of the number of stacked annotations.
    # This array will be updated when a new annotation is drawn.
    y_stack_arr = np.array([[box_struct['x'] for box_struct in box_structs],
                            [box_struct['ymax'] for box_struct in box_structs],
                            [0 for i in range(len(box_structs))]])
    if loc == 'outside':
        y_stack_arr[1, :] = ylim[1]
    ann_list = []
    test_result_list = []
    ymaxs = []
    y_stack = []

    for box_struct1, box_struct2 in box_struct_pairs:

        box1 = box_struct1['box']
        box2 = box_struct2['box']
        label1 = box_struct1['label']
        label2 = box_struct2['label']
        box_data1 = box_struct1['box_data']
        box_data2 = box_struct2['box_data']
        x1 = box_struct1['x']
        x2 = box_struct2['x']
        xi1 = box_struct1['xi']
        xi2 = box_struct2['xi']
        ymax1 = box_struct1['ymax']
        ymax2 = box_struct2['ymax']
        i_box_pair = box_struct1['i_box_pair']

        # Find y maximum for all the y_stacks *in between* the box1 and the box2
        i_ymax_in_range_x1_x2 = xi1 + np.nanargmax(
            y_stack_arr[1,
                        np.where((x1 <= y_stack_arr[0, :])
                                 & (y_stack_arr[0, :] <= x2))])
        ymax_in_range_x1_x2 = y_stack_arr[1, i_ymax_in_range_x1_x2]

        if perform_stat_test:
            result = stat_test(box_data1, box_data2,
                               test, comparisons_correction,
                               len(box_struct_pairs), **stats_params)
        else:
            test_short_name = test_short_name if test_short_name is not None else ''
            result = StatResult('Custom statistical test', test_short_name,
                                None, None, pvalues[i_box_pair])

        result.box1 = box1
        result.box2 = box2
        test_result_list.append(result)

        if verbose >= 1:
            print("{} v.s. {}: {}".format(label1, label2,
                                          result.formatted_output))

        if text_annot_custom is not None:
            text = text_annot_custom[i_box_pair]
        else:
            if text_format == 'full':
                text = "{} p = {}".format('{}', pvalue_format_string).format(
                    result.test_short_name, result.pval)
            elif text_format is None:
                text = None
            elif text_format is 'star':
                text = pval_annotation_text(result.pval, pvalue_thresholds)
            elif text_format is 'simple':
                test_short_name = show_test_name and test_short_name or ""
                text = simple_text(result.pval, simple_format_string,
                                   pvalue_thresholds, test_short_name)
            elif text_format is 'custom':
                text = "%0.2f" % result.pval

        yref = ymax_in_range_x1_x2
        yref2 = yref

        # Choose the best offset depending on wether there is an annotation below
        # at the x position in the range [x1, x2] where the stack is the highest
        if y_stack_arr[2, i_ymax_in_range_x1_x2] == 0:
            # there is only a box below
            offset = y_offset_to_box
        else:
            # there is an annotation below
            offset = y_offset
        y = yref2 + offset
        h = line_height * yrange
        line_x, line_y = [x1, x1, x2, x2], [y, y + h, y + h, y]
        if loc == 'inside':
            ax.plot(line_x, line_y, lw=linewidth, c=color)
        elif loc == 'outside':
            line = lines.Line2D(line_x,
                                line_y,
                                lw=linewidth,
                                c=color,
                                transform=ax.transData)
            line.set_clip_on(False)
            ax.add_line(line)

        # why should we change here the ylim if at the very end we set it to the correct range????
        # ax.set_ylim((ylim[0], 1.1*(y + h)))

        if text is not None:
            ann = ax.annotate(text,
                              xy=(np.mean([x1, x2]), y + h),
                              xytext=(0, text_offset),
                              textcoords='offset points',
                              xycoords='data',
                              ha='center',
                              va='bottom',
                              fontsize=fontsize,
                              clip_on=False,
                              annotation_clip=False)
            ann_list.append(ann)

            plt.draw()
            y_top_annot = None
            got_mpl_error = False
            if not use_fixed_offset:
                try:
                    bbox = ann.get_window_extent()
                    bbox_data = bbox.transformed(ax.transData.inverted())
                    y_top_annot = bbox_data.ymax
                except RuntimeError:
                    got_mpl_error = True

            if use_fixed_offset or got_mpl_error:
                if verbose >= 1:
                    print(
                        "Warning: cannot get the text bounding box. Falling back to a fixed"
                        " y offset. Layout may be not optimal.")
                # We will apply a fixed offset in points,
                # based on the font size of the annotation.
                fontsize_points = FontProperties(
                    size='medium').get_size_in_points()
                offset_trans = mtransforms.offset_copy(
                    ax.transData,
                    fig=fig,
                    x=0,
                    y=1.0 * fontsize_points + text_offset,
                    units='points')
                y_top_display = offset_trans.transform((0, y + h))
                y_top_annot = ax.transData.inverted().transform(
                    y_top_display)[1]
        else:
            y_top_annot = y + h

        y_stack.append(
            y_top_annot
        )  # remark: y_stack is not really necessary if we have the stack_array
        ymaxs.append(max(y_stack))
        # Fill the highest y position of the annotation into the y_stack array
        # for all positions in the range x1 to x2
        y_stack_arr[1, (x1 <= y_stack_arr[0, :]) &
                    (y_stack_arr[0, :] <= x2)] = y_top_annot
        # Increment the counter of annotations in the y_stack array
        y_stack_arr[2, xi1:xi2 + 1] = y_stack_arr[2, xi1:xi2 + 1] + 1

    y_stack_max = max(ymaxs)
    if loc == 'inside':
        ax.set_ylim((ylim[0], max(1.03 * y_stack_max, ylim[1])))
    elif loc == 'outside':
        ax.set_ylim((ylim[0], ylim[1]))

    return ax, test_result_list
Beispiel #60
0
    def ndimage_alg(self, img, opts):
        """Island detection using scipy.ndimage

        Use scipy.ndimage.label to detect islands of emission in the image.
        Island is defined as group of tightly connected (8-connectivity
        for 2D images) pixels with emission.

        The following cuts are applied:
         - pixel is considered to have emission if it is 'thresh_isl' times
           higher than RMS.
         - Island should have at least 'minsize' active pixels
         - There should be at lease 1 pixel in the island which is 'thresh_pix'
           times higher than noise (peak clip).

        Parameters:
        image, mask: arrays with image data and mask
        mean, rms: arrays with mean & rms maps
        thresh_isl: threshold for 'active pixels'
        thresh_pix: threshold for peak
        minsize: minimal acceptable island size

        Function returns a list of Island objects.
        """
        ### islands detection
        mylog = mylogger.logging.getLogger("PyBDSM."+img.log+"Islands")

        image = img.ch0_arr
        mask = img.mask_arr
        rms = img.rms_arr
        mean = img.mean_arr
        thresh_isl = opts.thresh_isl
        thresh_pix = img.thresh_pix
        clipped_mean = img.clipped_mean
        saverank = opts.savefits_rankim

                        # act_pixels is true if significant emission
        if img.masked:
            act_pixels = ~(mask.copy())
            act_pixels[~mask] = (image[~mask]-mean[~mask])/thresh_isl >= rms[~mask]
        else:
            act_pixels = (image-mean)/thresh_isl >= rms

                        # dimension of image
        rank = len(image.shape)
                        # generates matrix for connectivity, in this case, 8-conn
        connectivity = nd.generate_binary_structure(rank, rank)
                        # labels = matrix with value = (initial) island number
        labels, count = nd.label(act_pixels, connectivity)
                        # slices has limits of bounding box of each such island
        slices = nd.find_objects(labels)
        img.island_labels = labels

        ### apply cuts on island size and peak value
        pyrank = N.zeros(image.shape, dtype=N.int32)
        res = []
        islid = 0
        for idx, s in enumerate(slices):
            idx += 1 # nd.labels indices are counted from 1
                        # number of pixels inside bounding box which are in island
            isl_size = (labels[s] == idx).sum()
            isl_peak = nd.maximum(image[s], labels[s], idx)
            isl_maxposn = tuple(N.array(N.unravel_index(N.nanargmax(image[s]), image[s].shape))+\
                          N.array((s[0].start, s[1].start)))
            if (isl_size >= img.minpix_isl) and (isl_size <= img.maxpix_isl) and (isl_peak - mean[isl_maxposn])/thresh_pix > rms[isl_maxposn]:
                isl = Island(image, mask, mean, rms, labels, s, idx, img.pixel_beamarea())
                res.append(isl)
                pyrank[tuple(isl.bbox)] += N.invert(isl.mask_active)*idx // idx

        return res