def interpolate_to_grid(self, time_new: np.ndarray, height_new: np.ndarray) -> list: """Interpolate beta using nearest neighbor.""" max_height = 100.0 # m max_time = 1.0 # min # Remove completely masked profiles from the interpolation beta = self.data["beta"][:] indices = [] for ind, b in enumerate(beta): if not ma.all(b) is ma.masked: indices.append(ind) assert self.height is not None beta_interpolated = interpolate_2d_nearest( self.time[indices], self.height, beta[indices, :], time_new, height_new ) # Filter profiles and range gates having data gap max_time /= 60 # to fraction hour bad_time_indices = _get_bad_indices(self.time[indices], time_new, max_time) bad_height_indices = _get_bad_indices(self.height, height_new, max_height) if bad_time_indices: logging.warning(f"Unable to interpolate lidar for {len(bad_time_indices)} time steps") beta_interpolated[bad_time_indices, :] = ma.masked if bad_height_indices: logging.warning(f"Unable to interpolate lidar for {len(bad_height_indices)} altitudes") beta_interpolated[:, bad_height_indices] = ma.masked self.data["beta"].data = beta_interpolated return bad_time_indices
def test_old_style_process_class(mp_tmpdir, cleantopo_tl, old_style_process_py): """Test correct processing using MapcheteProcess class.""" config = cleantopo_tl.dict config.update(process_file=old_style_process_py) with mapchete.open(config) as mp: for zoom in range(6): tiles = [] for tile in mp.get_process_tiles(zoom): output = mp.execute(tile) tiles.append((tile, output)) assert isinstance(output, ma.MaskedArray) assert output.shape == output.shape assert not ma.all(output.mask) mp.write(tile, output) mosaic, mosaic_affine = create_mosaic(tiles) try: temp_vrt = os.path.join(mp_tmpdir, str(zoom)+".vrt") gdalbuildvrt = "gdalbuildvrt %s %s/%s/*/*.tif > /dev/null" % ( temp_vrt, mp_tmpdir, zoom) os.system(gdalbuildvrt) with rasterio.open(temp_vrt, "r") as testfile: for file_item, mosaic_item in zip( testfile.meta["transform"], mosaic_affine ): assert file_item == mosaic_item band = testfile.read(1, masked=True) assert band.shape == mosaic.shape assert ma.allclose(band, mosaic) assert ma.allclose(band.mask, mosaic.mask) finally: shutil.rmtree(mp_tmpdir, ignore_errors=True)
def check_sum(self): """ The method checks to make sure that the probabilities add up to 1 across choices. Inputs: None """ self.num_agents = self.probabilities.shape[0] self.num_choices = self.probabilities.shape[-1] #print 'OLD CUM SUM' #cumsum_across_rows = self.probabilities.cumsum(-1)[:,-1] #print cumsum_across_rows #print 'NEW CUM SUM' cumsum_across_rows = self.probabilities.sum(-1) #print cumsum_across_rows diff_from_unity = abs(cumsum_across_rows - 1) #print self.probabilities #print diff_from_unity #print diff_from_unity #rowsId = diff_from_unity < 1e-6 #a = array(range(self.probabilities.shape[0]))+1 #print self.probabilities[~rowsId], a[~rowsId] if not ma.all(diff_from_unity < 1e-6): raise ProbabilityError, """probability values do not add up """ \ """to one across rows"""
def test_processing(): """Test correct processing (read and write) outputs.""" for cleantopo_process in [ "testdata/cleantopo_tl.mapchete", "testdata/cleantopo_br.mapchete" ]: mp = mapchete.open(os.path.join(SCRIPTDIR, cleantopo_process)) for zoom in range(6): tiles = [] for tile in mp.get_process_tiles(zoom): output = mp.execute(tile) tiles.append(output) assert isinstance(output, BufferedTile) assert isinstance(output.data, ma.MaskedArray) assert output.data.shape == output.shape assert not ma.all(output.data.mask) mp.write(output) mosaic, mosaic_affine = create_mosaic(tiles) try: temp_vrt = os.path.join(OUT_DIR, str(zoom) + ".vrt") gdalbuildvrt = "gdalbuildvrt %s %s/%s/*/*.tif > /dev/null" % ( temp_vrt, OUT_DIR, zoom) os.system(gdalbuildvrt) with rasterio.open(temp_vrt, "r") as testfile: for file_item, mosaic_item in zip( testfile.meta["transform"], mosaic_affine): assert file_item == mosaic_item band = testfile.read(1, masked=True) assert band.shape == mosaic.shape assert ma.allclose(band, mosaic) assert ma.allclose(band.mask, mosaic.mask) finally: shutil.rmtree(OUT_DIR, ignore_errors=True)
def test_processing(mp_tmpdir, cleantopo_br, cleantopo_tl): """Test correct processing (read and write) outputs.""" for cleantopo_process in [cleantopo_br.path, cleantopo_tl.path]: with mapchete.open(cleantopo_process) as mp: for zoom in range(6): tiles = [] for tile in mp.get_process_tiles(zoom): output = mp.execute(tile) tiles.append((tile, output)) assert isinstance(output, ma.MaskedArray) assert output.shape == output.shape assert not ma.all(output.mask) mp.write(tile, output) mosaic = create_mosaic(tiles) try: temp_vrt = os.path.join(mp_tmpdir, str(zoom)+".vrt") gdalbuildvrt = "gdalbuildvrt %s %s/%s/*/*.tif > /dev/null" % ( temp_vrt, mp.config.output.path, zoom) os.system(gdalbuildvrt) with rasterio.open(temp_vrt, "r") as testfile: for file_item, mosaic_item in zip( testfile.meta["transform"], mosaic.affine ): assert file_item == mosaic_item band = testfile.read(1, masked=True) assert band.shape == mosaic.data.shape assert ma.allclose(band, mosaic.data) assert ma.allclose(band.mask, mosaic.data.mask) finally: shutil.rmtree(mp_tmpdir, ignore_errors=True)
def format_and_clean_data_main(self): """ Main function to format and clean data based on choices by the user. """ # Check if over missing_bound percent or missing_bound number of values are missing too_many_missing = self.has_too_many_missing(self.init_perc_remove) if ma.any(too_many_missing): idx, = ma.where(too_many_missing) self.xs[idx] = ma.mask_rows(self.xs[idx]) # Check array to see if it is filled with values or empty if ma.all(self.check_for_all()): return self.xs # Clean outliers self.clean_outliers() # Take average of neighbor values to fill up to a given missing value gap length self.clean_gaps_w_linspace(fill_gap_length=self.max_gap_length) if ma.all(ma.count_masked(self.xs[:, :-self.keep_n_values], axis=1)[np.newaxis,:] == 0): return self.xs # if no masked values remain in values before recent ones # Remove values if they start the array and are then followed by too many masked values start_idx = self.find_new_starting_value() # If there are over x% blank values left in the original data after above changes, # check to see if x% of the blanks fall after the new start year too_many_missing = self.has_too_many_missing(self.second_perc_remove) # boolean array if ma.any(too_many_missing): n_masked = np.array([ma.count_masked(self.xs[i,s_idx:]) for i, s_idx in enumerate(start_idx)]) / self.N > self.perc_remove_after_start_idx if ma.any(n_masked): idx, = ma.where(n_masked) self.xs[idx] = ma.mask_rows(self.xs[idx]) # To fill in remaining values, run linear regression on non-zero values self.clean_gaps_w_lin_regress(start_idx) # If linear regression left negative or zero values, then use linear space to fill in middle gaps if ma.any(ma.masked_less_equal(self.xs, 0.)): self.clean_gaps_w_linspace()
def ComputeFobsSqPink(refl, iref): yp = np.zeros(len(x)) # not masked refl8im = 0 Wd, fmin, fmax = G2pwd.getWidthsTOF(refl[5 + im], refl[12 + im], refl[13 + im], refl[6 + im] / 1.e4, refl[7 + im] / 100.) iBeg = max(xB, np.searchsorted(x, refl[5 + im] - fmin)) iFin = max(xB, min(np.searchsorted(x, refl[5 + im] + fmax), xF)) if not iBeg + iFin: #peak below low limit - skip peak return 0 if ma.all(xMask[iBeg:iFin]): #peak entirely masked - skip peak return -1 elif not iBeg - iFin: #peak above high limit - done return -2 if iBeg < iFin: yp[iBeg:iFin] = refl[11 + im] * refl[9 + im] * G2pwd.getEpsVoigt( refl[5 + im], refl[12 + im], refl[13 + im], refl[6 + im] / 1.e4, refl[7 + im] / 100., x[iBeg:iFin]) refl8im = np.sum( np.where(ratio[iBeg:iFin] > 0., yp[iBeg:iFin] * ratio[iBeg:iFin] / refl[11 + im], 0.0)) return refl8im, refl[11 + im] * refl[9 + im]
def test_dereference(full_testfile): fh, (a, b, c) = full_testfile sel = slice(0, 10) ref = fh[f'{a}/ref/{b}/ref'] dset = fh[f'{b}/data'] region = fh[f'{a}/ref/{b}/ref_region'] data_no_reg = dereference(sel, ref, dset) data_reg = dereference(sel, ref, dset, region=region) data_idx = dereference(sel, ref, dset, region=region, indices_only=True) data_list = dereference(sel, ref, dset, region=region, as_masked=False) assert ma.all(data_no_reg == data_reg) assert data_reg.shape == (10, 10) assert data_reg.shape == data_idx.shape assert np.sum(data_reg.mask) == 0 assert len(data_list) == 10 assert isinstance(data_list, list) assert all([len(a) == 10 for a in data_list])
def test_dereference_chain(full_testfile): fh, (a, b, c) = full_testfile sel = slice(0, 10) refs = [fh[f'{a}/ref/{b}/ref'], fh[f'{c}/ref/{b}/ref']] dset = fh[f'{c}/data'] regions = [fh[f'{a}/ref/{b}/ref_region'], fh[f'{b}/ref/{c}/ref_region']] ref_dir = [(0, 1), (1, 0)] data_no_reg = dereference_chain(sel, refs, dset) data_reg = dereference_chain(sel, refs, dset, regions=regions) data_idx = dereference_chain(sel, refs, dset, regions=regions, indices_only=True) assert ma.all(data_no_reg == data_reg) assert data_reg.shape == (10, 10, 1) assert data_reg.shape == data_idx.shape assert np.sum(data_reg.mask) == 0
def _mark_gaps(time: np.ndarray, data: ma.MaskedArray, max_allowed_gap: float = 1) -> tuple: assert time[0] >= 0 assert time[-1] <= 24 max_gap = max_allowed_gap / 60 if not ma.is_masked(data): mask_new = np.zeros(data.shape) elif ma.all(data.mask) is ma.masked: mask_new = np.ones(data.shape) else: mask_new = np.copy(data.mask) data_new = ma.copy(data) time_new = np.copy(time) gap_indices = np.where(np.diff(time) > max_gap)[0] temp_array = np.zeros((2, data.shape[1])) temp_mask = np.ones((2, data.shape[1])) time_delta = 0.001 for ind in np.sort(gap_indices)[::-1]: ind += 1 data_new = np.insert(data_new, ind, temp_array, axis=0) mask_new = np.insert(mask_new, ind, temp_mask, axis=0) time_new = np.insert(time_new, ind, time[ind] - time_delta) time_new = np.insert(time_new, ind, time[ind - 1] + time_delta) if (time[0] - 0) > max_gap: data_new = np.insert(data_new, 0, temp_array, axis=0) mask_new = np.insert(mask_new, 0, temp_mask, axis=0) time_new = np.insert(time_new, 0, time[0] - time_delta) time_new = np.insert(time_new, 0, time_delta) if (24 - time[-1]) > max_gap: ind = mask_new.shape[0] data_new = np.insert(data_new, ind, temp_array, axis=0) mask_new = np.insert(mask_new, ind, temp_mask, axis=0) time_new = np.insert(time_new, ind, 24 - time_delta) time_new = np.insert(time_new, ind, time[-1] + time_delta) data_new.mask = mask_new return time_new, data_new
def ComputeFobsSqCW(refl, iref): yp = np.zeros(len(x)) # not masked sInt = 0 refl8im = 0 Wd, fmin, fmax = G2pwd.getWidthsCW(refl[5 + im], refl[6 + im], refl[7 + im], shl) iBeg = max(xB, np.searchsorted(x, refl[5 + im] - fmin)) iFin = max(xB, min(np.searchsorted(x, refl[5 + im] + fmax), xF)) iFin2 = iFin if not iBeg + iFin: #peak below low limit - skip peak return 0 if ma.all(xMask[iBeg:iFin]): #peak entirely masked - skip peak return -1 elif not iBeg - iFin: #peak above high limit - done return -2 elif iBeg < iFin: yp[iBeg:iFin] = refl[11 + im] * refl[9 + im] * G2pwd.getFCJVoigt3( refl[5 + im], refl[6 + im], refl[7 + im], shl, x[iBeg:iFin]) sInt = refl[11 + im] * refl[9 + im] if Ka2: pos2 = refl[5 + im] + lamRatio * tand( refl[5 + im] / 2.0) # + 360/pi * Dlam/lam * tan(th) Wd, fmin, fmax = G2pwd.getWidthsCW(pos2, refl[6 + im], refl[7 + im], shl) iBeg2 = max(xB, np.searchsorted(x, pos2 - fmin)) iFin2 = min(np.searchsorted(x, pos2 + fmax), xF) if iFin2 > iBeg2: yp[iBeg2:iFin2] += refl[11 + im] * refl[ 9 + im] * kRatio * G2pwd.getFCJVoigt3( pos2, refl[6 + im], refl[7 + im], shl, x[iBeg2:iFin2]) sInt *= 1. + kRatio refl8im = np.sum( np.where( ratio[iBeg:iFin2] > 0., yp[iBeg:iFin2] * ratio[iBeg:iFin2] / (refl[11 + im] * (1. + kRatio)), 0.0)) return refl8im, sInt
def test_testMaPut(self): (x, y, a10, m1, m2, xm, ym, z, zm, xf, s) = self.d m = [1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1] i = np.nonzero(m)[0] put(ym, i, zm) assert_(all(take(ym, i, axis=0) == zm))
def _calc_z_power_min() -> float: if ma.all(z_power.mask): return 0 return np.percentile(z_power.compressed(), 0.1)
if args.exzp: with open(args.exzp) as f: lista2 = f.read().splitlines() standards = get_image_data(lista2) standards = standards.group_by(['dayobs', 'shortname', 'instrument', 'filter', 'zcol1', 'zcol2']) targets[['zcol1', 'z1', 'dz1', 'c1', 'dc1', 'zcol2', 'z2', 'dz2', 'c2', 'dc2']].mask = True for group in standards.groups: matches_in_targets = ((targets['dayobs'] == group['dayobs'][0]) & (targets['shortname'] == group['shortname'][0]) & (targets['instrument'] == group['instrument'][0]) & (targets['filter'] == group['filter'][0])) if not np.any(matches_in_targets): continue targets['zcol1'][matches_in_targets] = group['zcol1'][0] targets['zcol2'][matches_in_targets] = group['zcol2'][0] targets['z1'][matches_in_targets], targets['dz1'][matches_in_targets] = average_in_flux(group['z1'], group['dz1']) targets['z2'][matches_in_targets], targets['dz2'][matches_in_targets] = average_in_flux(group['z2'], group['dz2']) if np.all(group['dc1']): dc1 = np.sum(group['dc1']**-2)**-0.5 targets['c1'][matches_in_targets] = np.sum(group['c1'] * group['dc1']**-2) * dc1**2 targets['dc1'][matches_in_targets] = dc1 else: targets['c1'][matches_in_targets] = np.mean(group['c1']) targets['dc1'] = 0. if np.all(group['dc2']): dc2 = np.sum(group['dc2']**-2)**-0.5 targets['c2'][matches_in_targets] = np.sum(group['c2'] * group['dc2']**-2) * dc2**2 targets['dc2'][matches_in_targets] = dc2 else: targets['c2'][matches_in_targets] = np.mean(group['c2']) targets['dc2'] = 0. # generate average colors for each night at each site
Tavlat, Q2_levs, colors='k', linewidths=2) q.levels = [nf(val) for val in q.levels] plt.clabel(q, q.levels[::2], inline=1, fmt=fmt, fontsize=25) # Add diabatic layer depth PI = c.mnc('PSI.nc', "LaPs1TH").mean(axis=2) PI = ma.masked_array(PI, PI < 0.95) # Depths th = c.mnc('PSI.nc', "LaHs1TH").mean(axis=2) depths = np.cumsum(th[::-1], axis=0)[::-1] DDL = np.zeros(len(c.yc)) psi = c.get_psi_iso() for jj in range(len(c.yc)): if ma.all(PI[:, jj] == 1) or np.all( psi[:, jj] == -0) or PI[:, jj].mask.all(): continue indx = ma.nonzero(PI[:, jj] < 0.9999999999)[0] a = indx[np.nonzero(indx > 3)[0]][0] if a < 41 and depths[a - 1, jj] - depths[a, jj] > 150: DDL[jj] = (depths[a - 1, jj] + depths[a, jj]) / 2 else: DDL[jj] = depths[a, jj] r = ax.plot(c.yc / 1000, SG.savitzky_golay(-DDL / 1000, 21, 1), color='0.75', linewidth=4) # Lables ax.set_title(str(Figletter[Runs[i]]) + str(tau[Runs[i]]) + 'day',
def condorcet_irv(ratings_list, ids): """ Rank games by condorcet method. Takes a list of dicts with user-rating as key-value pairs and a list of game ids. Returns a list of average ratings among common users between all lists. """ num_games = len(ratings_list) users = list({user for u in ratings_list for user in list(u.keys())}) ranks = [] # Get info for IRV tiebreaker. irv = np.zeros((num_games, ), dtype=[("top_rating", "<i4"), ("votes", "<i4"), ("year", "<i4"), ("id", "<i4")]) game_info = get_game_info(ids) for i, info in enumerate(game_info): irv[i]["year"] = info.find("yearpublished")["value"] irv[i]["id"] = int(ids[i]) # Create matrix of all games/users. user_game_mat = np.zeros((num_games, len(users))) for i, ratings in enumerate(ratings_list): irv[i]["votes"] = len(ratings) for j, user in enumerate(users): user_game_mat[i, j] = ratings.get(user, np.nan) # Generate matrix showing how many times game was favored in pairwise comparison. cond_mat = np.apply_along_axis( lambda x: np.apply_along_axis(np.sum, 1, x > user_game_mat), 1, user_game_mat) # For IRV tiebreaker, how many times game was a user's top ranked game. top_ratings = np.max(user_game_mat, 0) irv[:]["top_rating"] = np.apply_along_axis( lambda x: np.sum(top_ratings == x), 1, user_game_mat) # Subtract columns from rows to show pairwise difference between games. diff_mat = cond_mat - cond_mat.T np.fill_diagonal(diff_mat, 1) # Get tiebreak order, inverting years and ids for uniform sort order. max_year = np.max(irv[:]["year"]) irv[:]["year"] = max_year - irv[:]["year"] max_id = np.max(irv[:]["id"]) irv[:]["id"] = max_id - irv[:]["id"] # Rank by condorcet-IRV method. while len(irv): # Create masked array copies to hide tiebreak losers. masked_mat = ma.masked_array(diff_mat) masked_irv = ma.masked_array(irv) # Find condorcet winner. winners = [] while not len(winners): # Winning rows are all positive. winners = ma.where(ma.all(masked_mat > 0, axis=1))[0] assert len(winners) <= 1, "multiple winners found" if len(winners): temp_id = irv[winners]["id"][0] game_id = -(temp_id - max_id) # Convert back to original id. ranks.append(str(game_id)) diff_mat = np.delete(diff_mat, winners, axis=0) diff_mat = np.delete(diff_mat, winners, axis=1) irv = np.delete(irv, winners) else: # Remove plurality loser. tiebreak = np.argsort(masked_irv, order=("top_rating", "votes", "year", "id")) loser = tiebreak[0] masked_irv[loser] = ma.masked masked_mat[loser, :] = ma.masked masked_mat[:, loser] = ma.masked return ranks
matches_in_targets = ( (targets['dayobs'] == group['dayobs'][0]) & (targets['shortname'] == group['shortname'][0]) & (targets['instrument'] == group['instrument'][0]) & (targets['filter'] == group['filter'][0])) if not np.any(matches_in_targets): continue targets['zcol1'][matches_in_targets] = group['zcol1'][0] targets['zcol2'][matches_in_targets] = group['zcol2'][0] targets['z1'][matches_in_targets], targets['dz1'][ matches_in_targets] = average_in_flux(group['z1'], group['dz1']) targets['z2'][matches_in_targets], targets['dz2'][ matches_in_targets] = average_in_flux(group['z2'], group['dz2']) if np.all(group['dc1']): dc1 = np.sum(group['dc1']**-2)**-0.5 targets['c1'][matches_in_targets] = np.sum( group['c1'] * group['dc1']**-2) * dc1**2 targets['dc1'][matches_in_targets] = dc1 else: targets['c1'][matches_in_targets] = np.mean(group['c1']) targets['dc1'] = 0. if np.all(group['dc2']): dc2 = np.sum(group['dc2']**-2)**-0.5 targets['c2'][matches_in_targets] = np.sum( group['c2'] * group['dc2']**-2) * dc2**2 targets['dc2'][matches_in_targets] = dc2 else: targets['c2'][matches_in_targets] = np.mean(group['c2']) targets['dc2'] = 0.