def assertDataAlmostEqual(self, data, reference_filename, **kwargs): reference_path = self.get_result_path(reference_filename) if self._check_reference_file(reference_path): kwargs.setdefault('err_msg', 'Reference file %s' % reference_path) with open(reference_path, 'r') as reference_file: stats = json.load(reference_file) self.assertEqual(stats.get('shape', []), list(data.shape)) self.assertEqual(stats.get('masked', False), ma.is_masked(data)) nstats = np.array((stats.get('mean', 0.), stats.get('std', 0.), stats.get('max', 0.), stats.get('min', 0.)), dtype=np.float_) if math.isnan(stats.get('mean', 0.)): self.assertTrue(math.isnan(data.mean())) else: data_stats = np.array((data.mean(), data.std(), data.max(), data.min()), dtype=np.float_) self.assertArrayAllClose(nstats, data_stats, **kwargs) else: self._ensure_folder(reference_path) stats = collections.OrderedDict([ ('std', np.float_(data.std())), ('min', np.float_(data.min())), ('max', np.float_(data.max())), ('shape', data.shape), ('masked', ma.is_masked(data)), ('mean', np.float_(data.mean()))]) with open(reference_path, 'w') as reference_file: reference_file.write(json.dumps(stats))
def sam2mat_main(args): region_pattern = r'^[^:]+(?::\d+-\d+)?(?:,[^:]+(?::\d+-\d+)?)?$' if args.region is not None and re.search(region_pattern, args.region): regions = args.region elif args.reglist is not None: with open(args.reglist) as f: regions = [line.rstrip() for line in f] else: regions = None if args.insam is None: sam_fh = sys.stdin else: sam_fh = open(args.insam, 'r') bdata = BinnedData(args.fai, regions=regions, resolution=args.resolution) bdata.read_sam(sam_fh) sam_fh.close() if args.clean: bdata.clean() if args.ice: bdata.iterative_correction() margins = bdata.dat.sum(axis=0) #print(margins) #sys.exit() try: os.makedirs(args.outdir) except OSError as e: if e.errno != errno.EEXIST: raise(e) bin_outfile = os.path.join(args.outdir, 'bins.txt.gz') contact_outfile = os.path.join(args.outdir, 'contacts.txt.gz') matrix_outfile = os.path.join(args.outdir, 'matrix.txt.gz') bin_f = gzip.open(bin_outfile, 'wb') contact_f = gzip.open(contact_outfile, 'wb') matrix_f = gzip.open(matrix_outfile, 'wb') for i,chrom1,b1 in bdata.iter_bins(): bin_mid1 = (b1[0]+b1[1])/2 if ma.is_masked(margins[i]): margin = 0 else: margin = int(margins[i]) print('{}\t{}\t{}\t{}\t{}'.format(chrom1,0,bin_mid1,margin,int(margin>0)), file=bin_f) if bdata.cleaned: print('\t'.join(bdata.dat.data[i].astype(str)), file=matrix_f) else: print('\t'.join(bdata.dat[i].astype(str)), file=matrix_f) for j,chrom2,b2 in bdata.iter_bins(): bin_mid2 = (b2[0]+b2[1])/2 contact = bdata.dat[i,j] if j>i and not ma.is_masked(contact) and contact > 0: print('{}\t{}\t{}\t{}\t{}'.format(chrom1,bin_mid1,chrom2,bin_mid2,int(contact)), file=contact_f) bin_f.close() contact_f.close() matrix_f.close()
def test_ephemerides_query(patch_request): # check values of Ceres for a given epoch # orbital uncertainty of Ceres is basically zero res = jplhorizons.Horizons(id='Ceres', location='500', epochs=2451545.5).ephemerides()[0] assert res['targetname'] == "1 Ceres" assert res['datetime_str'] == "2000-Jan-01 00:00:00.000" assert res['solar_presence'] == "" assert res['flags'] == "" assert res['elongFlag'] == '/L' assert is_masked(res['AZ']) assert is_masked(res['EL']) assert is_masked(res['airmass']) assert is_masked(res['magextinct']) npt.assert_allclose( [2451544.5, 188.70280, 9.09829, 34.40955, -2.68358, 8.27, 6.83, 96.171, 161.3828, 10.4528, 2.551099014238, 0.1744491, 2.26315116146176, -21.9390511, 18.822054, 95.3996, 22.5698, 292.551, 296.850, 184.3426220, 11.7996521, 289.864329, 71.545655, 0, 0], [res['datetime_jd'], res['RA'], res['DEC'], res['RA_rate'], res['DEC_rate'], res['V'], res['surfbright'], res['illumination'], res['EclLon'], res['EclLat'], res['r'], res['r_rate'], res['delta'], res['delta_rate'], res['lighttime'], res['elong'], res['alpha'], res['sunTargetPA'], res['velocityPA'], res['ObsEclLon'], res['ObsEclLat'], res['GlxLon'], res['GlxLat'], res['RA_3sigma'], res['DEC_3sigma']])
def test_addTraceWithGap(self): """ Tests __add__ method of the Trace class. """ # set up tr1 = Trace(data=np.arange(1000)) tr1.stats.sampling_rate = 200 start = UTCDateTime(2000, 1, 1, 0, 0, 0, 0) tr1.stats.starttime = start tr2 = Trace(data=np.arange(0, 1000)[::-1]) tr2.stats.sampling_rate = 200 tr2.stats.starttime = start + 10 # verify tr1.verify() tr2.verify() # add trace = tr1 + tr2 # stats self.assertEquals(trace.stats.starttime, start) self.assertEquals(trace.stats.endtime, start + 14.995) self.assertEquals(trace.stats.sampling_rate, 200) self.assertEquals(trace.stats.npts, 3000) # data self.assertEquals(len(trace), 3000) self.assertEquals(trace[0], 0) self.assertEquals(trace[999], 999) self.assertTrue(is_masked(trace[1000])) self.assertTrue(is_masked(trace[1999])) self.assertEquals(trace[2000], 999) self.assertEquals(trace[2999], 0) # verify trace.verify()
def ham6_nearest(pixel, palette, last_color=None): if pixel is None or ma.is_masked(pixel): return ma.masked, ma.masked min_dist = None best_index = ma.masked best_color = ma.masked for i, c in enumerate(palette[:16]): d = color_distance(pixel, c) if min_dist is None or d < min_dist: if d == 0: return i, c min_dist = d best_index = i best_color = c if last_color is None or ma.is_masked(last_color): return best_index, best_color c = last_color.copy() for i in range(16): c[2] = i * 0x11 d = color_distance(pixel, c) if d < min_dist: if d == 0: return i + 0x10, c min_dist = d best_index = i + 0x10 best_color = c.copy() c = last_color.copy() for i in range(16): c[0] = i * 0x11 d = color_distance(pixel, c) if d < min_dist: if d == 0: return i + 0x20, c min_dist = d best_index = i + 0x20 best_color = c.copy() c = last_color.copy() for i in range(16): c[1] = i * 0x11 d = color_distance(pixel, c) if d < min_dist: if d == 0: return i + 0x30, c min_dist = d best_index = i + 0x30 best_color = c.copy() return best_index, best_color
def add_average_field(self, field_to_avg, average_func=np.ma.mean, valid_range=None #[20, 300] ): """ Will run an function over the elements of a field to reduce them to a single metric for each element, and add this reduced data (e.g. the mean value) as a new field to the DataMat. Obvious example is to compute the average pupil size in a single trial. Will honor span start and end indices if they are not masked. Parameters: field_to_avg : string the name of the field to process average_func : function pointer a pointer to the function to use for each element valid_range : 2-element sequence (tuple or list) if not None, then minimum and maximum dictating the range, outside of which the data will be ignored. The data outside this range will be masked prior to the averaging. """ for fieldname in [field_to_avg]: if fieldname not in self.fieldnames(): raise ValueError("Required field '%s' not in Datamat." % ( fieldname)) avg = [] for dmi in self: dat = dmi.field(field_to_avg)[0] if dat is not None: sidx = dmi.span_start_idx[0] if ma.is_masked(sidx): sidx = 0 eidx = dmi.span_end_idx[0] if ma.is_masked(eidx): eidx = -1 spandat = dat[sidx:eidx] if valid_range is not None: valdat = spandat[(spandat > valid_range[0]) & (spandat < valid_range[1])] else: valdat = spandat datavg = average_func(valdat) if len(valdat) > 0 else np.NaN avg.append(datavg) else: avg.append(np.NaN) avg = ma.masked_invalid(avg) avg.fill_value = np.NaN fname = get_short_function_name(average_func) new_field = (fname) + "_" + field_to_avg self.add_field(new_field, avg)
def _next_non_masked_element(a, idx): """Return the next non masked element of a masked array. If an array is masked, return the next non-masked element (if the given index is masked). If no other unmasked points are after the given masked point, returns none. Parameters ---------- a : array-like 1-dimensional array of numeric values idx : integer index of requested element Returns ------- Index of next non-masked element and next non-masked element """ try: next_idx = idx + a[idx:].mask.argmin() if ma.is_masked(a[next_idx]): return None, None else: return next_idx, a[next_idx] except (AttributeError, TypeError, IndexError): return idx, a[idx]
def array_masked_to_nans(array): """ Convert a masked array to a NumPy `ndarray` filled with NaN values. Input NumPy arrays with no mask are returned unchanged. This is used for dask integration, as dask does not support masked arrays. Args: * array: A NumPy `ndarray` or masked array. Returns: A NumPy `ndarray`. This is the input array if unmasked, or an array of floating-point values with NaN values where the mask was `True` if the input array is masked. .. note:: The fill value and mask of the input masked array will be lost. .. note:: Integer masked arrays are cast to 8-byte floats because NaN is a floating-point value. """ if not ma.isMaskedArray(array): result = array else: if ma.is_masked(array): mask = array.mask new_dtype = nan_array_type(array.data.dtype) result = array.data.astype(new_dtype) result[mask] = np.nan else: result = array.data return result
def _math_op_common(cube, operation_function, new_unit, new_dtype=None, in_place=False): _assert_is_cube(cube) if in_place: new_cube = cube if cube.has_lazy_data(): new_cube.data = operation_function(cube.lazy_data()) else: try: operation_function(cube.data, out=cube.data) except TypeError: # Non ufunc function operation_function(cube.data) else: new_cube = cube.copy(data=operation_function(cube.core_data())) # If the result of the operation is scalar and masked, we need to fix up # the dtype if new_dtype is not None \ and not new_cube.has_lazy_data() \ and new_cube.data.shape == () \ and ma.is_masked(new_cube.data): new_cube.data = ma.masked_array(0, 1, dtype=new_dtype) iris.analysis.clear_phenomenon_identity(new_cube) new_cube.units = new_unit return new_cube
def from_ham6(ham6, palette, background=None): if background is None: background = ma.masked elif isinstance(background, numbers.Integral): background = palette[background] if ma.is_masked(background) or ma.isMaskedArray(ham6): rgb8 = ma.empty(ham6.shape[:2] + (3,), dtype=np.uint8) else: rgb8 = np.empty(ham6.shape[:2] + (3,), dtype=np.uint8) for y in range(rgb8.shape[0]): c = background for x in range(rgb8.shape[1]): i = ham6[y, x] if i is ma.masked: ham6[y, x] = ma.masked continue if i < 0x10: c = palette[i] else: c = c.copy() c[(None, 2, 0, 1)[i >> 4]] = (i & 0xF) * 0x11 rgb8[y, x] = c return rgb8
def to_ham6(img, palette, background=None, out=None): _debug_array(img) if background is None: background = ma.masked elif isinstance(background, numbers.Integral): background = palette[background] if not ma.is_masked(background) and ma.isMaskedArray(img): img = img.filled(background) if ma.isMaskedArray(img): ham6 = ma.empty(img.shape[:2], dtype=np.uint8) else: ham6 = np.empty(img.shape[:2], dtype=np.uint8) for y in range(img.shape[0]): c = background for x in range(img.shape[1]): i, c = ham6_nearest(img[y, x], palette, c) ham6[y, x] = i if out is not None: out[y, x] = c _debug_array(ham6) return ham6
def make_gene_map_2(self): """ The method that takes the attributes from the array and uses them to create a gene map for the array. The gene map is a dictionary which has a binary string as a key. The binary string is created by creating a binary bit string of an appropriate length. The length is calculated """ count = 0 self.iterator_one = itertools.product(range(self.time_len), range(self.lat_len), range(self.lon_len)) ### Assign a binary string a location and a value from the data print "\n" print "Creating gene-map dictionary... \n" print "Assigning valid locations to binary strings! \n" for x_valid in self.iterator_one: binary_string = bin(count)[2:] while len(binary_string) < self.string_length: # removed minus one (-1) NB binary_string = "0" + binary_string self.gene_map[binary_string] = {} if ma.is_masked(self.array[x_valid]): pass else: self.gene_map[binary_string]["coordinate"] = tuple(x_valid) self.gene_map[binary_string]["value"] = self.array[x_valid] self.location_dict[x_valid[1:3]] = [] self.location_dict_stdevs[x_valid[1:3]] = 0 count += 1 self.last_valid_binary_string = binary_string binary_string_old = binary_string not_valid_first = int(binary_string, 2) + 1 not_valid_last = int("1" * (self.string_length), 2) # added minus one just for nonmasked version NB self.count = count if self.count == self.count_non_masked: print "The counter corresponds with the non-masked count! \n" ### Pad the dictionary to give binary strings some value print "Assigning left over binary strings to non-existant locations! \n" self.iterator_two = itertools.product(range(self.time_len), range(self.lat_len), range(self.lon_len)) # ~ for x_not_valid in range(not_valid_first, not_valid_last+1): count_2 = not_valid_first for x_not_valid in self.iterator_two: # ~ binary_string = bin(x_not_valid)[2:] # DOES IT NEED TO BE PADDED binary_string = bin(count_2)[2:] while len(binary_string) < self.string_length: # removed minus one (-1) NB binary_string = "0" + binary_string self.gene_map[binary_string] = {} self.gene_map[binary_string]["coordinate"] = (999, 999, 999) # x_not_valid self.gene_map[binary_string]["value"] = 1e09 # self.array[x_valid] if count_2 == not_valid_last: break else: count_2 += 1 print "There are %d valid locations. \n" % count print "The last binary string is: ", binary_string print "The last binary string assigned to a valid locations is :", binary_string_old print "The length of binary string is: ", self.string_length print "The non-valid locations fall between %d and %d. \n" % (not_valid_first, not_valid_last) print "Is the array masked?: \n", ma.isMA(self.array) print "The gene-map has been created! \n"
def err(coef): if ma.is_masked(v): res = v.flatten() - EllipticGaussian(x,y,coef).flatten() return res.data[res.mask==False] else: res = v.flatten() - EllipticGaussian(x,y,coef).flatten() return res
def err(coef): if ma.is_masked(v): res = v.flatten() - Gaussian1d(x,coef).flatten() return res.data[res.mask==False] else: res = v.flatten() - Gaussian1d(x,coef).flatten() return res
def createMinMaxList(self): """ Creates the minmax list. The method used is not fully accurate but the results should be ok. """ pixel = self.win.detail data = self.stream.slice(self.starttime, self.endtime)[0].data # Reshape and calculate point to point differences. per_pixel = int(len(data)//pixel) ptp = data[:pixel * per_pixel].reshape(pixel, per_pixel).ptp(axis=1) # Last pixel. last_pixel = data[pixel * per_pixel:] if len(last_pixel): last_pixel = last_pixel.ptp() if ptp[-1] < last_pixel: ptp[-1] = last_pixel self.ptp = ptp.astype('float32') # Create a logarithmic axis. if self.win.log_scale: self.ptp += 1 self.ptp = np.log(self.ptp)/np.log(self.win.log_scale) # Make it go from 0 to 100. self.ptp *= 100.0/self.ptp.max() # Set masked arrays to zero. if is_masked(self.ptp): self.ptp.fill_value = 0.0 self.ptp = self.ptp.filled() # Assure that very small values are also visible. Only true gaps are 0 # and will stay 0. self.ptp[(self.ptp > 0) & (self.ptp < 0.5)] = 0.5
def load_ham6(fname, palette, background=None, mtimes=None, format=None): stem, __ = os.path.splitext(fname) fname_cache = stem + '.cache' masked = background is None or ma.is_masked(background) if mtimes is None: mtimes = () elif not isinstance(mtimes, collections.Iterable): mtimes = (mtimes,) try: if os.path.exists(fname): mtimes = itertools.chain(mtimes, (os.path.getmtime(fname),)) for mtime in mtimes: if os.path.getmtime(fname_cache) < mtime: raise OSError("cache file out of date") ham6 = np.genfromtxt(fname_cache, dtype=np.uint8, missing_values=masked and '--', usemask=masked, loose=False, invalid_raise=True) except OSError: rgb = load_image(fname, masked=masked, format=format) ham6 = to_ham6(rgb, palette, background=background) out = np.array(ham6, dtype=np.str_) if masked: out[ma.getmaskarray(ham6)] = '--' np.savetxt(fname_cache, out, fmt='%2s', delimiter=' ') return ham6
def __call__(self,array): masked = ma.is_masked(array) if self.method is 'basemap': return basemap.interp(array, self.xin, self.yin, self.xout, self.yout, checkbounds=False, masked=masked, order=1) elif self.method is 'scipy': import scipy.interpolate interp = scipy.interpolate.interp2d(self.xin, self.yin, array, kind='linear') a1d = interp(self.xout[0,:],self.yout[:,0]) return npy.reshape(a1d,self.yout.shape)
def feature_from_tile(self,tile,out): if ma.is_masked(tile): tile = tile.compressed() tile = tile.reshape(-1) out[:] = np.bincount(tile,minlength=self.bin_count) out/=np.sum(out)
def unwrap_py(inph,in_p=(), uv=2*pi): """Return the input matrix unwraped the valu given in uv The same as unwrapv, but using for-s, written in python """ if not is_masked(inph): fasei=MaskedArray(inph, isnan(inph)) else: fasei=inph nx, ny=(fasei.shape[0],fasei.shape[1]) # If the initial unwraping point is not given, take the center of the image # as initial coordinate if in_p==(): in_p=(int(nx/2),int(ny/2)) # Create a temporal space to mark if the points are already unwrapped # 0 the point has not been unwrapped # 1 the point has not been unwrapped, but it is in the unwrapping list # 2 the point was already unwrapped fl=zeros((nx, ny)) # List containing the points to unwrap l_un=[in_p] fl[in_p]=1 # unwrapped values faseo=fasei.copy() while len(l_un)>0: # remove the first value from the list cx, cy=l_un.pop(0) # Put the coordinates of unwrapped the neigbors in the list # And check for wrapping nv=0 wv=0 for i in range(cx-1, cx+2): for j in range(cy-1, cy+2): if (i>-1) and (i<nx) and (j>-1) and (j<ny): if (fl[i, j]==0)&(faseo.mask[i, j]==False): fl[i, j]=1 l_un.append((i, j)) elif fl[i, j]==2: wv=wv+rint((faseo[i, j]-faseo[cx, cy])/uv) nv=nv+1 if nv!=0: wv=wv/nv fl[cx, cy]=2 faseo[cx, cy]=faseo[cx, cy]+wv*uv return faseo
def _numpy_interpolation(self, point_num, eval_points): """ Parameters ---------- point_num: int Index of class position in values list eval_points: ndarray Inputs used to evaluate class member function Returns ------- ndarray: output from member function """ is_masked = ma.is_masked(eval_points) shape = point_num.shape ev_shape = eval_points.shape vals = self.values[point_num.ravel()] eval_points = np.repeat(eval_points, shape[1], axis=0) it = np.arange(eval_points.shape[0]) it = np.repeat(it, eval_points.shape[1], axis=0) eval_points = eval_points.reshape( eval_points.shape[0] * eval_points.shape[1], eval_points.shape[-1] ) scaled_points = eval_points.T if is_masked: mask = np.invert(ma.getmask(scaled_points[0])) else: mask = np.ones_like(scaled_points[0], dtype=bool) it = ma.masked_array(it, mask) scaled_points[0] = ( (scaled_points[0] - (self._bounds[0][0])) / (self._bounds[0][1] - self._bounds[0][0]) ) * (vals.shape[-2] - 1) scaled_points[1] += ( (scaled_points[1] - (self._bounds[1][0])) / (self._bounds[1][1] - self._bounds[1][0]) ) * (vals.shape[-1] - 1) scaled_points = np.vstack((it, scaled_points)) output = np.zeros(scaled_points.T.shape[:-1]) output[mask] = map_coordinates(vals, scaled_points.T[mask].T, order=1) new_shape = (*shape, ev_shape[-2]) output = output.reshape(new_shape) return ma.masked_array(output, mask=mask)
def __call__(self, obj, base_encoder): if isinstance(obj, np.ndarray): if obj.ndim == 1: return [base_encoder.default(x) for x in obj] else: return [base_encoder.default(obj[i]) for i in range(obj.shape[0])] if isinstance(obj, np.generic): a = np.asscalar(obj) if (isinstance(a, float) and np.isnan(a)) or ma.is_masked(a): return None return a return None
def ssh2psi(lon,lat,ssh): from numpy import pi,sin,ndim,ma from pylab import meshgrid g = 9.8; #r = 6371.e3 omega = 0.729e-4 if ndim(lon)==1: lon,lat = meshgrid(lon,lat) f=2.0*omega*sin(lat*pi/180.0) psi = g/f * ssh if ma.is_masked(ssh): psi=ma.array(psi,mask=ssh.mask) return psi
def kdtree_sample2d(xin, yin, z2d, xout, yout, distance=2.,method='linear'): """ bin random data points to grids loc_points: 2 x dpoints, lon, lat loc_grids: 2 x dgrids, x.ravel, y.ravel """ from scipy import spatial, ma from numpy import meshgrid, exp, array,c_, where xip,xin = find_overlap(xin, xout) yip,yin = find_overlap(yin, yout) z2ds = z2d[where(yip==True)[0],:][:,where(xip==True)[0]] ismask = ma.is_masked(z2ds) xin2d,yin2d = meshgrid(xin, yin) if ismask: xin1d, yin1d = xin2d[z2ds.mask==False].ravel(), yin2d[z2ds.mask==False].ravel() z1d = z2ds[z2ds.mask==False] locs = c_[xin1d, yin1d] else: xin1d, yin1d = xin2d.ravel(), yin2d.ravel() z1d = z2ds.ravel() locs = c_[xin1d, yin1d] tree = spatial.cKDTree(locs) grids = zip(xout, yout) index = tree.query_ball_point(grids, distance) Tmis=[] sample_size=[] for i in range(xout.size): ip = index[i] if len(ip) == 0: Tmis.append(999999) sample_size.append(0) else: dis = ((xin1d[ip]-xout[i])**2+(yin1d[ip]-yout[i])**2) if method=='linear': dis = ma.masked_greater(dis**0.5, distance) weight = distance - dis**0.5 else: weight = exp(-(dis/distance**2)) weight = weight/weight.sum() Tmis.append((weight*z1d[ip]).sum()) sample_size.append(len(ip)) zout = ma.masked_greater(array(Tmis),1e5) sample_size = ma.masked_equal(array(sample_size),0) return zout, sample_size
def test_extract_overlimit(): """ Thest a request over the limits of the database """ db = WOA() t = db['TEMP'].extract(var='t_mn', doy=136.875, depth=5502, lat=17.5, lon=-37.5) assert ma.is_masked(t['t_mn']) t = db['TEMP'].extract(var='t_mn', doy=136.875, depth=[10, 5502], lat=17.5, lon=-37.5) assert np.all(t['t_mn'].mask == [False, True]) assert ma.allclose(t['t_mn'], ma.masked_array([24.62145996, 0], mask=[False, True]))
def inline_data_asarray(inline, dtype=None): # np.asarray doesn't handle structured arrays unless the innermost # elements are tuples. To do that, we drill down the first # element of each level until we find a single item that # successfully converts to a scalar of the expected structured # dtype. Then we go through and convert everything at that level # to a tuple. This probably breaks for nested structured dtypes, # but it's probably good enough for now. It also won't work with # object dtypes, but ASDF explicitly excludes those, so we're ok # there. if dtype is not None and dtype.fields is not None: def find_innermost_match(l, depth=0): if not isinstance(l, list) or not len(l): raise ValueError( "data can not be converted to structured array") try: np.asarray(tuple(l), dtype=dtype) except ValueError: return find_innermost_match(l[0], depth + 1) else: return depth depth = find_innermost_match(inline) def convert_to_tuples(l, data_depth, depth=0): if data_depth == depth: return tuple(l) else: return [convert_to_tuples(x, data_depth, depth+1) for x in l] inline = convert_to_tuples(inline, depth) return np.asarray(inline, dtype=dtype) else: def handle_mask(inline): if isinstance(inline, list): if None in inline: inline_array = np.asarray(inline) nones = np.equal(inline_array, None) return np.ma.array(np.where(nones, 0, inline), mask=nones) else: return [handle_mask(x) for x in inline] return inline inline = handle_mask(inline) inline = np.ma.asarray(inline, dtype=dtype) if not ma.is_masked(inline): return inline.data else: return inline
def wet_spell_analysis(reference_array, threshold=0.1, nyear=1, dt=3.): ''' Characterize wet spells using sub-daily (hourly) data :param reference_array: an array to be analyzed :type reference_array: :class:'numpy.ma.core.MaskedArray' :param threshold: the minimum amount of rainfall [mm/hour] :type threshold: 'float' :param nyear: the number of discontinous periods :type nyear: 'int' :param dt: the temporal resolution of reference_array :type dt: 'float' ''' nt = reference_array.shape[0] if reference_array.ndim == 3: reshaped_array = reference_array.reshape([nt, reference_array.size / nt]) else: reshaped_array = reference_array if ma.count_masked(reshaped_array[0,:]) != 0: xy_indices = numpy.where(reshaped_array.mask[0, :] == False)[0] else: xy_indices = numpy.arange(reshaped_array.shape[1]) nt_each_year = nt / nyear spell_duration = [] peak_rainfall = [] total_rainfall = [] for index in xy_indices: for iyear in numpy.arange(nyear): data0_temp = reshaped_array[nt_each_year * iyear:nt_each_year * (iyear + 1), index] # time indices when precipitation rate is smaller than the # threshold [mm/hr] t_index = numpy.where((data0_temp <= threshold) & (data0_temp.mask == False))[0] t_index = numpy.insert(t_index, 0, 0) t_index = t_index + nt_each_year * iyear for it in numpy.arange(t_index.size - 1): if t_index[it + 1] - t_index[it] > 1: data1_temp = data0_temp[t_index[it] + 1:t_index[it + 1]] if not ma.is_masked(data1_temp): spell_duration.append( (t_index[it + 1] - t_index[it] - 1) * dt) peak_rainfall.append(data1_temp.max()) total_rainfall.append(data1_temp.sum()) return numpy.array(spell_duration), numpy.array(peak_rainfall), numpy.array(total_rainfall)
def _percentile(data, axis, percent, **kwargs): # NB. scipy.stats.mstats.scoreatpercentile always works across just the # first dimension of its input data, and returns a result that has one # fewer dimension than the input. # So shape=(3, 4, 5) -> shape(4, 5) data = np.rollaxis(data, axis) shape = data.shape[1:] if shape: data = data.reshape([data.shape[0], np.prod(shape)]) result = scipy.stats.mstats.scoreatpercentile(data, percent, **kwargs) if not ma.isMaskedArray(data) and not ma.is_masked(result): result = np.asarray(result) if shape: result = result.reshape(shape) return result
def test_set_window(self) : window_data = np.ones( (self.lat_size_win, self.lon_size_win) ) x = self.w.set_window(window_data) # check output geometry self.assertEqual(x.shape[0], 360) self.assertEqual(x.shape[1], 720) # check output is masked self.assertTrue(ma.is_masked(x)) # check that the window is only thing in returned array win_masked = ma.count_masked(x) win = ma.count(x) self.assertEqual(win, window_data.size) self.assertEqual(win_masked, x.size - window_data.size) self.assertTrue(np.all(x[self.w._window] == window_data))
def __setitem__(self,key,value): """ Change the value of a tile based on a key (which is the row/col) """ if isinstance(key,list): key = tuple(map( np.concatenate ,zip(*key))) if(ma.is_masked(value)): (r_loc,c_loc) = key r_min = np.min(r_loc) c_min = np.min(c_loc) key_offset = (r_loc-r_min,c_loc-c_min) self._image[key] = value[key_offset] elif np.isscalar(value): self._image[key] = value else: self._image[key] = np.reshape(value,(-1,self._image.shape[2]))
def feature_from_tile(self,tile,out): fs = tile.shape[-1] out = out.view(); out.shape = (self._number_of_locations,fs) def build_tree_vector(points_r,points_c,levels_left,local_out_array): tile_rs = tile[points_r,points_c].reshape( -1,fs); local_out_array[0,:] = ma.mean(tile_rs,axis=0) #plt.plot(points_r,points_c,'o') if levels_left > 1: remaining_out_array = local_out_array[1:,:] mean_r = np.mean(points_r); mean_c = np.mean(points_c) offset_size = remaining_out_array.shape[0]/4 top = points_r < mean_r bottom = np.logical_not(top) left = points_c < mean_c right = np.logical_not(left) quadrents = [ (top,right),(top,left),(bottom,left),(bottom,right) ] #Fill the solution for all 4 quadrents for idx,quadrent in enumerate(quadrents): q = np.logical_and(quadrent[0],quadrent[1]) q_out = remaining_out_array[ idx*offset_size : (idx+1)*offset_size, : ] build_tree_vector(points_r[q],points_c[q],levels_left - 1,q_out) #renormilize remaining_out_array *= .25 if ma.is_masked(tile): points_r,points_c = np.nonzero(np.logical_not(tile.mask[:,:,0])) else: grid = np.mgrid[0:tile.shape[0], 0:tile.shape[1]] points_r = grid[0,:,:].ravel() points_c = grid[1,:,:].ravel() build_tree_vector(points_r,points_c,self._number_of_levels,out)
def calculate_joint_estimate(self): # do not use SSNV based estimate if it exceeds 0.3 (this estimate can be unreliable at high TiNs due to # germline events) if self.ssnv_based_model.TiN <= 0.3 and ~np.isnan( self.ascna_based_model.TiN): if len(self.ascna_based_model.centroids) > 1: reselect_cluster = np.argmin( np.abs(self.ascna_based_model.centroids / 100 - self.ssnv_based_model.TiN)) self.ascna_based_model.TiN_likelihood = self.ascna_based_model.cluster_TiN_likelihoods[ reselect_cluster] print('reselected cluster based on SSNVs') # combine independent likelihoods self.joint_log_likelihood = self.ascna_based_model.TiN_likelihood + self.ssnv_based_model.TiN_likelihood # normalize likelihood to calculate posterior self.joint_posterior = np.exp( self.ascna_based_model.TiN_likelihood + self.ssnv_based_model.TiN_likelihood - np.nanmax(self.ascna_based_model.TiN_likelihood + self.ssnv_based_model.TiN_likelihood)) self.joint_posterior = np.true_divide( self.joint_posterior, np.nansum(self.joint_posterior)) self.CI_tin_low = self.TiN_range[next(x[0] for x in enumerate( np.cumsum( np.ma.masked_array( np.true_divide(self.joint_posterior, np.nansum(self.joint_posterior))))) if x[1] > 0.025)] self.CI_tin_high = self.TiN_range[next(x[0] for x in enumerate( np.cumsum( np.ma.masked_array( np.true_divide(self.joint_posterior, np.nansum(self.joint_posterior))))) if x[1] > 0.975)] self.TiN_int = np.nanargmax(self.joint_posterior) self.TiN = self.TiN_range[self.TiN_int] zero_tin_ssnv_model = copy.deepcopy(self.ssnv_based_model) zero_tin_ssnv_model.TiN = 0 zero_tin_ssnv_model.expectation_of_z_given_TiN() zero_tin_ssnv_model.maximize_TiN_likelihood() zero_total_l = zero_tin_ssnv_model.TiN_likelihood + self.ascna_based_model.TiN_likelihood zero_total_l = np.exp(zero_total_l - np.nanmax(zero_total_l)) self.p_null = np.true_divide(zero_total_l, np.nansum(zero_total_l))[0] print('joint TiN estimate = ' + str(self.TiN)) # use only ssnv based model elif ~np.isnan(self.ascna_based_model.TiN): # otherwise TiN estimate is = to aSCNA estimate print( 'SSNV based TiN estimate exceed 0.3 using only aSCNA based estimate' ) self.joint_log_likelihood = self.ascna_based_model.TiN_likelihood self.joint_posterior = np.exp( self.ascna_based_model.TiN_likelihood - np.nanmax(self.ascna_based_model.TiN_likelihood)) self.joint_posterior = np.true_divide( self.joint_posterior, np.nansum(self.joint_posterior)) self.CI_tin_low = self.TiN_range[next(x[0] for x in enumerate( np.cumsum( np.ma.masked_array( np.true_divide(self.joint_posterior, np.nansum(self.joint_posterior))))) if x[1] > 0.025)] self.CI_tin_high = self.TiN_range[next(x[0] for x in enumerate( np.cumsum( np.ma.masked_array( np.true_divide(self.joint_posterior, np.nansum(self.joint_posterior))))) if x[1] > 0.975)] self.TiN_int = np.nanargmax(self.joint_posterior) self.TiN = self.TiN_range[self.TiN_int] self.p_null = self.joint_posterior[0] # use only aSCNA based estimate elif ~np.isnan(self.ssnv_based_model.TiN ) and self.ssnv_based_model.TiN <= 0.3: print('No aSCNAs only using SSNV based model') self.joint_log_likelihood = self.ssnv_based_model.TiN_likelihood self.joint_posterior = np.exp( self.ssnv_based_model.TiN_likelihood - np.nanmax(self.ssnv_based_model.TiN_likelihood)) self.joint_posterior = np.true_divide( self.joint_posterior, np.nansum(self.joint_posterior)) self.CI_tin_low = self.TiN_range[next(x[0] for x in enumerate( np.cumsum( np.ma.masked_array( np.true_divide(self.joint_posterior, np.nansum(self.joint_posterior))))) if x[1] > 0.025)] self.CI_tin_high = self.TiN_range[next(x[0] for x in enumerate( np.cumsum( np.ma.masked_array( np.true_divide(self.joint_posterior, np.nansum(self.joint_posterior))))) if x[1] > 0.975)] self.TiN_int = np.nanargmax(self.joint_posterior) self.TiN = self.TiN_range[self.TiN_int] zero_tin_ssnv_model = copy.deepcopy(self.ssnv_based_model) zero_tin_ssnv_model.TiN = 0 zero_tin_ssnv_model.expectation_of_z_given_TiN() zero_tin_ssnv_model.maximize_TiN_likelihood() zero_total_l = zero_tin_ssnv_model.TiN_likelihood zero_total_l = np.exp(zero_total_l - np.nanmax(zero_total_l)) self.p_null = np.true_divide(zero_total_l, np.nansum(zero_total_l))[0] else: print('insuffcient data to generate TiN estimate.') self.CI_tin_high = 0 self.CI_tin_low = 0 self.joint_posterior = np.zeros([self.input.resolution, 1]) self.joint_posterior[0] = 1 self.TiN_int = 0 self.TiN = 0 self.p_null = 1 pH1 = self.joint_posterior[self.TiN_int] #print(self.joint_posterior) #print(self.p_null) # code to deal with underflows if ma.is_masked(self.p_null): self.p_null = 0 pH0 = self.p_null p_model = np.true_divide(self.input.TiN_prior * pH1, (self.input.TiN_prior * pH1) + ((1 - self.input.TiN_prior) * pH0)) if p_model < 0.5 or ~np.isfinite(p_model): print('insufficient evidence to justify TiN > 0') self.joint_posterior = np.zeros([self.input.resolution, 1]) self.joint_posterior[0] = 1 self.TiN_int = 0 self.TiN = 0 self.CI_tin_high = 0 self.CI_tin_low = 0
def plot_rgb( arr, rgb=(0, 1, 2), figsize=(10, 10), str_clip=2, ax=None, extent=None, title="", stretch=None, ): """Plot three bands in a numpy array as a composite RGB image. Parameters ---------- arr : numpy array An n-dimensional array in rasterio band order (bands, rows, columns) containing the layers to plot. rgb : list (default = (0, 1, 2)) Indices of the three bands to be plotted. figsize : tuple (default = (10, 10) The x and y integer dimensions of the output plot. str_clip: int (default = 2) The percentage of clip to apply to the stretch. Default = 2 (2 and 98). ax : object (optional) The axes object where the ax element should be plotted. extent : tuple (optional) The extent object that matplotlib expects (left, right, bottom, top). title : string (optional) The intended title of the plot. stretch : Boolean (optional) Application of a linear stretch. If set to True, a linear stretch will be applied. Returns ---------- ax : axes object The axes object associated with the 3 band image. Example ------- .. plot:: >>> import matplotlib.pyplot as plt >>> import rasterio as rio >>> import earthpy.plot as ep >>> from earthpy.io import path_to_example >>> with rio.open(path_to_example('rmnp-rgb.tif')) as src: ... img_array = src.read() >>> # Ensure the input array doesn't have nodata values like -9999 >>> ep.plot_rgb(img_array) <matplotlib.axes._subplots.AxesSubplot object at 0x... """ if len(arr.shape) != 3: raise ValueError("Input needs to be 3 dimensions and in rasterio " "order with bands first") # Index bands for plotting and clean up data for matplotlib rgb_bands = arr[rgb, :, :] if stretch: rgb_bands = _stretch_im(rgb_bands, str_clip) # If type is masked array - add alpha channel for plotting if ma.is_masked(rgb_bands): # Build alpha channel mask = ~(np.ma.getmask(rgb_bands[0])) * 255 # Add the mask to the array & swap the axes order from (bands, # rows, columns) to (rows, columns, bands) for plotting rgb_bands = np.vstack((es.bytescale(rgb_bands), np.expand_dims(mask, axis=0))).transpose([1, 2, 0]) else: # Index bands for plotting and clean up data for matplotlib rgb_bands = es.bytescale(rgb_bands).transpose([1, 2, 0]) # Then plot. Define ax if it's undefined show = False if ax is None: fig, ax = plt.subplots(figsize=figsize) show = True ax.imshow(rgb_bands, extent=extent) ax.set_title(title) ax.set(xticks=[], yticks=[]) # Multipanel won't work if plt.show is called prior to second plot def if show: plt.show() return ax
def create_var_from_data(self, var_name, data, dims, datatype=DATATYPE_AUXILIARIES, attributes=None, replace_dims=REPLACE_DIMS): """ Create a new variable in the netcdf file starting from a numpy array and some metadata (like the name of the dimensions and the attributes). The dimensions that do not already exists (in the root group) will be created. If a dimension is unlimited, it will be saved as a fixed length dimension and the unlimited attributed will be lost. :param var_name: A string that will be used as the name of the new var :param data: A numpy array with the data that must be saved into the variable :param dims: A list of couples (tuples with two element). The first element of the tuple is the name of the dimension and the second one is an integer with its length. The order of the list must be such that [i[1] for i in dimensions] is the shape of data :param datatype: A string that represent the type of data of the variable. For example, "f8" means double precision :param attributes: A dictionary-like item :param replace_dims: A dictionary. If the name of a dimension is inside this dictionary, it will be replaced with its corresponding one. This is useful because some dimensions in RegCM are saved with another name in the CORDEX files (for example, jx becomes x). """ LOGGER.debug('Saving variable %s', var_name) if replace_dims is None: replace_dims = {} if attributes is None: attributes = {} # Copy dims dim_name_list = [] for dim_name, dim_len in dims: LOGGER.debug( 'A dimension named "%s" is required to create the variable %s', dim_name, var_name) if dim_name in replace_dims: LOGGER.debug( 'The name "%s" for a dimension is in the replace dict. It ' 'will be called "%s" instead', dim_name, replace_dims[dim_name], ) dim_name = replace_dims[dim_name] dim_name_list.append(dim_name) if dim_name in self.dimensions: LOGGER.debug( 'Dimension "%s" will not be created because it has already ' 'been created on the file', dim_name) dim_current_len = len(self.dimensions[dim_name]) if dim_current_len != dim_len: raise ValueError( 'The length of dimension {} is already set to {}. ' 'To save variable "{}", it should be {}!'.format( dim_name, dim_len, var_name, dim_current_len)) else: LOGGER.debug('Creating dimension %s of length %s', dim_name, dim_len) if dim_name == 'time': self.createDimension(dim_name, None) else: self.createDimension(dim_name, dim_len) # Finding the fill value if '_FillValue' in attributes: fill_value = attributes['_FillValue'] LOGGER.debug('Using %s as fill value (as requested)', fill_value) else: if is_masked(data): if datatype in NETCDF_DEFAULT_FILL_VALUES: fill_value = NETCDF_DEFAULT_FILL_VALUES[datatype] LOGGER.debug('Using %s as fill value (default value)', fill_value) else: raise ValueError( 'Data is masked but not appropriated fill_value has ' 'been found for datatype %s', datatype) else: LOGGER.debug( 'No _FillValue specified and data is unmasked. Setting ' 'fill_value flag as False') fill_value = False # Create the variable LOGGER.debug( 'Creating variable %s, with datatype "%s" and dimensions %s', var_name, datatype, tuple(dim_name_list)) ncdf_variable = self.createVariable(var_name, datatype, tuple(dim_name_list), fill_value=fill_value, zlib=COMPRESSION, complevel=COMPRESSION_LEVEL, shuffle=SHUFFLE, fletcher32=FLETCHER32) # Copy attributes for attr, attr_val in attributes.items(): if attr == '_FillValue' or attr == 'missing_value': ncdf_variable.setncattr('missing_value', attr_val) continue if attr in EXCLUDED_ATTRIBUTES: LOGGER.debug( 'Avoiding to copy the attribute %s because it is in the ' 'EXCLUDED_ATTRIBUTES list (file globals.py)', attr) continue LOGGER.debug( 'Adding attribute "%s" with value "%s" for variable "%s"', attr, attr_val, var_name, ) ncdf_variable.setncattr(attr, attr_val) if is_masked(data): LOGGER.debug('Data is masked') else: LOGGER.debug('Data is not masked') # Copy the values into the variable LOGGER.debug('Copying data inside the variable') ncdf_variable[:] = data return ncdf_variable
trackStartYear = years[trackStartDate] trackStartDate = Dates[trackStartDate] print(len(trackLen)) #for x in range(0,np.int(0.01*len(trackLen))): for x in range(0, len(trackLen)): print("On point %d of %d" % (x, len(trackLen))) if x % 10000 == 0: print("On track {0}/{1}".format(x, len(trackLen))) if trackLen[ x] < ntracks_min: # checking to make sure TPV track was longer than two days continue lat = data.variables['latExtr'][x, :] lon = data.variables['lonExtr'][x, :] if not ma.is_masked(lat): per_life_in_polar = float(np.where(lat <= -60)[0].shape[0]) / float( lat.shape[0]) # checking if TPV spent 60% of lifetime in Antarctic else: per_life_in_polar = float( np.where((lat.data <= -60) & (lat.mask != True))[0].shape[0]) / float( np.where((lat.mask != True))[0].shape[0]) if per_life_in_polar < 0.6: istpv = False else: istpv = True if (make_tpv == True): if istpv == True: perc = 0.0
fsm.magtoflux(fm.SPITZER, fm.IRAC3, i['5.8mag_Get']).value * (i['e_5.8mag_Get'] if i['e_5.8mag_Get'] else 0.01), fsm.magtoflux(fm.SPITZER, fm.IRAC4, i['8.0mag_Get']).value, fsm.magtoflux(fm.SPITZER, fm.IRAC4, i['8.0mag_Get']).value * (i['e_8.0mag_Get'] if i['e_8.0mag_Get'] else 0.01)) k = 6 else: k = -1 use_filts = [1, 1, 1, 1, 1, 1, 1, 1] nfilts = 3 + 5 if allfilts: #fm.GAIA k += 1 val = fsm.magtoflux(fm.GAIA, fm.GAIA_G, i['phot_g_mean_mag_Ga']).value if ma.is_masked(val): photline += " -999 -999 " use_filts[k] = 0 else: photline += " %5.4e %5.4e " % (val, val * i['phot_g_mean_flux_error_Ga'] / i['phot_g_mean_flux_Ga']) use_filts[k] = 1 k += 1 val = fsm.magtoflux(fm.GAIA, fm.GAIA_B, i['phot_bp_mean_mag_Ga']).value if ma.is_masked(val): photline += " -999 -999 " use_filts[k] = 0 else: photline += " %5.4e %5.4e " % (val, val * i['phot_bp_mean_flux_error_Ga'] /
def build_dimension_coordinate(engine, cf_coord_var, coord_name=None, coord_system=None): """Create a dimension coordinate (DimCoord) and add it to the cube.""" cf_var = engine.cf_var cube = engine.cube attributes = {} attr_units = get_attr_units(cf_coord_var, attributes) points_data = cf_coord_var[:] # Gracefully fill points masked array. if ma.is_masked(points_data): points_data = ma.filled(points_data) msg = "Gracefully filling {!r} dimension coordinate masked points" warnings.warn(msg.format(str(cf_coord_var.cf_name))) # Get any coordinate bounds. cf_bounds_var, climatological = get_cf_bounds_var(cf_coord_var) if cf_bounds_var is not None: bounds_data = cf_bounds_var[:] # Gracefully fill bounds masked array. if ma.is_masked(bounds_data): bounds_data = ma.filled(bounds_data) msg = "Gracefully filling {!r} dimension coordinate masked bounds" warnings.warn(msg.format(str(cf_coord_var.cf_name))) # Handle transposed bounds where the vertex dimension is not # the last one. Test based on shape to support different # dimension names. if cf_bounds_var.shape[:-1] != cf_coord_var.shape: bounds_data = reorder_bounds_data(bounds_data, cf_bounds_var, cf_coord_var) else: bounds_data = None # Determine whether the coordinate is circular. circular = False if (points_data.ndim == 1 and coord_name in [CF_VALUE_STD_NAME_LON, CF_VALUE_STD_NAME_GRID_LON] and cf_units.Unit(attr_units) in [cf_units.Unit("radians"), cf_units.Unit("degrees")]): modulus_value = cf_units.Unit(attr_units).modulus circular = iris.util._is_circular(points_data, modulus_value, bounds=bounds_data) # Determine the name of the dimension/s shared between the CF-netCDF data variable # and the coordinate being built. common_dims = [ dim for dim in cf_coord_var.dimensions if dim in cf_var.dimensions ] data_dims = None if common_dims: # Calculate the offset of each common dimension. data_dims = [cf_var.dimensions.index(dim) for dim in common_dims] # Determine the standard_name, long_name and var_name standard_name, long_name, var_name = get_names(cf_coord_var, coord_name, attributes) # Create the coordinate. try: coord = iris.coords.DimCoord( points_data, standard_name=standard_name, long_name=long_name, var_name=var_name, units=attr_units, bounds=bounds_data, attributes=attributes, coord_system=coord_system, circular=circular, climatological=climatological, ) except ValueError as e_msg: # Attempt graceful loading. coord = iris.coords.AuxCoord( points_data, standard_name=standard_name, long_name=long_name, var_name=var_name, units=attr_units, bounds=bounds_data, attributes=attributes, coord_system=coord_system, climatological=climatological, ) cube.add_aux_coord(coord, data_dims) msg = ("Failed to create {name!r} dimension coordinate: {error}\n" "Gracefully creating {name!r} auxiliary coordinate instead.") warnings.warn(msg.format(name=str(cf_coord_var.cf_name), error=e_msg)) else: # Add the dimension coordinate to the cube. if data_dims: cube.add_dim_coord(coord, data_dims) else: # Scalar coords are placed in the aux_coords container. cube.add_aux_coord(coord, data_dims) # Update the coordinate to CF-netCDF variable mapping. engine.cube_parts["coordinates"].append((coord, cf_coord_var.cf_name))
from obspy.core import read, Stream, Trace from glob import iglob import numpy as np from numpy.ma import is_masked folder =\ '/Users/lion/Documents/workspace/TestFiles/archive/RJOB/EHE.D/output/*_index.mseed' st = read(folder) #XXX: This fix is just for wrong index files. Remove the next time around. for trace in st: trace.stats.sampling_rate = 1000.0 / (24 * 60 * 60) st.merge() # Set masked arrays to zero. if is_masked(st[0].data): st[0].data.fill_value = 0.0 st[0].data = st[0].data.filled() st.write('BW.RJOB..EHE.2009.index', format='MSEED')
def plot_rgb(arr, rgb=(0, 1, 2), ax=None, extent=None, title="", figsize=(10, 10), stretch=None, str_clip=2): """Plot three bands in a numpy array as a composite RGB image. Parameters ---------- arr: numpy array An n dimension numpy array in rasterio band order (bands, x, y) rgb: list Indices of the three bands to be plotted (default = 0,1,2) extent: tuple The extent object that matplotlib expects (left, right, bottom, top) title: string (optional) String representing the title of the plot ax: object The axes object where the ax element should be plotted. Default = none figsize: tuple (optional) The x and y integer dimensions of the output plot if preferred to set. stretch: Boolean If True a linear stretch will be applied str_clip: int (optional) The % of clip to apply to the stretch. Default = 2 (2 and 98) Returns ---------- fig, ax : figure object, axes object The figure and axes object associated with the 3 band image. If the ax keyword is specified, the figure return will be None. """ if len(arr.shape) != 3: raise Exception("""Input needs to be 3 dimensions and in rasterio order with bands first""") # Index bands for plotting and clean up data for matplotlib rgb_bands = arr[rgb] if stretch: s_min = str_clip s_max = 100 - str_clip arr_rescaled = np.zeros_like(rgb_bands) for ii, band in enumerate(rgb_bands): lower, upper = np.percentile(band, (s_min, s_max)) arr_rescaled[ii] = exposure.rescale_intensity(band, in_range=(lower, upper)) rgb_bands = arr_rescaled.copy() # If type is masked array - add alpha channel for plotting if ma.is_masked(rgb_bands): # Build alpha channel mask = ~(np.ma.getmask(rgb_bands[0])) * 255 # Add the mask to the array & swap the axes order from (bands, # rows, columns) to (rows, columns, bands) for plotting rgb_bands = np.vstack((bytescale(rgb_bands), np.expand_dims(mask, axis=0))).\ transpose([1, 2, 0]) else: # Index bands for plotting and clean up data for matplotlib rgb_bands = bytescale(rgb_bands).transpose([1, 2, 0]) # Then plot. Define ax if it's default to none if ax is None: fig, ax = plt.subplots(figsize=figsize) else: fig = None ax.imshow(rgb_bands, extent=extent) ax.set_title(title) ax.set(xticks=[], yticks=[]) return fig, ax
def make_page(self): success = True # check all models.tab files and existence of all therein t = ModelSet.all_sets() failed = list() for n, z, md, m in zip(list(t["name"]), list(t["z"]), list(t["medium"]), list(t["mass"])): print(n, z, md, m) mdict = dict() ms = ModelSet(name=n, z=z, medium=md, mass=m) mp = ModelPlot(ms) # stop complaining about too many figures mp._plt.rcParams.update({'figure.max_open_warning': 0}) print(f'Making page for {n,z,md,m}') if m is None or ma.is_masked(m): dir = f'{n}_{z}_{md}' else: dir = f'{n}_{z}_{md}_{m}' dir = dir.replace(' ', '_') os.mkdir(f'/tmp/mpound/{dir}') index = open(f'/tmp/mpound/{dir}/index.html', 'w') index.write( f'<html><head> <meta charset="utf-8">\n <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">\n <meta name="description" content="Tools to analyze observations of photodissociation regions">\n <meta name="author" content="Marc W. Pound">\n <title>PhotoDissociation Region Toolbox {dir}</title>\n <!-- Font Awesome icons (free version)-->\n <script src="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.13.0/js/all.min.js" crossorigin="anonymous"></script>\n <!-- Font Awesome accessibility options -->\n <script src="https://use.fontawesome.com/824d9b17ca.js"></script>\n <link href="http://dustem.astro.umd.edu/freelancer/css/styles.css" rel="stylesheet">\n <!-- from https://startbootstrap.com/themes/freelancer/-->\n <link rel="stylesheet" href="http://dustem.astro.umd.edu/freelancer/css/heading.css">\n <link rel="stylesheet" href="http://dustem.astro.umd.edu/freelancer/css/body.css">\n \n <!-- PDRT specific CSS -->\n <link href="http://dustem.astro.umd.edu/css/pdrt.css" rel="stylesheet">\n </head><body><br>' ) index.write( '<table class="table mytable table-striped table-striped table-bordered" bgcolor="white" >\n<tr>' ) i = 0 numcols = 4 for r in ms.table["ratio"]: if i != 0 and i % numcols == 0: index.write("</tr>\n<tr>") try: model = ms.get_model(r) modelfile = ms.table.loc[r]["filename"] if "/" in model._title: model._title += " Intensity Ratio" else: if "FIR" not in model._title and "Surface" not in model._title and "A_V" not in model._title: model._title += " Intensity" model._title = model._title.replace( "$\mu$", "µ").replace("$_{FIR}$", "<sub>FIR</sub>").replace( "$_2$", "<sub>2</sub>").replace( "$A_V$", "A<sub>V</sub>") #.replace("$T_S$","T<sub>S</sub>") #.replace("$^{13}$","<sup>13</sup>") #print(f"doing {r} = {modelfile}.png title={model._title}") if "$" in model._title: print( f"############ OOPS missed some latex {model._title}" ) fig_out = f'{dir}/{modelfile}.png' fig_html = f'{dir}/{modelfile}.html' f_html = f'{modelfile}.html' index.write( f'<td><a href="{f_html}">{model._title}</a></td>') mdict[r] = fig_html i = i + 1 if False: if model.header["CTYPE1"] == "T_e": # Iron line ratios are function of electron temperature and electron density # not H2 density and radiation field. mp.plot(r, label=True, norm="log", cmap='plasma') else: mp.plot(r, yaxis_unit="Habing", label=True, norm="log", cmap='plasma') mp.savefig(f'/tmp/mpound/{fig_out}') # This is supposed to stop complaints about # too many figures, but actually does not! mp._plt.close(mp.figure) except Exception as e: success = False failed.append(f'{r} {modelfile} : {str(e)}\n') if not success: print("Couldn't open these models:", failed) index.write('</tr></table></body></html>') index.close()
def unwrapv(inph, in_p=(), uv=2 * pi): """Return the input matrix unwrapped the value given in uv This is a vectorized routine, but is not as fast as it should """ if not is_masked(inph): fasei = MaskedArray(inph, isnan(inph)) else: fasei = inph.copy() size = fasei.shape nx, ny = size # If the initial unwraping point is not given, take the center of the image # as initial coordinate if in_p == (): in_p = (int(size[0] / 2), int(size[1] / 2)) # Create a temporal space to mark if the points are already unwrapped # 0 the point has not been unwrapped # 1 the point has not been unwrapped, but it is in the unwrapping list # 2 the point was already unwrapped fl = N.zeros(size) # List containing the points to unwrap l_un = [in_p] fl[in_p] = 1 # unwrapped values faseo = fasei.copy() XI_, YI_ = meshgrid(range(-1, 2), range(-1, 2)) XI_ = XI_.flatten() YI_ = YI_.flatten() while len(l_un) > 0: # remove the first value from the list unp = l_un.pop(0) #l_un[0:1]=[] XI = XI_ + unp[0] YI = YI_ + unp[1] #Remove from the list the values where XI is negative nxi = XI > -1 nyi = YI > -1 nxf = XI < nx nyf = YI < ny n = nonzero(nxi & nyi & nxf & nyf) lco = zip(XI[n], YI[n]) # Put the coordinates of unwrapped the neigbors in the list # And check for wrapping nv = 0 wv = 0 for co in lco: if (fl[co] == 0) & (faseo.mask[co] == False): fl[co] = 1 l_un.append(co) elif fl[co] == 2: wv = wv + rint((faseo[co] - faseo[unp]) / uv) nv = nv + 1 if nv != 0: wv = wv / nv #if wv>=0: wv=int(wv+0.5) #else: wv=int(wv-0.5) fl[unp] = 2 faseo[unp] = faseo[unp] + wv * uv return faseo
def min_max_clip_version_5(cls, file_data: ndarray, number_dropped_values: int, console: Console, session_controller: SessionController): console.push_level() console.message( f"Using min-max clip with {number_dropped_values} iterations", +1) masked_array = ma.MaskedArray(file_data) drop_counter = 1 while drop_counter <= number_dropped_values: cls.check_cancellation(session_controller) console.push_level() console.message( f"Iteration {drop_counter} of {number_dropped_values}.", +1) drop_counter += 1 # Find the minimums in all columns. This will give a 2d matrix the same size as the images # with the column-minimum in each position minimum_values = masked_array.min(axis=0) cls.check_cancellation(session_controller) # Now compare that matrix of minimums down the layers, so we get Trues where # each minimum exists in its column (minimums might exist more than once, and # we want to find all of them) masked_array = ma.masked_where(masked_array == minimum_values, masked_array) cls.check_cancellation(session_controller) console.message("Masked minimums.", +1, temp=True) # Now find and mask the maximums, same approach maximum_values = masked_array.max(axis=0) masked_array = ma.masked_where(masked_array == maximum_values, masked_array) cls.check_cancellation(session_controller) console.message("Masked maximums.", +1, temp=True) console.pop_level() console.message(f"Calculating mean of remaining data.", 0) masked_means = numpy.mean(masked_array, axis=0) cls.check_cancellation(session_controller) # If the means matrix contains any masked values, that means that in that column the clipping # eliminated *all* the data. We will find the offending columns and re-calculate those with # fewer dropped extremes. This should exactly reproduce the results of the cell-by-cell methods if ma.is_masked(masked_means): console.message( "Some columns lost all their values; reducing drops for those columns.", 0) # Get the mask, and get a 2D matrix showing which columns were entirely masked the_mask = masked_array.mask eliminated_columns_map = ndarray.all(the_mask, axis=0) masked_coordinates = numpy.where(eliminated_columns_map) cls.check_cancellation(session_controller) x_coordinates = masked_coordinates[0] y_coordinates = masked_coordinates[1] assert len(x_coordinates) == len(y_coordinates) repairs = len(x_coordinates) cp = "s" if repairs > 1 else "" np = "" if repairs > 1 else "s" console.message(f"{repairs} column{cp} need{np} repair.", +1) for index in range(repairs): cls.check_cancellation(session_controller) # print(".", end="\n" if (index > 0) and (index % 50 == 0) else "") column_x = x_coordinates[index] column_y = y_coordinates[index] column = file_data[:, column_x, column_y] min_max_clipped_mean: int = round( cls.calc_mm_clipped_mean(column, number_dropped_values - 1, console, session_controller)) masked_means[column_x, column_y] = min_max_clipped_mean # We've replaced the problematic columns, now the mean should calculate cleanly assert not ma.is_masked(masked_means) console.pop_level() return masked_means.round()
def pearsonr(cube_a, cube_b, corr_coords=None, weights=None, mdtol=1., common_mask=False): """ Calculate the Pearson's r correlation coefficient over specified dimensions. Args: * cube_a, cube_b (cubes): Cubes between which the correlation will be calculated. The cubes should either be the same shape and have the same dimension coordinates or one cube should be broadcastable to the other. * corr_coords (str or list of str): The cube coordinate name(s) over which to calculate correlations. If no names are provided then correlation will be calculated over all common cube dimensions. * weights (numpy.ndarray, optional): Weights array of same shape as (the smaller of) cube_a and cube_b. Note that latitude/longitude area weights can be calculated using :func:`iris.analysis.cartography.area_weights`. * mdtol (float, optional): Tolerance of missing data. The missing data fraction is calculated based on the number of grid cells masked in both cube_a and cube_b. If this fraction exceed mdtol, the returned value in the corresponding cell is masked. mdtol=0 means no missing data is tolerated while mdtol=1 means the resulting element will be masked if and only if all contributing elements are masked in cube_a or cube_b. Defaults to 1. * common_mask (bool): If True, applies a common mask to cube_a and cube_b so only cells which are unmasked in both cubes contribute to the calculation. If False, the variance for each cube is calculated from all available cells. Defaults to False. Returns: A cube of the correlation between the two input cubes along the specified dimensions, at each point in the remaining dimensions of the cubes. For example providing two time/altitude/latitude/longitude cubes and corr_coords of 'latitude' and 'longitude' will result in a time/altitude cube describing the latitude/longitude (i.e. pattern) correlation at each time/altitude point. Reference: http://www.statsoft.com/textbook/glosp.html#Pearson%20Correlation This operation is non-lazy. """ # Assign larger cube to cube_1 if cube_b.ndim > cube_a.ndim: cube_1 = cube_b cube_2 = cube_a else: cube_1 = cube_a cube_2 = cube_b dim_coords_1 = [coord.name() for coord in cube_1.dim_coords] dim_coords_2 = [coord.name() for coord in cube_2.dim_coords] common_dim_coords = list(set(dim_coords_1) & set(dim_coords_2)) # If no coords passed then set to all common dimcoords of cubes. if corr_coords is None: corr_coords = common_dim_coords smaller_shape = cube_2.shape # Match up data masks if required. if common_mask: # Create a cube of 1's with a common mask. if ma.is_masked(cube_2.data): mask_cube = _ones_like(cube_2) else: mask_cube = 1. if ma.is_masked(cube_1.data): # Take a slice to avoid unnecessary broadcasting of cube_2. slice_coords = [ dim_coords_1[i] for i in range(cube_1.ndim) if dim_coords_1[i] not in common_dim_coords and np.array_equal( cube_1.data.mask.any(axis=i), cube_1.data.mask.all(axis=i)) ] cube_1_slice = next(cube_1.slices_over(slice_coords)) mask_cube = _ones_like(cube_1_slice) * mask_cube # Apply common mask to data. if isinstance(mask_cube, iris.cube.Cube): cube_1 = cube_1 * mask_cube cube_2 = mask_cube * cube_2 dim_coords_2 = [coord.name() for coord in cube_2.dim_coords] # Broadcast weights to shape of cubes if necessary. if weights is None or cube_1.shape == smaller_shape: weights_1 = weights weights_2 = weights else: if weights.shape != smaller_shape: raise ValueError("weights array should have dimensions {}".format( smaller_shape)) dims_1_common = [ i for i in range(cube_1.ndim) if dim_coords_1[i] in common_dim_coords ] weights_1 = broadcast_to_shape(weights, cube_1.shape, dims_1_common) if cube_2.shape != smaller_shape: dims_2_common = [ i for i in range(cube_2.ndim) if dim_coords_2[i] in common_dim_coords ] weights_2 = broadcast_to_shape(weights, cube_2.shape, dims_2_common) else: weights_2 = weights # Calculate correlations. s1 = cube_1 - cube_1.collapsed( corr_coords, iris.analysis.MEAN, weights=weights_1) s2 = cube_2 - cube_2.collapsed( corr_coords, iris.analysis.MEAN, weights=weights_2) covar = (s1 * s2).collapsed(corr_coords, iris.analysis.SUM, weights=weights_1, mdtol=mdtol) var_1 = (s1**2).collapsed(corr_coords, iris.analysis.SUM, weights=weights_1) var_2 = (s2**2).collapsed(corr_coords, iris.analysis.SUM, weights=weights_2) denom = iris.analysis.maths.apply_ufunc(np.sqrt, var_1 * var_2, new_unit=covar.units) corr_cube = covar / denom corr_cube.rename("Pearson's r") return corr_cube
def write_llh_to_gdal(llh_data, lon_min, dlon, lat_min, dlat, gdal_format, dst_filename, origin_up=True, options=None, nodata_value=None, vflip_data=False): """Write an LLH layer to a GIS file in a gdal supported format. vflip_data: if True llh_data => llh_data[::-1,:]. Use in case the data is not aligned with the desired geotranform. """ gdal_type = get_gdal_type(llh_data.dtype) # Get the driver and open the output file driver = gdal.GetDriverByName(gdal_format) if driver == None: raise Exception('Unimplented gdal driver: %s' % driver) dst_ds = driver.Create(dst_filename, llh_data.shape[1], llh_data.shape[0], bands=1, eType=gdal_type) #, options=options ) # Flip the data if needed to be consistent with the geotransform if vflip_data: llh_data = llh_data[::-1, :] # Set all of the transform information if origin_up: nlat = llh_data.shape[0] lat_max = lat_min + (nlat - 1) * dlat dst_ds.SetGeoTransform([lon_min, dlon, 0, lat_max, 0, -dlat]) else: dst_ds.SetGeoTransform([lon_min, dlon, 0, lat_min, 0, dlat]) srs = osr.SpatialReference() srs.SetWellKnownGeogCS('WGS84') dst_ds.SetProjection(srs.ExportToWkt()) # Now write the raster band = dst_ds.GetRasterBand(1) if nodata_value != None: band.SetNoDataValue(nodata_value) if is_masked(llh_data): if nodata_value != None: llh_data.data[llh_data.mask] = nodata_value band.WriteArray(llh_data.data) else: band.WriteArray(llh_data) # Clean up by closing the dataset dst_ds = None src_ds = None
#... #main cycle of averaging to bins for i,x in enumerate(Xbins[:-1]): for j,y in enumerate(Ybins[:-1]): SUM = [] for line in np.arange(len(LINES_subset)): LONindices = (LON_subset[line] > x) & (LON_subset[line] <= Xbins[i+1]) LATindices = (LAT_subset[line] > y) & (LAT_subset[line] <= Ybins[j+1]) DATAindices = np.logical_and(LONindices,LATindices) if np.any(DATAindices): local_mean = ma.mean(DATA_subset[line][DATAindices]) if ma.is_masked(local_mean): pass else: SUM.append(float(local_mean)) if len(SUM)==0: AVG[i,j] = 1e+20 else: AVG[i,j] = np.mean(SUM) #... #set mask AVG = ma.masked_where(AVG>=1e+19,AVG) #XC = ma.masked_where(AVG>=1e+19,XC)
def export_cbc(model, cbcfile, otfolder, precision='single', nanval=-1e+20, kstpkper=None, text=None, smooth=False, point_scalars=False, binary=False): """ Exports cell by cell file to vtk Parameters ---------- model : flopy model instance the flopy model instance cbcfile : str the cell by cell file otfolder : str output folder to write the data precision : str: binary file precision, default is 'single' nanval : scalar no data value kstpkper : tuple of ints or list of tuple of ints A tuple containing the time step and stress period (kstp, kper). The kstp and kper values are zero based. text : str or list of str The text identifier for the record. Examples include 'RIVER LEAKAGE', 'STORAGE', 'FLOW RIGHT FACE', etc. smooth : bool If true a smooth surface will be output, default is False point_scalars : bool If True point scalar values will be written, default is False binary : bool if True the output .vtu file will be binary, default is False. """ mg = model.modelgrid shape = (mg.nlay, mg.nrow, mg.ncol) if not os.path.exists(otfolder): os.mkdir(otfolder) # set up the pvd file to make the output files time enabled pvdfile = open(os.path.join(otfolder, '{}_Heads.pvd'.format(model.name)), 'w') pvdfile.write("""<?xml version="1.0"?> <VTKFile type="Collection" version="0.1" byte_order="LittleEndian" compressor="vtkZLibDataCompressor"> <Collection>\n""") # load cbc cbb = bf.CellBudgetFile(cbcfile, precision=precision) # totim_dict = dict(zip(cbb.get_kstpkper(), model.dis.get_totim())) # get records records = _get_names(cbb.get_unique_record_names()) # build imeth lookup imeth_dict = { record: imeth for (record, imeth) in zip(records, cbb.imethlist) } # get list of packages to export if text is not None: # build keylist if isinstance(text, str): keylist = [text] elif isinstance(text, list): keylist = text else: raise Exception('text must be type str or list of str') else: keylist = records if kstpkper is not None: if isinstance(kstpkper, tuple): kstplist = [kstpkper[0]] kperlist = [kstpkper[1]] elif isinstance(kstpkper, list): kstpkper_list = list(map(list, zip(*kstpkper))) kstplist = kstpkper_list[0] kperlist = kstpkper_list[1] else: raise Exception('kstpkper must be tuple of (kstp, kper) or list ' 'of tuples') else: kperlist = list(set([x[1] for x in cbb.get_kstpkper() if x[1] > -1])) kstplist = list(set([x[0] for x in cbb.get_kstpkper() if x[0] > -1])) # get model name model_name = model.name vtk = Vtk(model, nanval=nanval, smooth=smooth, point_scalars=point_scalars) # export data addarray = False count = 1 for kper in kperlist: for kstp in kstplist: ot_base = '{}_CBC_KPER{}_KSTP{}.vtu'.format( model_name, kper + 1, kstp + 1) otfile = os.path.join(otfolder, ot_base) pvdfile.write("""<DataSet timestep="{}" group="" part="0" file="{}"/>\n""".format(count, ot_base)) for name in keylist: try: rec = cbb.get_data(kstpkper=(kstp, kper), text=name, full3D=True) if len(rec) > 0: array = rec[0] # need to fix for multiple pak addarray = True except ValueError: rec = cbb.get_data(kstpkper=(kstp, kper), text=name)[0] if imeth_dict[name] == 6: array = np.full(shape, nanval) # rec array for [node, q] in zip(rec['node'], rec['q']): lyr, row, col = np.unravel_index(node - 1, shape) array[lyr, row, col] = q addarray = True else: raise Exception('Data type not currently supported ' 'for cbc output') # print('Data type not currently supported ' # 'for cbc output') if addarray: # set the data to no data value if ma.is_masked(array): array = np.where(array.mask, nanval, array) # add array to vtk vtk.add_array(name.strip(), array) # need to adjust for # write the vtk data to the output file if binary: vtk.write_binary(otfile) else: vtk.write(otfile) count += 1 # finish writing the pvd file pvdfile.write(""" </Collection> </VTKFile>""") pvdfile.close() return
def ingest_sources(db, sources, references=None, ras=None, decs=None, comments=None, epochs=None, equinoxes=None, raise_error=True, search_db=True): """ Script to ingest sources TODO: better support references=None Parameters ---------- db: astrodbkit2.astrodb.Database Database object created by astrodbkit2 sources: list[str] Names of sources references: str or list[strings] Discovery references of sources ras: list[floats], optional Right ascensions of sources. Decimal degrees. decs: list[floats], optional Declinations of sources. Decimal degrees. comments: list[strings], optional Comments epochs: str or list[str], optional Epochs of coordinates equinoxes: str or list[string], optional Equinoxes of coordinates raise_error: bool, optional True (default): Raise an error if a source cannot be ingested False: Log a warning but skip sources which cannot be ingested search_db: bool, optional True (default): Search database to see if source is already ingested False: Ingest source without searching the database Returns ------- None """ # TODO: add example # SETUP INPUTS if ras is None and decs is None: coords = False else: coords = True if isinstance(sources, str): n_sources = 1 else: n_sources = len(sources) # Convert single element input values into lists input_values = [sources, references, epochs, equinoxes, comments] for i, input_value in enumerate(input_values): if input_value is None: input_values[i] = [None] * n_sources elif isinstance(input_value, str): input_values[i] = [input_value] * n_sources sources, references, epochs, equinoxes, comments = input_values n_added = 0 n_existing = 0 n_names = 0 n_alt_names = 0 n_skipped = 0 n_multiples = 0 logger.info(f"Trying to add {n_sources} sources") # Loop over each source and decide to ingest, skip, or add alt name for i, source in enumerate(sources): # Find out if source is already in database or not if coords and search_db: name_matches = find_source_in_db(db, source, ra=ras[i], dec=decs[i]) elif search_db: name_matches = find_source_in_db(db, source) elif not search_db: name_matches = [] else: name_matches = None if len(name_matches ) == 1 and search_db: # Source is already in database n_existing += 1 msg1 = f"{i}: Skipping {source}. Already in database. \n " msg2 = f"{i}: Match found for {source}: {name_matches[0]}" logger.debug(msg1 + msg2) # Figure out if ingest name is an alternate name and add db_matches = db.search_object(source, output_table='Sources', fuzzy_search=False) if len(db_matches) == 0: alt_names_data = [{ 'source': name_matches[0], 'other_name': source }] try: db.Names.insert().execute(alt_names_data) logger.debug( f"{i}: Name added to database: {alt_names_data}\n") n_alt_names += 1 except sqlalchemy.exc.IntegrityError as e: msg = f"{i}: Could not add {alt_names_data} to database" logger.warning(msg) if raise_error: raise SimpleError(msg + '\n' + str(e)) else: continue continue # Source is already in database, nothing new to ingest elif len( name_matches ) > 1 and search_db: # Multiple source matches in the database n_multiples += 1 msg1 = f"{i} Skipping {source} " msg = f"{i} More than one match for {source}\n {name_matches}\n" logger.warning(msg1 + msg) if raise_error: raise SimpleError(msg) else: continue elif len(name_matches ) == 0 or not search_db: # No match in the database, INGEST! if coords: # Coordinates were provided as input ra = ras[i] dec = decs[i] epoch = None if ma.is_masked(epochs[i]) else epochs[i] equinox = None if ma.is_masked(equinoxes[i]) else equinoxes[i] else: # Try to get coordinates from SIMBAD simbad_result_table = Simbad.query_object(source) if simbad_result_table is None: n_skipped += 1 msg = f"{i}: Skipping: {source}. Coordinates are needed and could not be retrieved from SIMBAD. \n" logger.warning(msg) if raise_error: raise SimpleError(msg) else: continue elif len(simbad_result_table) == 1: simbad_coords = simbad_result_table['RA'][ 0] + ' ' + simbad_result_table['DEC'][0] simbad_skycoord = SkyCoord(simbad_coords, unit=(u.hourangle, u.deg)) ra = simbad_skycoord.to_string(style='decimal').split()[0] dec = simbad_skycoord.to_string(style='decimal').split()[1] epoch = '2000' # Default coordinates from SIMBAD are epoch 2000. equinox = 'J2000' # Default frame from SIMBAD is IRCS and J2000. msg = f"Coordinates retrieved from SIMBAD {ra}, {dec}" logger.debug(msg) else: n_skipped += 1 msg = f"{i}: Skipping: {source}. Coordinates are needed and could not be retrieved from SIMBAD. \n" logger.warning(msg) if raise_error: raise SimpleError(msg) else: continue logger.debug(f"{i}: Ingesting {source}. Not already in database. ") else: msg = f"{i}: unexpected condition encountered ingesting {source}" logger.error(msg) raise SimpleError(msg) # Construct data to be added source_data = [{ 'source': source, 'ra': ra, 'dec': dec, 'reference': references[i], 'epoch': epoch, 'equinox': equinox, 'comments': None if ma.is_masked(comments[i]) else comments[i] }] names_data = [{'source': source, 'other_name': source}] # Try to add the source to the database try: db.Sources.insert().execute(source_data) n_added += 1 msg = f"Added {str(source_data)}" logger.debug(msg) except sqlalchemy.exc.IntegrityError: if ma.is_masked(source_data[0] ['reference']): # check if reference is blank msg = f"{i}: Skipping: {source}. Discovery reference is blank. \n" msg2 = f"\n {str(source_data)}\n" logger.warning(msg) logger.debug(msg2) n_skipped += 1 if raise_error: raise SimpleError(msg + msg2) else: continue elif db.query(db.Publications).filter( db.Publications.c.name == references[i]).count() == 0: # check if reference is in Publications table msg = f"{i}: Skipping: {source}. Discovery reference {references[i]} is not in Publications table. \n" \ f"(Add it with add_publication function.) \n " msg2 = f"\n {str(source_data)}\n" logger.warning(msg) logger.debug(msg2) n_skipped += 1 if raise_error: raise SimpleError(msg + msg2) else: continue else: msg = f"{i}: Skipping: {source}. Not sure why." msg2 = f"\n {str(source_data)} " logger.warning(msg) logger.debug(msg2) n_skipped += 1 if raise_error: raise SimpleError(msg + msg2) else: continue # Try to add the source name to the Names table try: db.Names.insert().execute(names_data) logger.debug(f"Name added to database: {names_data}\n") n_names += 1 except sqlalchemy.exc.IntegrityError: msg = f"{i}: Could not add {names_data} to database" logger.warning(msg) if raise_error: raise SimpleError(msg) else: continue logger.info(f"Sources added to database: {n_added}") logger.info(f"Names added to database: {n_names} \n") logger.info(f"Sources already in database: {n_existing}") logger.info(f"Alt Names added to database: {n_alt_names}") logger.info( f"Sources NOT added to database because multiple matches: {n_multiples}" ) logger.info(f"Sources NOT added to database: {n_skipped} \n") if n_added != n_names: msg = f"Number added should equal names added." raise SimpleError(msg) if n_added + n_existing + n_multiples + n_skipped != n_sources: msg = f"Number added + Number skipped doesn't add up to total sources" raise SimpleError(msg) return
plt.tight_layout() plt.show() sys.exit() dataH, dataW = deltaValues.shape pixelData = np.zeros((dataH, dataW, 4), dtype=np.uint8) pixelDataGain = np.zeros((dataH, dataW, 4), dtype=np.uint8) pixelDataLoss = np.zeros((dataH, dataW, 4), dtype=np.uint8) total = dataH * dataW print("Converting data to colors...") for i in range(dataH): for j in range(dataW): delta = deltaValues[i, j] color = gainColor = lossColor = [0,0,0,0] if not ma.is_masked(delta): nvalue = norm(delta, DATA_RANGE, limit=True) color = getColor(COLOR_GRADIENT, nvalue) if nvalue > 0.5: gainColor = color else: lossColor = color pixelData[i, j] = np.array(color, dtype=np.uint8) pixelDataGain[i, j] = np.array(gainColor, dtype=np.uint8) pixelDataLoss[i, j] = np.array(lossColor, dtype=np.uint8) printProgress(i*dataW+j, total) def makeImage(filename, pixelData): print("Writing data to image...") dataIm = Image.fromarray(pixelData, mode="RGBA") baseIm = Image.new(mode="RGBA", size=dataIm.size, color=(0, 0, 0, 255))
try: s_reference_stream.trim(starttime=s_times[s_stations.index(values[parameters.index( 'reference_station')])] - datetime.timedelta(seconds=5), endtime=s_times[s_stations.index(values[parameters.index( 'reference_station')])] + datetime.timedelta(seconds=5)) except ValueError: # When there is no s data for the station, use the minimum and maximum from the other sites s_reference_stream.trim(starttime=min(s_times) - datetime.timedelta(seconds=5), endtime=max(s_times) + datetime.timedelta(seconds=5)) lag_time = find_lag_time(s_station_stream, s_reference_stream) shift_idx = int(abs(lag_time * streams[m][0].stats.sampling_rate)) downsampled_shift_idx = int(abs(lag_time * downsampled_streams[m][0].stats.sampling_rate)) for n in range(len(streams[m])): # Ensure all data are masked arrays if not ma.is_masked(streams[m][n].data): streams[m][n].data = ma.masked_array(streams[m][n].data) if not ma.is_masked(downsampled_streams[m][n].data): downsampled_streams[m][n].data = ma.masked_array(downsampled_streams[m][n].data) # Apply shift if lag_time > 0: nandices[0] = downsampled_shift_idx shifted_streams[m][n].data = np.asarray([float('nan')] * shift_idx + streams[m][n].data[:-shift_idx].filled( float('nan')).tolist()) shifted_downsampled_streams[m][n].data = np.asarray([float('nan')] * downsampled_shift_idx + downsampled_streams[m][n].data[ :-downsampled_shift_idx].filled(float('nan')).tolist()) else: nandices[1] = len(downsampled_streams[m][n].data) - downsampled_shift_idx
def chunk_shape_nD(varShape, valSize=4, chunkSize=4096, minDim=1): """ Return a 'good shape' for an nD variable, assuming balanced 1D, 2D access varShape -- list of variable dimension sizes chunkSize -- minimum chunksize desired, in bytes (default 4096) valSize -- size of each data value, in bytes (default 4) minDim -- mimimum chunk dimension (if var dimension larger than this value, otherwise it is just var dimension) Returns integer chunk lengths of a chunk shape that provides balanced access of 1D subsets and 2D subsets of a netCDF or HDF5 variable var. 'Good shape' for chunks means that the number of chunks accessed to read any kind of 1D or 2D subset is approximately equal, and the size of each chunk (uncompressed) is at least chunkSize, which is often a disk block size. """ varShapema = ma.array(varShape) chunkVals = min(chunkSize / float(valSize), numVals(varShapema)) # ideal number of values in a chunk # Make an ideal chunk shape array chunkShape = ma.array(calcChunkShape(chunkVals, varShapema), dtype=int) # Short circuit for 1D arrays. Logic below unecessary & can have divide by zero if len(varShapema) == 1: return chunkShape.filled(fill_value=1) # And a copy where we'll store our final values chunkShapeFinal = ma.masked_all(chunkShape.shape, dtype=int) if chunkVals < numVals(np.minimum(varShapema, minDim)): while chunkVals < numVals(np.minimum(varShapema, minDim)): minDim -= 1 sys.stderr.write('Mindim too large for variable, reduced to : %d\n' % minDim) lastChunkCount = -1 while True: # Loop over the axes in chunkShape, making sure they are at # least minDim in length. for i in range(len(chunkShape)): if ma.is_masked(chunkShape[i]): continue if (chunkShape[i] < minDim): # Set the final chunk shape for this dimension chunkShapeFinal[i] = min(minDim, varShapema[i]) # mask it out of the array of possible chunkShapes chunkShape[i] = ma.masked # Have we fixed any dimensions and filled them in chunkShapeFinal? if chunkShapeFinal.count() > 0: chunkCount = numVals(chunkShapeFinal[~chunkShapeFinal.mask]) else: if (lastChunkCount == -1): # Haven't modified initial guess, break out of # this loop and accept chunkShape break if chunkCount != lastChunkCount and len( varShapema[~chunkShape.mask]) > 0: # Recalculate chunkShape array, with reduced dimensions chunkShape[~chunkShape.mask] = calcChunkShape( chunkVals / chunkCount, varShapema[~chunkShape.mask]) lastChunkCount = chunkCount else: break # This doesn't work when chunkShape has no masked values. Weird. # chunkShapeFinal[chunkShapeFinal.mask] = chunkShape[~chunkShape.mask] for i in range(len(chunkShapeFinal)): if ma.is_masked(chunkShapeFinal[i]): chunkShapeFinal[i] = chunkShape[i] return chunkShapeFinal.filled(fill_value=1)
def find_xcorr_window(shifted_downsampled_streams, downsampled_rss, nandices, phase, parameters, values): """ Find the time window of each event for which the correlation of the vertical waveforms at the sensors is highest: This window is that for which the normalised cross-correlation of the total energy traces of each sensor pair is highest. """ xcorr_window = [] # Calculate horizontal total energy for the reference stream reference_total_energy_waveform = calculate_total_energy( downsampled_rss, phase) reference_total_energy_waveform = np.asarray( smooth_data( reference_total_energy_waveform, int(2 * round( 1 / float(values[parameters.index('lower_frequency')]))))) for m in range(len(shifted_downsampled_streams)): # Calculate horizontal total energy for the station stream downsampled_sss = shifted_downsampled_streams[m].copy() shifted_stream_total_energy_waveform = calculate_total_energy( downsampled_sss, phase) shifted_stream_total_energy_waveform = np.asarray( smooth_data( shifted_stream_total_energy_waveform, int(2 * round( 1 / float(values[parameters.index('lower_frequency')]))))) # Initiate one loop to work through each possible start time in the waveform normalised_xcorr_values = [ ([0] * len(reference_total_energy_waveform)) for y in range(len(shifted_stream_total_energy_waveform)) ] for n in range(nandices[0], len(shifted_stream_total_energy_waveform)): if nandices[1] and n > nandices[ 1]: # Don't do cross-correlation past the data break # Initiate a second loop to work through each possible end time in the waveform, # so that all possible windows are tested, BUT require that windows are at least 1 wavelength of the # lowest frequency wavelet in length. for o in range( n + int( round(1 / float( values[parameters.index('lower_frequency')]))) * int(shifted_downsampled_streams[m][0].stats.sampling_rate) + 1, len(reference_total_energy_waveform)): if nandices[1] and o > nandices[ 1]: # Don't do cross-correlation past the data break # Calculate mean, variance for data in the given window x_mean = np.nanmean(shifted_stream_total_energy_waveform[n:o]) y_mean = np.nanmean(reference_total_energy_waveform[n:o]) x_var = np.nanvar(shifted_stream_total_energy_waveform[n:o]) y_var = np.nanvar(reference_total_energy_waveform[n:o]) if x_var == 0 or y_var == 0: continue if np.isnan(x_mean) or np.isnan(y_mean) or np.isnan( x_var) or np.isnan(y_var): continue # Iterate through all values in the given window sum = 0 for p in range(n, o): # Skip this window if there are any nan values if (np.isnan(shifted_stream_total_energy_waveform[p]) or ma.is_masked( shifted_stream_total_energy_waveform[p]) or np.isnan(reference_total_energy_waveform[p]) or ma.is_masked(reference_total_energy_waveform[p])): break sum += ( (shifted_stream_total_energy_waveform[p] - x_mean) * (reference_total_energy_waveform[p] - y_mean)) normalised_xcorr_value = ( 1 / len(shifted_stream_total_energy_waveform[n:o]) * sum / math.sqrt(x_var * y_var)) # Store normalised cross-correlation values in a nested list where the first index is the window # start index and the second index is the window end index. normalised_xcorr_values[n][o] = normalised_xcorr_value if np.nanmax(normalised_xcorr_values) == 0: print( 'Seismograms failed to find any suitable correlation window!') return np.nan normalised_xcorr_values = np.asarray(normalised_xcorr_values) max_normalised_xcorr_value_idx = np.unravel_index( np.nanargmax(normalised_xcorr_values), normalised_xcorr_values.shape) print( 'For station ' + shifted_downsampled_streams[m][0].stats.station + ' maximum normalised cross-correlation value occurs between times ' + str(downsampled_sss[0].times( type='utcdatetime')[max_normalised_xcorr_value_idx[0]]) + ' (index ' + str(max_normalised_xcorr_value_idx[0]) + ' in downsampled data) - ' + str(downsampled_sss[0].times( type='utcdatetime')[max_normalised_xcorr_value_idx[1]]) + ' in the aligned data (index ' + str(max_normalised_xcorr_value_idx[1]) + ' in the downsampled data)') print('Maximum cross-correlation value is: ' + str(normalised_xcorr_values[max_normalised_xcorr_value_idx])) if nandices[0] > max_normalised_xcorr_value_idx[0]: print( 'There are ' + str(nandices[0] - max_normalised_xcorr_value_idx[0]) + ' NaN values at the front of the cross-correlation window in the downsampled aligned data' ) if nandices[1] and nandices[1] < max_normalised_xcorr_value_idx[1]: print( 'There are ' + str(max_normalised_xcorr_value_idx[1] - nandices[1]) + ' NaN values at the end of the cross-correlation window in the downsampled aligned data' ) xcorr_window.append([ downsampled_sss[0].times( type='utcdatetime')[max_normalised_xcorr_value_idx[0]], downsampled_sss[0].times( type='utcdatetime')[max_normalised_xcorr_value_idx[1]] ]) return xcorr_window
trackLen_polar_nh = [] trackLen_polar_sh = [] for x in range(0, niters): #for x in range(0,60000): if x % 10000 == 0: print("On track {0}/{1}".format(x, niters)) if x < len_nh: if trackLen_nh[ x] < 8: # checking to make sure TPV track was longer than two days continue lat_nh = data_nh.variables['latExtr'][x, :] if not ma.is_masked(lat_nh): per_life_in_arctic = float( np.where(lat_nh >= 60)[0].shape[0]) / float( lat_nh.shape[0] ) # checking if TPV spent 60% of lifetime in Arctic else: per_life_in_arctic = float( np.where((lat_nh.data >= 60) & (lat_nh.mask != True))[0].shape[0]) / float( np.where((lat_nh.mask != True))[0].shape[0]) if per_life_in_arctic * 100 >= 60.: lifetimes_polar_nh.append(data_nh.variables['lenTrack'][x] / hinc) months_polar_nh.append(trackStartMon_nh[x]) trackLen_polar_nh.append(data_nh.variables['lenTrack'][x])
def find_lag_time(stream, reference_stream, phase): """ Find the lag time between a given stream and a reference stream using cross-correlation of the total energy, which is independent of component alignment. :param stream: obspy stream object of the seismogram to calculate lag time for :param reference_stream: object stream object of the seismogram to use as reference for lag time calculation :param phase: whether to use total energy on vertical channel (P phase) or horizontal channels (S phase) :return: lag time between the two sensors in seconds relative to the reference stream """ # Create normalised amplitude envelopes of the data using horizontal total energy stream_envelope = calculate_total_energy(stream, phase) max_se = ma.max(stream_envelope) stream_envelope /= max_se ref_envelope = calculate_total_energy(reference_stream, phase) max_re = ma.max(ref_envelope) ref_envelope /= max_re # Ensure all data are masked arrays if not ma.is_masked(stream_envelope): stream_envelope = ma.masked_array(stream_envelope) if not ma.is_masked(ref_envelope): ref_envelope = ma.masked_array(ref_envelope) # Find the lag time from the maximum cross-correlation value between the two waveforms xcorr_values = [] ref_envelope = ref_envelope.filled(0).tolist() + len(stream_envelope) * [0] ref_envelope = np.asarray( smooth_data( ref_envelope, int(2 * round( 1 / float(values[parameters.index('lower_frequency')]))))) for m in range(2 * len(stream_envelope)): # Shift the stream if m <= len(stream_envelope): shifted_stream_envelope = (len(stream_envelope) - m) * [ 0 ] + stream_envelope.filled(0).tolist() + m * [0] else: shifted_stream_envelope = max(0, len(stream_envelope) - m) * [0] + \ stream_envelope[:len(stream_envelope) - m].filled(0).tolist() + m * [0] shifted_stream_envelope = np.asarray( smooth_data( shifted_stream_envelope, int(2 * round( 1 / float(values[parameters.index('lower_frequency')]))))) # Perform cross-correlation try: xcorr_value = np.corrcoef(shifted_stream_envelope, ref_envelope)[0][1] except ValueError: print( 'Cross-correlation failed! Perhaps one stream is a data point different to the other? ' 'This can occur for certain corner frequency and data length combinations... It is a bug.' ) xcorr_values.append(xcorr_value) # Find lag time from highest cross-correlation value max_xcorr_value = max(xcorr_values) print( 'Correlation value at best alignment of total energy waveforms is: ' + str(max_xcorr_value)) lag_time = (1 / stream[0].stats.sampling_rate) * ( len(stream_envelope) - xcorr_values.index(max_xcorr_value)) return lag_time
def fit_histogram_gaussian_function(data_array, bin_width): """ This function fits a Gaussian function to a specific set of data. Gaussian fitting is hard, this function exists as a port so that all fitting functions use the same algorithm and said algorithm is easy to change. This applies it to the histogram of the data. Parameters ---------- data_array : ndarray The data that the histogram Gaussian function is fitting. bin_width : float The width of the bins to use for the histogram fitting function. Returns ------- gaussian_function : function A callable function that when provided an X value, it will return the value of the function. gaussian_parameters : dictionary A compiled dictionary of all of the parameters of the Gaussian fit. """ # Be able to accept both masked arrays and standard arrays and # be able to tell. if (np_ma.is_masked(data_array)): flat_data = data_array.compressed() else: flat_data = data_array.flatten() # Numpy does not support histogram bin widths, instead using # bins defined by values in an array. Converting equal bin # widths to this array. hist_bins = core.math.generate_numpy_bin_width_array(data_array=flat_data, bin_width=bin_width) # Extract histogram data from the data. The x locations are in # the middle of the bin. hist_data = np.histogram(flat_data, bins=hist_bins) hist_x = (hist_data[1][0:-1] + hist_data[1][1:]) / 2 hist_y = hist_data[0] # Determine the initial guesses of the Gaussian histogram # fit. So far magic is the best way. guess_mean, guess_stddev, guess_amplitude = \ core.magic.magic_inital_gaussian_parameters(x_data=hist_x, y_data=hist_y) # Do the Gaussian fit. inital_guesses = { 'mean': guess_mean, 'stddev': guess_stddev, 'amplitude': guess_amplitude } gauss_funct, gauss_param = fit_gaussian_function(hist_x, hist_y, inital_guesses) # For naming convention. gaussian_function = gauss_funct gaussian_parameters = gauss_param return gaussian_function, gaussian_parameters
def combine_sigma_clip( cls, file_names: [str], sigma_threshold: float, calibrator: Calibrator, console: Console, session_controller: SessionController) -> Optional[ndarray]: console.push_level() console.message( f"Combine by sigma-clipped mean, z-score threshold {sigma_threshold}", +1) sample_file = RmFitsUtil.make_file_descriptor(file_names[0]) file_data = numpy.asarray(RmFitsUtil.read_all_files_data(file_names)) cls.check_cancellation(session_controller) file_data = calibrator.calibrate_images(file_data, sample_file, console, session_controller) cls.check_cancellation(session_controller) console.message("Calculating unclipped means", +1) column_means = numpy.mean(file_data, axis=0) cls.check_cancellation(session_controller) console.message("Calculating standard deviations", 0) column_stdevs = numpy.std(file_data, axis=0) cls.check_cancellation(session_controller) console.message("Calculating z-scores", 0) # Now what we'd like to do is just: # z_scores = abs(file_data - column_means) / column_stdevs # Unfortunately, standard deviations can be zero, so that simplistic # statement would generate division-by-zero errors. # Std for a column would be zero if all the values in the column were identical. # In that case we wouldn't want to eliminate any anyway, so we'll set the # zero stdevs to a large number, which causes the z-scores to be small, which # causes no values to be eliminated. column_stdevs[column_stdevs == 0.0] = sys.float_info.max z_scores = abs(file_data - column_means) / column_stdevs cls.check_cancellation(session_controller) console.message("Eliminated data outside threshold", 0) exceeds_threshold = z_scores > sigma_threshold cls.check_cancellation(session_controller) # Calculate and display how much data we are ignoring dimensions = exceeds_threshold.shape total_pixels = dimensions[0] * dimensions[1] * dimensions[2] number_masked = numpy.count_nonzero(exceeds_threshold) percentage_masked = 100.0 * number_masked / total_pixels console.message( f"Discarded {number_masked:,} pixels of {total_pixels:,} " f"({percentage_masked:.3f}% of data)", +1) masked_array = ma.masked_array(file_data, exceeds_threshold) cls.check_cancellation(session_controller) console.message("Calculating adjusted means", -1) masked_means = ma.mean(masked_array, axis=0) cls.check_cancellation(session_controller) # If the means matrix contains any masked values, that means that in that column the clipping # eliminated *all* the data. We will find the offending columns and re-calculate those using # simple min-max clipping. if ma.is_masked(masked_means): console.message( "Some columns lost all their values; min-max clipping those columns.", 0) # Get the mask, and get a 2D matrix showing which columns were entirely masked eliminated_columns_map = ndarray.all(exceeds_threshold, axis=0) masked_coordinates = numpy.where(eliminated_columns_map) x_coordinates = masked_coordinates[0] y_coordinates = masked_coordinates[1] assert len(x_coordinates) == len(y_coordinates) for index in range(len(x_coordinates)): cls.check_cancellation(session_controller) column_x = x_coordinates[index] column_y = y_coordinates[index] column = file_data[:, column_x, column_y] min_max_clipped_mean: int = round( cls.calc_mm_clipped_mean(column, 2, console, session_controller)) masked_means[column_x, column_y] = min_max_clipped_mean # We've replaced the problematic columns, now the mean should calculate cleanly assert not ma.is_masked(masked_means) cls.check_cancellation(session_controller) console.pop_level() result = masked_means.round().filled() return result
def ingest_spectra(db, sources, spectra, regimes, telescopes, instruments, modes, obs_dates, references, wavelength_units=None, flux_units=None, wavelength_order=None, comments=None, other_references=None, raise_error=True): """ Parameters ---------- db: astrodbkit2.astrodb.Database sources: list[str] List of source names spectra: list[str] List of filenames corresponding to spectra files regimes: str or list[str] List or string telescopes: str or list[str] List or string instruments: str or list[str] List or string modes: str or list[str] List or string obs_dates: str or datetime List of strings or datetime objects references: list[str] List or string wavelength_units: list[str] or Quantity, optional List or string flux_units: list[str] or Quantity, optional List or string wavelength_order: list[int], optional comments: list[str], optional List of strings other_references: list[str], optional List of strings raise_error: bool """ # Convert single value input values to lists input_values = [ regimes, telescopes, instruments, modes, wavelength_order, wavelength_units, flux_units, references ] for i, input_value in enumerate(input_values): if isinstance(input_value, str): print, input_value input_values[i] = [input_value] * len(sources) elif isinstance(input_value, type(None)): print, input_value input_values[i] = [None] * len(sources) regimes, telescopes, instruments, modes, wavelength_order, wavelength_units, flux_units, references = input_values n_spectra = len(spectra) n_skipped = 0 n_dupes = 0 n_missing_instrument = 0 n_added = 0 n_blank = 0 msg = f'Trying to add {n_spectra} spectra' logger.info(msg) for i, source in enumerate(sources): # TODO: check that spectrum can be read by astrodbkit # Get source name as it appears in the database db_name = find_source_in_db(db, source) if len(db_name) != 1: msg = f"No unique source match for {source} in the database" raise SimpleError(msg) else: db_name = db_name[0] # Check if spectrum file is accessible # First check for internet internet = check_internet_connection() if internet: request_response = requests.head(spectra[i]) status_code = request_response.status_code # The website is up if the status code is 200 if status_code != 200: n_skipped += 1 msg = "The spectrum location does not appear to be valid: \n" \ f'spectrum: {spectra[i]} \n' \ f'status code: {status_code}' logger.error(msg) if raise_error: raise SimpleError(msg) else: continue else: msg = f"The spectrum location appears up: {spectra[i]}" logger.debug(msg) else: msg = "No internet connection. Internet is needed to check spectrum files." raise SimpleError(msg) # Find what spectra already exists in database for this source source_spec_data = db.query( db.Spectra).filter(db.Spectra.c.source == db_name).table() # SKIP if observation date is blank # TODO: try to populate obs date from meta data in spectrum file if ma.is_masked(obs_dates[i]) or obs_dates[i] == '': obs_date = None missing_obs_msg = f"Skipping spectrum with missing observation date: {source} \n" missing_row_spe = f"{source, obs_dates[i], references[i]} \n" logger.info(missing_obs_msg) logger.debug(missing_row_spe) n_blank += 1 continue else: try: obs_date = pd.to_datetime( obs_dates[i] ) # TODO: Another method that doesn't require pandas? except dateutil.parser._parser.ParserError: n_skipped += 1 if raise_error: msg = f"{source}: Can't convert obs date to Date Time object: {obs_dates[i]}" logger.error(msg) raise SimpleError else: msg = f"Skipping {source} Can't convert obs date to Date Time object: {obs_dates[i]}" logger.warning(msg) continue # TODO: make it possible to ingest units and order row_data = [{ 'source': db_name, 'spectrum': spectra[i], 'local_spectrum': None, # if ma.is_masked(local_spectra[i]) else local_spectra[i], 'regime': regimes[i], 'telescope': telescopes[i], 'instrument': None if ma.is_masked(instruments[i]) else instruments[i], 'mode': None if ma.is_masked(modes[i]) else modes[i], 'observation_date': obs_date, 'wavelength_units': None if ma.is_masked(wavelength_units[i]) else wavelength_units[i], 'flux_units': None if ma.is_masked(flux_units[i]) else flux_units[i], 'wavelength_order': None if ma.is_masked(wavelength_order[i]) else wavelength_order[i], 'comments': None if ma.is_masked(comments[i]) else comments[i], 'reference': references[i], 'other_references': None if ma.is_masked(other_references[i]) else other_references[i] }] logger.debug(row_data) try: db.Spectra.insert().execute(row_data) n_added += 1 except sqlalchemy.exc.IntegrityError as e: # TODO: add elif to check if reference is in Publications Table if "CHECK constraint failed: regime" in str(e): msg = f"Regime provided is not in schema: {regimes[i]}" logger.error(msg) if raise_error: raise SimpleError(msg) else: continue # check telescope, instrument, mode exists telescope = db.query(db.Telescopes).filter( db.Telescopes.c.name == row_data[0]['telescope']).table() instrument = db.query(db.Instruments).filter( db.Instruments.c.name == row_data[0]['instrument']).table() mode = db.query(db.Modes).filter( db.Modes.c.name == row_data[0]['mode']).table() if len(source_spec_data) > 0: # Spectra data already exists # check for duplicate measurement ref_dupe_ind = source_spec_data['reference'] == references[i] date_dupe_ind = source_spec_data[ 'observation_date'] == obs_date instrument_dupe_ind = source_spec_data[ 'instrument'] == instruments[i] mode_dupe_ind = source_spec_data['mode'] == modes[i] if sum(ref_dupe_ind) and sum(date_dupe_ind) and sum( instrument_dupe_ind) and sum(mode_dupe_ind): msg = f"Skipping suspected duplicate measurement\n{source}\n" msg2 = f"{source_spec_data[ref_dupe_ind]['source', 'instrument', 'mode', 'observation_date', 'reference']}" msg3 = f"{instruments[i], modes[i], obs_date, references[i], spectra[i]} \n" logger.warning(msg) logger.debug(msg2 + msg3 + str(e)) n_dupes += 1 if raise_error: raise SimpleError else: continue # Skip duplicate measurement # else: # msg = f'Spectrum could not be added to the database (other data exist): \n ' \ # f"{source, instruments[i], modes[i], obs_date, references[i], spectra[i]} \n" # msg2 = f"Existing Data: \n " # # f"{source_spec_data[ref_dupe_ind]['source', 'instrument', 'mode', 'observation_date', 'reference', 'spectrum']}" # msg3 = f"Data not able to add: \n {row_data} \n " # logger.warning(msg + msg2) # source_spec_data[ref_dupe_ind][ # 'source', 'instrument', 'mode', 'observation_date', 'reference', 'spectrum'].pprint_all() # logger.debug(msg3) # n_skipped += 1 # continue if len(instrument) == 0 or len(mode) == 0 or len(telescope) == 0: msg = f'Spectrum for {source} could not be added to the database. \n' \ f' Telescope, Instrument, and/or Mode need to be added to the appropriate table. \n' \ f" Trying to find telescope: {row_data[0]['telescope']}, instrument: {row_data[0]['instrument']}, " \ f" mode: {row_data[0]['mode']} \n" \ f" Telescope: {telescope}, Instrument: {instrument}, Mode: {mode} \n" logger.error(msg) n_missing_instrument += 1 if raise_error: raise SimpleError else: continue else: msg = f'Spectrum for {source} could not be added to the database for unknown reason: \n {row_data} \n ' logger.error(msg) raise SimpleError(msg) msg = f"SPECTRA ADDED: {n_added} \n" \ f" Spectra with blank obs_date: {n_blank} \n" \ f" Suspected duplicates skipped: {n_dupes}\n" \ f" Missing Telescope/Instrument/Mode: {n_missing_instrument} \n" \ f" Spectra skipped for unknown reason: {n_skipped} \n" logger.info(msg) if n_added + n_dupes + n_blank + n_skipped + n_missing_instrument != n_spectra: msg = "Numbers don't add up: " logger.error(msg) raise SimpleError(msg) spec_count = db.query(Spectra.regime, func.count(Spectra.regime)).group_by( Spectra.regime).all() spec_ref_count = db.query(Spectra.reference, func.count(Spectra.reference)). \ group_by(Spectra.reference).order_by(func.count(Spectra.reference).desc()).limit(20).all() telescope_spec_count = db.query(Spectra.telescope, func.count(Spectra.telescope)). \ group_by(Spectra.telescope).order_by(func.count(Spectra.telescope).desc()).limit(20).all() logger.info( f'Spectra in the database: \n {spec_count} \n {spec_ref_count} \n {telescope_spec_count}' ) return
def align_seismograms(stations, arrival_times, streams, downsampled_streams, reference_station_stream, downsampled_rss, phase, parameters, values): """ Align all seismograms for each event to facilitate cross-correlation: Use the lag time that produces the maximum cross-correlation value between each sensor and the reference sensor's energy traces. In this process, convert all numpy masked arrays to numpy arrays with nan values as mask fill values. """ nandices = [0, None] shifted_streams = streams shifted_downsampled_streams = downsampled_streams for m in range(len(streams)): # Cut the lag time streams to 5 seconds before and after the arrival at each site station_stream = downsampled_streams[m].copy() try: station_stream.trim( starttime=UTCDateTime( arrival_times[stations.index(stations[m])] - datetime.timedelta(seconds=5)), endtime=UTCDateTime( arrival_times[stations.index(stations[m])] + datetime.timedelta(seconds=5))) except ValueError: # When there is no data for the station, use the minimum and maximum from the other sites station_stream.trim(starttime=UTCDateTime( min(arrival_times) - datetime.timedelta(seconds=5)), endtime=UTCDateTime( max(arrival_times) + datetime.timedelta(seconds=5))) reference_stream = downsampled_rss.copy() try: reference_stream.trim( starttime=UTCDateTime(arrival_times[stations.index(values[ parameters.index('reference_station')])] - datetime.timedelta(seconds=5)), endtime=UTCDateTime(arrival_times[stations.index(values[ parameters.index('reference_station')])] + datetime.timedelta(seconds=5))) except ValueError: # When there is no data for the station, use the minimum and maximum from the other sites reference_stream.trim(starttime=UTCDateTime( min(arrival_times) - datetime.timedelta(seconds=5)), endtime=UTCDateTime( max(arrival_times) + datetime.timedelta(seconds=5))) lag_time = find_lag_time(station_stream, reference_stream, phase) shift_idx = int(abs(lag_time * streams[m][0].stats.sampling_rate)) downsampled_shift_idx = int( abs(lag_time * downsampled_streams[m][0].stats.sampling_rate)) for n in range(len(streams[m])): # Ensure all data are masked arrays if not ma.is_masked(streams[m][n].data): streams[m][n].data = ma.masked_array(streams[m][n].data) if not ma.is_masked(downsampled_streams[m][n].data): downsampled_streams[m][n].data = ma.masked_array( downsampled_streams[m][n].data) # Apply shift if lag_time > 0: nandices[0] = downsampled_shift_idx shifted_streams[m][n].data = np.asarray( [float('nan')] * shift_idx + streams[m] [n].data[:-shift_idx].filled(float('nan')).tolist()) shifted_downsampled_streams[m][n].data = np.asarray( [float('nan')] * downsampled_shift_idx + downsampled_streams[m][n].data[:-downsampled_shift_idx]. filled(float('nan')).tolist()) else: nandices[1] = len( downsampled_streams[m][n].data) - downsampled_shift_idx shifted_streams[m][n].data = np.asarray( streams[m][n].data[shift_idx:].filled(float( 'nan')).tolist() + [float('nan')] * shift_idx) shifted_downsampled_streams[m][n].data = np.asarray( downsampled_streams[m][n].data[downsampled_shift_idx:]. filled(float('nan')).tolist() + [float('nan')] * downsampled_shift_idx) print(shifted_streams[m][0].stats.station + ' seismograms have been aligned to the reference station by ' 'appling a shift of ' + str(lag_time) + ' seconds') for m in range(len(reference_station_stream)): if ma.is_masked(reference_station_stream[m].data): reference_station_stream[m].data = reference_station_stream[ m].data.filled(float('nan')) downsampled_rss[m].data = downsampled_rss[m].data.filled( float('nan')) return nandices, reference_station_stream, downsampled_rss, shifted_streams, shifted_downsampled_streams
def write_numpy_to_gdal(data, geotransform, wkt_proj, dst_filename, gdal_format='GTiff', origin_up=True, options=None, nodata_value=None): """Given numpy data and projection information, write to a gdal file. Parameters ---------- data : a 2D numpy array geotransform : a list containing the affine transformation (e.g., the result of gdal data_set.GetGeoTransform()) wkt_proj : well known text projection information (e.g., the data_set.GetProjection() ) dst_filename : str destination file name origin_up : bool if origin_up == True, the data is reversed in its first axis option : options to pass to gdal. nodata_value : nodata_value value. If None, no nodata_value value is set. """ gdal_type = get_gdal_type(data.dtype) # Get the driver and open the output file driver = gdal.GetDriverByName(gdal_format) if driver == None: raise Exception('Unimplented gdal driver: %s' % driver) dst_ds = driver.Create(dst_filename, data.shape[1], data.shape[0], bands=1, eType=gdal_type) #, options=options ) # Set all of the transform information if origin_up: data = data[::-1, :] dst_ds.SetGeoTransform(geotransform) dst_ds.SetProjection(wkt_proj) # Now write the raster band = dst_ds.GetRasterBand(1) if nodata_value != None: band.SetNoDataValue(nodata_value) if is_masked(data): if nodata_value != None: data.data[data.mask] = nodata_value band.WriteArray(data.data) else: band.WriteArray(data.data) # Clean up by closing the dataset dst_ds = None src_ds = None
ans = dat[:][ii] print(ii + 1, ans[2]) #print(ans[2], ans[9], ans[10]) # import pdb ; pdb.set_trace() outfile.write(r'{\large {\bf ' + ans[2] + r'}}\\' + '\n') # outfile.write(r'\vspace*{1mm}'+'\n') outfile.write(r'Email: {}\\'.format(ans[5]) + '\n') outfile.write(r'Cell: {}\\'.format(ans[6]) + '\n') outfile.write(r'Class: {}\\'.format(ans[3]) + '\n') outfile.write(r'Anticipated Graduation Date: {}\\'.format(ans[4]) + '\n') outfile.write(r'Major(s): {}\\'.format(ans[7]) + '\n') if ma.is_masked(ans[8]): outfile.write(r'Minor(s): None\\' + '\n') else: outfile.write(r'Minor(s): {}\\'.format(ans[8]) + '\n') outfile.write(r'Overall GPA: {:.5}\\'.format(str(ans[9])) + '\n') outfile.write(r'Major GPA: {:.5}\\'.format(str(ans[10])) + '\n') outfile.write('\n ') outfile.write(r'\vspace*{1mm}' + '\n') for jj in range(11, 20): #print(ii, jj, ans[jj]) outfile.write(r'{\bf ' + head[jj] + r'}\\' + '\n') if ma.is_masked(ans[jj]): outfile.write(r'\vspace*{3mm}' + '\n') else: outfile.write(ans[jj].replace('&', ',') + '\n') # for jj in range(len(indx)):
def plot_data_ax(fig, ax, matrix, times, wavelengths, symlog=True, t_unit='ps', z_unit=dA_unit, cmap='diverging', z_lim=(None, None), t_lim=(None, None), w_lim=(None, None), linthresh=1, linscale=1, D_mul_factor=1e3, n_lin_bins=10, n_log_bins=10, plot_tilts=True, y_major_formatter=ScalarFormatter(), x_minor_locator=AutoMinorLocator(10), x_major_locator=None, n_levels=30, plot_countours=True, colorbar_locator=MultipleLocator(50), colorbarpad=0.04, diverging_white_cmap_tr=0.98, hatch='/////', colorbar_aspect=35, add_wn_axis=True, x_label="Wavelength / nm"): """data is individual dataset""" # assert type(data) == Data t_lim = (times[0] if t_lim[0] is None else t_lim[0], times[-1] if t_lim[1] is None else t_lim[1]) w_lim = (wavelengths[0] if w_lim[0] is None else w_lim[0], wavelengths[-1] if w_lim[1] is None else w_lim[1]) D = matrix.copy() * D_mul_factor zmin = np.min(D) if z_lim[0] is None else z_lim[0] zmax = np.max(D) if z_lim[1] is None else z_lim[1] if z_lim[0] is not None: D[D < zmin] = zmin if z_lim[1] is not None: D[D > zmax] = zmax register_div_cmap(zmin, zmax) register_div_white_cmap(zmin, zmax, diverging_white_cmap_tr) x, y = np.meshgrid( wavelengths, times) # needed for pcolormesh to correctly scale the image # plot data matrix D set_main_axis(ax, xlim=w_lim, ylim=t_lim, x_label=x_label, y_label=f'Time delay / {t_unit}', x_minor_locator=x_minor_locator, x_major_locator=x_major_locator, y_minor_locator=None) if add_wn_axis: w_ax = setup_wavenumber_axis(ax, x_major_locator=MultipleLocator(0.5)) w_ax.tick_params(which='minor', direction='out') w_ax.tick_params(which='major', direction='out') # ax.set_facecolor((0.8, 0.8, 0.8, 1)) if ma.is_masked( D ): # https://stackoverflow.com/questions/41664850/hatch-area-using-pcolormesh-in-basemap m_idxs = np.argwhere(D.mask[0] > 0).squeeze() wl_range = [wavelengths[m_idxs[0] - 1], wavelengths[m_idxs[-1] + 1]] ax.fill_between(wl_range, [t_lim[0], t_lim[0]], [t_lim[1], t_lim[1]], facecolor="none", hatch=hatch, edgecolor="k", linewidth=0.0) # mappable = ax.pcolormesh(x, y, D, cmap=cmap, vmin=zmin, vmax=zmax) levels = get_sym_space(zmin, zmax, n_levels) mappable = ax.contourf(x, y, D, cmap=cmap, vmin=zmin, vmax=zmax, levels=levels, antialiased=True) if plot_countours: cmap_colors = cm.get_cmap(cmap) colors = cmap_colors(np.linspace(0, 1, n_levels + 1)) colors *= 0.45 # plot contours as darkens colors of colormap, blue -> darkblue, white -> gray ... ax.contour(x, y, D, colors=colors, levels=levels, antialiased=True, linewidths=0.1, alpha=1, linestyles='-') ax.invert_yaxis() ax.tick_params(which='major', direction='out') ax.tick_params(which='minor', direction='out') ax.yaxis.set_ticks_position('both') ax.set_axisbelow(False) fig.colorbar(mappable, ax=ax, label=z_unit, orientation='vertical', aspect=colorbar_aspect, pad=colorbarpad, ticks=colorbar_locator) if symlog: ax.set_yscale('symlog', subs=[2, 3, 4, 5, 6, 7, 8, 9], linscale=linscale, linthresh=linthresh) ax.yaxis.set_major_locator( MajorSymLogLocator(base=10, linthresh=linthresh)) ax.yaxis.set_minor_locator( MinorSymLogLocator(linthresh, n_lin_ints=n_lin_bins, n_log_ints=n_log_bins, base=10)) if plot_tilts: norm = c.SymLogNorm(vmin=t_lim[0], vmax=t_lim[1], linscale=linscale, linthresh=linthresh, base=10, clip=True) _plot_tilts(ax, norm, linthresh, 'y', inverted_axis=True) if y_major_formatter: ax.yaxis.set_major_formatter(y_major_formatter)
def on_campus_arr(filename): schedule = [] with open(filename) as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') for line in csv_reader: schedule.append(line[1]) schedule.append(line[-1]) schedule = np.asarray(schedule) schedule = schedule.reshape((-1, 2)) masked = ma.masked_where(schedule == 'to be arranged', schedule) masked = ma.masked_where(masked == '0', masked) unmasked = [] index = 0 for item in masked: if ma.is_masked(item) == False: unmasked.append(schedule[index]) index += 1 unmasked = np.asarray(unmasked) final_schedule = np.empty((unmasked.shape[0], 4), dtype='U14') for i in range(len(unmasked)): final_schedule[i, 3] = unmasked[i, 1] day_time = unmasked[i, 0].split(' ') unmasked[i, 0] = day_time[0] unmasked[i, 1] = day_time[1] final_schedule[i, 0] = unmasked[i, 0] hour = unmasked[i, 1].split('-') unmasked[i, 0] = hour[0] unmasked[i, 1] = hour[1] final_schedule[i, 1] = unmasked[i, 0] final_schedule[i, 2] = unmasked[i, 1] if final_schedule[i, 2][-1] == 'P': final_schedule[i, 1] = int(final_schedule[i, 1]) + 1200 final_schedule[i, 2] = int(final_schedule[i, 2][0:-1]) + 1200 if int(final_schedule[i, 1]) > int(final_schedule[i, 2]): final_schedule[i, 2] = int(final_schedule[i, 2]) + 1200 if int(final_schedule[i, 2]) < 800: final_schedule[i, 1] = int(final_schedule[i, 1]) + 1200 final_schedule[i, 2] = int(final_schedule[i, 2]) + 1200 if int(final_schedule[i, 1]) < 800 and int(final_schedule[i, 2]): final_schedule[i, 1] = int(final_schedule[i, 1]) + 1200 final_schedule[i, 2] = int(final_schedule[i, 2]) + 1200 if (final_schedule[i, 2][-2:] == '05' or final_schedule[i, 2][-2:] == '10'): if r.randint(2) == 0: final_schedule[i, 2] = final_schedule[i, 2][0:-2] + '00' else: final_schedule[i, 2] = final_schedule[i, 2][0:-2] + '15' if (final_schedule[i, 2][-2:] == '20' or final_schedule[i, 2][-2:] == '25'): if r.randint(2) == 0: final_schedule[i, 2] = final_schedule[i, 2][0:-2] + '15' else: final_schedule[i, 2] = final_schedule[i, 2][0:-2] + '30' if (final_schedule[i, 2][-2:] == '35' or final_schedule[i, 2][-2:] == '40'): if r.randint(2) == 0: final_schedule[i, 2] = final_schedule[i, 2][0:-2] + '30' else: final_schedule[i, 2] = final_schedule[i, 2][0:-2] + '45' if (final_schedule[i, 2][-2:] == '50' or final_schedule[i, 2][-2:] == '55'): if r.randint(2) == 0: final_schedule[i, 2] = final_schedule[i, 2][0:-2] + '45' else: final_schedule[i, 2] = final_schedule[i, 2][0:-2] + '00' temp_str = int(final_schedule[i, 2]) + 100 final_schedule[i, 2] = str(int(temp_str)) if (final_schedule[i, 1][-2:] != '00'): convert_to_quarter = int(final_schedule[i, 1][-2:]) * 100 / 60 convert_to_quarter = str(int(convert_to_quarter)) time_str = final_schedule[i, 1][0:-2] time_str += convert_to_quarter final_schedule[i, 1] = time_str if (final_schedule[i, 2][-2:] != '00'): convert_to_quarter = int(final_schedule[i, 2][-2:]) * 100 / 60 convert_to_quarter = str(int(convert_to_quarter)) time_str = final_schedule[i, 2][0:-2] time_str += convert_to_quarter final_schedule[i, 2] = time_str #- from 0 - 24 = 24hrs = 2400 #- 25 mins a timestep #- 2400 / 25 = 96 steps #- 800 - 8:00, 825 = 8:15, 850 = 8:30, 875 = 8:45 #- 1600 - 4:00, 1625 = 4:15, 1650 = 4:30, 1675 = 4:45 STEPS = 25 BEGINNING = 0 # Can change the beginning value to remove all uncessary # zeros in the 2D array on_campus = np.zeros((96, 5)) def save2arr(day, in_time, out_time, destination, value): start = int(in_time) end = int(out_time) time1 = int((start - BEGINNING) / STEPS) time2 = int((end - BEGINNING) / STEPS) # print(start,end,time1,time2) time = range(time1, time2) # print(time1,time2) for i in time: # print(i) destination[i][day] += int(value) #M = [0], T = [1], W = [2], TH = [3], F = [4] for row in final_schedule: # print(row[0]) for day in range(len(row[0])): if row[0][day] == 'M': save2arr(0, row[1], row[2], on_campus, row[-1]) # print('M') elif row[0][day] == 'T': if day < len(row[0]) - 1 and row[0][day + 1] == 'h': # if row[0][day+1] == 'h': save2arr(3, row[1], row[2], on_campus, row[-1]) else: save2arr(1, row[1], row[2], on_campus, row[-1]) elif row[0][day] == 'W': save2arr(2, row[1], row[2], on_campus, row[-1]) elif row[0][day] == 'F': save2arr(4, row[1], row[2], on_campus, row[-1]) else: continue on_campus = on_campus.astype(int) ''' with open("2DonCampus.csv","w+") as my_csv: csvWriter = csv.writer(my_csv,delimiter=',') csvWriter.writerows(on_campus) with open("fileReadableSchedule.csv","w+") as my_csv: csvWriter = csv.writer(my_csv,delimiter=',') csvWriter.writerows(final_schedule) ''' return on_campus